[ Index ] |
PHP Cross Reference of Joomla 4.2.2 documentation |
[Summary view] [Print] [Text view]
1 <?php 2 3 namespace Wamania\Snowball\Stemmer; 4 5 use voku\helper\UTF8; 6 7 /** 8 * 9 * @link http://snowball.tartarus.org/algorithms/italian/stemmer.html 10 * @author wamania 11 * 12 */ 13 class Italian extends Stem 14 { 15 /** 16 * All Italian vowels 17 */ 18 protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'à', 'è', 'ì', 'ò', 'ù'); 19 20 /** 21 * {@inheritdoc} 22 */ 23 public function stem($word) 24 { 25 // we do ALL in UTF-8 26 if (!UTF8::is_utf8($word)) { 27 throw new \Exception('Word must be in UTF-8'); 28 } 29 30 $this->plainVowels = implode('', self::$vowels); 31 32 $this->word = UTF8::strtolower($word); 33 34 // First, replace all acute accents by grave accents. 35 $this->word = UTF8::str_replace(array('á', 'é', 'í', 'ó', 'ú'), array('à', 'è', 'ì', 'ò', 'ù'), $this->word); 36 37 //And, as in French, put u after q, and u, i between vowels into upper case. (See note on vowel marking.) The vowels are then 38 $this->word = preg_replace('#([q])u#u', '$1U', $this->word); 39 $this->word = preg_replace('#(['.$this->plainVowels.'])u(['.$this->plainVowels.'])#u', '$1U$2', $this->word); 40 $this->word = preg_replace('#(['.$this->plainVowels.'])i(['.$this->plainVowels.'])#u', '$1I$2', $this->word); 41 42 $this->rv(); 43 $this->r1(); 44 $this->r2(); 45 46 $this->step0(); 47 48 $word = $this->word; 49 $this->step1(); 50 51 //Do step 2 if no ending was removed by step 1. 52 if ($word == $this->word) { 53 $this->step2(); 54 } 55 56 $this->step3a(); 57 $this->step3b(); 58 $this->finish(); 59 60 return $this->word; 61 } 62 63 /** 64 * Step 0: Attached pronoun 65 */ 66 private function step0() 67 { 68 // Search for the longest among the following suffixes 69 if ( ($position = $this->search(array( 70 'gliela', 'gliele', 'glieli', 'glielo', 'gliene', 71 'sene', 'mela', 'mele', 'meli', 'melo', 'mene', 'tela', 'tele', 'teli', 'telo', 'tene', 'cela', 72 'cele', 'celi', 'celo', 'cene', 'vela', 'vele', 'veli', 'velo', 'vene', 73 'gli', 'la', 'le', 'li', 'lo', 'mi', 'ne', 'si', 'ti', 'vi', 'ci'))) !== false) { 74 75 $suffixe = UTF8::substr($this->word, $position); 76 77 // following one of (in RV) 78 // a 79 $a = array('ando', 'endo'); 80 $a = array_map(function($item) use ($suffixe) { 81 return $item . $suffixe; 82 }, $a); 83 // In case of (a) the suffix is deleted 84 if ($this->searchIfInRv($a) !== false) { 85 $this->word = UTF8::substr($this->word, 0, $position); 86 } 87 88 //b 89 $b = array('ar', 'er', 'ir'); 90 $b = array_map(function($item) use ($suffixe) { 91 return $item . $suffixe; 92 }, $b); 93 // in case (b) it is replace by e 94 if ($this->searchIfInRv($b) !== false) { 95 $this->word = preg_replace('#('.$suffixe.')$#u', 'e', $this->word); 96 } 97 98 return true; 99 } 100 101 return false; 102 } 103 104 /** 105 * Step 1: Standard suffix removal 106 */ 107 private function step1() 108 { 109 // amente 110 // delete if in R1 111 // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise, 112 // if preceded by os, ic or abil, delete if in R2 113 if ( ($position = $this->search(array('amente'))) !== false) { 114 if ($this->inR1($position)) { 115 $this->word = UTF8::substr($this->word, 0, $position); 116 } 117 118 // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise, 119 if ( ($position2 = $this->searchIfInR2(array('iv'))) !== false) { 120 $this->word = UTF8::substr($this->word, 0, $position2); 121 if ( ($position3 = $this->searchIfInR2(array('at'))) !== false) { 122 $this->word = UTF8::substr($this->word, 0, $position3); 123 } 124 125 // if preceded by os, ic or ad, delete if in R2 126 } elseif ( ($position4 = $this->searchIfInR2(array('os', 'ic', 'abil'))) != false) { 127 $this->word = UTF8::substr($this->word, 0, $position4); 128 } 129 return true; 130 } 131 132 // delete if in R2 133 if ( ($position = $this->search(array( 134 'ibili', 'atrice', 'abili', 'abile', 'ibile', 'atrici', 'mente', 135 'anza', 'anze', 'iche', 'ichi', 'ismo', 'ismi', 'ista', 'iste', 'isti', 'istà', 'istè', 'istì', 'ante', 'anti', 136 'ico', 'ici', 'ica', 'ice', 'oso', 'osi', 'osa', 'ose' 137 ))) !== false) { 138 139 if ($this->inR2($position)) { 140 $this->word = UTF8::substr($this->word, 0, $position); 141 } 142 return true; 143 } 144 145 // azione azioni atore atori 146 // delete if in R2 147 // if preceded by ic, delete if in R2 148 if ( ($position = $this->search(array('azione', 'azioni', 'atore', 'atori'))) !== false) { 149 if ($this->inR2($position)) { 150 $this->word = UTF8::substr($this->word, 0, $position); 151 152 if ( ($position2 = $this->search(array('ic'))) !== false) { 153 if ($this->inR2($position2)) { 154 $this->word = UTF8::substr($this->word, 0, $position2); 155 } 156 } 157 } 158 return true; 159 } 160 161 // logia logie 162 // replace with log if in R2 163 if ( ($position = $this->search(array('logia', 'logie'))) !== false) { 164 if ($this->inR2($position)) { 165 $this->word = preg_replace('#(logia|logie)$#u', 'log', $this->word); 166 } 167 return true; 168 } 169 170 // uzione uzioni usione usioni 171 // replace with u if in R2 172 if ( ($position = $this->search(array('uzione', 'uzioni', 'usione', 'usioni'))) !== false) { 173 if ($this->inR2($position)) { 174 $this->word = preg_replace('#(uzione|uzioni|usione|usioni)$#u', 'u', $this->word); 175 } 176 return true; 177 } 178 179 // enza enze 180 // replace with ente if in R2 181 if ( ($position = $this->search(array('enza', 'enze'))) !== false) { 182 if ($this->inR2($position)) { 183 $this->word = preg_replace('#(enza|enze)$#u', 'ente', $this->word); 184 } 185 return true; 186 } 187 188 // amento amenti imento imenti 189 // delete if in RV 190 if ( ($position = $this->search(array('amento', 'amenti', 'imento', 'imenti'))) !== false) { 191 if ($this->inRv($position)) { 192 $this->word = UTF8::substr($this->word, 0, $position); 193 } 194 return true; 195 } 196 197 // ità 198 // delete if in R2 199 // if preceded by abil, ic or iv, delete if in R2 200 if ( ($position = $this->search(array('ità'))) !== false) { 201 if ($this->inR2($position)) { 202 $this->word = UTF8::substr($this->word, 0, $position); 203 } 204 205 if ( ($position2 = $this->searchIfInR2(array('abil', 'ic', 'iv'))) != false) { 206 $this->word = UTF8::substr($this->word, 0, $position2); 207 } 208 return true; 209 } 210 211 // ivo ivi iva ive 212 // delete if in R2 213 // if preceded by at, delete if in R2 (and if further preceded by ic, delete if in R2) 214 if ( ($position = $this->search(array('ivo', 'ivi', 'iva', 'ive'))) !== false) { 215 if ($this->inR2($position)) { 216 $this->word = UTF8::substr($this->word, 0, $position); 217 } 218 219 if ( ($position2 = $this->searchIfInR2(array('at'))) !== false) { 220 $this->word = UTF8::substr($this->word, 0, $position2); 221 if ( ($position3 = $this->searchIfInR2(array('ic'))) !== false) { 222 $this->word = UTF8::substr($this->word, 0, $position3); 223 } 224 } 225 return true; 226 } 227 228 return false; 229 } 230 231 /** 232 * Step 2: Verb suffixes 233 * Search for the longest among the following suffixes in RV, and if found, delete. 234 */ 235 private function step2() 236 { 237 if ( ($position = $this->searchIfInRv(array( 238 'assimo', 'assero', 'eranno', 'erebbero', 'erebbe', 'eremmo', 'ereste', 'eresti', 'essero', 'iranno', 'irebbero', 'irebbe', 'iremmo', 239 'iscano', 'ireste', 'iresti', 'iscono', 'issero', 240 'avamo', 'arono', 'avano', 'avate', 'eremo', 'erete', 'erono', 'evamo', 'evano', 'evate', 'ivamo', 'ivano', 'ivate', 'iremo', 'irete', 'irono', 241 'ammo', 'ando', 'asse', 'assi', 'emmo', 'enda', 'ende', 'endi', 'endo', 'erai', 'erei', 'Yamo', 'iamo', 'immo', 'irà', 'irai', 'irei', 242 'isca', 'isce', 'isci', 'isco', 243 'ano', 'are', 'ata', 'ate', 'ati', 'ato', 'ava', 'avi', 'avo', 'erà', 'ere', 'erò', 'ete', 'eva', 244 'evi', 'evo', 'ire', 'ita', 'ite', 'iti', 'ito', 'iva', 'ivi', 'ivo', 'ono', 'uta', 'ute', 'uti', 'uto', 'irò', 'ar', 'ir'))) !== false) { 245 246 $this->word = UTF8::substr($this->word, 0, $position); 247 } 248 } 249 250 /** 251 * Step 3a 252 * Delete a final a, e, i, o, à, è, ì or ò if it is in RV, and a preceding i if it is in RV 253 */ 254 private function step3a() 255 { 256 if ($this->searchIfInRv(array('a', 'e', 'i', 'o', 'à', 'è', 'ì', 'ò')) !== false) { 257 $this->word = UTF8::substr($this->word, 0, -1); 258 259 if ($this->searchIfInRv(array('i')) !== false) { 260 $this->word = UTF8::substr($this->word, 0, -1); 261 } 262 return true; 263 } 264 return false; 265 } 266 267 /** 268 * Step 3b 269 * Replace final ch (or gh) with c (or g) if in RV (crocch -> crocc) 270 */ 271 private function step3b() 272 { 273 if ($this->searchIfInRv(array('ch')) !== false) { 274 $this->word = preg_replace('#(ch)$#u', 'c', $this->word); 275 276 } elseif ($this->searchIfInRv(array('gh')) !== false) { 277 $this->word = preg_replace('#(gh)$#u', 'g', $this->word); 278 } 279 } 280 281 /** 282 * Finally 283 * turn I and U back into lower case 284 */ 285 private function finish() 286 { 287 $this->word = UTF8::str_replace(array('I', 'U'), array('i', 'u'), $this->word); 288 } 289 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Sep 7 05:41:13 2022 | Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer |