[ Index ] |
PHP Cross Reference of Joomla 4.2.2 documentation |
[Summary view] [Print] [Text view]
1 <?php 2 3 namespace Wamania\Snowball\Stemmer; 4 5 use voku\helper\UTF8; 6 7 /** 8 * 9 * @link http://snowball.tartarus.org/algorithms/spanish/stemmer.html 10 * @author wamania 11 * 12 */ 13 class Spanish extends Stem 14 { 15 /** 16 * All spanish vowels 17 */ 18 protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'á', 'é', 'í', 'ó', 'ú', 'ü'); 19 20 /** 21 * {@inheritdoc} 22 */ 23 public function stem($word) 24 { 25 // we do ALL in UTF-8 26 if (!UTF8::is_utf8($word)) { 27 throw new \Exception('Word must be in UTF-8'); 28 } 29 30 $this->word = UTF8::strtolower($word); 31 32 $this->rv(); 33 $this->r1(); 34 $this->r2(); 35 36 $this->step0(); 37 38 $word = $this->word; 39 $this->step1(); 40 41 // Do step 2a if no ending was removed by step 1. 42 if ($this->word == $word) { 43 $this->step2a(); 44 45 // Do Step 2b if step 2a was done, but failed to remove a suffix. 46 if ($this->word == $word) { 47 $this->step2b(); 48 } 49 } 50 51 $this->step3(); 52 $this->finish(); 53 54 return $this->word; 55 } 56 57 /** 58 * Step 0: Attached pronoun 59 * 60 * Search for the longest among the following suffixes 61 * me se sela selo selas selos la le lo las les los nos 62 * 63 * and delete it, if comes after one of 64 * (a) iéndo ándo ár ér ír 65 * (b) ando iendo ar er ir 66 * (c) yendo following u 67 * 68 * in RV. In the case of (c), yendo must lie in RV, but the preceding u can be outside it. 69 * In the case of (a), deletion is followed by removing the acute accent (for example, haciéndola -> haciendo). 70 */ 71 private function step0() 72 { 73 if ( ($position = $this->searchIfInRv(array('selas', 'selos', 'las', 'los', 'les', 'nos', 'selo', 'sela', 'me', 'se', 'la', 'le', 'lo' ))) != false) { 74 $suffixe = UTF8::substr($this->word, $position); 75 76 // a 77 $a = array('iéndo', 'ándo', 'ár', 'ér', 'ír'); 78 $a = array_map(function($item) use ($suffixe) { 79 return $item . $suffixe; 80 }, $a); 81 82 if ( ($position2 = $this->searchIfInRv($a)) !== false) { 83 $suffixe2 = UTF8::substr($this->word, $position2); 84 $suffixe2 = UTF8::to_utf8(UTF8::to_ascii($suffixe2)); // unaccent 85 $this->word = UTF8::substr($this->word, 0, $position2); 86 $this->word .= $suffixe2; 87 $this->word = UTF8::substr($this->word, 0, $position); 88 return true; 89 } 90 91 // b 92 $b = array('iendo', 'ando', 'ar', 'er', 'ir'); 93 $b = array_map(function($item) use ($suffixe) { 94 return $item . $suffixe; 95 }, $b); 96 97 if ( ($position2 = $this->searchIfInRv($b)) !== false) { 98 $this->word = UTF8::substr($this->word, 0, $position); 99 return true; 100 } 101 102 // c 103 if ( ($position2 = $this->searchIfInRv(array('yendo' . $suffixe))) != false) { 104 $before = UTF8::substr($this->word, ($position2-1), 1); 105 if ( (isset($before)) && ($before == 'u') ) { 106 $this->word = UTF8::substr($this->word, 0, $position); 107 return true; 108 } 109 } 110 } 111 112 return false; 113 } 114 115 /** 116 * Step 1 117 */ 118 private function step1() 119 { 120 // anza anzas ico ica icos icas ismo ismos able ables ible ibles ista 121 // istas oso osa osos osas amiento amientos imiento imientos 122 // delete if in R2 123 if ( ($position = $this->search(array( 124 'imientos', 'imiento', 'amientos', 'amiento', 'osas', 'osos', 'osa', 'oso', 'istas', 'ista', 'ibles', 125 'ible', 'ables', 'able', 'ismos', 'ismo', 'icas', 'icos', 'ica', 'ico', 'anzas', 'anza'))) != false) { 126 127 if ($this->inR2($position)) { 128 $this->word = UTF8::substr($this->word, 0, $position); 129 } 130 return true; 131 } 132 133 // adora ador ación adoras adores aciones ante antes ancia ancias 134 // delete if in R2 135 // if preceded by ic, delete if in R2 136 if ( ($position = $this->search(array( 137 'adoras', 'adora', 'aciones', 'ación', 'adores', 'ador', 'antes', 'ante', 'ancias', 'ancia'))) != false) { 138 139 if ($this->inR2($position)) { 140 $this->word = UTF8::substr($this->word, 0, $position); 141 } 142 143 if ( ($position2 = $this->searchIfInR2(array('ic')))) { 144 $this->word = UTF8::substr($this->word, 0, $position2); 145 } 146 return true; 147 } 148 149 // logía logías 150 // replace with log if in R2 151 if ( ($position = $this->search(array('logías', 'logía'))) != false) { 152 if ($this->inR2($position)) { 153 $this->word = preg_replace('#(logías|logía)$#u', 'log', $this->word); 154 } 155 return true; 156 } 157 158 // ución uciones 159 // replace with u if in R2 160 if ( ($position = $this->search(array('uciones', 'ución'))) != false) { 161 if ($this->inR2($position)) { 162 $this->word = preg_replace('#(uciones|ución)$#u', 'u', $this->word); 163 } 164 return true; 165 } 166 167 // encia encias 168 // replace with ente if in R2 169 if ( ($position = $this->search(array('encias', 'encia'))) != false) { 170 if ($this->inR2($position)) { 171 $this->word = preg_replace('#(encias|encia)$#u', 'ente', $this->word); 172 } 173 return true; 174 } 175 176 // amente 177 // delete if in R1 178 // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise, 179 // if preceded by os, ic or ad, delete if in R2 180 if ( ($position = $this->search(array('amente'))) != false) { 181 182 // delete if in R1 183 if ($this->inR1($position)) { 184 $this->word = UTF8::substr($this->word, 0, $position); 185 } 186 187 // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise, 188 if ( ($position2 = $this->searchIfInR2(array('iv'))) !== false) { 189 $this->word = UTF8::substr($this->word, 0, $position2); 190 if ( ($position3 = $this->searchIfInR2(array('at'))) !== false) { 191 $this->word = UTF8::substr($this->word, 0, $position3); 192 } 193 194 // if preceded by os, ic or ad, delete if in R2 195 } elseif ( ($position4 = $this->searchIfInR2(array('os', 'ic', 'ad'))) != false) { 196 $this->word = UTF8::substr($this->word, 0, $position4); 197 } 198 return true; 199 } 200 201 // mente 202 // delete if in R2 203 // if preceded by ante, able or ible, delete if in R2 204 if ( ($position = $this->search(array('mente'))) != false) { 205 206 // delete if in R2 207 if ($this->inR2($position)) { 208 $this->word = UTF8::substr($this->word, 0, $position); 209 } 210 211 // if preceded by ante, able or ible, delete if in R2 212 if ( ($position2 = $this->searchIfInR2(array('ante', 'able', 'ible'))) != false) { 213 $this->word = UTF8::substr($this->word, 0, $position2); 214 } 215 return true; 216 } 217 218 // idad idades 219 // delete if in R2 220 // if preceded by abil, ic or iv, delete if in R2 221 if ( ($position = $this->search(array('idades', 'idad'))) != false) { 222 223 // delete if in R2 224 if ($this->inR2($position)) { 225 $this->word = UTF8::substr($this->word, 0, $position); 226 } 227 228 // if preceded by abil, ic or iv, delete if in R2 229 if ( ($position2 = $this->searchIfInR2(array('abil', 'ic', 'iv'))) != false) { 230 $this->word = UTF8::substr($this->word, 0, $position2); 231 } 232 return true; 233 } 234 235 // iva ivo ivas ivos 236 // delete if in R2 237 // if preceded by at, delete if in R2 238 if ( ($position = $this->search(array('ivas', 'ivos', 'iva', 'ivo'))) != false) { 239 240 // delete if in R2 241 if ($this->inR2($position)) { 242 $this->word = UTF8::substr($this->word, 0, $position); 243 } 244 245 // if preceded by at, delete if in R2 246 if ( ($position2 = $this->searchIfInR2(array('at'))) != false) { 247 $this->word = UTF8::substr($this->word, 0, $position2); 248 } 249 return true; 250 } 251 252 return false; 253 } 254 255 /** 256 * Step 2a: Verb suffixes beginning y 257 */ 258 private function step2a() 259 { 260 // if found, delete if preceded by u 261 // (Note that the preceding u need not be in RV.) 262 if ( ($position = $this->searchIfInRv(array( 263 'yamos', 'yendo', 'yeron', 'yan', 'yen', 'yais', 'yas', 'yes', 'yo', 'yó', 'ya', 'ye'))) != false) { 264 265 $before = UTF8::substr($this->word, ($position-1), 1); 266 if ( (isset($before)) && ($before == 'u') ) { 267 $this->word = UTF8::substr($this->word, 0, $position); 268 return true; 269 } 270 } 271 272 return false; 273 } 274 275 /** 276 * Step 2b: Other verb suffixes 277 * Search for the longest among the following suffixes in RV, and perform the action indicated. 278 */ 279 private function step2b() 280 { 281 // delete 282 if ( ($position = $this->searchIfInRv(array( 283 'iésemos', 'iéramos', 'ábamos', 'iríamos', 'eríamos', 'aríamos', 'áramos', 'ásemos', 'eríais', 284 'aremos', 'eremos', 'iremos', 'asteis', 'ieseis', 'ierais', 'isteis', 'aríais', 285 'irían', 'aréis', 'erían', 'erías', 'eréis', 'iréis', 'irías', 'ieran', 'iesen', 'ieron', 'iendo', 'ieras', 286 'iríais', 'arían', 'arías', 287 'amos', 'imos', 'ados', 'idos', 'irán', 'irás', 'erán', 'erás', 'ería', 'iría', 'íais', 'arán', 'arás', 'aría', 288 'iera', 'iese', 'aste', 'iste', 'aban', 'aran', 'asen', 'aron', 'ando', 'abas', 'adas', 'idas', 'ases', 'aras', 289 'aré', 'erá', 'eré', 'áis', 'ías', 'irá', 'iré', 'aba', 'ían', 'ada', 'ara', 'ase', 'ida', 'ado', 'ido', 'ará', 290 'ad', 'ed', 'id', 'ís', 'ió', 'ar', 'er', 'ir', 'as', 'ía', 'an' 291 ))) != false) { 292 $this->word = UTF8::substr($this->word, 0, $position); 293 return true; 294 } 295 296 // en es éis emos 297 // delete, and if preceded by gu delete the u (the gu need not be in RV) 298 if ( ($position = $this->searchIfInRv(array('éis', 'emos', 'en', 'es'))) != false) { 299 $this->word = UTF8::substr($this->word, 0, $position); 300 301 if ( ($position2 = $this->search(array('gu'))) != false) { 302 $this->word = UTF8::substr($this->word, 0, ($position2+1)); 303 } 304 305 306 return true; 307 } 308 } 309 310 /** 311 * Step 3: residual suffix 312 * Search for the longest among the following suffixes in RV, and perform the action indicated. 313 */ 314 private function step3() 315 { 316 // os a o á í ó 317 // delete if in RV 318 if ( ($position = $this->searchIfInRv(array('os', 'a', 'o', 'á', 'í', 'ó'))) != false) { 319 $this->word = UTF8::substr($this->word, 0, $position); 320 return true; 321 } 322 323 // e é 324 // delete if in RV, and if preceded by gu with the u in RV delete the u 325 if ( ($position = $this->searchIfInRv(array('e', 'é'))) != false) { 326 $this->word = UTF8::substr($this->word, 0, $position); 327 328 if ( ($position2 = $this->searchIfInRv(array('u'))) != false) { 329 $before = UTF8::substr($this->word, ($position2-1), 1); 330 if ( (isset($before)) && ($before == 'g') ) { 331 $this->word = UTF8::substr($this->word, 0, $position2); 332 return true; 333 } 334 } 335 } 336 337 return false; 338 } 339 340 /** 341 * And finally: 342 * Remove acute accents 343 */ 344 private function finish() 345 { 346 $this->word = UTF8::str_replace(array('á', 'í', 'ó', 'é', 'ú'), array('a', 'i', 'o', 'e', 'u'), $this->word); 347 } 348 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Sep 7 05:41:13 2022 | Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer |