[ Index ] |
PHP Cross Reference of Joomla 4.2.2 documentation |
[Summary view] [Print] [Text view]
1 <?php 2 3 namespace Wamania\Snowball\Stemmer; 4 5 use voku\helper\UTF8; 6 7 /** 8 * 9 * @link http://snowball.tartarus.org/algorithms/portuguese/stemmer.html 10 * @author wamania 11 * 12 */ 13 class Portuguese extends Stem 14 { 15 /** 16 * All Portuguese vowels 17 */ 18 protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'á', 'é', 'í', 'ó', 'ú', 'â', 'ê', 'ô'); 19 20 /** 21 * {@inheritdoc} 22 */ 23 public function stem($word) 24 { 25 // we do ALL in UTF-8 26 if (!UTF8::is_utf8($word)) { 27 throw new \Exception('Word must be in UTF-8'); 28 } 29 30 $this->word = UTF8::strtolower($word); 31 32 $this->word = UTF8::str_replace(array('ã', 'õ'), array('a~', 'o~'), $this->word); 33 34 $this->rv(); 35 $this->r1(); 36 $this->r2(); 37 38 $word = $this->word; 39 $this->step1(); 40 41 if ($word == $this->word) { 42 $this->step2(); 43 } 44 45 if ($word != $this->word) { 46 $this->step3(); 47 } else { 48 $this->step4(); 49 } 50 51 $this->step5(); 52 $this->finish(); 53 54 return $this->word; 55 } 56 57 /** 58 * Step 1: Standard suffix removal 59 */ 60 private function step1() 61 { 62 // delete if in R2 63 if ( ($position = $this->search(array( 64 'amentos', 'imentos', 'adoras', 'adores', 'amento', 'imento', 'adora', 'istas', 'ismos', 'antes', 'ância', 65 'ezas', 'eza', 'icos', 'icas', 'ismo', 'ável', 'ível', 'ista', 'oso', 66 'osos', 'osas', 'osa', 'ico', 'ica', 'ador', 'aça~o', 'aço~es' , 'ante'))) !== false) { 67 68 if ($this->inR2($position)) { 69 $this->word = UTF8::substr($this->word, 0, $position); 70 } 71 return true; 72 } 73 74 // logía logías 75 // replace with log if in R2 76 if ( ($position = $this->search(array('logías', 'logía'))) !== false) { 77 if ($this->inR2($position)) { 78 $this->word = preg_replace('#(logías|logía)$#u', 'log', $this->word); 79 } 80 return true; 81 } 82 83 // ución uciones 84 // replace with u if in R2 85 if ( ($position = $this->search(array('uciones', 'ución'))) !== false) { 86 if ($this->inR2($position)) { 87 $this->word = preg_replace('#(uciones|ución)$#u', 'u', $this->word); 88 } 89 return true; 90 } 91 92 // ência ências 93 // replace with ente if in R2 94 if ( ($position = $this->search(array('ências', 'ência'))) !== false) { 95 if ($this->inR2($position)) { 96 $this->word = preg_replace('#(ências|ência)$#u', 'ente', $this->word); 97 } 98 return true; 99 } 100 101 // amente 102 // delete if in R1 103 // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise, 104 // if preceded by os, ic or ad, delete if in R2 105 if ( ($position = $this->search(array('amente'))) !== false) { 106 107 // delete if in R1 108 if ($this->inR1($position)) { 109 $this->word = UTF8::substr($this->word, 0, $position); 110 } 111 112 // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise, 113 if ( ($position2 = $this->searchIfInR2(array('iv'))) !== false) { 114 $this->word = UTF8::substr($this->word, 0, $position2); 115 if ( ($position3 = $this->searchIfInR2(array('at'))) !== false) { 116 $this->word = UTF8::substr($this->word, 0, $position3); 117 } 118 119 // if preceded by os, ic or ad, delete if in R2 120 } elseif ( ($position4 = $this->searchIfInR2(array('os', 'ic', 'ad'))) !== false) { 121 $this->word = UTF8::substr($this->word, 0, $position4); 122 } 123 return true; 124 } 125 126 // mente 127 // delete if in R2 128 // if preceded by ante, avel or ível, delete if in R2 129 if ( ($position = $this->search(array('mente'))) !== false) { 130 131 // delete if in R2 132 if ($this->inR2($position)) { 133 $this->word = UTF8::substr($this->word, 0, $position); 134 } 135 136 // if preceded by ante, avel or ível, delete if in R2 137 if ( ($position2 = $this->searchIfInR2(array('ante', 'avel', 'ível'))) != false) { 138 $this->word = UTF8::substr($this->word, 0, $position2); 139 } 140 return true; 141 } 142 143 // idade idades 144 // delete if in R2 145 // if preceded by abil, ic or iv, delete if in R2 146 if ( ($position = $this->search(array('idades', 'idade'))) !== false) { 147 148 // delete if in R2 149 if ($this->inR2($position)) { 150 $this->word = UTF8::substr($this->word, 0, $position); 151 } 152 153 // if preceded by abil, ic or iv, delete if in R2 154 if ( ($position2 = $this->searchIfInR2(array('abil', 'ic', 'iv'))) !== false) { 155 $this->word = UTF8::substr($this->word, 0, $position2); 156 } 157 return true; 158 } 159 160 // iva ivo ivas ivos 161 // delete if in R2 162 // if preceded by at, delete if in R2 163 if ( ($position = $this->search(array('ivas', 'ivos', 'iva', 'ivo'))) !== false) { 164 165 // delete if in R2 166 if ($this->inR2($position)) { 167 $this->word = UTF8::substr($this->word, 0, $position); 168 } 169 170 // if preceded by at, delete if in R2 171 if ( ($position2 = $this->searchIfInR2(array('at'))) !== false) { 172 $this->word = UTF8::substr($this->word, 0, $position2); 173 } 174 return true; 175 } 176 177 // ira iras 178 // replace with ir if in RV and preceded by e 179 if ( ($position = $this->search(array('iras', 'ira'))) !== false) { 180 181 if ($this->inRv($position)) { 182 $before = $position -1; 183 $letter = UTF8::substr($this->word, $before, 1); 184 185 if ($letter == 'e') { 186 $this->word = preg_replace('#(iras|ira)$#u', 'ir', $this->word); 187 } 188 } 189 return true; 190 } 191 192 return false; 193 } 194 195 /** 196 * Step 2: Verb suffixes 197 * Search for the longest among the following suffixes in RV, and if found, delete. 198 */ 199 private function step2() 200 { 201 if ( ($position = $this->searchIfInRv(array( 202 'aríamos', 'eríamos', 'iríamos', 'ássemos', 'êssemos', 'íssemos', 203 'aríeis', 'eríeis', 'iríeis', 'ásseis', 'ésseis', 'ísseis', 'áramos', 'éramos', 'íramos', 'ávamos', 204 'aremos', 'eremos', 'iremos', 205 'ariam', 'eriam', 'iriam', 'assem', 'essem', 'issem', 'arias', 'erias', 'irias', 'ardes', 'erdes', 'irdes', 206 'asses', 'esses', 'isses', 'astes', 'estes', 'istes', 'áreis', 'areis', 'éreis', 'ereis', 'íreis', 'ireis', 207 'áveis', 'íamos', 'armos', 'ermos', 'irmos', 208 'aria', 'eria', 'iria', 'asse', 'esse', 'isse', 'aste', 'este', 'iste', 'arei', 'erei', 'irei', 'adas', 'idas', 209 'aram', 'eram', 'iram', 'avam', 'arem', 'erem', 'irem', 'ando', 'endo', 'indo', 'ara~o', 'era~o', 'ira~o', 210 'arás', 'aras', 'erás', 'eras', 'irás', 'avas', 'ares', 'eres', 'ires', 'íeis', 'ados', 'idos', 'ámos', 'amos', 211 'emos', 'imos', 'iras', 212 'ada', 'ida', 'ará', 'ara', 'erá', 'era', 'irá', 'ava', 'iam', 'ado', 'ido', 'ias', 'ais', 'eis', 'ira', 213 'ia', 'ei', 'am', 'em', 'ar', 'er', 'ir', 'as', 'es', 'is', 'eu', 'iu', 'ou', 214 ))) !== false) { 215 216 $this->word = UTF8::substr($this->word, 0, $position); 217 return true; 218 } 219 return false; 220 } 221 222 /** 223 * Step 3: d-suffixes 224 * 225 */ 226 private function step3() 227 { 228 // Delete suffix i if in RV and preceded by c 229 if ($this->searchIfInRv(array('i')) !== false) { 230 $letter = UTF8::substr($this->word, -2, 1); 231 232 if ($letter == 'c') { 233 $this->word = UTF8::substr($this->word, 0, -1); 234 } 235 return true; 236 } 237 return false; 238 } 239 240 /** 241 * Step 4 242 */ 243 private function step4() 244 { 245 // If the word ends with one of the suffixes "os a i o á í ó" in RV, delete it 246 if ( ($position = $this->searchIfInRv(array('os', 'a', 'i', 'o','á', 'í', 'ó'))) !== false) { 247 $this->word = UTF8::substr($this->word, 0, $position); 248 return true; 249 } 250 return false; 251 } 252 253 /** 254 * Step 5 255 */ 256 private function step5() 257 { 258 // If the word ends with one of "e é ê" in RV, delete it, and if preceded by gu (or ci) with the u (or i) in RV, delete the u (or i). 259 if ($this->searchIfInRv(array('e', 'é', 'ê')) !== false) { 260 $this->word = UTF8::substr($this->word, 0, -1); 261 262 if ( ($position2 = $this->search(array('gu', 'ci'))) !== false) { 263 if ($this->inRv(($position2+1))) { 264 $this->word = UTF8::substr($this->word, 0, -1); 265 } 266 } 267 return true; 268 } else if ($this->search(array('ç')) !== false) { 269 $this->word = preg_replace('#(ç)$#u', 'c', $this->word); 270 return true; 271 } 272 return false; 273 } 274 275 /** 276 * Finally 277 */ 278 private function finish() 279 { 280 // turn U and Y back into lower case, and remove the umlaut accent from a, o and u. 281 $this->word = UTF8::str_replace(array('a~', 'o~'), array('ã', 'õ'), $this->word); 282 } 283 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Sep 7 05:41:13 2022 | Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer |