[ Index ] |
PHP Cross Reference of Joomla 4.2.2 documentation |
[Summary view] [Print] [Text view]
1 <?php 2 3 namespace Wamania\Snowball\Stemmer; 4 5 use voku\helper\UTF8; 6 7 /** 8 * 9 * @link http://snowball.tartarus.org/algorithms/russian/stemmer.html 10 * @author wamania 11 * 12 */ 13 class Russian extends Stem 14 { 15 /** 16 * All russian vowels 17 */ 18 protected static $vowels = array('а', 'е', 'и', 'о', 'у', 'ы', 'э', 'ю', 'я'); 19 20 protected static $perfectiveGerund = array( 21 array('вшись', 'вши', 'в'), 22 array('ывшись', 'ившись', 'ывши', 'ивши', 'ив', 'ыв') 23 ); 24 25 protected static $adjective = array( 26 'ыми', 'ими', 'ему', 'ому', 'его', 'ого', 'ее', 'ие', 'ые', 'ое', 'ей', 'ий', 27 'ый', 'ой', 'ем', 'им', 'ым','ом','их', 'ых', 'ую', 'юю', 'ая', 'яя', 'ою', 'ею' 28 ); 29 30 protected static $participle = array( 31 array('ем', 'нн', 'вш', 'ющ', 'щ'), 32 array('ивш', 'ывш', 'ующ') 33 ); 34 35 protected static $reflexive = array('ся', 'сь'); 36 37 protected static $verb = array( 38 array('ешь', 'нно', 'ете', 'йте', 'ла', 'на', 'ли', 'й', 'л', 'ем', 'н', 'ло', 'но', 'ет', 'ют', 'ны', 'ть'), 39 array( 40 'уйте', 'ило', 'ыло', 'ено','ила', 'ыла', 'ена', 'ейте', 'ены', 'ить', 'ыть', 'ишь', 'ите', 'или', 'ыли', 41 'ует', 'уют', 'ей', 'уй', 'ил', 'ыл', 'им', 'ым', 'ен', 'ят', 'ит', 'ыт', 'ую', 'ю' 42 ) 43 ); 44 45 protected static $noun = array( 46 'иями', 'ями', 'ами', 'ией', 'иям', 'ием', 'иях', 'ев', 'ов', 'ие', 'ье', 'еи', 'ии', 'ей', 'ой', 'ий', 'ям', 47 'ем', 'ам', 'ом', 'ах', 'ях', 'ию', 'ью', 'ия', 'ья', 'я', 'а', 'е', 'ы', 'ь', 'и', 'о', 'у', 'й', 'ю' 48 ); 49 50 protected static $superlative = array('ейше', 'ейш'); 51 52 protected static $derivational = array('ость', 'ост'); 53 54 /** 55 * {@inheritdoc} 56 */ 57 public function stem($word) 58 { 59 // we do ALL in UTF-8 60 if (!UTF8::is_utf8($word)) { 61 throw new \Exception('Word must be in UTF-8'); 62 } 63 64 $this->word = UTF8::strtolower($word); 65 66 // R2 is not used: R1 is defined in the same way as in the German stemmer 67 $this->r1(); 68 $this->r2(); 69 $this->rv(); 70 71 // Do each of steps 1, 2 3 and 4. 72 $this->step1(); 73 $this->step2(); 74 $this->step3(); 75 $this->step4(); 76 77 return $this->word; 78 } 79 80 /** 81 * Step 1: Search for a PERFECTIVE GERUND ending. If one is found remove it, and that is then the end of step 1. 82 * Otherwise try and remove a REFLEXIVE ending, and then search in turn for (1) an ADJECTIVAL, (2) a VERB or (3) a NOUN ending. 83 * As soon as one of the endings (1) to (3) is found remove it, and terminate step 1. 84 */ 85 private function step1() 86 { 87 // Search for a PERFECTIVE GERUND ending. 88 // group 1 89 if ( ($position = $this->searchIfInRv(self::$perfectiveGerund[0])) !== false) { 90 if ( ($this->inRv($position)) && ($this->checkGroup1($position)) ) { 91 $this->word = UTF8::substr($this->word, 0, $position); 92 return true; 93 } 94 } 95 96 // group 2 97 if ( ($position = $this->searchIfInRv(self::$perfectiveGerund[1])) !== false) { 98 if ($this->inRv($position)) { 99 $this->word = UTF8::substr($this->word, 0, $position); 100 return true; 101 } 102 } 103 104 // Otherwise try and remove a REFLEXIVE ending 105 if ( ($position = $this->searchIfInRv(self::$reflexive)) !== false) { 106 if ($this->inRv($position)) { 107 $this->word = UTF8::substr($this->word, 0, $position); 108 } 109 } 110 111 // then search in turn for (1) an ADJECTIVAL, (2) a VERB or (3) a NOUN ending. 112 // As soon as one of the endings (1) to (3) is found remove it, and terminate step 1. 113 if ( ($position = $this->searchIfInRv(self::$adjective)) !== false) { 114 if ($this->inRv($position)) { 115 $this->word = UTF8::substr($this->word, 0, $position); 116 117 if ( ($position2 = $this->search(self::$participle[0])) !== false) { 118 if ( ($this->inRv($position2)) && ($this->checkGroup1($position2)) ) { 119 $this->word = UTF8::substr($this->word, 0, $position2); 120 return true; 121 } 122 } 123 124 if ( ($position2 = $this->search(self::$participle[1])) !== false) { 125 if ($this->inRv($position2)) { 126 $this->word = UTF8::substr($this->word, 0, $position2); 127 return true; 128 } 129 } 130 131 return true; 132 } 133 } 134 135 if ( ($position = $this->searchIfInRv(self::$verb[0])) !== false) { 136 if ( ($this->inRv($position)) && ($this->checkGroup1($position)) ) { 137 $this->word = UTF8::substr($this->word, 0, $position); 138 return true; 139 } 140 } 141 142 if ( ($position = $this->searchIfInRv(self::$verb[1])) !== false) { 143 if ($this->inRv($position)) { 144 $this->word = UTF8::substr($this->word, 0, $position); 145 return true; 146 } 147 } 148 149 if ( ($position = $this->searchIfInRv(self::$noun)) !== false) { 150 if ($this->inRv($position)) { 151 $this->word = UTF8::substr($this->word, 0, $position); 152 return true; 153 } 154 } 155 156 return false; 157 } 158 159 /** 160 * Step 2: If the word ends with и (i), remove it. 161 */ 162 private function step2() 163 { 164 if ( ($position = $this->searchIfInRv(array('и'))) !== false) { 165 if ($this->inRv($position)) { 166 $this->word = UTF8::substr($this->word, 0, $position); 167 return true; 168 } 169 } 170 return false; 171 } 172 173 /** 174 * Step 3: Search for a DERIVATIONAL ending in R2 (i.e. the entire ending must lie in R2), 175 * and if one is found, remove it. 176 */ 177 private function step3() 178 { 179 if ( ($position = $this->searchIfInRv(self::$derivational)) !== false) { 180 if ($this->inR2($position)) { 181 $this->word = UTF8::substr($this->word, 0, $position); 182 return true; 183 } 184 } 185 } 186 187 /** 188 * Step 4: (1) Undouble н (n), or, (2) if the word ends with a SUPERLATIVE ending, remove it 189 * and undouble н (n), or (3) if the word ends ь (') (soft sign) remove it. 190 */ 191 private function step4() 192 { 193 // (2) if the word ends with a SUPERLATIVE ending, remove it 194 if ( ($position = $this->searchIfInRv(self::$superlative)) !== false) { 195 $this->word = UTF8::substr($this->word, 0, $position); 196 } 197 198 // (1) Undouble н (n) 199 if ( ($position = $this->searchIfInRv(array('нн'))) !== false) { 200 $this->word = UTF8::substr($this->word, 0, ($position+1)); 201 return true; 202 } 203 204 // (3) if the word ends ь (') (soft sign) remove it 205 if ( ($position = $this->searchIfInRv(array('ь'))) !== false) { 206 $this->word = UTF8::substr($this->word, 0, $position); 207 return true; 208 } 209 } 210 211 /** 212 * In any word, RV is the region after the first vowel, or the end of the word if it contains no vowel. 213 */ 214 protected function rv() 215 { 216 $length = UTF8::strlen($this->word); 217 218 $this->rv = ''; 219 $this->rvIndex = $length; 220 221 for ($i=0; $i<$length; $i++) { 222 $letter = UTF8::substr($this->word, $i, 1); 223 if (in_array($letter, self::$vowels)) { 224 $this->rv = UTF8::substr($this->word, ($i+1)); 225 $this->rvIndex = $i + 1; 226 return true; 227 } 228 } 229 230 return false; 231 } 232 233 /** 234 * group 1 endings must follow а (a) or я (ia) 235 * 236 * @param integer $position 237 * @return boolean 238 */ 239 private function checkGroup1($position) 240 { 241 if (! $this->inRv(($position-1))) { 242 return false; 243 } 244 245 $letter = UTF8::substr($this->word, ($position - 1), 1); 246 247 if ($letter == 'а' || $letter == 'я') { 248 return true; 249 } 250 return false; 251 } 252 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Sep 7 05:41:13 2022 | Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer |