[ Index ] |
PHP Cross Reference of Joomla 4.2.2 documentation |
[Summary view] [Print] [Text view]
1 <?php 2 3 namespace Wamania\Snowball\Stemmer; 4 5 use voku\helper\UTF8; 6 7 abstract class Stem implements Stemmer 8 { 9 protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y'); 10 11 /** 12 * helper, contains stringified list of vowels 13 * @var string 14 */ 15 protected $plainVowels; 16 17 /** 18 * The word we are stemming 19 * @var string 20 */ 21 protected $word; 22 23 /** 24 * The original word, use to check if word has been modified 25 * @var string 26 */ 27 protected $originalWord; 28 29 /** 30 * RV value 31 * @var string 32 */ 33 protected $rv; 34 35 /** 36 * RV index (based on the beginning of the word) 37 * @var integer 38 */ 39 protected $rvIndex; 40 41 /** 42 * R1 value 43 * @var integer 44 */ 45 protected $r1; 46 47 /** 48 * R1 index (based on the beginning of the word) 49 * @var int 50 */ 51 protected $r1Index; 52 53 /** 54 * R2 value 55 * @var integer 56 */ 57 protected $r2; 58 59 /** 60 * R2 index (based on the beginning of the word) 61 * @var int 62 */ 63 protected $r2Index; 64 65 protected function inRv($position) 66 { 67 return ($position >= $this->rvIndex); 68 } 69 70 protected function inR1($position) 71 { 72 return ($position >= $this->r1Index); 73 } 74 75 protected function inR2($position) 76 { 77 return ($position >= $this->r2Index); 78 } 79 80 protected function searchIfInRv($suffixes) 81 { 82 return $this->search($suffixes, $this->rvIndex); 83 } 84 85 protected function searchIfInR1($suffixes) 86 { 87 return $this->search($suffixes, $this->r1Index); 88 } 89 90 protected function searchIfInR2($suffixes) 91 { 92 return $this->search($suffixes, $this->r2Index); 93 } 94 95 protected function search($suffixes, $offset = 0) 96 { 97 $length = UTF8::strlen($this->word); 98 if ($offset > $length) { 99 return false; 100 } 101 foreach ($suffixes as $suffixe) { 102 if ( (($position = UTF8::strrpos($this->word, $suffixe, $offset)) !== false) && ((Utf8::strlen($suffixe)+$position) == $length) ) { 103 return $position; 104 } 105 } 106 107 return false; 108 } 109 110 /** 111 * R1 is the region after the first non-vowel following a vowel, or the end of the word if there is no such non-vowel. 112 */ 113 protected function r1() 114 { 115 list($this->r1Index, $this->r1) = $this->rx($this->word); 116 } 117 118 /** 119 * R2 is the region after the first non-vowel following a vowel in R1, or the end of the word if there is no such non-vowel. 120 */ 121 protected function r2() 122 { 123 list($index, $value) = $this->rx($this->r1); 124 125 $this->r2 = $value; 126 $this->r2Index = $this->r1Index + $index; 127 } 128 129 /** 130 * Common function for R1 and R2 131 * Search the region after the first non-vowel following a vowel in $word, or the end of the word if there is no such non-vowel. 132 * R1 : $in = $this->word 133 * R2 : $in = R1 134 */ 135 protected function rx($in) 136 { 137 $length = UTF8::strlen($in); 138 139 // defaults 140 $value = ''; 141 $index = $length; 142 143 // we search all vowels 144 $vowels = array(); 145 for ($i=0; $i<$length; $i++) { 146 $letter = UTF8::substr($in, $i, 1); 147 if (in_array($letter, static::$vowels)) { 148 $vowels[] = $i; 149 } 150 } 151 152 // search the non-vowel following a vowel 153 foreach ($vowels as $position) { 154 $after = $position + 1; 155 $letter = UTF8::substr($in, $after, 1); 156 157 if (! in_array($letter, static::$vowels)) { 158 $index = $after + 1; 159 $value = UTF8::substr($in, ($after+1)); 160 161 break; 162 } 163 } 164 165 return array($index, $value); 166 } 167 168 /** 169 * Used by spanish, italian, portuguese, etc (but not by french) 170 * 171 * If the second letter is a consonant, RV is the region after the next following vowel, 172 * or if the first two letters are vowels, RV is the region after the next consonant, 173 * and otherwise (consonant-vowel case) RV is the region after the third letter. 174 * But RV is the end of the word if these positions cannot be found. 175 */ 176 protected function rv() 177 { 178 $length = UTF8::strlen($this->word); 179 180 $this->rv = ''; 181 $this->rvIndex = $length; 182 183 if ($length < 3) { 184 return true; 185 } 186 187 $first = UTF8::substr($this->word, 0, 1); 188 $second = UTF8::substr($this->word, 1, 1); 189 190 // If the second letter is a consonant, RV is the region after the next following vowel, 191 if (!in_array($second, static::$vowels)) { 192 for ($i=2; $i<$length; $i++) { 193 $letter = UTF8::substr($this->word, $i, 1); 194 if (in_array($letter, static::$vowels)) { 195 $this->rvIndex = $i + 1; 196 $this->rv = UTF8::substr($this->word, ($i+1)); 197 return true; 198 } 199 } 200 } 201 202 // or if the first two letters are vowels, RV is the region after the next consonant, 203 if ( (in_array($first, static::$vowels)) && (in_array($second, static::$vowels)) ) { 204 for ($i=2; $i<$length; $i++) { 205 $letter = UTF8::substr($this->word, $i, 1); 206 if (! in_array($letter, static::$vowels)) { 207 $this->rvIndex = $i + 1; 208 $this->rv = UTF8::substr($this->word, ($i+1)); 209 return true; 210 } 211 } 212 } 213 214 // and otherwise (consonant-vowel case) RV is the region after the third letter. 215 if ( (! in_array($first, static::$vowels)) && (in_array($second, static::$vowels)) ) { 216 $this->rv = UTF8::substr($this->word, 3); 217 $this->rvIndex = 3; 218 return true; 219 } 220 } 221 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Sep 7 05:41:13 2022 | Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer |