[ Index ] |
PHP Cross Reference of Joomla 4.2.2 documentation |
[Summary view] [Print] [Text view]
1 <?php 2 3 namespace Wamania\Snowball\Stemmer; 4 5 use voku\helper\UTF8; 6 7 /** 8 * 9 * @link http://snowball.tartarus.org/algorithms/german/stemmer.html 10 * @author wamania 11 * 12 */ 13 class German extends Stem 14 { 15 /** 16 * All German vowels 17 */ 18 protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y', 'ä', 'ö', 'ü'); 19 20 protected static $sEndings = array('b', 'd', 'f', 'g', 'h', 'k', 'l', 'm', 'n', 'r' ,'t'); 21 22 protected static $stEndings = array('b', 'd', 'f', 'g', 'h', 'k', 'l', 'm', 'n', 't'); 23 24 /** 25 * {@inheritdoc} 26 */ 27 public function stem($word) 28 { 29 // we do ALL in UTF-8 30 if (!UTF8::is_utf8($word)) { 31 throw new \Exception('Word must be in UTF-8'); 32 } 33 34 $this->plainVowels = implode('', self::$vowels); 35 36 $this->word = UTF8::strtolower($word); 37 38 // First, replace ß by ss 39 $this->word = UTF8::str_replace('ß', 'ss', $this->word); 40 41 // put u and y between vowels into upper case 42 $this->word = preg_replace('#(['.$this->plainVowels.'])y(['.$this->plainVowels.'])#u', '$1Y$2', $this->word); 43 $this->word = preg_replace('#(['.$this->plainVowels.'])u(['.$this->plainVowels.'])#u', '$1U$2', $this->word); 44 45 // R1 and R2 are first set up in the standard way 46 $this->r1(); 47 $this->r2(); 48 49 // but then R1 is adjusted so that the region before it contains at least 3 letters. 50 if ($this->r1Index < 3) { 51 $this->r1Index = 3; 52 $this->r1 = UTF8::substr($this->word, 3); 53 } 54 55 $this->step1(); 56 $this->step2(); 57 $this->step3(); 58 $this->finish(); 59 60 return $this->word; 61 } 62 63 /** 64 * Step 1 65 */ 66 private function step1() 67 { 68 // delete if in R1 69 if ( ($position = $this->search(array('em', 'ern', 'er'))) !== false) { 70 if ($this->inR1($position)) { 71 $this->word = UTF8::substr($this->word, 0, $position); 72 } 73 return true; 74 } 75 76 // delete if in R1 77 if ( ($position = $this->search(array('es', 'en', 'e'))) !== false) { 78 if ($this->inR1($position)) { 79 $this->word = UTF8::substr($this->word, 0, $position); 80 81 //If an ending of group (b) is deleted, and the ending is preceded by niss, delete the final s 82 if ($this->search(array('niss')) !== false) { 83 $this->word = UTF8::substr($this->word, 0, -1); 84 } 85 } 86 return true; 87 } 88 89 // s (preceded by a valid s-ending) 90 if ( ($position = $this->search(array('s'))) !== false) { 91 if ($this->inR1($position)) { 92 $before = $position - 1; 93 $letter = UTF8::substr($this->word, $before, 1); 94 95 if (in_array($letter, self::$sEndings)) { 96 $this->word = UTF8::substr($this->word, 0, $position); 97 } 98 } 99 return true; 100 } 101 102 return false; 103 } 104 105 /** 106 * Step 2 107 */ 108 private function step2() 109 { 110 // en er est 111 // delete if in R1 112 if ( ($position = $this->search(array('en', 'er', 'est'))) !== false) { 113 if ($this->inR1($position)) { 114 $this->word = UTF8::substr($this->word, 0, $position); 115 } 116 return true; 117 } 118 119 // st (preceded by a valid st-ending, itself preceded by at least 3 letters) 120 // delete if in R1 121 if ( ($position = $this->search(array('st'))) !== false) { 122 if ($this->inR1($position)) { 123 $before = $position - 1; 124 if ($before >= 3) { 125 $letter = UTF8::substr($this->word, $before, 1); 126 127 if (in_array($letter, self::$stEndings)) { 128 $this->word = UTF8::substr($this->word, 0, $position); 129 } 130 } 131 } 132 return true; 133 } 134 return false; 135 } 136 137 /** 138 * Step 3: d-suffixes 139 */ 140 private function step3() 141 { 142 // end ung 143 // delete if in R2 144 // if preceded by ig, delete if in R2 and not preceded by e 145 if ( ($position = $this->search(array('end', 'ung'))) !== false) { 146 if ($this->inR2($position)) { 147 $this->word = UTF8::substr($this->word, 0, $position); 148 } 149 150 if ( ($position2 = $this->search(array('ig'))) !== false) { 151 $before = $position2 - 1; 152 $letter = UTF8::substr($this->word, $before, 1); 153 154 if ( ($this->inR2($position2)) && ($letter != 'e') ) { 155 $this->word = UTF8::substr($this->word, 0, $position2); 156 } 157 } 158 return true; 159 } 160 161 // ig ik isch 162 // delete if in R2 and not preceded by e 163 if ( ($position = $this->search(array('ig', 'ik', 'isch'))) !== false) { 164 $before = $position - 1; 165 $letter = UTF8::substr($this->word, $before, 1); 166 167 if ( ($this->inR2($position)) && ($letter != 'e') ) { 168 $this->word = UTF8::substr($this->word, 0, $position); 169 } 170 return true; 171 } 172 173 // lich heit 174 // delete if in R2 175 // if preceded by er or en, delete if in R1 176 if ( ($position = $this->search(array('lich', 'heit'))) != false) { 177 if ($this->inR2($position)) { 178 $this->word = UTF8::substr($this->word, 0, $position); 179 } 180 181 if ( ($position2 = $this->search(array('er', 'en'))) !== false) { 182 if ($this->inR1($position2)) { 183 $this->word = UTF8::substr($this->word, 0, $position2); 184 } 185 } 186 return true; 187 } 188 189 // keit 190 // delete if in R2 191 // if preceded by lich or ig, delete if in R2 192 if ( ($position = $this->search(array('keit'))) != false) { 193 if ($this->inR2($position)) { 194 $this->word = UTF8::substr($this->word, 0, $position); 195 } 196 197 if ( ($position2 = $this->search(array('lich', 'ig'))) !== false) { 198 if ($this->inR2($position2)) { 199 $this->word = UTF8::substr($this->word, 0, $position2); 200 } 201 } 202 return true; 203 } 204 205 return false; 206 } 207 208 /** 209 * Finally 210 */ 211 private function finish() 212 { 213 // turn U and Y back into lower case, and remove the umlaut accent from a, o and u. 214 $this->word = UTF8::str_replace(array('U', 'Y', 'ä', 'ü', 'ö'), array('u', 'y', 'a', 'u', 'o'), $this->word); 215 } 216 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Sep 7 05:41:13 2022 | Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer |