[ Index ] |
PHP Cross Reference of Joomla 4.2.2 documentation |
[Summary view] [Print] [Text view]
1 <?php 2 3 namespace Wamania\Snowball\Stemmer; 4 5 use voku\helper\UTF8; 6 7 /** 8 * 9 * @link http://snowball.tartarus.org/algorithms/danish/stemmer.html 10 * @author wamania 11 * 12 */ 13 class Danish extends Stem 14 { 15 /** 16 * All danish vowels 17 */ 18 protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y', 'æ', 'å', 'ø'); 19 20 /** 21 * {@inheritdoc} 22 */ 23 public function stem($word): string 24 { 25 // we do ALL in UTF-8 26 if (!UTF8::is_utf8($word)) { 27 throw new \Exception('Word must be in UTF-8'); 28 } 29 30 $this->word = UTF8::strtolower($word); 31 32 // R2 is not used: R1 is defined in the same way as in the German stemmer 33 $this->r1(); 34 35 // then R1 is adjusted so that the region before it contains at least 3 letters. 36 if ($this->r1Index < 3) { 37 $this->r1Index = 3; 38 $this->r1 = UTF8::substr($this->word, 3); 39 } 40 41 // Do each of steps 1, 2 3 and 4. 42 $this->step1(); 43 $this->step2(); 44 $this->step3(); 45 $this->step4(); 46 47 return $this->word; 48 } 49 50 /** 51 * Define a valid s-ending as one of 52 * a b c d f g h j k l m n o p r t v y z å 53 * 54 * @param string $ending 55 * @return boolean 56 */ 57 private function hasValidSEnding($word) 58 { 59 $lastLetter = UTF8::substr($word, -1, 1); 60 return in_array($lastLetter, array('a', 'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 't', 'v', 'y', 'z', 'å')); 61 } 62 63 /** 64 * Step 1 65 * Search for the longest among the following suffixes in R1, and perform the action indicated. 66 */ 67 private function step1() 68 { 69 // hed ethed ered e erede ende erende ene erne ere en heden eren er heder erer 70 // heds es endes erendes enes ernes eres ens hedens erens ers ets erets et eret 71 // delete 72 if ( ($position = $this->searchIfInR1(array( 73 'erendes', 'erende', 'hedens', 'erede', 'ethed', 'heden', 'endes', 'erets', 'heder', 'ernes', 74 'erens', 'ered', 'ende', 'erne', 'eres', 'eren', 'eret', 'erer', 'enes', 'heds', 75 'ens', 'ene', 'ere', 'ers', 'ets', 'hed', 'es', 'et', 'er', 'en', 'e' 76 ))) !== false) { 77 $this->word = UTF8::substr($this->word, 0, $position); 78 return true; 79 } 80 81 // s 82 // delete if preceded by a valid s-ending 83 if ( ($position = $this->searchIfInR1(array('s'))) !== false) { 84 $word = UTF8::substr($this->word, 0, $position); 85 if ($this->hasValidSEnding($word)) { 86 $this->word = $word; 87 } 88 return true; 89 } 90 } 91 92 /** 93 * Step 2 94 * Search for one of the following suffixes in R1, and if found delete the last letter. 95 * gd dt gt kt 96 */ 97 private function step2() 98 { 99 if ($this->searchIfInR1(array('gd', 'dt', 'gt', 'kt')) !== false) { 100 $this->word = UTF8::substr($this->word, 0, -1); 101 } 102 } 103 104 /** 105 * Step 3: 106 */ 107 private function step3() 108 { 109 // If the word ends igst, remove the final st. 110 if ($this->search(array('igst')) !== false) { 111 $this->word = UTF8::substr($this->word, 0, -2); 112 } 113 114 // Search for the longest among the following suffixes in R1, and perform the action indicated. 115 // ig lig elig els 116 // delete, and then repeat step 2 117 if ( ($position = $this->searchIfInR1(array('elig', 'lig', 'ig', 'els'))) !== false) { 118 $this->word = UTF8::substr($this->word, 0, $position); 119 $this->step2(); 120 return true; 121 } 122 123 // løst 124 // replace with løs 125 if ($this->searchIfInR1(array('løst')) !== false) { 126 $this->word = UTF8::substr($this->word, 0, -1); 127 } 128 } 129 130 /** 131 * Step 4: undouble 132 * If the word ends with double consonant in R1, remove one of the consonants. 133 */ 134 private function step4() 135 { 136 $length = UTF8::strlen($this->word); 137 if (!$this->inR1(($length-1))) { 138 return false; 139 } 140 141 $lastLetter = UTF8::substr($this->word, -1, 1); 142 if (in_array($lastLetter, self::$vowels)) { 143 return false; 144 } 145 $beforeLastLetter = UTF8::substr($this->word, -2, 1); 146 147 if ($lastLetter == $beforeLastLetter) { 148 $this->word = UTF8::substr($this->word, 0, -1); 149 } 150 return true; 151 } 152 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Sep 7 05:41:13 2022 | Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer |