[ Index ] |
PHP Cross Reference of Joomla 4.2.2 documentation |
[Summary view] [Print] [Text view]
1 <?php 2 3 namespace Wamania\Snowball\Stemmer; 4 5 use voku\helper\UTF8; 6 7 /** 8 * 9 * @link http://snowball.tartarus.org/algorithms/swedish/stemmer.html 10 * @author wamania 11 * 12 */ 13 class Swedish extends Stem 14 { 15 /** 16 * All swedish vowels 17 */ 18 protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y', 'ä', 'å', 'ö'); 19 20 /** 21 * {@inheritdoc} 22 */ 23 public function stem($word) 24 { 25 // we do ALL in UTF-8 26 if (!UTF8::is_utf8($word)) { 27 throw new \Exception('Word must be in UTF-8'); 28 } 29 30 $this->word = UTF8::strtolower($word); 31 32 // R2 is not used: R1 is defined in the same way as in the German stemmer 33 $this->r1(); 34 35 // then R1 is adjusted so that the region before it contains at least 3 letters. 36 if ($this->r1Index < 3) { 37 $this->r1Index = 3; 38 $this->r1 = UTF8::substr($this->word, 3); 39 } 40 41 // Do each of steps 1, 2 3 and 4. 42 $this->step1(); 43 $this->step2(); 44 $this->step3(); 45 46 return $this->word; 47 } 48 49 /** 50 * Define a valid s-ending as one of 51 * b c d f g h j k l m n o p r t v y 52 * 53 * @param string $ending 54 * @return boolean 55 */ 56 private function hasValidSEnding($word) 57 { 58 $lastLetter = UTF8::substr($word, -1, 1); 59 return in_array($lastLetter, array('b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 't', 'v', 'y')); 60 } 61 62 /** 63 * Step 1 64 * Search for the longest among the following suffixes in R1, and perform the action indicated. 65 */ 66 private function step1() 67 { 68 // a arna erna heterna orna ad e ade ande arne are aste en anden aren heten 69 // ern ar er heter or as arnas ernas ornas es ades andes ens arens hetens 70 // erns at andet het ast 71 // delete 72 if ( ($position = $this->searchIfInR1(array( 73 'heterna', 'hetens', 'ornas', 'andes', 'arnas', 'heter', 'ernas', 'anden', 'heten', 'andet', 'arens', 74 'orna', 'arna', 'erna', 'aren', 'ande', 'ades', 'arne', 'erns', 'aste', 'ade', 'ern', 'het', 75 'ast', 'are', 'ens', 'or', 'es', 'ad', 'en', 'at', 'ar', 'as', 'er', 'a', 'e' 76 ))) !== false) { 77 $this->word = UTF8::substr($this->word, 0, $position); 78 return true; 79 } 80 81 // s 82 // delete if preceded by a valid s-ending 83 if ( ($position = $this->searchIfInR1(array('s'))) !== false) { 84 $word = UTF8::substr($this->word, 0, $position); 85 if ($this->hasValidSEnding($word)) { 86 $this->word = $word; 87 } 88 } 89 } 90 91 /** 92 * Step 2 93 * Search for one of the following suffixes in R1, and if found delete the last letter. 94 */ 95 private function step2() 96 { 97 // dd gd nn dt gt kt tt 98 if ($this->searchIfInR1(array('dd', 'gd', 'nn', 'dt', 'gt', 'kt', 'tt')) !== false) { 99 $this->word = UTF8::substr($this->word, 0, -1); 100 } 101 } 102 103 /** 104 * Step 3: 105 * Search for the longest among the following suffixes in R1, and perform the action indicated. 106 */ 107 private function step3() 108 { 109 // lig ig els 110 // delete 111 if ( ($position = $this->searchIfInR1(array('lig', 'ig', 'els'))) !== false) { 112 $this->word = UTF8::substr($this->word, 0, $position); 113 return true; 114 } 115 116 // löst 117 // replace with lös 118 if ( ($this->searchIfInR1(array('löst'))) !== false) { 119 $this->word = UTF8::substr($this->word, 0, -1); 120 return true; 121 } 122 123 // fullt 124 // replace with full 125 if ( ($this->searchIfInR1(array('fullt'))) !== false) { 126 $this->word = UTF8::substr($this->word, 0, -1); 127 return true; 128 } 129 } 130 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Sep 7 05:41:13 2022 | Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer |