[ Index ]

PHP Cross Reference of Joomla 4.2.2 documentation

title

Body

[close]

/libraries/vendor/wamania/php-stemmer/src/Stemmer/ -> Swedish.php (source)

   1  <?php
   2  
   3  namespace Wamania\Snowball\Stemmer;
   4  
   5  use voku\helper\UTF8;
   6  
   7  /**
   8   *
   9   * @link http://snowball.tartarus.org/algorithms/swedish/stemmer.html
  10   * @author wamania
  11   *
  12   */
  13  class Swedish extends Stem
  14  {
  15      /**
  16       * All swedish vowels
  17       */
  18      protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y', 'ä', 'å', 'ö');
  19  
  20      /**
  21       * {@inheritdoc}
  22       */
  23      public function stem($word)
  24      {
  25          // we do ALL in UTF-8
  26          if (!UTF8::is_utf8($word)) {
  27              throw new \Exception('Word must be in UTF-8');
  28          }
  29  
  30          $this->word = UTF8::strtolower($word);
  31  
  32          // R2 is not used: R1 is defined in the same way as in the German stemmer
  33          $this->r1();
  34  
  35          // then R1 is adjusted so that the region before it contains at least 3 letters.
  36          if ($this->r1Index < 3) {
  37              $this->r1Index = 3;
  38              $this->r1 = UTF8::substr($this->word, 3);
  39          }
  40  
  41          // Do each of steps 1, 2 3 and 4.
  42          $this->step1();
  43          $this->step2();
  44          $this->step3();
  45  
  46          return $this->word;
  47      }
  48  
  49      /**
  50       * Define a valid s-ending as one of
  51       * b   c   d   f   g   h   j   k   l   m   n   o   p   r   t   v   y
  52       *
  53       * @param string $ending
  54       * @return boolean
  55       */
  56      private function hasValidSEnding($word)
  57      {
  58          $lastLetter = UTF8::substr($word, -1, 1);
  59          return in_array($lastLetter, array('b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 't', 'v', 'y'));
  60      }
  61  
  62      /**
  63       * Step 1
  64       * Search for the longest among the following suffixes in R1, and perform the action indicated.
  65       */
  66      private function step1()
  67      {
  68          // a   arna   erna   heterna   orna   ad   e   ade   ande   arne   are   aste   en   anden   aren   heten
  69          // ern   ar   er   heter   or   as   arnas   ernas   ornas   es   ades   andes   ens   arens   hetens
  70          // erns   at   andet   het   ast
  71          //      delete
  72          if ( ($position = $this->searchIfInR1(array(
  73              'heterna', 'hetens', 'ornas', 'andes', 'arnas', 'heter', 'ernas', 'anden', 'heten', 'andet', 'arens',
  74              'orna', 'arna', 'erna', 'aren', 'ande', 'ades', 'arne', 'erns', 'aste', 'ade', 'ern', 'het',
  75              'ast', 'are', 'ens', 'or', 'es', 'ad', 'en', 'at', 'ar', 'as', 'er', 'a', 'e'
  76          ))) !== false) {
  77              $this->word = UTF8::substr($this->word, 0, $position);
  78              return true;
  79          }
  80  
  81          //  s
  82          //      delete if preceded by a valid s-ending
  83          if ( ($position = $this->searchIfInR1(array('s'))) !== false) {
  84              $word = UTF8::substr($this->word, 0, $position);
  85              if ($this->hasValidSEnding($word)) {
  86                  $this->word = $word;
  87              }
  88          }
  89      }
  90  
  91      /**
  92       * Step 2
  93       * Search for one of the following suffixes in R1, and if found delete the last letter.
  94       */
  95      private function step2()
  96      {
  97          // dd   gd   nn   dt   gt   kt   tt
  98          if ($this->searchIfInR1(array('dd', 'gd', 'nn', 'dt', 'gt', 'kt', 'tt')) !== false) {
  99              $this->word = UTF8::substr($this->word, 0, -1);
 100          }
 101      }
 102  
 103      /**
 104       * Step 3:
 105       * Search for the longest among the following suffixes in R1, and perform the action indicated.
 106       */
 107      private function step3()
 108      {
 109          // lig   ig   els
 110          //      delete
 111          if ( ($position = $this->searchIfInR1(array('lig', 'ig', 'els'))) !== false) {
 112              $this->word = UTF8::substr($this->word, 0, $position);
 113              return true;
 114          }
 115  
 116          // löst
 117          //      replace with lös
 118          if ( ($this->searchIfInR1(array('löst'))) !== false) {
 119              $this->word = UTF8::substr($this->word, 0, -1);
 120              return true;
 121          }
 122  
 123          // fullt
 124          //      replace with full
 125          if ( ($this->searchIfInR1(array('fullt'))) !== false) {
 126              $this->word = UTF8::substr($this->word, 0, -1);
 127              return true;
 128          }
 129      }
 130  }


Generated: Wed Sep 7 05:41:13 2022 Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer