[ Index ]

PHP Cross Reference of Joomla 4.2.2 documentation

title

Body

[close]

/libraries/vendor/wamania/php-stemmer/src/Stemmer/ -> Danish.php (source)

   1  <?php
   2  
   3  namespace Wamania\Snowball\Stemmer;
   4  
   5  use voku\helper\UTF8;
   6  
   7  /**
   8   *
   9   * @link http://snowball.tartarus.org/algorithms/danish/stemmer.html
  10   * @author wamania
  11   *
  12   */
  13  class Danish extends Stem
  14  {
  15      /**
  16       * All danish vowels
  17       */
  18      protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y', 'æ', 'å', 'ø');
  19  
  20      /**
  21       * {@inheritdoc}
  22       */
  23      public function stem($word): string
  24      {
  25          // we do ALL in UTF-8
  26          if (!UTF8::is_utf8($word)) {
  27              throw new \Exception('Word must be in UTF-8');
  28          }
  29  
  30          $this->word = UTF8::strtolower($word);
  31  
  32          // R2 is not used: R1 is defined in the same way as in the German stemmer
  33          $this->r1();
  34  
  35          // then R1 is adjusted so that the region before it contains at least 3 letters.
  36          if ($this->r1Index < 3) {
  37              $this->r1Index = 3;
  38              $this->r1 = UTF8::substr($this->word, 3);
  39          }
  40  
  41          // Do each of steps 1, 2 3 and 4.
  42          $this->step1();
  43          $this->step2();
  44          $this->step3();
  45          $this->step4();
  46  
  47          return $this->word;
  48      }
  49  
  50      /**
  51       * Define a valid s-ending as one of
  52       * a   b   c   d   f   g   h   j   k   l   m   n   o   p   r   t   v   y   z   å
  53       *
  54       * @param string $ending
  55       * @return boolean
  56       */
  57      private function hasValidSEnding($word)
  58      {
  59          $lastLetter = UTF8::substr($word, -1, 1);
  60          return in_array($lastLetter, array('a', 'b', 'c', 'd', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'r', 't', 'v', 'y', 'z', 'å'));
  61      }
  62  
  63      /**
  64       * Step 1
  65       * Search for the longest among the following suffixes in R1, and perform the action indicated.
  66       */
  67      private function step1()
  68      {
  69          // hed   ethed   ered   e   erede   ende   erende   ene   erne   ere   en   heden   eren   er   heder   erer
  70          // heds   es   endes   erendes   enes   ernes   eres   ens   hedens   erens   ers   ets   erets   et   eret
  71          //      delete
  72          if ( ($position = $this->searchIfInR1(array(
  73              'erendes', 'erende', 'hedens', 'erede', 'ethed', 'heden', 'endes', 'erets', 'heder', 'ernes',
  74              'erens', 'ered', 'ende', 'erne', 'eres', 'eren', 'eret', 'erer', 'enes', 'heds',
  75              'ens', 'ene', 'ere', 'ers', 'ets', 'hed', 'es', 'et', 'er', 'en', 'e'
  76          ))) !== false) {
  77              $this->word = UTF8::substr($this->word, 0, $position);
  78              return true;
  79          }
  80  
  81          // s
  82          //      delete if preceded by a valid s-ending
  83          if ( ($position = $this->searchIfInR1(array('s'))) !== false) {
  84              $word = UTF8::substr($this->word, 0, $position);
  85              if ($this->hasValidSEnding($word)) {
  86                  $this->word = $word;
  87              }
  88              return true;
  89          }
  90      }
  91  
  92      /**
  93       * Step 2
  94       * Search for one of the following suffixes in R1, and if found delete the last letter.
  95       *      gd   dt   gt   kt
  96       */
  97      private function step2()
  98      {
  99          if ($this->searchIfInR1(array('gd', 'dt', 'gt', 'kt')) !== false) {
 100              $this->word = UTF8::substr($this->word, 0, -1);
 101          }
 102      }
 103  
 104      /**
 105       * Step 3:
 106       */
 107      private function step3()
 108      {
 109          // If the word ends igst, remove the final st.
 110          if ($this->search(array('igst')) !== false) {
 111              $this->word = UTF8::substr($this->word, 0, -2);
 112          }
 113  
 114          // Search for the longest among the following suffixes in R1, and perform the action indicated.
 115          //  ig   lig   elig   els
 116          //      delete, and then repeat step 2
 117          if ( ($position = $this->searchIfInR1(array('elig', 'lig', 'ig', 'els'))) !== false) {
 118              $this->word = UTF8::substr($this->word, 0, $position);
 119              $this->step2();
 120              return true;
 121          }
 122  
 123          //  løst
 124          //      replace with løs
 125          if ($this->searchIfInR1(array('løst')) !== false) {
 126              $this->word = UTF8::substr($this->word, 0, -1);
 127          }
 128      }
 129  
 130      /**
 131       * Step 4: undouble
 132       * If the word ends with double consonant in R1, remove one of the consonants.
 133       */
 134      private function step4()
 135      {
 136          $length = UTF8::strlen($this->word);
 137          if (!$this->inR1(($length-1))) {
 138              return false;
 139          }
 140  
 141          $lastLetter = UTF8::substr($this->word, -1, 1);
 142          if (in_array($lastLetter, self::$vowels)) {
 143              return false;
 144          }
 145          $beforeLastLetter = UTF8::substr($this->word, -2, 1);
 146  
 147          if ($lastLetter == $beforeLastLetter) {
 148              $this->word = UTF8::substr($this->word, 0, -1);
 149          }
 150          return true;
 151      }
 152  }


Generated: Wed Sep 7 05:41:13 2022 Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer