PHPXRef 0.7.1 : Joomla 4.2.2 documentation : /libraries/vendor/wamania/php-stemmer/src/Stemmer/German.php source

[Summary view] [Print] [Text view]
   1  <?php
   2  
   3  namespace Wamania\Snowball\Stemmer;
   4  
   5  use voku\helper\UTF8;
   6  
   7  /**
   8   *
   9   * @link http://snowball.tartarus.org/algorithms/german/stemmer.html
  10   * @author wamania
  11   *
  12   */
  13  class German extends Stem
  14  {
  15      /**
  16       * All German vowels
  17       */
  18      protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y', 'ä', 'ö', 'ü');
  19  
  20      protected static $sEndings = array('b', 'd', 'f', 'g', 'h', 'k', 'l', 'm', 'n', 'r' ,'t');
  21  
  22      protected static $stEndings = array('b', 'd', 'f', 'g', 'h', 'k', 'l', 'm', 'n', 't');
  23  
  24      /**
  25       * {@inheritdoc}
  26       */
  27      public function stem($word)
  28      {
  29          // we do ALL in UTF-8
  30          if (!UTF8::is_utf8($word)) {
  31              throw new \Exception('Word must be in UTF-8');
  32          }
  33  
  34          $this->plainVowels = implode('', self::$vowels);
  35  
  36          $this->word = UTF8::strtolower($word);
  37  
  38          // First, replace ß by ss
  39          $this->word = UTF8::str_replace('ß', 'ss', $this->word);
  40  
  41          // put u and y between vowels into upper case
  42          $this->word = preg_replace('#(['.$this->plainVowels.'])y(['.$this->plainVowels.'])#u', '$1Y$2', $this->word);
  43          $this->word = preg_replace('#(['.$this->plainVowels.'])u(['.$this->plainVowels.'])#u', '$1U$2', $this->word);
  44  
  45          //  R1 and R2 are first set up in the standard way
  46          $this->r1();
  47          $this->r2();
  48  
  49          // but then R1 is adjusted so that the region before it contains at least 3 letters.
  50          if ($this->r1Index < 3) {
  51              $this->r1Index = 3;
  52              $this->r1 = UTF8::substr($this->word, 3);
  53          }
  54  
  55          $this->step1();
  56          $this->step2();
  57          $this->step3();
  58          $this->finish();
  59  
  60          return $this->word;
  61      }
  62  
  63      /**
  64       * Step 1
  65       */
  66      private function step1()
  67      {
  68          // delete if in R1
  69          if ( ($position = $this->search(array('em', 'ern', 'er'))) !== false) {
  70              if ($this->inR1($position)) {
  71                  $this->word = UTF8::substr($this->word, 0, $position);
  72              }
  73              return true;
  74          }
  75  
  76          // delete if in R1
  77          if ( ($position = $this->search(array('es', 'en', 'e'))) !== false) {
  78              if ($this->inR1($position)) {
  79                  $this->word = UTF8::substr($this->word, 0, $position);
  80  
  81                  //If an ending of group (b) is deleted, and the ending is preceded by niss, delete the final s
  82                  if ($this->search(array('niss')) !== false) {
  83                      $this->word = UTF8::substr($this->word, 0, -1);
  84                  }
  85              }
  86              return true;
  87          }
  88  
  89          // s (preceded by a valid s-ending)
  90          if ( ($position = $this->search(array('s'))) !== false) {
  91              if ($this->inR1($position)) {
  92                  $before = $position - 1;
  93                  $letter = UTF8::substr($this->word, $before, 1);
  94  
  95                  if (in_array($letter, self::$sEndings)) {
  96                      $this->word = UTF8::substr($this->word, 0, $position);
  97                  }
  98              }
  99              return true;
 100          }
 101  
 102          return false;
 103      }
 104  
 105      /**
 106       * Step 2
 107       */
 108      private function step2()
 109      {
 110          // en   er   est
 111          //      delete if in R1
 112          if ( ($position = $this->search(array('en', 'er', 'est'))) !== false) {
 113              if ($this->inR1($position)) {
 114                  $this->word = UTF8::substr($this->word, 0, $position);
 115              }
 116              return true;
 117          }
 118  
 119          // st (preceded by a valid st-ending, itself preceded by at least 3 letters)
 120          //      delete if in R1
 121          if ( ($position = $this->search(array('st'))) !== false) {
 122              if ($this->inR1($position)) {
 123                  $before = $position - 1;
 124                  if ($before >= 3) {
 125                      $letter = UTF8::substr($this->word, $before, 1);
 126  
 127                      if (in_array($letter, self::$stEndings)) {
 128                          $this->word = UTF8::substr($this->word, 0, $position);
 129                      }
 130                  }
 131              }
 132              return true;
 133          }
 134          return false;
 135      }
 136  
 137      /**
 138       * Step 3: d-suffixes
 139       */
 140      private function step3()
 141      {
 142          // end   ung
 143          //      delete if in R2
 144          //      if preceded by ig, delete if in R2 and not preceded by e
 145          if ( ($position = $this->search(array('end', 'ung'))) !== false) {
 146              if ($this->inR2($position)) {
 147                  $this->word = UTF8::substr($this->word, 0, $position);
 148              }
 149  
 150              if ( ($position2 = $this->search(array('ig'))) !== false) {
 151                  $before = $position2 - 1;
 152                  $letter = UTF8::substr($this->word, $before, 1);
 153  
 154                  if ( ($this->inR2($position2)) && ($letter != 'e') ) {
 155                      $this->word = UTF8::substr($this->word, 0, $position2);
 156                  }
 157              }
 158              return true;
 159          }
 160  
 161          // ig   ik   isch
 162          //      delete if in R2 and not preceded by e
 163          if ( ($position = $this->search(array('ig', 'ik', 'isch'))) !== false) {
 164              $before = $position - 1;
 165              $letter = UTF8::substr($this->word, $before, 1);
 166  
 167              if ( ($this->inR2($position)) && ($letter != 'e') ) {
 168                  $this->word = UTF8::substr($this->word, 0, $position);
 169              }
 170              return true;
 171          }
 172  
 173          // lich   heit
 174          //      delete if in R2
 175          //      if preceded by er or en, delete if in R1
 176          if ( ($position = $this->search(array('lich', 'heit'))) != false) {
 177              if ($this->inR2($position)) {
 178                  $this->word = UTF8::substr($this->word, 0, $position);
 179              }
 180  
 181              if ( ($position2 = $this->search(array('er', 'en'))) !== false) {
 182                  if ($this->inR1($position2)) {
 183                      $this->word = UTF8::substr($this->word, 0, $position2);
 184                  }
 185              }
 186              return true;
 187          }
 188  
 189          // keit
 190          //      delete if in R2
 191          //      if preceded by lich or ig, delete if in R2
 192          if ( ($position = $this->search(array('keit'))) != false) {
 193              if ($this->inR2($position)) {
 194                  $this->word = UTF8::substr($this->word, 0, $position);
 195              }
 196  
 197              if ( ($position2 = $this->search(array('lich', 'ig'))) !== false) {
 198                  if ($this->inR2($position2)) {
 199                      $this->word = UTF8::substr($this->word, 0, $position2);
 200                  }
 201              }
 202              return true;
 203          }
 204  
 205          return false;
 206      }
 207  
 208      /**
 209       * Finally
 210       */
 211      private function finish()
 212      {
 213          // turn U and Y back into lower case, and remove the umlaut accent from a, o and u.
 214          $this->word = UTF8::str_replace(array('U', 'Y', 'ä', 'ü', 'ö'), array('u', 'y', 'a', 'u', 'o'), $this->word);
 215      }
 216  }
PHP Cross Reference of Joomla 4.2.2 documentation

/libraries/vendor/wamania/php-stemmer/src/Stemmer/ -> German.php (source)