[ Index ]

PHP Cross Reference of Joomla 4.2.2 documentation

title

Body

[close]

/libraries/vendor/wamania/php-stemmer/src/Stemmer/ -> Stem.php (source)

   1  <?php
   2  
   3  namespace Wamania\Snowball\Stemmer;
   4  
   5  use voku\helper\UTF8;
   6  
   7  abstract class Stem implements Stemmer
   8  {
   9      protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'y');
  10  
  11      /**
  12       * helper, contains stringified list of vowels
  13       * @var string
  14       */
  15      protected $plainVowels;
  16  
  17      /**
  18       * The word we are stemming
  19       * @var string
  20       */
  21      protected $word;
  22  
  23      /**
  24       * The original word, use to check if word has been modified
  25       * @var string
  26       */
  27      protected $originalWord;
  28  
  29      /**
  30       * RV value
  31       * @var string
  32       */
  33      protected $rv;
  34  
  35      /**
  36       * RV index (based on the beginning of the word)
  37       * @var integer
  38       */
  39      protected $rvIndex;
  40  
  41      /**
  42       * R1 value
  43       * @var integer
  44       */
  45      protected $r1;
  46  
  47      /**
  48       * R1 index (based on the beginning of the word)
  49       * @var int
  50       */
  51      protected $r1Index;
  52  
  53      /**
  54       * R2 value
  55       * @var integer
  56       */
  57      protected $r2;
  58  
  59      /**
  60       * R2 index (based on the beginning of the word)
  61       * @var int
  62       */
  63      protected $r2Index;
  64  
  65      protected function inRv($position)
  66      {
  67          return ($position >= $this->rvIndex);
  68      }
  69  
  70      protected function inR1($position)
  71      {
  72          return ($position >= $this->r1Index);
  73      }
  74  
  75      protected function inR2($position)
  76      {
  77          return ($position >= $this->r2Index);
  78      }
  79  
  80      protected function searchIfInRv($suffixes)
  81      {
  82          return $this->search($suffixes, $this->rvIndex);
  83      }
  84  
  85      protected function searchIfInR1($suffixes)
  86      {
  87          return $this->search($suffixes, $this->r1Index);
  88      }
  89  
  90      protected function searchIfInR2($suffixes)
  91      {
  92          return $this->search($suffixes, $this->r2Index);
  93      }
  94  
  95      protected function search($suffixes, $offset = 0)
  96      {
  97          $length = UTF8::strlen($this->word);
  98          if ($offset > $length) {
  99              return false;
 100          }
 101          foreach ($suffixes as $suffixe) {
 102              if ( (($position = UTF8::strrpos($this->word, $suffixe, $offset)) !== false) && ((Utf8::strlen($suffixe)+$position) == $length) ) {
 103                  return $position;
 104              }
 105          }
 106  
 107          return false;
 108      }
 109  
 110      /**
 111       * R1 is the region after the first non-vowel following a vowel, or the end of the word if there is no such non-vowel.
 112       */
 113      protected function r1()
 114      {
 115          list($this->r1Index, $this->r1) = $this->rx($this->word);
 116      }
 117  
 118      /**
 119       * R2 is the region after the first non-vowel following a vowel in R1, or the end of the word if there is no such non-vowel.
 120       */
 121      protected function r2()
 122      {
 123          list($index, $value) = $this->rx($this->r1);
 124  
 125          $this->r2 = $value;
 126          $this->r2Index = $this->r1Index + $index;
 127      }
 128  
 129      /**
 130       * Common function for R1 and R2
 131       * Search the region after the first non-vowel following a vowel in $word, or the end of the word if there is no such non-vowel.
 132       * R1 : $in = $this->word
 133       * R2 : $in = R1
 134       */
 135      protected function rx($in)
 136      {
 137          $length = UTF8::strlen($in);
 138  
 139          // defaults
 140          $value = '';
 141          $index = $length;
 142  
 143          // we search all vowels
 144          $vowels = array();
 145          for ($i=0; $i<$length; $i++) {
 146              $letter = UTF8::substr($in, $i, 1);
 147              if (in_array($letter, static::$vowels)) {
 148                  $vowels[] = $i;
 149              }
 150          }
 151  
 152          // search the non-vowel following a vowel
 153          foreach ($vowels as $position) {
 154              $after = $position + 1;
 155              $letter = UTF8::substr($in, $after, 1);
 156  
 157              if (! in_array($letter, static::$vowels)) {
 158                  $index = $after + 1;
 159                  $value = UTF8::substr($in, ($after+1));
 160  
 161                  break;
 162              }
 163          }
 164  
 165          return array($index, $value);
 166      }
 167  
 168      /**
 169       * Used by spanish, italian, portuguese, etc (but not by french)
 170       *
 171       * If the second letter is a consonant, RV is the region after the next following vowel,
 172       * or if the first two letters are vowels, RV is the region after the next consonant,
 173       * and otherwise (consonant-vowel case) RV is the region after the third letter.
 174       * But RV is the end of the word if these positions cannot be found.
 175       */
 176      protected function rv()
 177      {
 178          $length = UTF8::strlen($this->word);
 179  
 180          $this->rv = '';
 181          $this->rvIndex = $length;
 182  
 183          if ($length < 3) {
 184              return true;
 185          }
 186  
 187          $first = UTF8::substr($this->word, 0, 1);
 188          $second = UTF8::substr($this->word, 1, 1);
 189  
 190          // If the second letter is a consonant, RV is the region after the next following vowel,
 191          if (!in_array($second, static::$vowels)) {
 192              for ($i=2; $i<$length; $i++) {
 193                  $letter = UTF8::substr($this->word, $i, 1);
 194                  if (in_array($letter, static::$vowels)) {
 195                      $this->rvIndex = $i + 1;
 196                      $this->rv = UTF8::substr($this->word, ($i+1));
 197                      return true;
 198                  }
 199              }
 200          }
 201  
 202          // or if the first two letters are vowels, RV is the region after the next consonant,
 203          if ( (in_array($first, static::$vowels)) && (in_array($second, static::$vowels)) ) {
 204              for ($i=2; $i<$length; $i++) {
 205                  $letter = UTF8::substr($this->word, $i, 1);
 206                  if (! in_array($letter, static::$vowels)) {
 207                      $this->rvIndex = $i + 1;
 208                      $this->rv = UTF8::substr($this->word, ($i+1));
 209                      return true;
 210                  }
 211              }
 212          }
 213  
 214          // and otherwise (consonant-vowel case) RV is the region after the third letter.
 215          if ( (! in_array($first, static::$vowels)) && (in_array($second, static::$vowels)) ) {
 216              $this->rv = UTF8::substr($this->word, 3);
 217              $this->rvIndex = 3;
 218              return true;
 219          }
 220      }
 221  }


Generated: Wed Sep 7 05:41:13 2022 Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer