[ Index ]

PHP Cross Reference of Joomla 4.2.2 documentation

title

Body

[close]

/libraries/vendor/wamania/php-stemmer/src/Stemmer/ -> Russian.php (source)

   1  <?php
   2  
   3  namespace Wamania\Snowball\Stemmer;
   4  
   5  use voku\helper\UTF8;
   6  
   7  /**
   8   *
   9   * @link http://snowball.tartarus.org/algorithms/russian/stemmer.html
  10   * @author wamania
  11   *
  12   */
  13  class Russian extends Stem
  14  {
  15      /**
  16       * All russian vowels
  17       */
  18      protected static $vowels = array('а', 'е', 'и', 'о', 'у', 'ы', 'э', 'ю', 'я');
  19  
  20      protected static $perfectiveGerund = array(
  21          array('вшись', 'вши', 'в'),
  22          array('ывшись', 'ившись', 'ывши', 'ивши', 'ив', 'ыв')
  23      );
  24  
  25      protected static $adjective = array(
  26          'ыми', 'ими', 'ему', 'ому', 'его', 'ого', 'ее', 'ие', 'ые', 'ое', 'ей', 'ий',
  27          'ый', 'ой', 'ем', 'им', 'ым','ом','их', 'ых', 'ую', 'юю', 'ая', 'яя', 'ою', 'ею'
  28      );
  29  
  30      protected static $participle = array(
  31          array('ем', 'нн', 'вш', 'ющ', 'щ'),
  32          array('ивш', 'ывш', 'ующ')
  33      );
  34  
  35      protected static $reflexive = array('ся', 'сь');
  36  
  37      protected static $verb = array(
  38          array('ешь', 'нно', 'ете', 'йте', 'ла', 'на', 'ли', 'й', 'л', 'ем', 'н', 'ло', 'но', 'ет', 'ют', 'ны', 'ть'),
  39          array(
  40              'уйте', 'ило', 'ыло', 'ено','ила', 'ыла', 'ена', 'ейте', 'ены', 'ить', 'ыть', 'ишь', 'ите', 'или', 'ыли',
  41              'ует', 'уют', 'ей', 'уй', 'ил', 'ыл', 'им', 'ым', 'ен', 'ят', 'ит', 'ыт', 'ую', 'ю'
  42          )
  43      );
  44  
  45      protected static $noun = array(
  46          'иями', 'ями', 'ами', 'ией', 'иям', 'ием', 'иях', 'ев', 'ов', 'ие', 'ье', 'еи', 'ии', 'ей', 'ой', 'ий', 'ям',
  47          'ем', 'ам', 'ом', 'ах', 'ях', 'ию', 'ью', 'ия', 'ья', 'я', 'а', 'е', 'ы', 'ь', 'и', 'о', 'у', 'й', 'ю'
  48      );
  49  
  50      protected static $superlative = array('ейше', 'ейш');
  51  
  52      protected static $derivational = array('ость', 'ост');
  53  
  54      /**
  55       * {@inheritdoc}
  56       */
  57      public function stem($word)
  58      {
  59          // we do ALL in UTF-8
  60          if (!UTF8::is_utf8($word)) {
  61              throw new \Exception('Word must be in UTF-8');
  62          }
  63  
  64          $this->word = UTF8::strtolower($word);
  65  
  66          // R2 is not used: R1 is defined in the same way as in the German stemmer
  67          $this->r1();
  68          $this->r2();
  69          $this->rv();
  70  
  71          // Do each of steps 1, 2 3 and 4.
  72          $this->step1();
  73          $this->step2();
  74          $this->step3();
  75          $this->step4();
  76  
  77          return $this->word;
  78      }
  79  
  80      /**
  81       * Step 1: Search for a PERFECTIVE GERUND ending. If one is found remove it, and that is then the end of step 1.
  82       * Otherwise try and remove a REFLEXIVE ending, and then search in turn for (1) an ADJECTIVAL, (2) a VERB or (3) a NOUN ending.
  83       * As soon as one of the endings (1) to (3) is found remove it, and terminate step 1.
  84       */
  85      private function step1()
  86      {
  87          // Search for a PERFECTIVE GERUND ending.
  88          // group 1
  89          if ( ($position = $this->searchIfInRv(self::$perfectiveGerund[0])) !== false) {
  90              if ( ($this->inRv($position)) && ($this->checkGroup1($position)) ) {
  91                  $this->word = UTF8::substr($this->word, 0, $position);
  92                  return true;
  93              }
  94          }
  95  
  96          // group 2
  97          if ( ($position = $this->searchIfInRv(self::$perfectiveGerund[1])) !== false) {
  98              if ($this->inRv($position)) {
  99                  $this->word = UTF8::substr($this->word, 0, $position);
 100                  return true;
 101              }
 102          }
 103  
 104          // Otherwise try and remove a REFLEXIVE ending
 105          if ( ($position = $this->searchIfInRv(self::$reflexive)) !== false) {
 106              if ($this->inRv($position)) {
 107                  $this->word = UTF8::substr($this->word, 0, $position);
 108              }
 109          }
 110  
 111          // then search in turn for (1) an ADJECTIVAL, (2) a VERB or (3) a NOUN ending.
 112          // As soon as one of the endings (1) to (3) is found remove it, and terminate step 1.
 113          if ( ($position = $this->searchIfInRv(self::$adjective)) !== false) {
 114              if ($this->inRv($position)) {
 115                  $this->word = UTF8::substr($this->word, 0, $position);
 116  
 117                  if ( ($position2 = $this->search(self::$participle[0])) !== false) {
 118                      if ( ($this->inRv($position2)) && ($this->checkGroup1($position2)) ) {
 119                          $this->word = UTF8::substr($this->word, 0, $position2);
 120                          return true;
 121                      }
 122                  }
 123  
 124                  if ( ($position2 = $this->search(self::$participle[1])) !== false) {
 125                      if ($this->inRv($position2)) {
 126                          $this->word = UTF8::substr($this->word, 0, $position2);
 127                          return true;
 128                      }
 129                  }
 130  
 131                  return true;
 132              }
 133          }
 134  
 135          if ( ($position = $this->searchIfInRv(self::$verb[0])) !== false) {
 136              if ( ($this->inRv($position)) && ($this->checkGroup1($position)) ) {
 137                  $this->word = UTF8::substr($this->word, 0, $position);
 138                  return true;
 139              }
 140          }
 141  
 142          if ( ($position = $this->searchIfInRv(self::$verb[1])) !== false) {
 143              if ($this->inRv($position)) {
 144                  $this->word = UTF8::substr($this->word, 0, $position);
 145                  return true;
 146              }
 147          }
 148  
 149          if ( ($position = $this->searchIfInRv(self::$noun)) !== false) {
 150              if ($this->inRv($position)) {
 151                  $this->word = UTF8::substr($this->word, 0, $position);
 152                  return true;
 153              }
 154          }
 155  
 156          return false;
 157      }
 158  
 159      /**
 160       * Step 2: If the word ends with и (i), remove it.
 161       */
 162      private function step2()
 163      {
 164          if ( ($position = $this->searchIfInRv(array('и'))) !== false) {
 165              if ($this->inRv($position)) {
 166                  $this->word = UTF8::substr($this->word, 0, $position);
 167                  return true;
 168              }
 169          }
 170          return false;
 171      }
 172  
 173      /**
 174       * Step 3: Search for a DERIVATIONAL ending in R2 (i.e. the entire ending must lie in R2),
 175       * and if one is found, remove it.
 176       */
 177      private function step3()
 178      {
 179          if ( ($position = $this->searchIfInRv(self::$derivational)) !== false) {
 180              if ($this->inR2($position)) {
 181                  $this->word = UTF8::substr($this->word, 0, $position);
 182                  return true;
 183              }
 184          }
 185      }
 186  
 187      /**
 188       *  Step 4: (1) Undouble н (n), or, (2) if the word ends with a SUPERLATIVE ending, remove it
 189       *  and undouble н (n), or (3) if the word ends ь (') (soft sign) remove it.
 190       */
 191      private function step4()
 192      {
 193          // (2) if the word ends with a SUPERLATIVE ending, remove it
 194          if ( ($position = $this->searchIfInRv(self::$superlative)) !== false) {
 195              $this->word = UTF8::substr($this->word, 0, $position);
 196          }
 197  
 198          // (1) Undouble н (n)
 199          if ( ($position = $this->searchIfInRv(array('нн'))) !== false) {
 200              $this->word = UTF8::substr($this->word, 0, ($position+1));
 201              return true;
 202          }
 203  
 204          // (3) if the word ends ь (') (soft sign) remove it
 205          if ( ($position = $this->searchIfInRv(array('ь'))) !== false) {
 206              $this->word = UTF8::substr($this->word, 0, $position);
 207              return true;
 208          }
 209      }
 210  
 211      /**
 212       *  In any word, RV is the region after the first vowel, or the end of the word if it contains no vowel.
 213       */
 214      protected function rv()
 215      {
 216          $length = UTF8::strlen($this->word);
 217  
 218          $this->rv = '';
 219          $this->rvIndex = $length;
 220  
 221          for ($i=0; $i<$length; $i++) {
 222              $letter = UTF8::substr($this->word, $i, 1);
 223              if (in_array($letter, self::$vowels)) {
 224                  $this->rv = UTF8::substr($this->word, ($i+1));
 225                  $this->rvIndex = $i + 1;
 226                  return true;
 227              }
 228          }
 229  
 230          return false;
 231      }
 232  
 233      /**
 234       * group 1 endings must follow а (a) or я (ia)
 235       *
 236       * @param integer $position
 237       * @return boolean
 238       */
 239      private function checkGroup1($position)
 240      {
 241          if (! $this->inRv(($position-1))) {
 242              return false;
 243          }
 244  
 245          $letter = UTF8::substr($this->word, ($position - 1), 1);
 246  
 247          if ($letter == 'а' || $letter == 'я') {
 248              return true;
 249          }
 250          return false;
 251      }
 252  }


Generated: Wed Sep 7 05:41:13 2022 Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer