[ Index ]

PHP Cross Reference of Joomla 4.2.2 documentation

title

Body

[close]

/libraries/vendor/wamania/php-stemmer/src/Stemmer/ -> Spanish.php (source)

   1  <?php
   2  
   3  namespace Wamania\Snowball\Stemmer;
   4  
   5  use voku\helper\UTF8;
   6  
   7  /**
   8   *
   9   * @link http://snowball.tartarus.org/algorithms/spanish/stemmer.html
  10   * @author wamania
  11   *
  12   */
  13  class Spanish extends Stem
  14  {
  15      /**
  16       * All spanish vowels
  17       */
  18      protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'á', 'é', 'í', 'ó', 'ú', 'ü');
  19  
  20      /**
  21       * {@inheritdoc}
  22       */
  23      public function stem($word)
  24      {
  25          // we do ALL in UTF-8
  26          if (!UTF8::is_utf8($word)) {
  27              throw new \Exception('Word must be in UTF-8');
  28          }
  29  
  30          $this->word = UTF8::strtolower($word);
  31  
  32          $this->rv();
  33          $this->r1();
  34          $this->r2();
  35  
  36          $this->step0();
  37  
  38          $word = $this->word;
  39          $this->step1();
  40  
  41          // Do step 2a if no ending was removed by step 1.
  42          if ($this->word == $word) {
  43              $this->step2a();
  44  
  45              // Do Step 2b if step 2a was done, but failed to remove a suffix.
  46              if ($this->word == $word) {
  47                  $this->step2b();
  48              }
  49          }
  50  
  51          $this->step3();
  52          $this->finish();
  53  
  54          return $this->word;
  55      }
  56  
  57      /**
  58       * Step 0: Attached pronoun
  59       *
  60       * Search for the longest among the following suffixes
  61       *      me   se   sela   selo   selas   selos   la   le   lo   las   les   los   nos
  62       *
  63       * and delete it, if comes after one of
  64       *      (a) iéndo   ándo   ár   ér   ír
  65       *      (b) ando   iendo   ar   er   ir
  66       *      (c) yendo following u
  67       *
  68       *  in RV. In the case of (c), yendo must lie in RV, but the preceding u can be outside it.
  69       *  In the case of (a), deletion is followed by removing the acute accent (for example, haciéndola -> haciendo).
  70       */
  71      private function step0()
  72      {
  73          if ( ($position = $this->searchIfInRv(array('selas', 'selos', 'las', 'los', 'les', 'nos', 'selo', 'sela', 'me', 'se', 'la', 'le', 'lo' ))) != false) {
  74              $suffixe = UTF8::substr($this->word, $position);
  75  
  76              // a
  77              $a = array('iéndo', 'ándo', 'ár', 'ér', 'ír');
  78              $a = array_map(function($item) use ($suffixe) {
  79                  return $item . $suffixe;
  80              }, $a);
  81  
  82              if ( ($position2 = $this->searchIfInRv($a)) !== false) {
  83                  $suffixe2 = UTF8::substr($this->word, $position2);
  84                  $suffixe2 = UTF8::to_utf8(UTF8::to_ascii($suffixe2)); // unaccent
  85                  $this->word = UTF8::substr($this->word, 0, $position2);
  86                  $this->word .= $suffixe2;
  87                  $this->word = UTF8::substr($this->word, 0, $position);
  88                  return true;
  89              }
  90  
  91              // b
  92              $b = array('iendo', 'ando', 'ar', 'er', 'ir');
  93              $b = array_map(function($item) use ($suffixe) {
  94                  return $item . $suffixe;
  95              }, $b);
  96  
  97              if ( ($position2 = $this->searchIfInRv($b)) !== false) {
  98                  $this->word = UTF8::substr($this->word, 0, $position);
  99                  return true;
 100              }
 101  
 102              // c
 103              if ( ($position2 = $this->searchIfInRv(array('yendo' . $suffixe))) != false) {
 104                  $before = UTF8::substr($this->word, ($position2-1), 1);
 105                  if ( (isset($before)) && ($before == 'u') ) {
 106                      $this->word = UTF8::substr($this->word, 0, $position);
 107                      return true;
 108                  }
 109              }
 110          }
 111  
 112          return false;
 113      }
 114  
 115      /**
 116       * Step 1
 117       */
 118      private function step1()
 119      {
 120          // anza   anzas   ico   ica   icos   icas   ismo   ismos   able   ables   ible   ibles   ista
 121          // istas   oso   osa   osos   osas   amiento   amientos   imiento   imientos
 122          //      delete if in R2
 123          if ( ($position = $this->search(array(
 124              'imientos', 'imiento', 'amientos', 'amiento', 'osas', 'osos', 'osa', 'oso', 'istas', 'ista', 'ibles',
 125              'ible', 'ables', 'able', 'ismos', 'ismo', 'icas', 'icos', 'ica', 'ico', 'anzas', 'anza'))) != false) {
 126  
 127              if ($this->inR2($position)) {
 128                  $this->word = UTF8::substr($this->word, 0, $position);
 129              }
 130              return true;
 131          }
 132  
 133          // adora   ador   ación   adoras   adores   aciones   ante   antes   ancia   ancias
 134          //      delete if in R2
 135          //      if preceded by ic, delete if in R2
 136          if ( ($position = $this->search(array(
 137              'adoras', 'adora', 'aciones', 'ación', 'adores', 'ador', 'antes', 'ante', 'ancias', 'ancia'))) != false) {
 138  
 139              if ($this->inR2($position)) {
 140                  $this->word = UTF8::substr($this->word, 0, $position);
 141              }
 142  
 143              if ( ($position2 = $this->searchIfInR2(array('ic')))) {
 144                  $this->word = UTF8::substr($this->word, 0, $position2);
 145              }
 146              return true;
 147          }
 148  
 149          // logía   logías
 150          //      replace with log if in R2
 151          if ( ($position = $this->search(array('logías', 'logía'))) != false) {
 152              if ($this->inR2($position)) {
 153                  $this->word = preg_replace('#(logías|logía)$#u', 'log', $this->word);
 154              }
 155              return true;
 156          }
 157  
 158          // ución   uciones
 159          //      replace with u if in R2
 160          if ( ($position = $this->search(array('uciones', 'ución'))) != false) {
 161              if ($this->inR2($position)) {
 162                  $this->word = preg_replace('#(uciones|ución)$#u', 'u', $this->word);
 163              }
 164              return true;
 165          }
 166  
 167          // encia   encias
 168          //      replace with ente if in R2
 169          if ( ($position = $this->search(array('encias', 'encia'))) != false) {
 170              if ($this->inR2($position)) {
 171                  $this->word = preg_replace('#(encias|encia)$#u', 'ente', $this->word);
 172              }
 173              return true;
 174          }
 175  
 176          // amente
 177          //      delete if in R1
 178          //      if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
 179          //      if preceded by os, ic or ad, delete if in R2
 180          if ( ($position = $this->search(array('amente'))) != false) {
 181  
 182              // delete if in R1
 183              if ($this->inR1($position)) {
 184                  $this->word = UTF8::substr($this->word, 0, $position);
 185              }
 186  
 187              // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
 188              if ( ($position2 = $this->searchIfInR2(array('iv'))) !== false) {
 189                  $this->word = UTF8::substr($this->word, 0, $position2);
 190                  if ( ($position3 = $this->searchIfInR2(array('at'))) !== false) {
 191                      $this->word = UTF8::substr($this->word, 0, $position3);
 192                  }
 193  
 194              // if preceded by os, ic or ad, delete if in R2
 195              } elseif ( ($position4 = $this->searchIfInR2(array('os', 'ic', 'ad'))) != false) {
 196                  $this->word = UTF8::substr($this->word, 0, $position4);
 197              }
 198              return true;
 199          }
 200  
 201          // mente
 202          //      delete if in R2
 203          //      if preceded by ante, able or ible, delete if in R2
 204          if ( ($position = $this->search(array('mente'))) != false) {
 205  
 206              // delete if in R2
 207              if ($this->inR2($position)) {
 208                  $this->word = UTF8::substr($this->word, 0, $position);
 209              }
 210  
 211              // if preceded by ante, able or ible, delete if in R2
 212              if ( ($position2 = $this->searchIfInR2(array('ante', 'able', 'ible'))) != false) {
 213                  $this->word = UTF8::substr($this->word, 0, $position2);
 214              }
 215              return true;
 216          }
 217  
 218          // idad   idades
 219          //      delete if in R2
 220          //      if preceded by abil, ic or iv, delete if in R2
 221          if ( ($position = $this->search(array('idades', 'idad'))) != false) {
 222  
 223              // delete if in R2
 224              if ($this->inR2($position)) {
 225                  $this->word = UTF8::substr($this->word, 0, $position);
 226              }
 227  
 228              // if preceded by abil, ic or iv, delete if in R2
 229              if ( ($position2 = $this->searchIfInR2(array('abil', 'ic', 'iv'))) != false) {
 230                  $this->word = UTF8::substr($this->word, 0, $position2);
 231              }
 232              return true;
 233          }
 234  
 235          // iva   ivo   ivas   ivos
 236          //      delete if in R2
 237          //      if preceded by at, delete if in R2
 238          if ( ($position = $this->search(array('ivas', 'ivos', 'iva', 'ivo'))) != false) {
 239  
 240              // delete if in R2
 241              if ($this->inR2($position)) {
 242                  $this->word = UTF8::substr($this->word, 0, $position);
 243              }
 244  
 245              // if preceded by at, delete if in R2
 246              if ( ($position2 = $this->searchIfInR2(array('at'))) != false) {
 247                  $this->word = UTF8::substr($this->word, 0, $position2);
 248              }
 249              return true;
 250          }
 251  
 252          return false;
 253      }
 254  
 255      /**
 256       * Step 2a: Verb suffixes beginning y
 257       */
 258      private function step2a()
 259      {
 260          // if found, delete if preceded by u
 261          // (Note that the preceding u need not be in RV.)
 262          if ( ($position = $this->searchIfInRv(array(
 263              'yamos', 'yendo', 'yeron', 'yan', 'yen', 'yais', 'yas', 'yes', 'yo', 'yó', 'ya', 'ye'))) != false) {
 264  
 265              $before = UTF8::substr($this->word, ($position-1), 1);
 266              if ( (isset($before)) && ($before == 'u') ) {
 267                  $this->word = UTF8::substr($this->word, 0, $position);
 268                  return true;
 269              }
 270          }
 271  
 272          return false;
 273      }
 274  
 275      /**
 276       * Step 2b: Other verb suffixes
 277       *      Search for the longest among the following suffixes in RV, and perform the action indicated.
 278       */
 279      private function step2b()
 280      {
 281          //      delete
 282          if ( ($position = $this->searchIfInRv(array(
 283              'iésemos', 'iéramos', 'ábamos', 'iríamos', 'eríamos', 'aríamos', 'áramos', 'ásemos', 'eríais',
 284              'aremos', 'eremos', 'iremos', 'asteis', 'ieseis', 'ierais', 'isteis', 'aríais',
 285              'irían', 'aréis', 'erían', 'erías', 'eréis', 'iréis', 'irías', 'ieran', 'iesen', 'ieron', 'iendo', 'ieras',
 286              'iríais', 'arían', 'arías',
 287              'amos', 'imos', 'ados', 'idos', 'irán', 'irás', 'erán', 'erás', 'ería', 'iría', 'íais', 'arán', 'arás', 'aría',
 288              'iera', 'iese', 'aste', 'iste', 'aban', 'aran', 'asen', 'aron', 'ando', 'abas', 'adas', 'idas', 'ases', 'aras',
 289              'aré', 'erá', 'eré', 'áis', 'ías', 'irá', 'iré', 'aba', 'ían', 'ada', 'ara', 'ase', 'ida', 'ado', 'ido', 'ará',
 290              'ad', 'ed', 'id', 'ís', 'ió', 'ar', 'er', 'ir', 'as', 'ía', 'an'
 291          ))) != false) {
 292              $this->word = UTF8::substr($this->word, 0, $position);
 293              return true;
 294          }
 295  
 296          // en   es   éis   emos
 297          //      delete, and if preceded by gu delete the u (the gu need not be in RV)
 298          if ( ($position = $this->searchIfInRv(array('éis', 'emos', 'en', 'es'))) != false) {
 299              $this->word = UTF8::substr($this->word, 0, $position);
 300  
 301              if ( ($position2 = $this->search(array('gu'))) != false) {
 302                  $this->word = UTF8::substr($this->word, 0, ($position2+1));
 303              }
 304  
 305  
 306              return true;
 307          }
 308      }
 309  
 310      /**
 311       * Step 3: residual suffix
 312       * Search for the longest among the following suffixes in RV, and perform the action indicated.
 313       */
 314      private function step3()
 315      {
 316          // os   a   o   á   í   ó
 317          //      delete if in RV
 318          if ( ($position = $this->searchIfInRv(array('os', 'a', 'o', 'á', 'í', 'ó'))) != false) {
 319              $this->word = UTF8::substr($this->word, 0, $position);
 320              return true;
 321          }
 322  
 323          // e   é
 324          //      delete if in RV, and if preceded by gu with the u in RV delete the u
 325          if ( ($position = $this->searchIfInRv(array('e', 'é'))) != false) {
 326              $this->word = UTF8::substr($this->word, 0, $position);
 327  
 328              if ( ($position2 = $this->searchIfInRv(array('u'))) != false) {
 329                  $before = UTF8::substr($this->word, ($position2-1), 1);
 330                  if ( (isset($before)) && ($before == 'g') ) {
 331                      $this->word = UTF8::substr($this->word, 0, $position2);
 332                      return true;
 333                  }
 334              }
 335          }
 336  
 337          return false;
 338      }
 339  
 340      /**
 341       * And finally:
 342       * Remove acute accents
 343       */
 344      private function finish()
 345      {
 346          $this->word = UTF8::str_replace(array('á', 'í', 'ó', 'é', 'ú'), array('a', 'i', 'o', 'e', 'u'), $this->word);
 347      }
 348  }


Generated: Wed Sep 7 05:41:13 2022 Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer