[ Index ]

PHP Cross Reference of Joomla 4.2.2 documentation

title

Body

[close]

/libraries/vendor/wamania/php-stemmer/src/Stemmer/ -> Portuguese.php (source)

   1  <?php
   2  
   3  namespace Wamania\Snowball\Stemmer;
   4  
   5  use voku\helper\UTF8;
   6  
   7  /**
   8   *
   9   * @link http://snowball.tartarus.org/algorithms/portuguese/stemmer.html
  10   * @author wamania
  11   *
  12   */
  13  class Portuguese extends Stem
  14  {
  15      /**
  16       * All Portuguese vowels
  17       */
  18      protected static $vowels = array('a', 'e', 'i', 'o', 'u', 'á', 'é', 'í', 'ó', 'ú', 'â', 'ê', 'ô');
  19  
  20      /**
  21       * {@inheritdoc}
  22       */
  23      public function stem($word)
  24      {
  25          // we do ALL in UTF-8
  26          if (!UTF8::is_utf8($word)) {
  27              throw new \Exception('Word must be in UTF-8');
  28          }
  29  
  30          $this->word = UTF8::strtolower($word);
  31  
  32          $this->word = UTF8::str_replace(array('ã', 'õ'), array('a~', 'o~'), $this->word);
  33  
  34          $this->rv();
  35          $this->r1();
  36          $this->r2();
  37  
  38          $word = $this->word;
  39          $this->step1();
  40  
  41          if ($word == $this->word) {
  42              $this->step2();
  43          }
  44  
  45          if ($word != $this->word) {
  46              $this->step3();
  47          } else {
  48              $this->step4();
  49          }
  50  
  51          $this->step5();
  52          $this->finish();
  53  
  54          return $this->word;
  55      }
  56  
  57      /**
  58       * Step 1: Standard suffix removal
  59       */
  60      private function step1()
  61      {
  62          // delete if in R2
  63          if ( ($position = $this->search(array(
  64              'amentos', 'imentos', 'adoras', 'adores', 'amento', 'imento', 'adora', 'istas', 'ismos', 'antes', 'ância',
  65              'ezas', 'eza', 'icos', 'icas', 'ismo', 'ável', 'ível', 'ista', 'oso',
  66              'osos', 'osas', 'osa', 'ico', 'ica', 'ador', 'aça~o', 'aço~es' , 'ante'))) !== false) {
  67  
  68              if ($this->inR2($position)) {
  69                  $this->word = UTF8::substr($this->word, 0, $position);
  70              }
  71              return true;
  72          }
  73  
  74          // logía   logías
  75          //      replace with log if in R2
  76          if ( ($position = $this->search(array('logías', 'logía'))) !== false) {
  77              if ($this->inR2($position)) {
  78                  $this->word = preg_replace('#(logías|logía)$#u', 'log', $this->word);
  79              }
  80              return true;
  81          }
  82  
  83          // ución   uciones
  84          //      replace with u if in R2
  85          if ( ($position = $this->search(array('uciones', 'ución'))) !== false) {
  86              if ($this->inR2($position)) {
  87                  $this->word = preg_replace('#(uciones|ución)$#u', 'u', $this->word);
  88              }
  89              return true;
  90          }
  91  
  92          // ência    ências
  93          //      replace with ente if in R2
  94          if ( ($position = $this->search(array('ências', 'ência'))) !== false) {
  95              if ($this->inR2($position)) {
  96                  $this->word = preg_replace('#(ências|ência)$#u', 'ente', $this->word);
  97              }
  98              return true;
  99          }
 100  
 101          // amente
 102          //      delete if in R1
 103          //      if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
 104          //      if preceded by os, ic or ad, delete if in R2
 105          if ( ($position = $this->search(array('amente'))) !== false) {
 106  
 107              // delete if in R1
 108              if ($this->inR1($position)) {
 109                  $this->word = UTF8::substr($this->word, 0, $position);
 110              }
 111  
 112              // if preceded by iv, delete if in R2 (and if further preceded by at, delete if in R2), otherwise,
 113              if ( ($position2 = $this->searchIfInR2(array('iv'))) !== false) {
 114                  $this->word = UTF8::substr($this->word, 0, $position2);
 115                  if ( ($position3 = $this->searchIfInR2(array('at'))) !== false) {
 116                      $this->word = UTF8::substr($this->word, 0, $position3);
 117                  }
 118  
 119                  // if preceded by os, ic or ad, delete if in R2
 120              } elseif ( ($position4 = $this->searchIfInR2(array('os', 'ic', 'ad'))) !== false) {
 121                  $this->word = UTF8::substr($this->word, 0, $position4);
 122              }
 123              return true;
 124          }
 125  
 126          // mente
 127          //      delete if in R2
 128          //      if preceded by ante, avel or ível, delete if in R2
 129          if ( ($position = $this->search(array('mente'))) !== false) {
 130  
 131              // delete if in R2
 132              if ($this->inR2($position)) {
 133                  $this->word = UTF8::substr($this->word, 0, $position);
 134              }
 135  
 136              // if preceded by ante, avel or ível, delete if in R2
 137              if ( ($position2 = $this->searchIfInR2(array('ante', 'avel', 'ível'))) != false) {
 138                  $this->word = UTF8::substr($this->word, 0, $position2);
 139              }
 140              return true;
 141          }
 142  
 143          // idade   idades
 144          //      delete if in R2
 145          //      if preceded by abil, ic or iv, delete if in R2
 146          if ( ($position = $this->search(array('idades', 'idade'))) !== false) {
 147  
 148              // delete if in R2
 149              if ($this->inR2($position)) {
 150                  $this->word = UTF8::substr($this->word, 0, $position);
 151              }
 152  
 153              // if preceded by abil, ic or iv, delete if in R2
 154              if ( ($position2 = $this->searchIfInR2(array('abil', 'ic', 'iv'))) !== false) {
 155                  $this->word = UTF8::substr($this->word, 0, $position2);
 156              }
 157              return true;
 158          }
 159  
 160          // iva   ivo   ivas   ivos
 161          //      delete if in R2
 162          //      if preceded by at, delete if in R2
 163          if ( ($position = $this->search(array('ivas', 'ivos', 'iva', 'ivo'))) !== false) {
 164  
 165              // delete if in R2
 166              if ($this->inR2($position)) {
 167                  $this->word = UTF8::substr($this->word, 0, $position);
 168              }
 169  
 170              // if preceded by at, delete if in R2
 171              if ( ($position2 = $this->searchIfInR2(array('at'))) !== false) {
 172                  $this->word = UTF8::substr($this->word, 0, $position2);
 173              }
 174              return true;
 175          }
 176  
 177          // ira   iras
 178          //      replace with ir if in RV and preceded by e
 179          if ( ($position = $this->search(array('iras', 'ira'))) !== false) {
 180  
 181              if ($this->inRv($position)) {
 182                  $before = $position -1;
 183                  $letter = UTF8::substr($this->word, $before, 1);
 184  
 185                  if ($letter == 'e') {
 186                      $this->word = preg_replace('#(iras|ira)$#u', 'ir', $this->word);
 187                  }
 188              }
 189              return true;
 190          }
 191  
 192          return false;
 193      }
 194  
 195      /**
 196       * Step 2: Verb suffixes
 197       * Search for the longest among the following suffixes in RV, and if found, delete.
 198       */
 199      private function step2()
 200      {
 201          if ( ($position = $this->searchIfInRv(array(
 202              'aríamos', 'eríamos', 'iríamos', 'ássemos', 'êssemos', 'íssemos',
 203              'aríeis', 'eríeis', 'iríeis', 'ásseis', 'ésseis', 'ísseis', 'áramos', 'éramos', 'íramos', 'ávamos',
 204              'aremos', 'eremos', 'iremos',
 205              'ariam', 'eriam', 'iriam', 'assem', 'essem', 'issem', 'arias', 'erias', 'irias', 'ardes', 'erdes', 'irdes',
 206              'asses', 'esses', 'isses', 'astes', 'estes', 'istes', 'áreis', 'areis', 'éreis', 'ereis', 'íreis', 'ireis',
 207              'áveis', 'íamos', 'armos', 'ermos', 'irmos',
 208              'aria', 'eria', 'iria', 'asse', 'esse', 'isse', 'aste', 'este', 'iste', 'arei', 'erei', 'irei', 'adas', 'idas',
 209              'aram', 'eram', 'iram', 'avam', 'arem', 'erem', 'irem', 'ando', 'endo', 'indo', 'ara~o', 'era~o', 'ira~o',
 210              'arás', 'aras', 'erás', 'eras', 'irás', 'avas', 'ares', 'eres', 'ires', 'íeis', 'ados', 'idos', 'ámos', 'amos',
 211              'emos', 'imos', 'iras',
 212              'ada', 'ida', 'ará', 'ara', 'erá', 'era', 'irá', 'ava', 'iam', 'ado', 'ido', 'ias', 'ais', 'eis', 'ira',
 213              'ia', 'ei', 'am', 'em', 'ar', 'er', 'ir', 'as', 'es', 'is', 'eu', 'iu', 'ou',
 214          ))) !== false) {
 215  
 216              $this->word = UTF8::substr($this->word, 0, $position);
 217              return true;
 218          }
 219          return false;
 220      }
 221  
 222      /**
 223       * Step 3: d-suffixes
 224       *
 225       */
 226      private function step3()
 227      {
 228          // Delete suffix i if in RV and preceded by c
 229          if ($this->searchIfInRv(array('i')) !== false) {
 230              $letter = UTF8::substr($this->word, -2, 1);
 231  
 232              if ($letter == 'c') {
 233                  $this->word = UTF8::substr($this->word, 0, -1);
 234              }
 235              return true;
 236          }
 237          return false;
 238      }
 239  
 240      /**
 241       * Step 4
 242       */
 243      private function step4()
 244      {
 245          // If the word ends with one of the suffixes "os   a   i   o   á   í   ó" in RV, delete it
 246          if ( ($position = $this->searchIfInRv(array('os', 'a', 'i', 'o','á', 'í', 'ó'))) !== false) {
 247              $this->word = UTF8::substr($this->word, 0, $position);
 248              return true;
 249          }
 250          return false;
 251      }
 252  
 253      /**
 254       * Step 5
 255       */
 256      private function step5()
 257      {
 258          // If the word ends with one of "e   é   ê" in RV, delete it, and if preceded by gu (or ci) with the u (or i) in RV, delete the u (or i).
 259          if ($this->searchIfInRv(array('e', 'é', 'ê')) !== false) {
 260              $this->word = UTF8::substr($this->word, 0, -1);
 261  
 262              if ( ($position2 = $this->search(array('gu', 'ci'))) !== false) {
 263                  if ($this->inRv(($position2+1))) {
 264                      $this->word = UTF8::substr($this->word, 0, -1);
 265                  }
 266              }
 267              return true;
 268          } else if ($this->search(array('ç')) !== false) {
 269              $this->word = preg_replace('#(ç)$#u', 'c', $this->word);
 270              return true;
 271          }
 272          return false;
 273      }
 274  
 275      /**
 276       * Finally
 277       */
 278      private function finish()
 279      {
 280          // turn U and Y back into lower case, and remove the umlaut accent from a, o and u.
 281          $this->word = UTF8::str_replace(array('a~', 'o~'), array('ã', 'õ'), $this->word);
 282      }
 283  }


Generated: Wed Sep 7 05:41:13 2022 Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer