[ Index ]

PHP Cross Reference of Joomla 4.2.2 documentation

title

Body

[close]

/administrator/components/com_finder/src/Indexer/ -> Language.php (source)

   1  <?php
   2  
   3  /**
   4   * @package     Joomla.Administrator
   5   * @subpackage  com_finder
   6   *
   7   * @copyright   (C) 2018 Open Source Matters, Inc. <https://www.joomla.org>
   8   * @license     GNU General Public License version 2 or later; see LICENSE.txt
   9   */
  10  
  11  namespace Joomla\Component\Finder\Administrator\Indexer;
  12  
  13  use Joomla\String\StringHelper;
  14  use Wamania\Snowball\NotFoundException;
  15  use Wamania\Snowball\Stemmer\Stemmer;
  16  use Wamania\Snowball\StemmerFactory;
  17  
  18  // phpcs:disable PSR1.Files.SideEffects
  19  \defined('_JEXEC') or die;
  20  // phpcs:enable PSR1.Files.SideEffects
  21  
  22  /**
  23   * Language support class for the Finder indexer package.
  24   *
  25   * @since  4.0.0
  26   */
  27  class Language
  28  {
  29      /**
  30       * Language support instances container.
  31       *
  32       * @var    Language[]
  33       * @since  4.0.0
  34       */
  35      protected static $instances = array();
  36  
  37      /**
  38       * Language locale of the class
  39       *
  40       * @var    string
  41       * @since  4.0.0
  42       */
  43      public $language;
  44  
  45      /**
  46       * Spacer to use between terms
  47       *
  48       * @var    string
  49       * @since  4.0.0
  50       */
  51      public $spacer = ' ';
  52  
  53      /**
  54       * The stemmer object.
  55       *
  56       * @var    Stemmer
  57       * @since  4.0.0
  58       */
  59      protected $stemmer = null;
  60  
  61      /**
  62       * Method to construct the language object.
  63       *
  64       * @since   4.0.0
  65       */
  66      public function __construct($locale = null)
  67      {
  68          if ($locale !== null) {
  69              $this->language = $locale;
  70          }
  71  
  72          // Use our generic language handler if no language is set
  73          if ($this->language === null) {
  74              $this->language = '*';
  75          }
  76  
  77          try {
  78              $this->stemmer = StemmerFactory::create($this->language);
  79          } catch (NotFoundException $e) {
  80              // We don't have a stemmer for the language
  81          }
  82      }
  83  
  84      /**
  85       * Method to get a language support object.
  86       *
  87       * @param   string  $language  The language of the support object.
  88       *
  89       * @return  Language  A Language instance.
  90       *
  91       * @since   4.0.0
  92       */
  93      public static function getInstance($language)
  94      {
  95          if (isset(self::$instances[$language])) {
  96              return self::$instances[$language];
  97          }
  98  
  99          $locale = '*';
 100  
 101          if ($language !== '*') {
 102              $locale = Helper::getPrimaryLanguage($language);
 103              $class  = '\\Joomla\\Component\\Finder\\Administrator\\Indexer\\Language\\' . ucfirst($locale);
 104  
 105              if (class_exists($class)) {
 106                  self::$instances[$language] = new $class();
 107  
 108                  return self::$instances[$language];
 109              }
 110          }
 111  
 112          self::$instances[$language] = new self($locale);
 113  
 114          return self::$instances[$language];
 115      }
 116  
 117      /**
 118       * Method to tokenise a text string.
 119       *
 120       * @param   string  $input  The input to tokenise.
 121       *
 122       * @return  array  An array of term strings.
 123       *
 124       * @since   4.0.0
 125       */
 126      public function tokenise($input)
 127      {
 128          $quotes = html_entity_decode('&#8216;&#8217;&#39;', ENT_QUOTES, 'UTF-8');
 129  
 130          /*
 131           * Parsing the string input into terms is a multi-step process.
 132           *
 133           * Regexes:
 134           *  1. Remove everything except letters, numbers, quotes, apostrophe, plus, dash, period, and comma.
 135           *  2. Remove plus, dash, period, and comma characters located before letter characters.
 136           *  3. Remove plus, dash, period, and comma characters located after other characters.
 137           *  4. Remove plus, period, and comma characters enclosed in alphabetical characters. Ungreedy.
 138           *  5. Remove orphaned apostrophe, plus, dash, period, and comma characters.
 139           *  6. Remove orphaned quote characters.
 140           *  7. Replace the assorted single quotation marks with the ASCII standard single quotation.
 141           *  8. Remove multiple space characters and replaces with a single space.
 142           */
 143          $input = StringHelper::strtolower($input);
 144          $input = preg_replace('#[^\pL\pM\pN\p{Pi}\p{Pf}\'+-.,]+#mui', ' ', $input);
 145          $input = preg_replace('#(^|\s)[+-.,]+([\pL\pM]+)#mui', ' $1', $input);
 146          $input = preg_replace('#([\pL\pM\pN]+)[+-.,]+(\s|$)#mui', '$1 ', $input);
 147          $input = preg_replace('#([\pL\pM]+)[+.,]+([\pL\pM]+)#muiU', '$1 $2', $input);
 148          $input = preg_replace('#(^|\s)[\'+-.,]+(\s|$)#mui', ' ', $input);
 149          $input = preg_replace('#(^|\s)[\p{Pi}\p{Pf}]+(\s|$)#mui', ' ', $input);
 150          $input = preg_replace('#[' . $quotes . ']+#mui', '\'', $input);
 151          $input = preg_replace('#\s+#mui', ' ', $input);
 152          $input = trim($input);
 153  
 154          // Explode the normalized string to get the terms.
 155          $terms = explode(' ', $input);
 156  
 157          return $terms;
 158      }
 159  
 160      /**
 161       * Method to stem a token.
 162       *
 163       * @param   string  $token  The token to stem.
 164       *
 165       * @return  string  The stemmed token.
 166       *
 167       * @since   4.0.0
 168       */
 169      public function stem($token)
 170      {
 171          if ($this->stemmer !== null) {
 172              return $this->stemmer->stem($token);
 173          }
 174  
 175          return $token;
 176      }
 177  }


Generated: Wed Sep 7 05:41:13 2022 Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer