[ Index ]

PHP Cross Reference of Joomla 4.2.2 documentation

title

Body

[close]

/administrator/components/com_finder/src/Indexer/ -> Helper.php (source)

   1  <?php
   2  
   3  /**
   4   * @package     Joomla.Administrator
   5   * @subpackage  com_finder
   6   *
   7   * @copyright   (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
   8   * @license     GNU General Public License version 2 or later; see LICENSE.txt
   9   */
  10  
  11  namespace Joomla\Component\Finder\Administrator\Indexer;
  12  
  13  use Exception;
  14  use Joomla\CMS\Component\ComponentHelper;
  15  use Joomla\CMS\Factory;
  16  use Joomla\CMS\Language\Multilanguage;
  17  use Joomla\CMS\Plugin\PluginHelper;
  18  use Joomla\CMS\Table\Table;
  19  use Joomla\Registry\Registry;
  20  use Joomla\String\StringHelper;
  21  
  22  // phpcs:disable PSR1.Files.SideEffects
  23  \defined('_JEXEC') or die;
  24  // phpcs:enable PSR1.Files.SideEffects
  25  
  26  /**
  27   * Helper class for the Finder indexer package.
  28   *
  29   * @since  2.5
  30   */
  31  class Helper
  32  {
  33      /**
  34       * Method to parse input into plain text.
  35       *
  36       * @param   string  $input   The raw input.
  37       * @param   string  $format  The format of the input. [optional]
  38       *
  39       * @return  string  The parsed input.
  40       *
  41       * @since   2.5
  42       * @throws  Exception on invalid parser.
  43       */
  44      public static function parse($input, $format = 'html')
  45      {
  46          // Get a parser for the specified format and parse the input.
  47          return Parser::getInstance($format)->parse($input);
  48      }
  49  
  50      /**
  51       * Method to tokenize a text string.
  52       *
  53       * @param   string   $input   The input to tokenize.
  54       * @param   string   $lang    The language of the input.
  55       * @param   boolean  $phrase  Flag to indicate whether input could be a phrase. [optional]
  56       *
  57       * @return  Token[]  An array of Token objects.
  58       *
  59       * @since   2.5
  60       */
  61      public static function tokenize($input, $lang, $phrase = false)
  62      {
  63          static $cache = [], $tuplecount;
  64          static $multilingual;
  65          static $defaultLanguage;
  66  
  67          if (!$tuplecount) {
  68              $params = ComponentHelper::getParams('com_finder');
  69              $tuplecount = $params->get('tuplecount', 1);
  70          }
  71  
  72          if (is_null($multilingual)) {
  73              $multilingual = Multilanguage::isEnabled();
  74              $config = ComponentHelper::getParams('com_finder');
  75  
  76              if ($config->get('language_default', '') == '') {
  77                  $defaultLang = '*';
  78              } elseif ($config->get('language_default', '') == '-1') {
  79                  $defaultLang = self::getDefaultLanguage();
  80              } else {
  81                  $defaultLang = $config->get('language_default');
  82              }
  83  
  84              /*
  85               * The default language always has the language code '*'.
  86               * In order to not overwrite the language code of the language
  87               * object that we are using, we are cloning it here.
  88               */
  89              $obj = Language::getInstance($defaultLang);
  90              $defaultLanguage = clone $obj;
  91              $defaultLanguage->language = '*';
  92          }
  93  
  94          if (!$multilingual || $lang == '*') {
  95              $language = $defaultLanguage;
  96          } else {
  97              $language = Language::getInstance($lang);
  98          }
  99  
 100          if (!isset($cache[$lang])) {
 101              $cache[$lang] = [];
 102          }
 103  
 104          $tokens = array();
 105          $terms = $language->tokenise($input);
 106  
 107          // @todo: array_filter removes any number 0's from the terms. Not sure this is entirely intended
 108          $terms = array_filter($terms);
 109          $terms = array_values($terms);
 110  
 111          /*
 112           * If we have to handle the input as a phrase, that means we don't
 113           * tokenize the individual terms and we do not create the two and three
 114           * term combinations. The phrase must contain more than one word!
 115           */
 116          if ($phrase === true && count($terms) > 1) {
 117              // Create tokens from the phrase.
 118              $tokens[] = new Token($terms, $language->language, $language->spacer);
 119          } else {
 120              // Create tokens from the terms.
 121              for ($i = 0, $n = count($terms); $i < $n; $i++) {
 122                  if (isset($cache[$lang][$terms[$i]])) {
 123                      $tokens[] = $cache[$lang][$terms[$i]];
 124                  } else {
 125                      $token = new Token($terms[$i], $language->language);
 126                      $tokens[] = $token;
 127                      $cache[$lang][$terms[$i]] = $token;
 128                  }
 129              }
 130  
 131              // Create multi-word phrase tokens from the individual words.
 132              if ($tuplecount > 1) {
 133                  for ($i = 0, $n = count($tokens); $i < $n; $i++) {
 134                      $temp = array($tokens[$i]->term);
 135  
 136                      // Create tokens for 2 to $tuplecount length phrases
 137                      for ($j = 1; $j < $tuplecount; $j++) {
 138                          if ($i + $j >= $n || !isset($tokens[$i + $j])) {
 139                              break;
 140                          }
 141  
 142                          $temp[] = $tokens[$i + $j]->term;
 143                          $key = implode('::', $temp);
 144  
 145                          if (isset($cache[$lang][$key])) {
 146                              $tokens[] = $cache[$lang][$key];
 147                          } else {
 148                              $token = new Token($temp, $language->language, $language->spacer);
 149                              $token->derived = true;
 150                              $tokens[] = $token;
 151                              $cache[$lang][$key] = $token;
 152                          }
 153                      }
 154                  }
 155              }
 156          }
 157  
 158          // Prevent the cache to fill up the memory
 159          while (count($cache[$lang]) > 1024) {
 160              /**
 161               * We want to cache the most common words/tokens. At the same time
 162               * we don't want to cache too much. The most common words will also
 163               * be early in the text, so we are dropping all terms/tokens which
 164               * have been cached later.
 165               */
 166              array_pop($cache[$lang]);
 167          }
 168  
 169          return $tokens;
 170      }
 171  
 172      /**
 173       * Method to get the base word of a token.
 174       *
 175       * @param   string  $token  The token to stem.
 176       * @param   string  $lang   The language of the token.
 177       *
 178       * @return  string  The root token.
 179       *
 180       * @since   2.5
 181       */
 182      public static function stem($token, $lang)
 183      {
 184          static $multilingual;
 185          static $defaultStemmer;
 186  
 187          if (is_null($multilingual)) {
 188              $multilingual = Multilanguage::isEnabled();
 189              $config = ComponentHelper::getParams('com_finder');
 190  
 191              if ($config->get('language_default', '') == '') {
 192                  $defaultStemmer = Language::getInstance('*');
 193              } elseif ($config->get('language_default', '') == '-1') {
 194                  $defaultStemmer = Language::getInstance(self::getDefaultLanguage());
 195              } else {
 196                  $defaultStemmer = Language::getInstance($config->get('language_default'));
 197              }
 198          }
 199  
 200          if (!$multilingual || $lang == '*') {
 201              $language = $defaultStemmer;
 202          } else {
 203              $language = Language::getInstance($lang);
 204          }
 205  
 206          return $language->stem($token);
 207      }
 208  
 209      /**
 210       * Method to add a content type to the database.
 211       *
 212       * @param   string  $title  The type of content. For example: PDF
 213       * @param   string  $mime   The mime type of the content. For example: PDF [optional]
 214       *
 215       * @return  integer  The id of the content type.
 216       *
 217       * @since   2.5
 218       * @throws  Exception on database error.
 219       */
 220      public static function addContentType($title, $mime = null)
 221      {
 222          static $types;
 223  
 224          $db    = Factory::getDbo();
 225          $query = $db->getQuery(true);
 226  
 227          // Check if the types are loaded.
 228          if (empty($types)) {
 229              // Build the query to get the types.
 230              $query->select('*')
 231                  ->from($db->quoteName('#__finder_types'));
 232  
 233              // Get the types.
 234              $db->setQuery($query);
 235              $types = $db->loadObjectList('title');
 236          }
 237  
 238          // Check if the type already exists.
 239          if (isset($types[$title])) {
 240              return (int) $types[$title]->id;
 241          }
 242  
 243          // Add the type.
 244          $query->clear()
 245              ->insert($db->quoteName('#__finder_types'))
 246              ->columns(array($db->quoteName('title'), $db->quoteName('mime')))
 247              ->values($db->quote($title) . ', ' . $db->quote($mime));
 248          $db->setQuery($query);
 249          $db->execute();
 250  
 251          // Return the new id.
 252          return (int) $db->insertid();
 253      }
 254  
 255      /**
 256       * Method to check if a token is common in a language.
 257       *
 258       * @param   string  $token  The token to test.
 259       * @param   string  $lang   The language to reference.
 260       *
 261       * @return  boolean  True if common, false otherwise.
 262       *
 263       * @since   2.5
 264       */
 265      public static function isCommon($token, $lang)
 266      {
 267          static $data, $default, $multilingual;
 268  
 269          if (is_null($multilingual)) {
 270              $multilingual = Multilanguage::isEnabled();
 271              $config = ComponentHelper::getParams('com_finder');
 272  
 273              if ($config->get('language_default', '') == '') {
 274                  $default = '*';
 275              } elseif ($config->get('language_default', '') == '-1') {
 276                  $default = self::getPrimaryLanguage(self::getDefaultLanguage());
 277              } else {
 278                  $default = self::getPrimaryLanguage($config->get('language_default'));
 279              }
 280          }
 281  
 282          if (!$multilingual || $lang == '*') {
 283              $lang = $default;
 284          }
 285  
 286          // Load the common tokens for the language if necessary.
 287          if (!isset($data[$lang])) {
 288              $data[$lang] = self::getCommonWords($lang);
 289          }
 290  
 291          // Check if the token is in the common array.
 292          return in_array($token, $data[$lang], true);
 293      }
 294  
 295      /**
 296       * Method to get an array of common terms for a language.
 297       *
 298       * @param   string  $lang  The language to use.
 299       *
 300       * @return  array  Array of common terms.
 301       *
 302       * @since   2.5
 303       * @throws  Exception on database error.
 304       */
 305      public static function getCommonWords($lang)
 306      {
 307          $db = Factory::getDbo();
 308  
 309          // Create the query to load all the common terms for the language.
 310          $query = $db->getQuery(true)
 311              ->select($db->quoteName('term'))
 312              ->from($db->quoteName('#__finder_terms_common'))
 313              ->where($db->quoteName('language') . ' = ' . $db->quote($lang));
 314  
 315          // Load all of the common terms for the language.
 316          $db->setQuery($query);
 317  
 318          return $db->loadColumn();
 319      }
 320  
 321      /**
 322       * Method to get the default language for the site.
 323       *
 324       * @return  string  The default language string.
 325       *
 326       * @since   2.5
 327       */
 328      public static function getDefaultLanguage()
 329      {
 330          static $lang;
 331  
 332          // We need to go to com_languages to get the site default language, it's the best we can guess.
 333          if (empty($lang)) {
 334              $lang = ComponentHelper::getParams('com_languages')->get('site', 'en-GB');
 335          }
 336  
 337          return $lang;
 338      }
 339  
 340      /**
 341       * Method to parse a language/locale key and return a simple language string.
 342       *
 343       * @param   string  $lang  The language/locale key. For example: en-GB
 344       *
 345       * @return  string  The simple language string. For example: en
 346       *
 347       * @since   2.5
 348       */
 349      public static function getPrimaryLanguage($lang)
 350      {
 351          static $data;
 352  
 353          // Only parse the identifier if necessary.
 354          if (!isset($data[$lang])) {
 355              if (is_callable(array('Locale', 'getPrimaryLanguage'))) {
 356                  // Get the language key using the Locale package.
 357                  $data[$lang] = \Locale::getPrimaryLanguage($lang);
 358              } else {
 359                  // Get the language key using string position.
 360                  $data[$lang] = StringHelper::substr($lang, 0, StringHelper::strpos($lang, '-'));
 361              }
 362          }
 363  
 364          return $data[$lang];
 365      }
 366  
 367      /**
 368       * Method to get extra data for a content before being indexed. This is how
 369       * we add Comments, Tags, Labels, etc. that should be available to Finder.
 370       *
 371       * @param   Result  $item  The item to index as a Result object.
 372       *
 373       * @return  boolean  True on success, false on failure.
 374       *
 375       * @since   2.5
 376       * @throws  Exception on database error.
 377       */
 378      public static function getContentExtras(Result $item)
 379      {
 380          // Load the finder plugin group.
 381          PluginHelper::importPlugin('finder');
 382  
 383          Factory::getApplication()->triggerEvent('onPrepareFinderContent', array(&$item));
 384  
 385          return true;
 386      }
 387  
 388      /**
 389       * Method to process content text using the onContentPrepare event trigger.
 390       *
 391       * @param   string    $text    The content to process.
 392       * @param   Registry  $params  The parameters object. [optional]
 393       * @param   Result    $item    The item which get prepared. [optional]
 394       *
 395       * @return  string  The processed content.
 396       *
 397       * @since   2.5
 398       */
 399      public static function prepareContent($text, $params = null, Result $item = null)
 400      {
 401          static $loaded;
 402  
 403          // Load the content plugins if necessary.
 404          if (empty($loaded)) {
 405              PluginHelper::importPlugin('content');
 406              $loaded = true;
 407          }
 408  
 409          // Instantiate the parameter object if necessary.
 410          if (!($params instanceof Registry)) {
 411              $registry = new Registry($params);
 412              $params = $registry;
 413          }
 414  
 415          // Create a mock content object.
 416          $content       = Table::getInstance('Content');
 417          $content->text = $text;
 418  
 419          if ($item) {
 420              $content->bind((array) $item);
 421              $content->bind($item->getElements());
 422          }
 423  
 424          if ($item && !empty($item->context)) {
 425              $content->context = $item->context;
 426          }
 427  
 428          // Fire the onContentPrepare event.
 429          Factory::getApplication()->triggerEvent('onContentPrepare', array('com_finder.indexer', &$content, &$params, 0));
 430  
 431          return $content->text;
 432      }
 433  }


Generated: Wed Sep 7 05:41:13 2022 Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer