[ Index ]

PHP Cross Reference of Joomla 4.2.2 documentation

title

Body

[close]

/administrator/components/com_finder/src/Indexer/ -> Indexer.php (source)

   1  <?php
   2  
   3  /**
   4   * @package     Joomla.Administrator
   5   * @subpackage  com_finder
   6   *
   7   * @copyright   (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
   8   * @license     GNU General Public License version 2 or later; see LICENSE.txt
   9   */
  10  
  11  namespace Joomla\Component\Finder\Administrator\Indexer;
  12  
  13  use Exception;
  14  use Joomla\CMS\Component\ComponentHelper;
  15  use Joomla\CMS\Factory;
  16  use Joomla\CMS\Filesystem\File;
  17  use Joomla\CMS\Object\CMSObject;
  18  use Joomla\CMS\Plugin\PluginHelper;
  19  use Joomla\CMS\Profiler\Profiler;
  20  use Joomla\Database\DatabaseInterface;
  21  use Joomla\Database\ParameterType;
  22  use Joomla\Database\QueryInterface;
  23  use Joomla\String\StringHelper;
  24  
  25  // phpcs:disable PSR1.Files.SideEffects
  26  \defined('_JEXEC') or die;
  27  // phpcs:enable PSR1.Files.SideEffects
  28  
  29  /**
  30   * Main indexer class for the Finder indexer package.
  31   *
  32   * The indexer class provides the core functionality of the Finder
  33   * search engine. It is responsible for adding and updating the
  34   * content links table; extracting and scoring tokens; and maintaining
  35   * all referential information for the content.
  36   *
  37   * Note: All exceptions thrown from within this class should be caught
  38   * by the controller.
  39   *
  40   * @since  2.5
  41   */
  42  class Indexer
  43  {
  44      /**
  45       * The title context identifier.
  46       *
  47       * @var    integer
  48       * @since  2.5
  49       */
  50      public const TITLE_CONTEXT = 1;
  51  
  52      /**
  53       * The text context identifier.
  54       *
  55       * @var    integer
  56       * @since  2.5
  57       */
  58      public const TEXT_CONTEXT = 2;
  59  
  60      /**
  61       * The meta context identifier.
  62       *
  63       * @var    integer
  64       * @since  2.5
  65       */
  66      public const META_CONTEXT = 3;
  67  
  68      /**
  69       * The path context identifier.
  70       *
  71       * @var    integer
  72       * @since  2.5
  73       */
  74      public const PATH_CONTEXT = 4;
  75  
  76      /**
  77       * The misc context identifier.
  78       *
  79       * @var    integer
  80       * @since  2.5
  81       */
  82      public const MISC_CONTEXT = 5;
  83  
  84      /**
  85       * The indexer state object.
  86       *
  87       * @var    CMSObject
  88       * @since  2.5
  89       */
  90      public static $state;
  91  
  92      /**
  93       * The indexer profiler object.
  94       *
  95       * @var    Profiler
  96       * @since  2.5
  97       */
  98      public static $profiler;
  99  
 100      /**
 101       * Database driver cache.
 102       *
 103       * @var    \Joomla\Database\DatabaseDriver
 104       * @since  3.8.0
 105       */
 106      protected $db;
 107  
 108      /**
 109       * Reusable Query Template. To be used with clone.
 110       *
 111       * @var    QueryInterface
 112       * @since  3.8.0
 113       */
 114      protected $addTokensToDbQueryTemplate;
 115  
 116      /**
 117       * Indexer constructor.
 118       *
 119       * @param  DatabaseInterface  $db  The database
 120       *
 121       * @since  3.8.0
 122       */
 123      public function __construct(DatabaseInterface $db = null)
 124      {
 125          if ($db === null) {
 126              @trigger_error(sprintf('Database will be mandatory in 5.0.'), E_USER_DEPRECATED);
 127              $db = Factory::getContainer()->get(DatabaseInterface::class);
 128          }
 129  
 130          $this->db = $db;
 131  
 132          // Set up query template for addTokensToDb
 133          $this->addTokensToDbQueryTemplate = $db->getQuery(true)->insert($db->quoteName('#__finder_tokens'))
 134              ->columns(
 135                  array(
 136                      $db->quoteName('term'),
 137                      $db->quoteName('stem'),
 138                      $db->quoteName('common'),
 139                      $db->quoteName('phrase'),
 140                      $db->quoteName('weight'),
 141                      $db->quoteName('context'),
 142                      $db->quoteName('language')
 143                  )
 144              );
 145      }
 146  
 147      /**
 148       * Method to get the indexer state.
 149       *
 150       * @return  object  The indexer state object.
 151       *
 152       * @since   2.5
 153       */
 154      public static function getState()
 155      {
 156          // First, try to load from the internal state.
 157          if ((bool) static::$state) {
 158              return static::$state;
 159          }
 160  
 161          // If we couldn't load from the internal state, try the session.
 162          $session = Factory::getSession();
 163          $data = $session->get('_finder.state', null);
 164  
 165          // If the state is empty, load the values for the first time.
 166          if (empty($data)) {
 167              $data = new CMSObject();
 168              $data->force = false;
 169  
 170              // Load the default configuration options.
 171              $data->options = ComponentHelper::getParams('com_finder');
 172              $db = Factory::getDbo();
 173  
 174              if ($db->getServerType() == 'mysql') {
 175                  /**
 176                   * Try to calculate the heapsize for the memory table for indexing. If this fails,
 177                   * we fall back on a reasonable small size. We want to prevent the system to fail
 178                   * and block saving content.
 179                   */
 180                  try {
 181                      $db->setQuery('SHOW VARIABLES LIKE ' . $db->quote('max_heap_table_size'));
 182                      $heapsize = $db->loadObject();
 183  
 184                      /**
 185                       * In tests, the size of a row seems to have been around 720 bytes.
 186                       * We take 800 to be on the safe side.
 187                       */
 188                      $memory_table_limit = (int) ($heapsize->Value / 800);
 189                      $data->options->set('memory_table_limit', $memory_table_limit);
 190                  } catch (Exception $e) {
 191                      // Something failed. We fall back to a reasonable guess.
 192                      $data->options->set('memory_table_limit', 7500);
 193                  }
 194              } else {
 195                  // We are running on PostgreSQL and don't have this issue, so we set a rather high number.
 196                  $data->options->set('memory_table_limit', 50000);
 197              }
 198  
 199              // Setup the weight lookup information.
 200              $data->weights = array(
 201                  self::TITLE_CONTEXT => round($data->options->get('title_multiplier', 1.7), 2),
 202                  self::TEXT_CONTEXT  => round($data->options->get('text_multiplier', 0.7), 2),
 203                  self::META_CONTEXT  => round($data->options->get('meta_multiplier', 1.2), 2),
 204                  self::PATH_CONTEXT  => round($data->options->get('path_multiplier', 2.0), 2),
 205                  self::MISC_CONTEXT  => round($data->options->get('misc_multiplier', 0.3), 2)
 206              );
 207  
 208              // Set the current time as the start time.
 209              $data->startTime = Factory::getDate()->toSql();
 210  
 211              // Set the remaining default values.
 212              $data->batchSize   = (int) $data->options->get('batch_size', 50);
 213              $data->batchOffset = 0;
 214              $data->totalItems  = 0;
 215              $data->pluginState = array();
 216          }
 217  
 218          // Setup the profiler if debugging is enabled.
 219          if (Factory::getApplication()->get('debug')) {
 220              static::$profiler = Profiler::getInstance('FinderIndexer');
 221          }
 222  
 223          // Set the state.
 224          static::$state = $data;
 225  
 226          return static::$state;
 227      }
 228  
 229      /**
 230       * Method to set the indexer state.
 231       *
 232       * @param   CMSObject  $data  A new indexer state object.
 233       *
 234       * @return  boolean  True on success, false on failure.
 235       *
 236       * @since   2.5
 237       */
 238      public static function setState($data)
 239      {
 240          // Check the state object.
 241          if (empty($data) || !$data instanceof CMSObject) {
 242              return false;
 243          }
 244  
 245          // Set the new internal state.
 246          static::$state = $data;
 247  
 248          // Set the new session state.
 249          Factory::getSession()->set('_finder.state', $data);
 250  
 251          return true;
 252      }
 253  
 254      /**
 255       * Method to reset the indexer state.
 256       *
 257       * @return  void
 258       *
 259       * @since   2.5
 260       */
 261      public static function resetState()
 262      {
 263          // Reset the internal state to null.
 264          self::$state = null;
 265  
 266          // Reset the session state to null.
 267          Factory::getSession()->set('_finder.state', null);
 268      }
 269  
 270      /**
 271       * Method to index a content item.
 272       *
 273       * @param   Result  $item    The content item to index.
 274       * @param   string  $format  The format of the content. [optional]
 275       *
 276       * @return  integer  The ID of the record in the links table.
 277       *
 278       * @since   2.5
 279       * @throws  \Exception on database error.
 280       */
 281      public function index($item, $format = 'html')
 282      {
 283          // Mark beforeIndexing in the profiler.
 284          static::$profiler ? static::$profiler->mark('beforeIndexing') : null;
 285          $db = $this->db;
 286          $serverType = strtolower($db->getServerType());
 287  
 288          // Check if the item is in the database.
 289          $query = $db->getQuery(true)
 290              ->select($db->quoteName('link_id') . ', ' . $db->quoteName('md5sum'))
 291              ->from($db->quoteName('#__finder_links'))
 292              ->where($db->quoteName('url') . ' = ' . $db->quote($item->url));
 293  
 294          // Load the item  from the database.
 295          $db->setQuery($query);
 296          $link = $db->loadObject();
 297  
 298          // Get the indexer state.
 299          $state = static::getState();
 300  
 301          // Get the signatures of the item.
 302          $curSig = static::getSignature($item);
 303          $oldSig = $link->md5sum ?? null;
 304  
 305          // Get the other item information.
 306          $linkId = empty($link->link_id) ? null : $link->link_id;
 307          $isNew = empty($link->link_id);
 308  
 309          // Check the signatures. If they match, the item is up to date.
 310          if (!$isNew && $curSig == $oldSig) {
 311              return $linkId;
 312          }
 313  
 314          /*
 315           * If the link already exists, flush all the term maps for the item.
 316           * Maps are stored in 16 tables so we need to iterate through and flush
 317           * each table one at a time.
 318           */
 319          if (!$isNew) {
 320              // Flush the maps for the link.
 321              $query->clear()
 322                  ->delete($db->quoteName('#__finder_links_terms'))
 323                  ->where($db->quoteName('link_id') . ' = ' . (int) $linkId);
 324              $db->setQuery($query);
 325              $db->execute();
 326  
 327              // Remove the taxonomy maps.
 328              Taxonomy::removeMaps($linkId);
 329          }
 330  
 331          // Mark afterUnmapping in the profiler.
 332          static::$profiler ? static::$profiler->mark('afterUnmapping') : null;
 333  
 334          // Perform cleanup on the item data.
 335          $item->publish_start_date = (int) $item->publish_start_date != 0 ? $item->publish_start_date : null;
 336          $item->publish_end_date = (int) $item->publish_end_date != 0 ? $item->publish_end_date : null;
 337          $item->start_date = (int) $item->start_date != 0 ? $item->start_date : null;
 338          $item->end_date = (int) $item->end_date != 0 ? $item->end_date : null;
 339  
 340          // Prepare the item description.
 341          $item->description = Helper::parse($item->summary ?? '');
 342  
 343          /*
 344           * Now, we need to enter the item into the links table. If the item
 345           * already exists in the database, we need to use an UPDATE query.
 346           * Otherwise, we need to use an INSERT to get the link id back.
 347           */
 348          $entry = new \stdClass();
 349          $entry->url = $item->url;
 350          $entry->route = $item->route;
 351          $entry->title = $item->title;
 352  
 353          // We are shortening the description in order to not run into length issues with this field
 354          $entry->description = StringHelper::substr($item->description, 0, 32000);
 355          $entry->indexdate = Factory::getDate()->toSql();
 356          $entry->state = (int) $item->state;
 357          $entry->access = (int) $item->access;
 358          $entry->language = $item->language;
 359          $entry->type_id = (int) $item->type_id;
 360          $entry->object = '';
 361          $entry->publish_start_date = $item->publish_start_date;
 362          $entry->publish_end_date = $item->publish_end_date;
 363          $entry->start_date = $item->start_date;
 364          $entry->end_date = $item->end_date;
 365          $entry->list_price = (double) ($item->list_price ?: 0);
 366          $entry->sale_price = (double) ($item->sale_price ?: 0);
 367  
 368          if ($isNew) {
 369              // Insert the link and get its id.
 370              $db->insertObject('#__finder_links', $entry);
 371              $linkId = (int) $db->insertid();
 372          } else {
 373              // Update the link.
 374              $entry->link_id = $linkId;
 375              $db->updateObject('#__finder_links', $entry, 'link_id');
 376          }
 377  
 378          // Set up the variables we will need during processing.
 379          $count = 0;
 380  
 381          // Mark afterLinking in the profiler.
 382          static::$profiler ? static::$profiler->mark('afterLinking') : null;
 383  
 384          // Truncate the tokens tables.
 385          $db->truncateTable('#__finder_tokens');
 386  
 387          // Truncate the tokens aggregate table.
 388          $db->truncateTable('#__finder_tokens_aggregate');
 389  
 390          /*
 391           * Process the item's content. The items can customize their
 392           * processing instructions to define extra properties to process
 393           * or rearrange how properties are weighted.
 394           */
 395          foreach ($item->getInstructions() as $group => $properties) {
 396              // Iterate through the properties of the group.
 397              foreach ($properties as $property) {
 398                  // Check if the property exists in the item.
 399                  if (empty($item->$property)) {
 400                      continue;
 401                  }
 402  
 403                  // Tokenize the property.
 404                  if (is_array($item->$property)) {
 405                      // Tokenize an array of content and add it to the database.
 406                      foreach ($item->$property as $ip) {
 407                          /*
 408                           * If the group is path, we need to a few extra processing
 409                           * steps to strip the extension and convert slashes and dashes
 410                           * to spaces.
 411                           */
 412                          if ($group === static::PATH_CONTEXT) {
 413                              $ip = File::stripExt($ip);
 414                              $ip = str_replace(array('/', '-'), ' ', $ip);
 415                          }
 416  
 417                          // Tokenize a string of content and add it to the database.
 418                          $count += $this->tokenizeToDb($ip, $group, $item->language, $format, $count);
 419  
 420                          // Check if we're approaching the memory limit of the token table.
 421                          if ($count > static::$state->options->get('memory_table_limit', 7500)) {
 422                              $this->toggleTables(false);
 423                          }
 424                      }
 425                  } else {
 426                      /*
 427                       * If the group is path, we need to a few extra processing
 428                       * steps to strip the extension and convert slashes and dashes
 429                       * to spaces.
 430                       */
 431                      if ($group === static::PATH_CONTEXT) {
 432                          $item->$property = File::stripExt($item->$property);
 433                          $item->$property = str_replace('/', ' ', $item->$property);
 434                          $item->$property = str_replace('-', ' ', $item->$property);
 435                      }
 436  
 437                      // Tokenize a string of content and add it to the database.
 438                      $count += $this->tokenizeToDb($item->$property, $group, $item->language, $format, $count);
 439  
 440                      // Check if we're approaching the memory limit of the token table.
 441                      if ($count > static::$state->options->get('memory_table_limit', 30000)) {
 442                          $this->toggleTables(false);
 443                      }
 444                  }
 445              }
 446          }
 447  
 448          /*
 449           * Process the item's taxonomy. The items can customize their
 450           * taxonomy mappings to define extra properties to map.
 451           */
 452          foreach ($item->getTaxonomy() as $branch => $nodes) {
 453              // Iterate through the nodes and map them to the branch.
 454              foreach ($nodes as $node) {
 455                  // Add the node to the tree.
 456                  if ($node->nested) {
 457                      $nodeId = Taxonomy::addNestedNode($branch, $node->node, $node->state, $node->access, $node->language);
 458                  } else {
 459                      $nodeId = Taxonomy::addNode($branch, $node->title, $node->state, $node->access, $node->language);
 460                  }
 461  
 462                  // Add the link => node map.
 463                  Taxonomy::addMap($linkId, $nodeId);
 464                  $node->id = $nodeId;
 465              }
 466          }
 467  
 468          // Mark afterProcessing in the profiler.
 469          static::$profiler ? static::$profiler->mark('afterProcessing') : null;
 470  
 471          /*
 472           * At this point, all of the item's content has been parsed, tokenized
 473           * and inserted into the #__finder_tokens table. Now, we need to
 474           * aggregate all the data into that table into a more usable form. The
 475           * aggregated data will be inserted into #__finder_tokens_aggregate
 476           * table.
 477           */
 478          $query = 'INSERT INTO ' . $db->quoteName('#__finder_tokens_aggregate') .
 479              ' (' . $db->quoteName('term_id') .
 480              ', ' . $db->quoteName('term') .
 481              ', ' . $db->quoteName('stem') .
 482              ', ' . $db->quoteName('common') .
 483              ', ' . $db->quoteName('phrase') .
 484              ', ' . $db->quoteName('term_weight') .
 485              ', ' . $db->quoteName('context') .
 486              ', ' . $db->quoteName('context_weight') .
 487              ', ' . $db->quoteName('total_weight') .
 488              ', ' . $db->quoteName('language') . ')' .
 489              ' SELECT' .
 490              ' COALESCE(t.term_id, 0), t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context,' .
 491              ' ROUND( t1.weight * COUNT( t2.term ) * %F, 8 ) AS context_weight, 0, t1.language' .
 492              ' FROM (' .
 493              '   SELECT DISTINCT t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context, t1.language' .
 494              '   FROM ' . $db->quoteName('#__finder_tokens') . ' AS t1' .
 495              '   WHERE t1.context = %d' .
 496              ' ) AS t1' .
 497              ' JOIN ' . $db->quoteName('#__finder_tokens') . ' AS t2 ON t2.term = t1.term AND t2.language = t1.language' .
 498              ' LEFT JOIN ' . $db->quoteName('#__finder_terms') . ' AS t ON t.term = t1.term AND t.language = t1.language' .
 499              ' WHERE t2.context = %d' .
 500              ' GROUP BY t1.term, t.term_id, t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context, t1.language' .
 501              ' ORDER BY t1.term DESC';
 502  
 503          // Iterate through the contexts and aggregate the tokens per context.
 504          foreach ($state->weights as $context => $multiplier) {
 505              // Run the query to aggregate the tokens for this context..
 506              $db->setQuery(sprintf($query, $multiplier, $context, $context));
 507              $db->execute();
 508          }
 509  
 510          // Mark afterAggregating in the profiler.
 511          static::$profiler ? static::$profiler->mark('afterAggregating') : null;
 512  
 513          /*
 514           * When we pulled down all of the aggregate data, we did a LEFT JOIN
 515           * over the terms table to try to find all the term ids that
 516           * already exist for our tokens. If any of the rows in the aggregate
 517           * table have a term of 0, then no term record exists for that
 518           * term so we need to add it to the terms table.
 519           */
 520          $db->setQuery(
 521              'INSERT INTO ' . $db->quoteName('#__finder_terms') .
 522              ' (' . $db->quoteName('term') .
 523              ', ' . $db->quoteName('stem') .
 524              ', ' . $db->quoteName('common') .
 525              ', ' . $db->quoteName('phrase') .
 526              ', ' . $db->quoteName('weight') .
 527              ', ' . $db->quoteName('soundex') .
 528              ', ' . $db->quoteName('language') . ')' .
 529              ' SELECT ta.term, ta.stem, ta.common, ta.phrase, ta.term_weight, SOUNDEX(ta.term), ta.language' .
 530              ' FROM ' . $db->quoteName('#__finder_tokens_aggregate') . ' AS ta' .
 531              ' WHERE ta.term_id = 0' .
 532              ' GROUP BY ta.term, ta.stem, ta.common, ta.phrase, ta.term_weight, SOUNDEX(ta.term), ta.language'
 533          );
 534          $db->execute();
 535  
 536          /*
 537           * Now, we just inserted a bunch of new records into the terms table
 538           * so we need to go back and update the aggregate table with all the
 539           * new term ids.
 540           */
 541          $query = $db->getQuery(true)
 542              ->update($db->quoteName('#__finder_tokens_aggregate', 'ta'))
 543              ->innerJoin($db->quoteName('#__finder_terms', 't'), 't.term = ta.term AND t.language = ta.language')
 544              ->where('ta.term_id = 0');
 545  
 546          if ($serverType == 'mysql') {
 547              $query->set($db->quoteName('ta.term_id') . ' = ' . $db->quoteName('t.term_id'));
 548          } else {
 549              $query->set($db->quoteName('term_id') . ' = ' . $db->quoteName('t.term_id'));
 550          }
 551  
 552          $db->setQuery($query);
 553          $db->execute();
 554  
 555          // Mark afterTerms in the profiler.
 556          static::$profiler ? static::$profiler->mark('afterTerms') : null;
 557  
 558          /*
 559           * After we've made sure that all of the terms are in the terms table
 560           * and the aggregate table has the correct term ids, we need to update
 561           * the links counter for each term by one.
 562           */
 563          $query->clear()
 564              ->update($db->quoteName('#__finder_terms', 't'))
 565              ->innerJoin($db->quoteName('#__finder_tokens_aggregate', 'ta'), 'ta.term_id = t.term_id');
 566  
 567          if ($serverType == 'mysql') {
 568              $query->set($db->quoteName('t.links') . ' = t.links + 1');
 569          } else {
 570              $query->set($db->quoteName('links') . ' = t.links + 1');
 571          }
 572  
 573          $db->setQuery($query);
 574          $db->execute();
 575  
 576          // Mark afterTerms in the profiler.
 577          static::$profiler ? static::$profiler->mark('afterTerms') : null;
 578  
 579          /*
 580           * At this point, the aggregate table contains a record for each
 581           * term in each context. So, we're going to pull down all of that
 582           * data while grouping the records by term and add all of the
 583           * sub-totals together to arrive at the final total for each token for
 584           * this link. Then, we insert all of that data into the mapping table.
 585           */
 586          $db->setQuery(
 587              'INSERT INTO ' . $db->quoteName('#__finder_links_terms') .
 588              ' (' . $db->quoteName('link_id') .
 589              ', ' . $db->quoteName('term_id') .
 590              ', ' . $db->quoteName('weight') . ')' .
 591              ' SELECT ' . (int) $linkId . ', ' . $db->quoteName('term_id') . ',' .
 592              ' ROUND(SUM(' . $db->quoteName('context_weight') . '), 8)' .
 593              ' FROM ' . $db->quoteName('#__finder_tokens_aggregate') .
 594              ' GROUP BY ' . $db->quoteName('term') . ', ' . $db->quoteName('term_id') .
 595              ' ORDER BY ' . $db->quoteName('term') . ' DESC'
 596          );
 597          $db->execute();
 598  
 599          // Mark afterMapping in the profiler.
 600          static::$profiler ? static::$profiler->mark('afterMapping') : null;
 601  
 602          // Update the signature.
 603          $object = serialize($item);
 604          $query->clear()
 605              ->update($db->quoteName('#__finder_links'))
 606              ->set($db->quoteName('md5sum') . ' = :md5sum')
 607              ->set($db->quoteName('object') . ' = :object')
 608              ->where($db->quoteName('link_id') . ' = :linkid')
 609              ->bind(':md5sum', $curSig)
 610              ->bind(':object', $object, ParameterType::LARGE_OBJECT)
 611              ->bind(':linkid', $linkId, ParameterType::INTEGER);
 612          $db->setQuery($query);
 613          $db->execute();
 614  
 615          // Mark afterSigning in the profiler.
 616          static::$profiler ? static::$profiler->mark('afterSigning') : null;
 617  
 618          // Truncate the tokens tables.
 619          $db->truncateTable('#__finder_tokens');
 620  
 621          // Truncate the tokens aggregate table.
 622          $db->truncateTable('#__finder_tokens_aggregate');
 623  
 624          // Toggle the token tables back to memory tables.
 625          $this->toggleTables(true);
 626  
 627          // Mark afterTruncating in the profiler.
 628          static::$profiler ? static::$profiler->mark('afterTruncating') : null;
 629  
 630          // Trigger a plugin event after indexing
 631          PluginHelper::importPlugin('finder');
 632          Factory::getApplication()->triggerEvent('onFinderIndexAfterIndex', array($item, $linkId));
 633  
 634          return $linkId;
 635      }
 636  
 637      /**
 638       * Method to remove a link from the index.
 639       *
 640       * @param   integer  $linkId            The id of the link.
 641       * @param   bool     $removeTaxonomies  Remove empty taxonomies
 642       *
 643       * @return  boolean  True on success.
 644       *
 645       * @since   2.5
 646       * @throws  Exception on database error.
 647       */
 648      public function remove($linkId, $removeTaxonomies = true)
 649      {
 650          $db     = $this->db;
 651          $query  = $db->getQuery(true);
 652          $linkId = (int) $linkId;
 653  
 654          // Update the link counts for the terms.
 655          $query->clear()
 656              ->update($db->quoteName('#__finder_terms', 't'))
 657              ->join('INNER', $db->quoteName('#__finder_links_terms', 'm'), $db->quoteName('m.term_id') . ' = ' . $db->quoteName('t.term_id'))
 658              ->set($db->quoteName('links') . ' = ' . $db->quoteName('links') . ' - 1')
 659              ->where($db->quoteName('m.link_id') . ' = :linkid')
 660              ->bind(':linkid', $linkId, ParameterType::INTEGER);
 661          $db->setQuery($query)->execute();
 662  
 663          // Remove all records from the mapping tables.
 664          $query->clear()
 665              ->delete($db->quoteName('#__finder_links_terms'))
 666              ->where($db->quoteName('link_id') . ' = :linkid')
 667              ->bind(':linkid', $linkId, ParameterType::INTEGER);
 668          $db->setQuery($query)->execute();
 669  
 670          // Delete all orphaned terms.
 671          $query->clear()
 672              ->delete($db->quoteName('#__finder_terms'))
 673              ->where($db->quoteName('links') . ' <= 0');
 674          $db->setQuery($query)->execute();
 675  
 676          // Delete the link from the index.
 677          $query->clear()
 678              ->delete($db->quoteName('#__finder_links'))
 679              ->where($db->quoteName('link_id') . ' = :linkid')
 680              ->bind(':linkid', $linkId, ParameterType::INTEGER);
 681          $db->setQuery($query)->execute();
 682  
 683          // Remove the taxonomy maps.
 684          Taxonomy::removeMaps($linkId);
 685  
 686          // Remove the orphaned taxonomy nodes.
 687          if ($removeTaxonomies) {
 688              Taxonomy::removeOrphanNodes();
 689          }
 690  
 691          PluginHelper::importPlugin('finder');
 692          Factory::getApplication()->triggerEvent('onFinderIndexAfterDelete', array($linkId));
 693  
 694          return true;
 695      }
 696  
 697      /**
 698       * Method to optimize the index. We use this method to remove unused terms
 699       * and any other optimizations that might be necessary.
 700       *
 701       * @return  boolean  True on success.
 702       *
 703       * @since   2.5
 704       * @throws  Exception on database error.
 705       */
 706      public function optimize()
 707      {
 708          // Get the database object.
 709          $db = $this->db;
 710          $serverType = strtolower($db->getServerType());
 711          $query = $db->getQuery(true);
 712  
 713          // Delete all orphaned terms.
 714          $query->delete($db->quoteName('#__finder_terms'))
 715              ->where($db->quoteName('links') . ' <= 0');
 716          $db->setQuery($query);
 717          $db->execute();
 718  
 719          // Delete all broken links. (Links missing the object)
 720          $query = $db->getQuery(true)
 721              ->delete('#__finder_links')
 722              ->where($db->quoteName('object') . ' = ' . $db->quote(''));
 723          $db->setQuery($query);
 724          $db->execute();
 725  
 726          // Delete all orphaned mappings of terms to links
 727          $query2 = $db->getQuery(true)
 728              ->select($db->quoteName('link_id'))
 729              ->from($db->quoteName('#__finder_links'));
 730          $query = $db->getQuery(true)
 731              ->delete($db->quoteName('#__finder_links_terms'))
 732              ->where($db->quoteName('link_id') . ' NOT IN (' . $query2 . ')');
 733          $db->setQuery($query);
 734          $db->execute();
 735  
 736          // Delete all orphaned terms
 737          $query2 = $db->getQuery(true)
 738              ->select($db->quoteName('term_id'))
 739              ->from($db->quoteName('#__finder_links_terms'));
 740          $query = $db->getQuery(true)
 741              ->delete($db->quoteName('#__finder_terms'))
 742              ->where($db->quoteName('term_id') . ' NOT IN (' . $query2 . ')');
 743          $db->setQuery($query);
 744          $db->execute();
 745  
 746          // Delete all orphaned taxonomies
 747          Taxonomy::removeOrphanMaps();
 748          Taxonomy::removeOrphanNodes();
 749  
 750          // Optimize the tables.
 751          $tables = [
 752              '#__finder_links',
 753              '#__finder_links_terms',
 754              '#__finder_filters',
 755              '#__finder_terms_common',
 756              '#__finder_types',
 757              '#__finder_taxonomy_map',
 758              '#__finder_taxonomy'
 759          ];
 760  
 761          foreach ($tables as $table) {
 762              if ($serverType == 'mysql') {
 763                  $db->setQuery('OPTIMIZE TABLE ' . $db->quoteName($table));
 764                  $db->execute();
 765              } else {
 766                  $db->setQuery('VACUUM ' . $db->quoteName($table));
 767                  $db->execute();
 768                  $db->setQuery('REINDEX TABLE ' . $db->quoteName($table));
 769                  $db->execute();
 770              }
 771          }
 772  
 773          return true;
 774      }
 775  
 776      /**
 777       * Method to get a content item's signature.
 778       *
 779       * @param   object  $item  The content item to index.
 780       *
 781       * @return  string  The content item's signature.
 782       *
 783       * @since   2.5
 784       */
 785      protected static function getSignature($item)
 786      {
 787          // Get the indexer state.
 788          $state = static::getState();
 789  
 790          // Get the relevant configuration variables.
 791          $config = array(
 792              $state->weights,
 793              $state->options->get('tuplecount', 1),
 794              $state->options->get('language_default', '')
 795          );
 796  
 797          return md5(serialize(array($item, $config)));
 798      }
 799  
 800      /**
 801       * Method to parse input, tokenize it, and then add it to the database.
 802       *
 803       * @param   mixed    $input    String or resource to use as input. A resource input will automatically be chunked to conserve
 804       *                             memory. Strings will be chunked if longer than 2K in size.
 805       * @param   integer  $context  The context of the input. See context constants.
 806       * @param   string   $lang     The language of the input.
 807       * @param   string   $format   The format of the input.
 808       * @param   integer  $count    Number of words indexed so far.
 809       *
 810       * @return  integer  The number of tokens extracted from the input.
 811       *
 812       * @since   2.5
 813       */
 814      protected function tokenizeToDb($input, $context, $lang, $format, $count = 0)
 815      {
 816          $buffer = null;
 817  
 818          if (empty($input)) {
 819              return $count;
 820          }
 821  
 822          // If the input is a resource, batch the process out.
 823          if (is_resource($input)) {
 824              // Batch the process out to avoid memory limits.
 825              while (!feof($input)) {
 826                  // Read into the buffer.
 827                  $buffer .= fread($input, 2048);
 828  
 829                  /*
 830                   * If we haven't reached the end of the file, seek to the last
 831                   * space character and drop whatever is after that to make sure
 832                   * we didn't truncate a term while reading the input.
 833                   */
 834                  if (!feof($input)) {
 835                      // Find the last space character.
 836                      $ls = strrpos($buffer, ' ');
 837  
 838                      // Adjust string based on the last space character.
 839                      if ($ls) {
 840                          // Truncate the string to the last space character.
 841                          $string = substr($buffer, 0, $ls);
 842  
 843                          // Adjust the buffer based on the last space for the next iteration and trim.
 844                          $buffer = StringHelper::trim(substr($buffer, $ls));
 845                      } else {
 846                          // No space character was found.
 847                          $string = $buffer;
 848                      }
 849                  } else {
 850                      // We've reached the end of the file, so parse whatever remains.
 851                      $string = $buffer;
 852                  }
 853  
 854                  // Parse, tokenise and add tokens to the database.
 855                  $count = $this->tokenizeToDbShort($string, $context, $lang, $format, $count);
 856  
 857                  unset($string);
 858              }
 859  
 860              return $count;
 861          }
 862  
 863          // Parse, tokenise and add tokens to the database.
 864          $count = $this->tokenizeToDbShort($input, $context, $lang, $format, $count);
 865  
 866          return $count;
 867      }
 868  
 869      /**
 870       * Method to parse input, tokenise it, then add the tokens to the database.
 871       *
 872       * @param   string   $input    String to parse, tokenise and add to database.
 873       * @param   integer  $context  The context of the input. See context constants.
 874       * @param   string   $lang     The language of the input.
 875       * @param   string   $format   The format of the input.
 876       * @param   integer  $count    The number of tokens processed so far.
 877       *
 878       * @return  integer  Cumulative number of tokens extracted from the input so far.
 879       *
 880       * @since   3.7.0
 881       */
 882      private function tokenizeToDbShort($input, $context, $lang, $format, $count)
 883      {
 884          static $filterCommon, $filterNumeric;
 885  
 886          if (is_null($filterCommon)) {
 887              $params = ComponentHelper::getParams('com_finder');
 888              $filterCommon = $params->get('filter_commonwords', false);
 889              $filterNumeric = $params->get('filter_numerics', false);
 890          }
 891  
 892          // Parse the input.
 893          $input = Helper::parse($input, $format);
 894  
 895          // Check the input.
 896          if (empty($input)) {
 897              return $count;
 898          }
 899  
 900          // Tokenize the input.
 901          $tokens = Helper::tokenize($input, $lang);
 902  
 903          if (count($tokens) == 0) {
 904              return $count;
 905          }
 906  
 907          $query = clone $this->addTokensToDbQueryTemplate;
 908  
 909          // Break into chunks of no more than 128 items
 910          $chunks = array_chunk($tokens, 128);
 911  
 912          foreach ($chunks as $tokens) {
 913              $query->clear('values');
 914  
 915              foreach ($tokens as $token) {
 916                  // Database size for a term field
 917                  if ($token->length > 75) {
 918                      continue;
 919                  }
 920  
 921                  if ($filterCommon && $token->common) {
 922                      continue;
 923                  }
 924  
 925                  if ($filterNumeric && $token->numeric) {
 926                      continue;
 927                  }
 928  
 929                  $query->values(
 930                      $this->db->quote($token->term) . ', '
 931                      . $this->db->quote($token->stem) . ', '
 932                      . (int) $token->common . ', '
 933                      . (int) $token->phrase . ', '
 934                      . $this->db->quote($token->weight) . ', '
 935                      . (int) $context . ', '
 936                      . $this->db->quote($token->language)
 937                  );
 938                  $count++;
 939              }
 940  
 941              // Check if we're approaching the memory limit of the token table.
 942              if ($count > static::$state->options->get('memory_table_limit', 7500)) {
 943                  $this->toggleTables(false);
 944              }
 945  
 946              // Only execute the query if there are tokens to insert
 947              if ($query->values !== null) {
 948                  $this->db->setQuery($query)->execute();
 949              }
 950          }
 951  
 952          return $count;
 953      }
 954  
 955      /**
 956       * Method to switch the token tables from Memory tables to Disk tables
 957       * when they are close to running out of memory.
 958       * Since this is not supported/implemented in all DB-drivers, the default is a stub method, which simply returns true.
 959       *
 960       * @param   boolean  $memory  Flag to control how they should be toggled.
 961       *
 962       * @return  boolean  True on success.
 963       *
 964       * @since   2.5
 965       * @throws  Exception on database error.
 966       */
 967      protected function toggleTables($memory)
 968      {
 969          if (strtolower($this->db->getServerType()) != 'mysql') {
 970              return true;
 971          }
 972  
 973          static $state;
 974  
 975          // Get the database adapter.
 976          $db = $this->db;
 977  
 978          // Check if we are setting the tables to the Memory engine.
 979          if ($memory === true && $state !== true) {
 980              // Set the tokens table to Memory.
 981              $db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens') . ' ENGINE = MEMORY');
 982              $db->execute();
 983  
 984              // Set the tokens aggregate table to Memory.
 985              $db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens_aggregate') . ' ENGINE = MEMORY');
 986              $db->execute();
 987  
 988              // Set the internal state.
 989              $state = $memory;
 990          } elseif ($memory === false && $state !== false) {
 991              // We must be setting the tables to the InnoDB engine.
 992              // Set the tokens table to InnoDB.
 993              $db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens') . ' ENGINE = INNODB');
 994              $db->execute();
 995  
 996              // Set the tokens aggregate table to InnoDB.
 997              $db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens_aggregate') . ' ENGINE = INNODB');
 998              $db->execute();
 999  
1000              // Set the internal state.
1001              $state = $memory;
1002          }
1003  
1004          return true;
1005      }
1006  }


Generated: Wed Sep 7 05:41:13 2022 Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer