[ Index ] |
PHP Cross Reference of Joomla 4.2.2 documentation |
[Summary view] [Print] [Text view]
1 <?php 2 3 /** 4 * @package Joomla.Administrator 5 * @subpackage com_finder 6 * 7 * @copyright (C) 2011 Open Source Matters, Inc. <https://www.joomla.org> 8 * @license GNU General Public License version 2 or later; see LICENSE.txt 9 */ 10 11 namespace Joomla\Component\Finder\Administrator\Indexer; 12 13 use Exception; 14 use Joomla\CMS\Component\ComponentHelper; 15 use Joomla\CMS\Factory; 16 use Joomla\CMS\Filesystem\File; 17 use Joomla\CMS\Object\CMSObject; 18 use Joomla\CMS\Plugin\PluginHelper; 19 use Joomla\CMS\Profiler\Profiler; 20 use Joomla\Database\DatabaseInterface; 21 use Joomla\Database\ParameterType; 22 use Joomla\Database\QueryInterface; 23 use Joomla\String\StringHelper; 24 25 // phpcs:disable PSR1.Files.SideEffects 26 \defined('_JEXEC') or die; 27 // phpcs:enable PSR1.Files.SideEffects 28 29 /** 30 * Main indexer class for the Finder indexer package. 31 * 32 * The indexer class provides the core functionality of the Finder 33 * search engine. It is responsible for adding and updating the 34 * content links table; extracting and scoring tokens; and maintaining 35 * all referential information for the content. 36 * 37 * Note: All exceptions thrown from within this class should be caught 38 * by the controller. 39 * 40 * @since 2.5 41 */ 42 class Indexer 43 { 44 /** 45 * The title context identifier. 46 * 47 * @var integer 48 * @since 2.5 49 */ 50 public const TITLE_CONTEXT = 1; 51 52 /** 53 * The text context identifier. 54 * 55 * @var integer 56 * @since 2.5 57 */ 58 public const TEXT_CONTEXT = 2; 59 60 /** 61 * The meta context identifier. 62 * 63 * @var integer 64 * @since 2.5 65 */ 66 public const META_CONTEXT = 3; 67 68 /** 69 * The path context identifier. 70 * 71 * @var integer 72 * @since 2.5 73 */ 74 public const PATH_CONTEXT = 4; 75 76 /** 77 * The misc context identifier. 78 * 79 * @var integer 80 * @since 2.5 81 */ 82 public const MISC_CONTEXT = 5; 83 84 /** 85 * The indexer state object. 86 * 87 * @var CMSObject 88 * @since 2.5 89 */ 90 public static $state; 91 92 /** 93 * The indexer profiler object. 94 * 95 * @var Profiler 96 * @since 2.5 97 */ 98 public static $profiler; 99 100 /** 101 * Database driver cache. 102 * 103 * @var \Joomla\Database\DatabaseDriver 104 * @since 3.8.0 105 */ 106 protected $db; 107 108 /** 109 * Reusable Query Template. To be used with clone. 110 * 111 * @var QueryInterface 112 * @since 3.8.0 113 */ 114 protected $addTokensToDbQueryTemplate; 115 116 /** 117 * Indexer constructor. 118 * 119 * @param DatabaseInterface $db The database 120 * 121 * @since 3.8.0 122 */ 123 public function __construct(DatabaseInterface $db = null) 124 { 125 if ($db === null) { 126 @trigger_error(sprintf('Database will be mandatory in 5.0.'), E_USER_DEPRECATED); 127 $db = Factory::getContainer()->get(DatabaseInterface::class); 128 } 129 130 $this->db = $db; 131 132 // Set up query template for addTokensToDb 133 $this->addTokensToDbQueryTemplate = $db->getQuery(true)->insert($db->quoteName('#__finder_tokens')) 134 ->columns( 135 array( 136 $db->quoteName('term'), 137 $db->quoteName('stem'), 138 $db->quoteName('common'), 139 $db->quoteName('phrase'), 140 $db->quoteName('weight'), 141 $db->quoteName('context'), 142 $db->quoteName('language') 143 ) 144 ); 145 } 146 147 /** 148 * Method to get the indexer state. 149 * 150 * @return object The indexer state object. 151 * 152 * @since 2.5 153 */ 154 public static function getState() 155 { 156 // First, try to load from the internal state. 157 if ((bool) static::$state) { 158 return static::$state; 159 } 160 161 // If we couldn't load from the internal state, try the session. 162 $session = Factory::getSession(); 163 $data = $session->get('_finder.state', null); 164 165 // If the state is empty, load the values for the first time. 166 if (empty($data)) { 167 $data = new CMSObject(); 168 $data->force = false; 169 170 // Load the default configuration options. 171 $data->options = ComponentHelper::getParams('com_finder'); 172 $db = Factory::getDbo(); 173 174 if ($db->getServerType() == 'mysql') { 175 /** 176 * Try to calculate the heapsize for the memory table for indexing. If this fails, 177 * we fall back on a reasonable small size. We want to prevent the system to fail 178 * and block saving content. 179 */ 180 try { 181 $db->setQuery('SHOW VARIABLES LIKE ' . $db->quote('max_heap_table_size')); 182 $heapsize = $db->loadObject(); 183 184 /** 185 * In tests, the size of a row seems to have been around 720 bytes. 186 * We take 800 to be on the safe side. 187 */ 188 $memory_table_limit = (int) ($heapsize->Value / 800); 189 $data->options->set('memory_table_limit', $memory_table_limit); 190 } catch (Exception $e) { 191 // Something failed. We fall back to a reasonable guess. 192 $data->options->set('memory_table_limit', 7500); 193 } 194 } else { 195 // We are running on PostgreSQL and don't have this issue, so we set a rather high number. 196 $data->options->set('memory_table_limit', 50000); 197 } 198 199 // Setup the weight lookup information. 200 $data->weights = array( 201 self::TITLE_CONTEXT => round($data->options->get('title_multiplier', 1.7), 2), 202 self::TEXT_CONTEXT => round($data->options->get('text_multiplier', 0.7), 2), 203 self::META_CONTEXT => round($data->options->get('meta_multiplier', 1.2), 2), 204 self::PATH_CONTEXT => round($data->options->get('path_multiplier', 2.0), 2), 205 self::MISC_CONTEXT => round($data->options->get('misc_multiplier', 0.3), 2) 206 ); 207 208 // Set the current time as the start time. 209 $data->startTime = Factory::getDate()->toSql(); 210 211 // Set the remaining default values. 212 $data->batchSize = (int) $data->options->get('batch_size', 50); 213 $data->batchOffset = 0; 214 $data->totalItems = 0; 215 $data->pluginState = array(); 216 } 217 218 // Setup the profiler if debugging is enabled. 219 if (Factory::getApplication()->get('debug')) { 220 static::$profiler = Profiler::getInstance('FinderIndexer'); 221 } 222 223 // Set the state. 224 static::$state = $data; 225 226 return static::$state; 227 } 228 229 /** 230 * Method to set the indexer state. 231 * 232 * @param CMSObject $data A new indexer state object. 233 * 234 * @return boolean True on success, false on failure. 235 * 236 * @since 2.5 237 */ 238 public static function setState($data) 239 { 240 // Check the state object. 241 if (empty($data) || !$data instanceof CMSObject) { 242 return false; 243 } 244 245 // Set the new internal state. 246 static::$state = $data; 247 248 // Set the new session state. 249 Factory::getSession()->set('_finder.state', $data); 250 251 return true; 252 } 253 254 /** 255 * Method to reset the indexer state. 256 * 257 * @return void 258 * 259 * @since 2.5 260 */ 261 public static function resetState() 262 { 263 // Reset the internal state to null. 264 self::$state = null; 265 266 // Reset the session state to null. 267 Factory::getSession()->set('_finder.state', null); 268 } 269 270 /** 271 * Method to index a content item. 272 * 273 * @param Result $item The content item to index. 274 * @param string $format The format of the content. [optional] 275 * 276 * @return integer The ID of the record in the links table. 277 * 278 * @since 2.5 279 * @throws \Exception on database error. 280 */ 281 public function index($item, $format = 'html') 282 { 283 // Mark beforeIndexing in the profiler. 284 static::$profiler ? static::$profiler->mark('beforeIndexing') : null; 285 $db = $this->db; 286 $serverType = strtolower($db->getServerType()); 287 288 // Check if the item is in the database. 289 $query = $db->getQuery(true) 290 ->select($db->quoteName('link_id') . ', ' . $db->quoteName('md5sum')) 291 ->from($db->quoteName('#__finder_links')) 292 ->where($db->quoteName('url') . ' = ' . $db->quote($item->url)); 293 294 // Load the item from the database. 295 $db->setQuery($query); 296 $link = $db->loadObject(); 297 298 // Get the indexer state. 299 $state = static::getState(); 300 301 // Get the signatures of the item. 302 $curSig = static::getSignature($item); 303 $oldSig = $link->md5sum ?? null; 304 305 // Get the other item information. 306 $linkId = empty($link->link_id) ? null : $link->link_id; 307 $isNew = empty($link->link_id); 308 309 // Check the signatures. If they match, the item is up to date. 310 if (!$isNew && $curSig == $oldSig) { 311 return $linkId; 312 } 313 314 /* 315 * If the link already exists, flush all the term maps for the item. 316 * Maps are stored in 16 tables so we need to iterate through and flush 317 * each table one at a time. 318 */ 319 if (!$isNew) { 320 // Flush the maps for the link. 321 $query->clear() 322 ->delete($db->quoteName('#__finder_links_terms')) 323 ->where($db->quoteName('link_id') . ' = ' . (int) $linkId); 324 $db->setQuery($query); 325 $db->execute(); 326 327 // Remove the taxonomy maps. 328 Taxonomy::removeMaps($linkId); 329 } 330 331 // Mark afterUnmapping in the profiler. 332 static::$profiler ? static::$profiler->mark('afterUnmapping') : null; 333 334 // Perform cleanup on the item data. 335 $item->publish_start_date = (int) $item->publish_start_date != 0 ? $item->publish_start_date : null; 336 $item->publish_end_date = (int) $item->publish_end_date != 0 ? $item->publish_end_date : null; 337 $item->start_date = (int) $item->start_date != 0 ? $item->start_date : null; 338 $item->end_date = (int) $item->end_date != 0 ? $item->end_date : null; 339 340 // Prepare the item description. 341 $item->description = Helper::parse($item->summary ?? ''); 342 343 /* 344 * Now, we need to enter the item into the links table. If the item 345 * already exists in the database, we need to use an UPDATE query. 346 * Otherwise, we need to use an INSERT to get the link id back. 347 */ 348 $entry = new \stdClass(); 349 $entry->url = $item->url; 350 $entry->route = $item->route; 351 $entry->title = $item->title; 352 353 // We are shortening the description in order to not run into length issues with this field 354 $entry->description = StringHelper::substr($item->description, 0, 32000); 355 $entry->indexdate = Factory::getDate()->toSql(); 356 $entry->state = (int) $item->state; 357 $entry->access = (int) $item->access; 358 $entry->language = $item->language; 359 $entry->type_id = (int) $item->type_id; 360 $entry->object = ''; 361 $entry->publish_start_date = $item->publish_start_date; 362 $entry->publish_end_date = $item->publish_end_date; 363 $entry->start_date = $item->start_date; 364 $entry->end_date = $item->end_date; 365 $entry->list_price = (double) ($item->list_price ?: 0); 366 $entry->sale_price = (double) ($item->sale_price ?: 0); 367 368 if ($isNew) { 369 // Insert the link and get its id. 370 $db->insertObject('#__finder_links', $entry); 371 $linkId = (int) $db->insertid(); 372 } else { 373 // Update the link. 374 $entry->link_id = $linkId; 375 $db->updateObject('#__finder_links', $entry, 'link_id'); 376 } 377 378 // Set up the variables we will need during processing. 379 $count = 0; 380 381 // Mark afterLinking in the profiler. 382 static::$profiler ? static::$profiler->mark('afterLinking') : null; 383 384 // Truncate the tokens tables. 385 $db->truncateTable('#__finder_tokens'); 386 387 // Truncate the tokens aggregate table. 388 $db->truncateTable('#__finder_tokens_aggregate'); 389 390 /* 391 * Process the item's content. The items can customize their 392 * processing instructions to define extra properties to process 393 * or rearrange how properties are weighted. 394 */ 395 foreach ($item->getInstructions() as $group => $properties) { 396 // Iterate through the properties of the group. 397 foreach ($properties as $property) { 398 // Check if the property exists in the item. 399 if (empty($item->$property)) { 400 continue; 401 } 402 403 // Tokenize the property. 404 if (is_array($item->$property)) { 405 // Tokenize an array of content and add it to the database. 406 foreach ($item->$property as $ip) { 407 /* 408 * If the group is path, we need to a few extra processing 409 * steps to strip the extension and convert slashes and dashes 410 * to spaces. 411 */ 412 if ($group === static::PATH_CONTEXT) { 413 $ip = File::stripExt($ip); 414 $ip = str_replace(array('/', '-'), ' ', $ip); 415 } 416 417 // Tokenize a string of content and add it to the database. 418 $count += $this->tokenizeToDb($ip, $group, $item->language, $format, $count); 419 420 // Check if we're approaching the memory limit of the token table. 421 if ($count > static::$state->options->get('memory_table_limit', 7500)) { 422 $this->toggleTables(false); 423 } 424 } 425 } else { 426 /* 427 * If the group is path, we need to a few extra processing 428 * steps to strip the extension and convert slashes and dashes 429 * to spaces. 430 */ 431 if ($group === static::PATH_CONTEXT) { 432 $item->$property = File::stripExt($item->$property); 433 $item->$property = str_replace('/', ' ', $item->$property); 434 $item->$property = str_replace('-', ' ', $item->$property); 435 } 436 437 // Tokenize a string of content and add it to the database. 438 $count += $this->tokenizeToDb($item->$property, $group, $item->language, $format, $count); 439 440 // Check if we're approaching the memory limit of the token table. 441 if ($count > static::$state->options->get('memory_table_limit', 30000)) { 442 $this->toggleTables(false); 443 } 444 } 445 } 446 } 447 448 /* 449 * Process the item's taxonomy. The items can customize their 450 * taxonomy mappings to define extra properties to map. 451 */ 452 foreach ($item->getTaxonomy() as $branch => $nodes) { 453 // Iterate through the nodes and map them to the branch. 454 foreach ($nodes as $node) { 455 // Add the node to the tree. 456 if ($node->nested) { 457 $nodeId = Taxonomy::addNestedNode($branch, $node->node, $node->state, $node->access, $node->language); 458 } else { 459 $nodeId = Taxonomy::addNode($branch, $node->title, $node->state, $node->access, $node->language); 460 } 461 462 // Add the link => node map. 463 Taxonomy::addMap($linkId, $nodeId); 464 $node->id = $nodeId; 465 } 466 } 467 468 // Mark afterProcessing in the profiler. 469 static::$profiler ? static::$profiler->mark('afterProcessing') : null; 470 471 /* 472 * At this point, all of the item's content has been parsed, tokenized 473 * and inserted into the #__finder_tokens table. Now, we need to 474 * aggregate all the data into that table into a more usable form. The 475 * aggregated data will be inserted into #__finder_tokens_aggregate 476 * table. 477 */ 478 $query = 'INSERT INTO ' . $db->quoteName('#__finder_tokens_aggregate') . 479 ' (' . $db->quoteName('term_id') . 480 ', ' . $db->quoteName('term') . 481 ', ' . $db->quoteName('stem') . 482 ', ' . $db->quoteName('common') . 483 ', ' . $db->quoteName('phrase') . 484 ', ' . $db->quoteName('term_weight') . 485 ', ' . $db->quoteName('context') . 486 ', ' . $db->quoteName('context_weight') . 487 ', ' . $db->quoteName('total_weight') . 488 ', ' . $db->quoteName('language') . ')' . 489 ' SELECT' . 490 ' COALESCE(t.term_id, 0), t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context,' . 491 ' ROUND( t1.weight * COUNT( t2.term ) * %F, 8 ) AS context_weight, 0, t1.language' . 492 ' FROM (' . 493 ' SELECT DISTINCT t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context, t1.language' . 494 ' FROM ' . $db->quoteName('#__finder_tokens') . ' AS t1' . 495 ' WHERE t1.context = %d' . 496 ' ) AS t1' . 497 ' JOIN ' . $db->quoteName('#__finder_tokens') . ' AS t2 ON t2.term = t1.term AND t2.language = t1.language' . 498 ' LEFT JOIN ' . $db->quoteName('#__finder_terms') . ' AS t ON t.term = t1.term AND t.language = t1.language' . 499 ' WHERE t2.context = %d' . 500 ' GROUP BY t1.term, t.term_id, t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context, t1.language' . 501 ' ORDER BY t1.term DESC'; 502 503 // Iterate through the contexts and aggregate the tokens per context. 504 foreach ($state->weights as $context => $multiplier) { 505 // Run the query to aggregate the tokens for this context.. 506 $db->setQuery(sprintf($query, $multiplier, $context, $context)); 507 $db->execute(); 508 } 509 510 // Mark afterAggregating in the profiler. 511 static::$profiler ? static::$profiler->mark('afterAggregating') : null; 512 513 /* 514 * When we pulled down all of the aggregate data, we did a LEFT JOIN 515 * over the terms table to try to find all the term ids that 516 * already exist for our tokens. If any of the rows in the aggregate 517 * table have a term of 0, then no term record exists for that 518 * term so we need to add it to the terms table. 519 */ 520 $db->setQuery( 521 'INSERT INTO ' . $db->quoteName('#__finder_terms') . 522 ' (' . $db->quoteName('term') . 523 ', ' . $db->quoteName('stem') . 524 ', ' . $db->quoteName('common') . 525 ', ' . $db->quoteName('phrase') . 526 ', ' . $db->quoteName('weight') . 527 ', ' . $db->quoteName('soundex') . 528 ', ' . $db->quoteName('language') . ')' . 529 ' SELECT ta.term, ta.stem, ta.common, ta.phrase, ta.term_weight, SOUNDEX(ta.term), ta.language' . 530 ' FROM ' . $db->quoteName('#__finder_tokens_aggregate') . ' AS ta' . 531 ' WHERE ta.term_id = 0' . 532 ' GROUP BY ta.term, ta.stem, ta.common, ta.phrase, ta.term_weight, SOUNDEX(ta.term), ta.language' 533 ); 534 $db->execute(); 535 536 /* 537 * Now, we just inserted a bunch of new records into the terms table 538 * so we need to go back and update the aggregate table with all the 539 * new term ids. 540 */ 541 $query = $db->getQuery(true) 542 ->update($db->quoteName('#__finder_tokens_aggregate', 'ta')) 543 ->innerJoin($db->quoteName('#__finder_terms', 't'), 't.term = ta.term AND t.language = ta.language') 544 ->where('ta.term_id = 0'); 545 546 if ($serverType == 'mysql') { 547 $query->set($db->quoteName('ta.term_id') . ' = ' . $db->quoteName('t.term_id')); 548 } else { 549 $query->set($db->quoteName('term_id') . ' = ' . $db->quoteName('t.term_id')); 550 } 551 552 $db->setQuery($query); 553 $db->execute(); 554 555 // Mark afterTerms in the profiler. 556 static::$profiler ? static::$profiler->mark('afterTerms') : null; 557 558 /* 559 * After we've made sure that all of the terms are in the terms table 560 * and the aggregate table has the correct term ids, we need to update 561 * the links counter for each term by one. 562 */ 563 $query->clear() 564 ->update($db->quoteName('#__finder_terms', 't')) 565 ->innerJoin($db->quoteName('#__finder_tokens_aggregate', 'ta'), 'ta.term_id = t.term_id'); 566 567 if ($serverType == 'mysql') { 568 $query->set($db->quoteName('t.links') . ' = t.links + 1'); 569 } else { 570 $query->set($db->quoteName('links') . ' = t.links + 1'); 571 } 572 573 $db->setQuery($query); 574 $db->execute(); 575 576 // Mark afterTerms in the profiler. 577 static::$profiler ? static::$profiler->mark('afterTerms') : null; 578 579 /* 580 * At this point, the aggregate table contains a record for each 581 * term in each context. So, we're going to pull down all of that 582 * data while grouping the records by term and add all of the 583 * sub-totals together to arrive at the final total for each token for 584 * this link. Then, we insert all of that data into the mapping table. 585 */ 586 $db->setQuery( 587 'INSERT INTO ' . $db->quoteName('#__finder_links_terms') . 588 ' (' . $db->quoteName('link_id') . 589 ', ' . $db->quoteName('term_id') . 590 ', ' . $db->quoteName('weight') . ')' . 591 ' SELECT ' . (int) $linkId . ', ' . $db->quoteName('term_id') . ',' . 592 ' ROUND(SUM(' . $db->quoteName('context_weight') . '), 8)' . 593 ' FROM ' . $db->quoteName('#__finder_tokens_aggregate') . 594 ' GROUP BY ' . $db->quoteName('term') . ', ' . $db->quoteName('term_id') . 595 ' ORDER BY ' . $db->quoteName('term') . ' DESC' 596 ); 597 $db->execute(); 598 599 // Mark afterMapping in the profiler. 600 static::$profiler ? static::$profiler->mark('afterMapping') : null; 601 602 // Update the signature. 603 $object = serialize($item); 604 $query->clear() 605 ->update($db->quoteName('#__finder_links')) 606 ->set($db->quoteName('md5sum') . ' = :md5sum') 607 ->set($db->quoteName('object') . ' = :object') 608 ->where($db->quoteName('link_id') . ' = :linkid') 609 ->bind(':md5sum', $curSig) 610 ->bind(':object', $object, ParameterType::LARGE_OBJECT) 611 ->bind(':linkid', $linkId, ParameterType::INTEGER); 612 $db->setQuery($query); 613 $db->execute(); 614 615 // Mark afterSigning in the profiler. 616 static::$profiler ? static::$profiler->mark('afterSigning') : null; 617 618 // Truncate the tokens tables. 619 $db->truncateTable('#__finder_tokens'); 620 621 // Truncate the tokens aggregate table. 622 $db->truncateTable('#__finder_tokens_aggregate'); 623 624 // Toggle the token tables back to memory tables. 625 $this->toggleTables(true); 626 627 // Mark afterTruncating in the profiler. 628 static::$profiler ? static::$profiler->mark('afterTruncating') : null; 629 630 // Trigger a plugin event after indexing 631 PluginHelper::importPlugin('finder'); 632 Factory::getApplication()->triggerEvent('onFinderIndexAfterIndex', array($item, $linkId)); 633 634 return $linkId; 635 } 636 637 /** 638 * Method to remove a link from the index. 639 * 640 * @param integer $linkId The id of the link. 641 * @param bool $removeTaxonomies Remove empty taxonomies 642 * 643 * @return boolean True on success. 644 * 645 * @since 2.5 646 * @throws Exception on database error. 647 */ 648 public function remove($linkId, $removeTaxonomies = true) 649 { 650 $db = $this->db; 651 $query = $db->getQuery(true); 652 $linkId = (int) $linkId; 653 654 // Update the link counts for the terms. 655 $query->clear() 656 ->update($db->quoteName('#__finder_terms', 't')) 657 ->join('INNER', $db->quoteName('#__finder_links_terms', 'm'), $db->quoteName('m.term_id') . ' = ' . $db->quoteName('t.term_id')) 658 ->set($db->quoteName('links') . ' = ' . $db->quoteName('links') . ' - 1') 659 ->where($db->quoteName('m.link_id') . ' = :linkid') 660 ->bind(':linkid', $linkId, ParameterType::INTEGER); 661 $db->setQuery($query)->execute(); 662 663 // Remove all records from the mapping tables. 664 $query->clear() 665 ->delete($db->quoteName('#__finder_links_terms')) 666 ->where($db->quoteName('link_id') . ' = :linkid') 667 ->bind(':linkid', $linkId, ParameterType::INTEGER); 668 $db->setQuery($query)->execute(); 669 670 // Delete all orphaned terms. 671 $query->clear() 672 ->delete($db->quoteName('#__finder_terms')) 673 ->where($db->quoteName('links') . ' <= 0'); 674 $db->setQuery($query)->execute(); 675 676 // Delete the link from the index. 677 $query->clear() 678 ->delete($db->quoteName('#__finder_links')) 679 ->where($db->quoteName('link_id') . ' = :linkid') 680 ->bind(':linkid', $linkId, ParameterType::INTEGER); 681 $db->setQuery($query)->execute(); 682 683 // Remove the taxonomy maps. 684 Taxonomy::removeMaps($linkId); 685 686 // Remove the orphaned taxonomy nodes. 687 if ($removeTaxonomies) { 688 Taxonomy::removeOrphanNodes(); 689 } 690 691 PluginHelper::importPlugin('finder'); 692 Factory::getApplication()->triggerEvent('onFinderIndexAfterDelete', array($linkId)); 693 694 return true; 695 } 696 697 /** 698 * Method to optimize the index. We use this method to remove unused terms 699 * and any other optimizations that might be necessary. 700 * 701 * @return boolean True on success. 702 * 703 * @since 2.5 704 * @throws Exception on database error. 705 */ 706 public function optimize() 707 { 708 // Get the database object. 709 $db = $this->db; 710 $serverType = strtolower($db->getServerType()); 711 $query = $db->getQuery(true); 712 713 // Delete all orphaned terms. 714 $query->delete($db->quoteName('#__finder_terms')) 715 ->where($db->quoteName('links') . ' <= 0'); 716 $db->setQuery($query); 717 $db->execute(); 718 719 // Delete all broken links. (Links missing the object) 720 $query = $db->getQuery(true) 721 ->delete('#__finder_links') 722 ->where($db->quoteName('object') . ' = ' . $db->quote('')); 723 $db->setQuery($query); 724 $db->execute(); 725 726 // Delete all orphaned mappings of terms to links 727 $query2 = $db->getQuery(true) 728 ->select($db->quoteName('link_id')) 729 ->from($db->quoteName('#__finder_links')); 730 $query = $db->getQuery(true) 731 ->delete($db->quoteName('#__finder_links_terms')) 732 ->where($db->quoteName('link_id') . ' NOT IN (' . $query2 . ')'); 733 $db->setQuery($query); 734 $db->execute(); 735 736 // Delete all orphaned terms 737 $query2 = $db->getQuery(true) 738 ->select($db->quoteName('term_id')) 739 ->from($db->quoteName('#__finder_links_terms')); 740 $query = $db->getQuery(true) 741 ->delete($db->quoteName('#__finder_terms')) 742 ->where($db->quoteName('term_id') . ' NOT IN (' . $query2 . ')'); 743 $db->setQuery($query); 744 $db->execute(); 745 746 // Delete all orphaned taxonomies 747 Taxonomy::removeOrphanMaps(); 748 Taxonomy::removeOrphanNodes(); 749 750 // Optimize the tables. 751 $tables = [ 752 '#__finder_links', 753 '#__finder_links_terms', 754 '#__finder_filters', 755 '#__finder_terms_common', 756 '#__finder_types', 757 '#__finder_taxonomy_map', 758 '#__finder_taxonomy' 759 ]; 760 761 foreach ($tables as $table) { 762 if ($serverType == 'mysql') { 763 $db->setQuery('OPTIMIZE TABLE ' . $db->quoteName($table)); 764 $db->execute(); 765 } else { 766 $db->setQuery('VACUUM ' . $db->quoteName($table)); 767 $db->execute(); 768 $db->setQuery('REINDEX TABLE ' . $db->quoteName($table)); 769 $db->execute(); 770 } 771 } 772 773 return true; 774 } 775 776 /** 777 * Method to get a content item's signature. 778 * 779 * @param object $item The content item to index. 780 * 781 * @return string The content item's signature. 782 * 783 * @since 2.5 784 */ 785 protected static function getSignature($item) 786 { 787 // Get the indexer state. 788 $state = static::getState(); 789 790 // Get the relevant configuration variables. 791 $config = array( 792 $state->weights, 793 $state->options->get('tuplecount', 1), 794 $state->options->get('language_default', '') 795 ); 796 797 return md5(serialize(array($item, $config))); 798 } 799 800 /** 801 * Method to parse input, tokenize it, and then add it to the database. 802 * 803 * @param mixed $input String or resource to use as input. A resource input will automatically be chunked to conserve 804 * memory. Strings will be chunked if longer than 2K in size. 805 * @param integer $context The context of the input. See context constants. 806 * @param string $lang The language of the input. 807 * @param string $format The format of the input. 808 * @param integer $count Number of words indexed so far. 809 * 810 * @return integer The number of tokens extracted from the input. 811 * 812 * @since 2.5 813 */ 814 protected function tokenizeToDb($input, $context, $lang, $format, $count = 0) 815 { 816 $buffer = null; 817 818 if (empty($input)) { 819 return $count; 820 } 821 822 // If the input is a resource, batch the process out. 823 if (is_resource($input)) { 824 // Batch the process out to avoid memory limits. 825 while (!feof($input)) { 826 // Read into the buffer. 827 $buffer .= fread($input, 2048); 828 829 /* 830 * If we haven't reached the end of the file, seek to the last 831 * space character and drop whatever is after that to make sure 832 * we didn't truncate a term while reading the input. 833 */ 834 if (!feof($input)) { 835 // Find the last space character. 836 $ls = strrpos($buffer, ' '); 837 838 // Adjust string based on the last space character. 839 if ($ls) { 840 // Truncate the string to the last space character. 841 $string = substr($buffer, 0, $ls); 842 843 // Adjust the buffer based on the last space for the next iteration and trim. 844 $buffer = StringHelper::trim(substr($buffer, $ls)); 845 } else { 846 // No space character was found. 847 $string = $buffer; 848 } 849 } else { 850 // We've reached the end of the file, so parse whatever remains. 851 $string = $buffer; 852 } 853 854 // Parse, tokenise and add tokens to the database. 855 $count = $this->tokenizeToDbShort($string, $context, $lang, $format, $count); 856 857 unset($string); 858 } 859 860 return $count; 861 } 862 863 // Parse, tokenise and add tokens to the database. 864 $count = $this->tokenizeToDbShort($input, $context, $lang, $format, $count); 865 866 return $count; 867 } 868 869 /** 870 * Method to parse input, tokenise it, then add the tokens to the database. 871 * 872 * @param string $input String to parse, tokenise and add to database. 873 * @param integer $context The context of the input. See context constants. 874 * @param string $lang The language of the input. 875 * @param string $format The format of the input. 876 * @param integer $count The number of tokens processed so far. 877 * 878 * @return integer Cumulative number of tokens extracted from the input so far. 879 * 880 * @since 3.7.0 881 */ 882 private function tokenizeToDbShort($input, $context, $lang, $format, $count) 883 { 884 static $filterCommon, $filterNumeric; 885 886 if (is_null($filterCommon)) { 887 $params = ComponentHelper::getParams('com_finder'); 888 $filterCommon = $params->get('filter_commonwords', false); 889 $filterNumeric = $params->get('filter_numerics', false); 890 } 891 892 // Parse the input. 893 $input = Helper::parse($input, $format); 894 895 // Check the input. 896 if (empty($input)) { 897 return $count; 898 } 899 900 // Tokenize the input. 901 $tokens = Helper::tokenize($input, $lang); 902 903 if (count($tokens) == 0) { 904 return $count; 905 } 906 907 $query = clone $this->addTokensToDbQueryTemplate; 908 909 // Break into chunks of no more than 128 items 910 $chunks = array_chunk($tokens, 128); 911 912 foreach ($chunks as $tokens) { 913 $query->clear('values'); 914 915 foreach ($tokens as $token) { 916 // Database size for a term field 917 if ($token->length > 75) { 918 continue; 919 } 920 921 if ($filterCommon && $token->common) { 922 continue; 923 } 924 925 if ($filterNumeric && $token->numeric) { 926 continue; 927 } 928 929 $query->values( 930 $this->db->quote($token->term) . ', ' 931 . $this->db->quote($token->stem) . ', ' 932 . (int) $token->common . ', ' 933 . (int) $token->phrase . ', ' 934 . $this->db->quote($token->weight) . ', ' 935 . (int) $context . ', ' 936 . $this->db->quote($token->language) 937 ); 938 $count++; 939 } 940 941 // Check if we're approaching the memory limit of the token table. 942 if ($count > static::$state->options->get('memory_table_limit', 7500)) { 943 $this->toggleTables(false); 944 } 945 946 // Only execute the query if there are tokens to insert 947 if ($query->values !== null) { 948 $this->db->setQuery($query)->execute(); 949 } 950 } 951 952 return $count; 953 } 954 955 /** 956 * Method to switch the token tables from Memory tables to Disk tables 957 * when they are close to running out of memory. 958 * Since this is not supported/implemented in all DB-drivers, the default is a stub method, which simply returns true. 959 * 960 * @param boolean $memory Flag to control how they should be toggled. 961 * 962 * @return boolean True on success. 963 * 964 * @since 2.5 965 * @throws Exception on database error. 966 */ 967 protected function toggleTables($memory) 968 { 969 if (strtolower($this->db->getServerType()) != 'mysql') { 970 return true; 971 } 972 973 static $state; 974 975 // Get the database adapter. 976 $db = $this->db; 977 978 // Check if we are setting the tables to the Memory engine. 979 if ($memory === true && $state !== true) { 980 // Set the tokens table to Memory. 981 $db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens') . ' ENGINE = MEMORY'); 982 $db->execute(); 983 984 // Set the tokens aggregate table to Memory. 985 $db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens_aggregate') . ' ENGINE = MEMORY'); 986 $db->execute(); 987 988 // Set the internal state. 989 $state = $memory; 990 } elseif ($memory === false && $state !== false) { 991 // We must be setting the tables to the InnoDB engine. 992 // Set the tokens table to InnoDB. 993 $db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens') . ' ENGINE = INNODB'); 994 $db->execute(); 995 996 // Set the tokens aggregate table to InnoDB. 997 $db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens_aggregate') . ' ENGINE = INNODB'); 998 $db->execute(); 999 1000 // Set the internal state. 1001 $state = $memory; 1002 } 1003 1004 return true; 1005 } 1006 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Sep 7 05:41:13 2022 | Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer |