<?php

/**
 * @package     Joomla.Administrator
 * @subpackage  com_finder
 *
 * @copyright   (C) 2011 Open Source Matters, Inc. <https://www.joomla.org>
 * @license     GNU General Public License version 2 or later; see LICENSE.txt
 */

namespace Joomla\Component\Finder\Administrator\Indexer;

use Exception;
use Joomla\CMS\Component\ComponentHelper;
use Joomla\CMS\Factory;
use Joomla\CMS\Filesystem\File;
use Joomla\CMS\Object\CMSObject;
use Joomla\CMS\Plugin\PluginHelper;
use Joomla\CMS\Profiler\Profiler;
use Joomla\Database\DatabaseInterface;
use Joomla\Database\ParameterType;
use Joomla\Database\QueryInterface;
use Joomla\String\StringHelper;

// phpcs:disable PSR1.Files.SideEffects
\defined('_JEXEC') or die;
// phpcs:enable PSR1.Files.SideEffects

/**
 * Main indexer class for the Finder indexer package.
 *
 * The indexer class provides the core functionality of the Finder
 * search engine. It is responsible for adding and updating the
 * content links table; extracting and scoring tokens; and maintaining
 * all referential information for the content.
 *
 * Note: All exceptions thrown from within this class should be caught
 * by the controller.
 *
 * @since  2.5
 */
class Indexer
{
    /**
     * The title context identifier.
     *
     * @var    integer
     * @since  2.5
     */
    public const TITLE_CONTEXT = 1;

    /**
     * The text context identifier.
     *
     * @var    integer
     * @since  2.5
     */
    public const TEXT_CONTEXT = 2;

    /**
     * The meta context identifier.
     *
     * @var    integer
     * @since  2.5
     */
    public const META_CONTEXT = 3;

    /**
     * The path context identifier.
     *
     * @var    integer
     * @since  2.5
     */
    public const PATH_CONTEXT = 4;

    /**
     * The misc context identifier.
     *
     * @var    integer
     * @since  2.5
     */
    public const MISC_CONTEXT = 5;

    /**
     * The indexer state object.
     *
     * @var    CMSObject
     * @since  2.5
     */
    public static $state;

    /**
     * The indexer profiler object.
     *
     * @var    Profiler
     * @since  2.5
     */
    public static $profiler;

    /**
     * Database driver cache.
     *
     * @var    \Joomla\Database\DatabaseDriver
     * @since  3.8.0
     */
    protected $db;

    /**
     * Reusable Query Template. To be used with clone.
     *
     * @var    QueryInterface
     * @since  3.8.0
     */
    protected $addTokensToDbQueryTemplate;

    /**
     * Indexer constructor.
     *
     * @param  DatabaseInterface  $db  The database
     *
     * @since  3.8.0
     */
    public function __construct(DatabaseInterface $db = null)
    {
        if ($db === null) {
            @trigger_error(sprintf('Database will be mandatory in 5.0.'), E_USER_DEPRECATED);
            $db = Factory::getContainer()->get(DatabaseInterface::class);
        }

        $this->db = $db;

        // Set up query template for addTokensToDb
        $this->addTokensToDbQueryTemplate = $db->getQuery(true)->insert($db->quoteName('#__finder_tokens'))
            ->columns(
                array(
                    $db->quoteName('term'),
                    $db->quoteName('stem'),
                    $db->quoteName('common'),
                    $db->quoteName('phrase'),
                    $db->quoteName('weight'),
                    $db->quoteName('context'),
                    $db->quoteName('language')
                )
            );
    }

    /**
     * Method to get the indexer state.
     *
     * @return  object  The indexer state object.
     *
     * @since   2.5
     */
    public static function getState()
    {
        // First, try to load from the internal state.
        if ((bool) static::$state) {
            return static::$state;
        }

        // If we couldn't load from the internal state, try the session.
        $session = Factory::getSession();
        $data = $session->get('_finder.state', null);

        // If the state is empty, load the values for the first time.
        if (empty($data)) {
            $data = new CMSObject();
            $data->force = false;

            // Load the default configuration options.
            $data->options = ComponentHelper::getParams('com_finder');
            $db = Factory::getDbo();

            if ($db->getServerType() == 'mysql') {
                /**
                 * Try to calculate the heapsize for the memory table for indexing. If this fails,
                 * we fall back on a reasonable small size. We want to prevent the system to fail
                 * and block saving content.
                 */
                try {
                    $db->setQuery('SHOW VARIABLES LIKE ' . $db->quote('max_heap_table_size'));
                    $heapsize = $db->loadObject();

                    /**
                     * In tests, the size of a row seems to have been around 720 bytes.
                     * We take 800 to be on the safe side.
                     */
                    $memory_table_limit = (int) ($heapsize->Value / 800);
                    $data->options->set('memory_table_limit', $memory_table_limit);
                } catch (Exception $e) {
                    // Something failed. We fall back to a reasonable guess.
                    $data->options->set('memory_table_limit', 7500);
                }
            } else {
                // We are running on PostgreSQL and don't have this issue, so we set a rather high number.
                $data->options->set('memory_table_limit', 50000);
            }

            // Setup the weight lookup information.
            $data->weights = array(
                self::TITLE_CONTEXT => round($data->options->get('title_multiplier', 1.7), 2),
                self::TEXT_CONTEXT  => round($data->options->get('text_multiplier', 0.7), 2),
                self::META_CONTEXT  => round($data->options->get('meta_multiplier', 1.2), 2),
                self::PATH_CONTEXT  => round($data->options->get('path_multiplier', 2.0), 2),
                self::MISC_CONTEXT  => round($data->options->get('misc_multiplier', 0.3), 2)
            );

            // Set the current time as the start time.
            $data->startTime = Factory::getDate()->toSql();

            // Set the remaining default values.
            $data->batchSize   = (int) $data->options->get('batch_size', 50);
            $data->batchOffset = 0;
            $data->totalItems  = 0;
            $data->pluginState = array();
        }

        // Setup the profiler if debugging is enabled.
        if (Factory::getApplication()->get('debug')) {
            static::$profiler = Profiler::getInstance('FinderIndexer');
        }

        // Set the state.
        static::$state = $data;

        return static::$state;
    }

    /**
     * Method to set the indexer state.
     *
     * @param   CMSObject  $data  A new indexer state object.
     *
     * @return  boolean  True on success, false on failure.
     *
     * @since   2.5
     */
    public static function setState($data)
    {
        // Check the state object.
        if (empty($data) || !$data instanceof CMSObject) {
            return false;
        }

        // Set the new internal state.
        static::$state = $data;

        // Set the new session state.
        Factory::getSession()->set('_finder.state', $data);

        return true;
    }

    /**
     * Method to reset the indexer state.
     *
     * @return  void
     *
     * @since   2.5
     */
    public static function resetState()
    {
        // Reset the internal state to null.
        self::$state = null;

        // Reset the session state to null.
        Factory::getSession()->set('_finder.state', null);
    }

    /**
     * Method to index a content item.
     *
     * @param   Result  $item    The content item to index.
     * @param   string  $format  The format of the content. [optional]
     *
     * @return  integer  The ID of the record in the links table.
     *
     * @since   2.5
     * @throws  \Exception on database error.
     */
    public function index($item, $format = 'html')
    {
        // Mark beforeIndexing in the profiler.
        static::$profiler ? static::$profiler->mark('beforeIndexing') : null;
        $db = $this->db;
        $serverType = strtolower($db->getServerType());

        // Check if the item is in the database.
        $query = $db->getQuery(true)
            ->select($db->quoteName('link_id') . ', ' . $db->quoteName('md5sum'))
            ->from($db->quoteName('#__finder_links'))
            ->where($db->quoteName('url') . ' = ' . $db->quote($item->url));

        // Load the item  from the database.
        $db->setQuery($query);
        $link = $db->loadObject();

        // Get the indexer state.
        $state = static::getState();

        // Get the signatures of the item.
        $curSig = static::getSignature($item);
        $oldSig = $link->md5sum ?? null;

        // Get the other item information.
        $linkId = empty($link->link_id) ? null : $link->link_id;
        $isNew = empty($link->link_id);

        // Check the signatures. If they match, the item is up to date.
        if (!$isNew && $curSig == $oldSig) {
            return $linkId;
        }

        /*
         * If the link already exists, flush all the term maps for the item.
         * Maps are stored in 16 tables so we need to iterate through and flush
         * each table one at a time.
         */
        if (!$isNew) {
            // Flush the maps for the link.
            $query->clear()
                ->delete($db->quoteName('#__finder_links_terms'))
                ->where($db->quoteName('link_id') . ' = ' . (int) $linkId);
            $db->setQuery($query);
            $db->execute();

            // Remove the taxonomy maps.
            Taxonomy::removeMaps($linkId);
        }

        // Mark afterUnmapping in the profiler.
        static::$profiler ? static::$profiler->mark('afterUnmapping') : null;

        // Perform cleanup on the item data.
        $item->publish_start_date = (int) $item->publish_start_date != 0 ? $item->publish_start_date : null;
        $item->publish_end_date = (int) $item->publish_end_date != 0 ? $item->publish_end_date : null;
        $item->start_date = (int) $item->start_date != 0 ? $item->start_date : null;
        $item->end_date = (int) $item->end_date != 0 ? $item->end_date : null;

        // Prepare the item description.
        $item->description = Helper::parse($item->summary ?? '');

        /*
         * Now, we need to enter the item into the links table. If the item
         * already exists in the database, we need to use an UPDATE query.
         * Otherwise, we need to use an INSERT to get the link id back.
         */
        $entry = new \stdClass();
        $entry->url = $item->url;
        $entry->route = $item->route;
        $entry->title = $item->title;

        // We are shortening the description in order to not run into length issues with this field
        $entry->description = StringHelper::substr($item->description, 0, 32000);
        $entry->indexdate = Factory::getDate()->toSql();
        $entry->state = (int) $item->state;
        $entry->access = (int) $item->access;
        $entry->language = $item->language;
        $entry->type_id = (int) $item->type_id;
        $entry->object = '';
        $entry->publish_start_date = $item->publish_start_date;
        $entry->publish_end_date = $item->publish_end_date;
        $entry->start_date = $item->start_date;
        $entry->end_date = $item->end_date;
        $entry->list_price = (double) ($item->list_price ?: 0);
        $entry->sale_price = (double) ($item->sale_price ?: 0);

        if ($isNew) {
            // Insert the link and get its id.
            $db->insertObject('#__finder_links', $entry);
            $linkId = (int) $db->insertid();
        } else {
            // Update the link.
            $entry->link_id = $linkId;
            $db->updateObject('#__finder_links', $entry, 'link_id');
        }

        // Set up the variables we will need during processing.
        $count = 0;

        // Mark afterLinking in the profiler.
        static::$profiler ? static::$profiler->mark('afterLinking') : null;

        // Truncate the tokens tables.
        $db->truncateTable('#__finder_tokens');

        // Truncate the tokens aggregate table.
        $db->truncateTable('#__finder_tokens_aggregate');

        /*
         * Process the item's content. The items can customize their
         * processing instructions to define extra properties to process
         * or rearrange how properties are weighted.
         */
        foreach ($item->getInstructions() as $group => $properties) {
            // Iterate through the properties of the group.
            foreach ($properties as $property) {
                // Check if the property exists in the item.
                if (empty($item->$property)) {
                    continue;
                }

                // Tokenize the property.
                if (is_array($item->$property)) {
                    // Tokenize an array of content and add it to the database.
                    foreach ($item->$property as $ip) {
                        /*
                         * If the group is path, we need to a few extra processing
                         * steps to strip the extension and convert slashes and dashes
                         * to spaces.
                         */
                        if ($group === static::PATH_CONTEXT) {
                            $ip = File::stripExt($ip);
                            $ip = str_replace(array('/', '-'), ' ', $ip);
                        }

                        // Tokenize a string of content and add it to the database.
                        $count += $this->tokenizeToDb($ip, $group, $item->language, $format, $count);

                        // Check if we're approaching the memory limit of the token table.
                        if ($count > static::$state->options->get('memory_table_limit', 7500)) {
                            $this->toggleTables(false);
                        }
                    }
                } else {
                    /*
                     * If the group is path, we need to a few extra processing
                     * steps to strip the extension and convert slashes and dashes
                     * to spaces.
                     */
                    if ($group === static::PATH_CONTEXT) {
                        $item->$property = File::stripExt($item->$property);
                        $item->$property = str_replace('/', ' ', $item->$property);
                        $item->$property = str_replace('-', ' ', $item->$property);
                    }

                    // Tokenize a string of content and add it to the database.
                    $count += $this->tokenizeToDb($item->$property, $group, $item->language, $format, $count);

                    // Check if we're approaching the memory limit of the token table.
                    if ($count > static::$state->options->get('memory_table_limit', 30000)) {
                        $this->toggleTables(false);
                    }
                }
            }
        }

        /*
         * Process the item's taxonomy. The items can customize their
         * taxonomy mappings to define extra properties to map.
         */
        foreach ($item->getTaxonomy() as $branch => $nodes) {
            // Iterate through the nodes and map them to the branch.
            foreach ($nodes as $node) {
                // Add the node to the tree.
                if ($node->nested) {
                    $nodeId = Taxonomy::addNestedNode($branch, $node->node, $node->state, $node->access, $node->language);
                } else {
                    $nodeId = Taxonomy::addNode($branch, $node->title, $node->state, $node->access, $node->language);
                }

                // Add the link => node map.
                Taxonomy::addMap($linkId, $nodeId);
                $node->id = $nodeId;
            }
        }

        // Mark afterProcessing in the profiler.
        static::$profiler ? static::$profiler->mark('afterProcessing') : null;

        /*
         * At this point, all of the item's content has been parsed, tokenized
         * and inserted into the #__finder_tokens table. Now, we need to
         * aggregate all the data into that table into a more usable form. The
         * aggregated data will be inserted into #__finder_tokens_aggregate
         * table.
         */
        $query = 'INSERT INTO ' . $db->quoteName('#__finder_tokens_aggregate') .
            ' (' . $db->quoteName('term_id') .
            ', ' . $db->quoteName('term') .
            ', ' . $db->quoteName('stem') .
            ', ' . $db->quoteName('common') .
            ', ' . $db->quoteName('phrase') .
            ', ' . $db->quoteName('term_weight') .
            ', ' . $db->quoteName('context') .
            ', ' . $db->quoteName('context_weight') .
            ', ' . $db->quoteName('total_weight') .
            ', ' . $db->quoteName('language') . ')' .
            ' SELECT' .
            ' COALESCE(t.term_id, 0), t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context,' .
            ' ROUND( t1.weight * COUNT( t2.term ) * %F, 8 ) AS context_weight, 0, t1.language' .
            ' FROM (' .
            '   SELECT DISTINCT t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context, t1.language' .
            '   FROM ' . $db->quoteName('#__finder_tokens') . ' AS t1' .
            '   WHERE t1.context = %d' .
            ' ) AS t1' .
            ' JOIN ' . $db->quoteName('#__finder_tokens') . ' AS t2 ON t2.term = t1.term AND t2.language = t1.language' .
            ' LEFT JOIN ' . $db->quoteName('#__finder_terms') . ' AS t ON t.term = t1.term AND t.language = t1.language' .
            ' WHERE t2.context = %d' .
            ' GROUP BY t1.term, t.term_id, t1.term, t1.stem, t1.common, t1.phrase, t1.weight, t1.context, t1.language' .
            ' ORDER BY t1.term DESC';

        // Iterate through the contexts and aggregate the tokens per context.
        foreach ($state->weights as $context => $multiplier) {
            // Run the query to aggregate the tokens for this context..
            $db->setQuery(sprintf($query, $multiplier, $context, $context));
            $db->execute();
        }

        // Mark afterAggregating in the profiler.
        static::$profiler ? static::$profiler->mark('afterAggregating') : null;

        /*
         * When we pulled down all of the aggregate data, we did a LEFT JOIN
         * over the terms table to try to find all the term ids that
         * already exist for our tokens. If any of the rows in the aggregate
         * table have a term of 0, then no term record exists for that
         * term so we need to add it to the terms table.
         */
        $db->setQuery(
            'INSERT INTO ' . $db->quoteName('#__finder_terms') .
            ' (' . $db->quoteName('term') .
            ', ' . $db->quoteName('stem') .
            ', ' . $db->quoteName('common') .
            ', ' . $db->quoteName('phrase') .
            ', ' . $db->quoteName('weight') .
            ', ' . $db->quoteName('soundex') .
            ', ' . $db->quoteName('language') . ')' .
            ' SELECT ta.term, ta.stem, ta.common, ta.phrase, ta.term_weight, SOUNDEX(ta.term), ta.language' .
            ' FROM ' . $db->quoteName('#__finder_tokens_aggregate') . ' AS ta' .
            ' WHERE ta.term_id = 0' .
            ' GROUP BY ta.term, ta.stem, ta.common, ta.phrase, ta.term_weight, SOUNDEX(ta.term), ta.language'
        );
        $db->execute();

        /*
         * Now, we just inserted a bunch of new records into the terms table
         * so we need to go back and update the aggregate table with all the
         * new term ids.
         */
        $query = $db->getQuery(true)
            ->update($db->quoteName('#__finder_tokens_aggregate', 'ta'))
            ->innerJoin($db->quoteName('#__finder_terms', 't'), 't.term = ta.term AND t.language = ta.language')
            ->where('ta.term_id = 0');

        if ($serverType == 'mysql') {
            $query->set($db->quoteName('ta.term_id') . ' = ' . $db->quoteName('t.term_id'));
        } else {
            $query->set($db->quoteName('term_id') . ' = ' . $db->quoteName('t.term_id'));
        }

        $db->setQuery($query);
        $db->execute();

        // Mark afterTerms in the profiler.
        static::$profiler ? static::$profiler->mark('afterTerms') : null;

        /*
         * After we've made sure that all of the terms are in the terms table
         * and the aggregate table has the correct term ids, we need to update
         * the links counter for each term by one.
         */
        $query->clear()
            ->update($db->quoteName('#__finder_terms', 't'))
            ->innerJoin($db->quoteName('#__finder_tokens_aggregate', 'ta'), 'ta.term_id = t.term_id');

        if ($serverType == 'mysql') {
            $query->set($db->quoteName('t.links') . ' = t.links + 1');
        } else {
            $query->set($db->quoteName('links') . ' = t.links + 1');
        }

        $db->setQuery($query);
        $db->execute();

        // Mark afterTerms in the profiler.
        static::$profiler ? static::$profiler->mark('afterTerms') : null;

        /*
         * At this point, the aggregate table contains a record for each
         * term in each context. So, we're going to pull down all of that
         * data while grouping the records by term and add all of the
         * sub-totals together to arrive at the final total for each token for
         * this link. Then, we insert all of that data into the mapping table.
         */
        $db->setQuery(
            'INSERT INTO ' . $db->quoteName('#__finder_links_terms') .
            ' (' . $db->quoteName('link_id') .
            ', ' . $db->quoteName('term_id') .
            ', ' . $db->quoteName('weight') . ')' .
            ' SELECT ' . (int) $linkId . ', ' . $db->quoteName('term_id') . ',' .
            ' ROUND(SUM(' . $db->quoteName('context_weight') . '), 8)' .
            ' FROM ' . $db->quoteName('#__finder_tokens_aggregate') .
            ' GROUP BY ' . $db->quoteName('term') . ', ' . $db->quoteName('term_id') .
            ' ORDER BY ' . $db->quoteName('term') . ' DESC'
        );
        $db->execute();

        // Mark afterMapping in the profiler.
        static::$profiler ? static::$profiler->mark('afterMapping') : null;

        // Update the signature.
        $object = serialize($item);
        $query->clear()
            ->update($db->quoteName('#__finder_links'))
            ->set($db->quoteName('md5sum') . ' = :md5sum')
            ->set($db->quoteName('object') . ' = :object')
            ->where($db->quoteName('link_id') . ' = :linkid')
            ->bind(':md5sum', $curSig)
            ->bind(':object', $object, ParameterType::LARGE_OBJECT)
            ->bind(':linkid', $linkId, ParameterType::INTEGER);
        $db->setQuery($query);
        $db->execute();

        // Mark afterSigning in the profiler.
        static::$profiler ? static::$profiler->mark('afterSigning') : null;

        // Truncate the tokens tables.
        $db->truncateTable('#__finder_tokens');

        // Truncate the tokens aggregate table.
        $db->truncateTable('#__finder_tokens_aggregate');

        // Toggle the token tables back to memory tables.
        $this->toggleTables(true);

        // Mark afterTruncating in the profiler.
        static::$profiler ? static::$profiler->mark('afterTruncating') : null;

        // Trigger a plugin event after indexing
        PluginHelper::importPlugin('finder');
        Factory::getApplication()->triggerEvent('onFinderIndexAfterIndex', array($item, $linkId));

        return $linkId;
    }

    /**
     * Method to remove a link from the index.
     *
     * @param   integer  $linkId            The id of the link.
     * @param   bool     $removeTaxonomies  Remove empty taxonomies
     *
     * @return  boolean  True on success.
     *
     * @since   2.5
     * @throws  Exception on database error.
     */
    public function remove($linkId, $removeTaxonomies = true)
    {
        $db     = $this->db;
        $query  = $db->getQuery(true);
        $linkId = (int) $linkId;

        // Update the link counts for the terms.
        $query->clear()
            ->update($db->quoteName('#__finder_terms', 't'))
            ->join('INNER', $db->quoteName('#__finder_links_terms', 'm'), $db->quoteName('m.term_id') . ' = ' . $db->quoteName('t.term_id'))
            ->set($db->quoteName('links') . ' = ' . $db->quoteName('links') . ' - 1')
            ->where($db->quoteName('m.link_id') . ' = :linkid')
            ->bind(':linkid', $linkId, ParameterType::INTEGER);
        $db->setQuery($query)->execute();

        // Remove all records from the mapping tables.
        $query->clear()
            ->delete($db->quoteName('#__finder_links_terms'))
            ->where($db->quoteName('link_id') . ' = :linkid')
            ->bind(':linkid', $linkId, ParameterType::INTEGER);
        $db->setQuery($query)->execute();

        // Delete all orphaned terms.
        $query->clear()
            ->delete($db->quoteName('#__finder_terms'))
            ->where($db->quoteName('links') . ' <= 0');
        $db->setQuery($query)->execute();

        // Delete the link from the index.
        $query->clear()
            ->delete($db->quoteName('#__finder_links'))
            ->where($db->quoteName('link_id') . ' = :linkid')
            ->bind(':linkid', $linkId, ParameterType::INTEGER);
        $db->setQuery($query)->execute();

        // Remove the taxonomy maps.
        Taxonomy::removeMaps($linkId);

        // Remove the orphaned taxonomy nodes.
        if ($removeTaxonomies) {
            Taxonomy::removeOrphanNodes();
        }

        PluginHelper::importPlugin('finder');
        Factory::getApplication()->triggerEvent('onFinderIndexAfterDelete', array($linkId));

        return true;
    }

    /**
     * Method to optimize the index. We use this method to remove unused terms
     * and any other optimizations that might be necessary.
     *
     * @return  boolean  True on success.
     *
     * @since   2.5
     * @throws  Exception on database error.
     */
    public function optimize()
    {
        // Get the database object.
        $db = $this->db;
        $serverType = strtolower($db->getServerType());
        $query = $db->getQuery(true);

        // Delete all orphaned terms.
        $query->delete($db->quoteName('#__finder_terms'))
            ->where($db->quoteName('links') . ' <= 0');
        $db->setQuery($query);
        $db->execute();

        // Delete all broken links. (Links missing the object)
        $query = $db->getQuery(true)
            ->delete('#__finder_links')
            ->where($db->quoteName('object') . ' = ' . $db->quote(''));
        $db->setQuery($query);
        $db->execute();

        // Delete all orphaned mappings of terms to links
        $query2 = $db->getQuery(true)
            ->select($db->quoteName('link_id'))
            ->from($db->quoteName('#__finder_links'));
        $query = $db->getQuery(true)
            ->delete($db->quoteName('#__finder_links_terms'))
            ->where($db->quoteName('link_id') . ' NOT IN (' . $query2 . ')');
        $db->setQuery($query);
        $db->execute();

        // Delete all orphaned terms
        $query2 = $db->getQuery(true)
            ->select($db->quoteName('term_id'))
            ->from($db->quoteName('#__finder_links_terms'));
        $query = $db->getQuery(true)
            ->delete($db->quoteName('#__finder_terms'))
            ->where($db->quoteName('term_id') . ' NOT IN (' . $query2 . ')');
        $db->setQuery($query);
        $db->execute();

        // Delete all orphaned taxonomies
        Taxonomy::removeOrphanMaps();
        Taxonomy::removeOrphanNodes();

        // Optimize the tables.
        $tables = [
            '#__finder_links',
            '#__finder_links_terms',
            '#__finder_filters',
            '#__finder_terms_common',
            '#__finder_types',
            '#__finder_taxonomy_map',
            '#__finder_taxonomy'
        ];

        foreach ($tables as $table) {
            if ($serverType == 'mysql') {
                $db->setQuery('OPTIMIZE TABLE ' . $db->quoteName($table));
                $db->execute();
            } else {
                $db->setQuery('VACUUM ' . $db->quoteName($table));
                $db->execute();
                $db->setQuery('REINDEX TABLE ' . $db->quoteName($table));
                $db->execute();
            }
        }

        return true;
    }

    /**
     * Method to get a content item's signature.
     *
     * @param   object  $item  The content item to index.
     *
     * @return  string  The content item's signature.
     *
     * @since   2.5
     */
    protected static function getSignature($item)
    {
        // Get the indexer state.
        $state = static::getState();

        // Get the relevant configuration variables.
        $config = array(
            $state->weights,
            $state->options->get('tuplecount', 1),
            $state->options->get('language_default', '')
        );

        return md5(serialize(array($item, $config)));
    }

    /**
     * Method to parse input, tokenize it, and then add it to the database.
     *
     * @param   mixed    $input    String or resource to use as input. A resource input will automatically be chunked to conserve
     *                             memory. Strings will be chunked if longer than 2K in size.
     * @param   integer  $context  The context of the input. See context constants.
     * @param   string   $lang     The language of the input.
     * @param   string   $format   The format of the input.
     * @param   integer  $count    Number of words indexed so far.
     *
     * @return  integer  The number of tokens extracted from the input.
     *
     * @since   2.5
     */
    protected function tokenizeToDb($input, $context, $lang, $format, $count = 0)
    {
        $buffer = null;

        if (empty($input)) {
            return $count;
        }

        // If the input is a resource, batch the process out.
        if (is_resource($input)) {
            // Batch the process out to avoid memory limits.
            while (!feof($input)) {
                // Read into the buffer.
                $buffer .= fread($input, 2048);

                /*
                 * If we haven't reached the end of the file, seek to the last
                 * space character and drop whatever is after that to make sure
                 * we didn't truncate a term while reading the input.
                 */
                if (!feof($input)) {
                    // Find the last space character.
                    $ls = strrpos($buffer, ' ');

                    // Adjust string based on the last space character.
                    if ($ls) {
                        // Truncate the string to the last space character.
                        $string = substr($buffer, 0, $ls);

                        // Adjust the buffer based on the last space for the next iteration and trim.
                        $buffer = StringHelper::trim(substr($buffer, $ls));
                    } else {
                        // No space character was found.
                        $string = $buffer;
                    }
                } else {
                    // We've reached the end of the file, so parse whatever remains.
                    $string = $buffer;
                }

                // Parse, tokenise and add tokens to the database.
                $count = $this->tokenizeToDbShort($string, $context, $lang, $format, $count);

                unset($string);
            }

            return $count;
        }

        // Parse, tokenise and add tokens to the database.
        $count = $this->tokenizeToDbShort($input, $context, $lang, $format, $count);

        return $count;
    }

    /**
     * Method to parse input, tokenise it, then add the tokens to the database.
     *
     * @param   string   $input    String to parse, tokenise and add to database.
     * @param   integer  $context  The context of the input. See context constants.
     * @param   string   $lang     The language of the input.
     * @param   string   $format   The format of the input.
     * @param   integer  $count    The number of tokens processed so far.
     *
     * @return  integer  Cumulative number of tokens extracted from the input so far.
     *
     * @since   3.7.0
     */
    private function tokenizeToDbShort($input, $context, $lang, $format, $count)
    {
        static $filterCommon, $filterNumeric;

        if (is_null($filterCommon)) {
            $params = ComponentHelper::getParams('com_finder');
            $filterCommon = $params->get('filter_commonwords', false);
            $filterNumeric = $params->get('filter_numerics', false);
        }

        // Parse the input.
        $input = Helper::parse($input, $format);

        // Check the input.
        if (empty($input)) {
            return $count;
        }

        // Tokenize the input.
        $tokens = Helper::tokenize($input, $lang);

        if (count($tokens) == 0) {
            return $count;
        }

        $query = clone $this->addTokensToDbQueryTemplate;

        // Break into chunks of no more than 128 items
        $chunks = array_chunk($tokens, 128);

        foreach ($chunks as $tokens) {
            $query->clear('values');

            foreach ($tokens as $token) {
                // Database size for a term field
                if ($token->length > 75) {
                    continue;
                }

                if ($filterCommon && $token->common) {
                    continue;
                }

                if ($filterNumeric && $token->numeric) {
                    continue;
                }

                $query->values(
                    $this->db->quote($token->term) . ', '
                    . $this->db->quote($token->stem) . ', '
                    . (int) $token->common . ', '
                    . (int) $token->phrase . ', '
                    . $this->db->quote($token->weight) . ', '
                    . (int) $context . ', '
                    . $this->db->quote($token->language)
                );
                $count++;
            }

            // Check if we're approaching the memory limit of the token table.
            if ($count > static::$state->options->get('memory_table_limit', 7500)) {
                $this->toggleTables(false);
            }

            // Only execute the query if there are tokens to insert
            if ($query->values !== null) {
                $this->db->setQuery($query)->execute();
            }
        }

        return $count;
    }

    /**
     * Method to switch the token tables from Memory tables to Disk tables
     * when they are close to running out of memory.
     * Since this is not supported/implemented in all DB-drivers, the default is a stub method, which simply returns true.
     *
     * @param   boolean  $memory  Flag to control how they should be toggled.
     *
     * @return  boolean  True on success.
     *
     * @since   2.5
     * @throws  Exception on database error.
     */
    protected function toggleTables($memory)
    {
        if (strtolower($this->db->getServerType()) != 'mysql') {
            return true;
        }

        static $state;

        // Get the database adapter.
        $db = $this->db;

        // Check if we are setting the tables to the Memory engine.
        if ($memory === true && $state !== true) {
            // Set the tokens table to Memory.
            $db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens') . ' ENGINE = MEMORY');
            $db->execute();

            // Set the tokens aggregate table to Memory.
            $db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens_aggregate') . ' ENGINE = MEMORY');
            $db->execute();

            // Set the internal state.
            $state = $memory;
        } elseif ($memory === false && $state !== false) {
            // We must be setting the tables to the InnoDB engine.
            // Set the tokens table to InnoDB.
            $db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens') . ' ENGINE = INNODB');
            $db->execute();

            // Set the tokens aggregate table to InnoDB.
            $db->setQuery('ALTER TABLE ' . $db->quoteName('#__finder_tokens_aggregate') . ' ENGINE = INNODB');
            $db->execute();

            // Set the internal state.
            $state = $memory;
        }

        return true;
    }
}