[ Index ] |
PHP Cross Reference of Joomla 4.2.2 documentation |
[Summary view] [Print] [Text view]
1 <?php 2 3 /** 4 * @package Joomla.Administrator 5 * @subpackage com_finder 6 * 7 * @copyright (C) 2018 Open Source Matters, Inc. <https://www.joomla.org> 8 * @license GNU General Public License version 2 or later; see LICENSE.txt 9 */ 10 11 namespace Joomla\Component\Finder\Administrator\Indexer; 12 13 use Joomla\String\StringHelper; 14 use Wamania\Snowball\NotFoundException; 15 use Wamania\Snowball\Stemmer\Stemmer; 16 use Wamania\Snowball\StemmerFactory; 17 18 // phpcs:disable PSR1.Files.SideEffects 19 \defined('_JEXEC') or die; 20 // phpcs:enable PSR1.Files.SideEffects 21 22 /** 23 * Language support class for the Finder indexer package. 24 * 25 * @since 4.0.0 26 */ 27 class Language 28 { 29 /** 30 * Language support instances container. 31 * 32 * @var Language[] 33 * @since 4.0.0 34 */ 35 protected static $instances = array(); 36 37 /** 38 * Language locale of the class 39 * 40 * @var string 41 * @since 4.0.0 42 */ 43 public $language; 44 45 /** 46 * Spacer to use between terms 47 * 48 * @var string 49 * @since 4.0.0 50 */ 51 public $spacer = ' '; 52 53 /** 54 * The stemmer object. 55 * 56 * @var Stemmer 57 * @since 4.0.0 58 */ 59 protected $stemmer = null; 60 61 /** 62 * Method to construct the language object. 63 * 64 * @since 4.0.0 65 */ 66 public function __construct($locale = null) 67 { 68 if ($locale !== null) { 69 $this->language = $locale; 70 } 71 72 // Use our generic language handler if no language is set 73 if ($this->language === null) { 74 $this->language = '*'; 75 } 76 77 try { 78 $this->stemmer = StemmerFactory::create($this->language); 79 } catch (NotFoundException $e) { 80 // We don't have a stemmer for the language 81 } 82 } 83 84 /** 85 * Method to get a language support object. 86 * 87 * @param string $language The language of the support object. 88 * 89 * @return Language A Language instance. 90 * 91 * @since 4.0.0 92 */ 93 public static function getInstance($language) 94 { 95 if (isset(self::$instances[$language])) { 96 return self::$instances[$language]; 97 } 98 99 $locale = '*'; 100 101 if ($language !== '*') { 102 $locale = Helper::getPrimaryLanguage($language); 103 $class = '\\Joomla\\Component\\Finder\\Administrator\\Indexer\\Language\\' . ucfirst($locale); 104 105 if (class_exists($class)) { 106 self::$instances[$language] = new $class(); 107 108 return self::$instances[$language]; 109 } 110 } 111 112 self::$instances[$language] = new self($locale); 113 114 return self::$instances[$language]; 115 } 116 117 /** 118 * Method to tokenise a text string. 119 * 120 * @param string $input The input to tokenise. 121 * 122 * @return array An array of term strings. 123 * 124 * @since 4.0.0 125 */ 126 public function tokenise($input) 127 { 128 $quotes = html_entity_decode('‘’'', ENT_QUOTES, 'UTF-8'); 129 130 /* 131 * Parsing the string input into terms is a multi-step process. 132 * 133 * Regexes: 134 * 1. Remove everything except letters, numbers, quotes, apostrophe, plus, dash, period, and comma. 135 * 2. Remove plus, dash, period, and comma characters located before letter characters. 136 * 3. Remove plus, dash, period, and comma characters located after other characters. 137 * 4. Remove plus, period, and comma characters enclosed in alphabetical characters. Ungreedy. 138 * 5. Remove orphaned apostrophe, plus, dash, period, and comma characters. 139 * 6. Remove orphaned quote characters. 140 * 7. Replace the assorted single quotation marks with the ASCII standard single quotation. 141 * 8. Remove multiple space characters and replaces with a single space. 142 */ 143 $input = StringHelper::strtolower($input); 144 $input = preg_replace('#[^\pL\pM\pN\p{Pi}\p{Pf}\'+-.,]+#mui', ' ', $input); 145 $input = preg_replace('#(^|\s)[+-.,]+([\pL\pM]+)#mui', ' $1', $input); 146 $input = preg_replace('#([\pL\pM\pN]+)[+-.,]+(\s|$)#mui', '$1 ', $input); 147 $input = preg_replace('#([\pL\pM]+)[+.,]+([\pL\pM]+)#muiU', '$1 $2', $input); 148 $input = preg_replace('#(^|\s)[\'+-.,]+(\s|$)#mui', ' ', $input); 149 $input = preg_replace('#(^|\s)[\p{Pi}\p{Pf}]+(\s|$)#mui', ' ', $input); 150 $input = preg_replace('#[' . $quotes . ']+#mui', '\'', $input); 151 $input = preg_replace('#\s+#mui', ' ', $input); 152 $input = trim($input); 153 154 // Explode the normalized string to get the terms. 155 $terms = explode(' ', $input); 156 157 return $terms; 158 } 159 160 /** 161 * Method to stem a token. 162 * 163 * @param string $token The token to stem. 164 * 165 * @return string The stemmed token. 166 * 167 * @since 4.0.0 168 */ 169 public function stem($token) 170 { 171 if ($this->stemmer !== null) { 172 return $this->stemmer->stem($token); 173 } 174 175 return $token; 176 } 177 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Sep 7 05:41:13 2022 | Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer |