PHPXRef 0.7.1 : Joomla 4.2.2 documentation : /libraries/vendor/voku/portable-ascii/src/voku/helper/ASCII.php source

[Summary view] [Print] [Text view]
   1  <?php
   2  
   3  declare(strict_types=1);
   4  
   5  namespace voku\helper;
   6  
   7  /**
   8   * @psalm-immutable
   9   */
  10  final class ASCII
  11  {
  12      //
  13      // INFO: https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
  14      //
  15  
  16      const UZBEK_LANGUAGE_CODE = 'uz';
  17  
  18      const TURKMEN_LANGUAGE_CODE = 'tk';
  19  
  20      const THAI_LANGUAGE_CODE = 'th';
  21  
  22      const PASHTO_LANGUAGE_CODE = 'ps';
  23  
  24      const ORIYA_LANGUAGE_CODE = 'or';
  25  
  26      const MONGOLIAN_LANGUAGE_CODE = 'mn';
  27  
  28      const KOREAN_LANGUAGE_CODE = 'ko';
  29  
  30      const KIRGHIZ_LANGUAGE_CODE = 'ky';
  31  
  32      const ARMENIAN_LANGUAGE_CODE = 'hy';
  33  
  34      const BENGALI_LANGUAGE_CODE = 'bn';
  35  
  36      const BELARUSIAN_LANGUAGE_CODE = 'be';
  37  
  38      const AMHARIC_LANGUAGE_CODE = 'am';
  39  
  40      const JAPANESE_LANGUAGE_CODE = 'ja';
  41  
  42      const CHINESE_LANGUAGE_CODE = 'zh';
  43  
  44      const DUTCH_LANGUAGE_CODE = 'nl';
  45  
  46      const ITALIAN_LANGUAGE_CODE = 'it';
  47  
  48      const MACEDONIAN_LANGUAGE_CODE = 'mk';
  49  
  50      const PORTUGUESE_LANGUAGE_CODE = 'pt';
  51  
  52      const GREEKLISH_LANGUAGE_CODE = 'el__greeklish';
  53  
  54      const GREEK_LANGUAGE_CODE = 'el';
  55  
  56      const HINDI_LANGUAGE_CODE = 'hi';
  57  
  58      const SWEDISH_LANGUAGE_CODE = 'sv';
  59  
  60      const TURKISH_LANGUAGE_CODE = 'tr';
  61  
  62      const BULGARIAN_LANGUAGE_CODE = 'bg';
  63  
  64      const HUNGARIAN_LANGUAGE_CODE = 'hu';
  65  
  66      const MYANMAR_LANGUAGE_CODE = 'my';
  67  
  68      const CROATIAN_LANGUAGE_CODE = 'hr';
  69  
  70      const FINNISH_LANGUAGE_CODE = 'fi';
  71  
  72      const GEORGIAN_LANGUAGE_CODE = 'ka';
  73  
  74      const RUSSIAN_LANGUAGE_CODE = 'ru';
  75  
  76      const RUSSIAN_PASSPORT_2013_LANGUAGE_CODE = 'ru__passport_2013';
  77  
  78      const RUSSIAN_GOST_2000_B_LANGUAGE_CODE = 'ru__gost_2000_b';
  79  
  80      const UKRAINIAN_LANGUAGE_CODE = 'uk';
  81  
  82      const KAZAKH_LANGUAGE_CODE = 'kk';
  83  
  84      const CZECH_LANGUAGE_CODE = 'cs';
  85  
  86      const DANISH_LANGUAGE_CODE = 'da';
  87  
  88      const POLISH_LANGUAGE_CODE = 'pl';
  89  
  90      const ROMANIAN_LANGUAGE_CODE = 'ro';
  91  
  92      const ESPERANTO_LANGUAGE_CODE = 'eo';
  93  
  94      const ESTONIAN_LANGUAGE_CODE = 'et';
  95  
  96      const LATVIAN_LANGUAGE_CODE = 'lv';
  97  
  98      const LITHUANIAN_LANGUAGE_CODE = 'lt';
  99  
 100      const NORWEGIAN_LANGUAGE_CODE = 'no';
 101  
 102      const VIETNAMESE_LANGUAGE_CODE = 'vi';
 103  
 104      const ARABIC_LANGUAGE_CODE = 'ar';
 105  
 106      const PERSIAN_LANGUAGE_CODE = 'fa';
 107  
 108      const SERBIAN_LANGUAGE_CODE = 'sr';
 109  
 110      const SERBIAN_CYRILLIC_LANGUAGE_CODE = 'sr__cyr';
 111  
 112      const SERBIAN_LATIN_LANGUAGE_CODE = 'sr__lat';
 113  
 114      const AZERBAIJANI_LANGUAGE_CODE = 'az';
 115  
 116      const SLOVAK_LANGUAGE_CODE = 'sk';
 117  
 118      const FRENCH_LANGUAGE_CODE = 'fr';
 119  
 120      const FRENCH_AUSTRIAN_LANGUAGE_CODE = 'fr_at';
 121  
 122      const FRENCH_SWITZERLAND_LANGUAGE_CODE = 'fr_ch';
 123  
 124      const GERMAN_LANGUAGE_CODE = 'de';
 125  
 126      const GERMAN_AUSTRIAN_LANGUAGE_CODE = 'de_at';
 127  
 128      const GERMAN_SWITZERLAND_LANGUAGE_CODE = 'de_ch';
 129  
 130      const ENGLISH_LANGUAGE_CODE = 'en';
 131  
 132      const EXTRA_LATIN_CHARS_LANGUAGE_CODE = 'latin';
 133  
 134      const EXTRA_WHITESPACE_CHARS_LANGUAGE_CODE = ' ';
 135  
 136      const EXTRA_MSWORD_CHARS_LANGUAGE_CODE = 'msword';
 137  
 138      /**
 139       * @var array<string, array<string, string>>|null
 140       */
 141      private static $ASCII_MAPS;
 142  
 143      /**
 144       * @var array<string, array<string, string>>|null
 145       */
 146      private static $ASCII_MAPS_AND_EXTRAS;
 147  
 148      /**
 149       * @var array<string, array<string, string>>|null
 150       */
 151      private static $ASCII_EXTRAS;
 152  
 153      /**
 154       * @var array<string, int>|null
 155       */
 156      private static $ORD;
 157  
 158      /**
 159       * @var array<string, int>|null
 160       */
 161      private static $LANGUAGE_MAX_KEY;
 162  
 163      /**
 164       * url: https://en.wikipedia.org/wiki/Wikipedia:ASCII#ASCII_printable_characters
 165       *
 166       * @var string
 167       */
 168      private static $REGEX_ASCII = "[^\x09\x10\x13\x0A\x0D\x20-\x7E]";
 169  
 170      /**
 171       * bidirectional text chars
 172       *
 173       * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
 174       *
 175       * @var array<int, string>
 176       */
 177      private static $BIDI_UNI_CODE_CONTROLS_TABLE = [
 178          // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
 179          8234 => "\xE2\x80\xAA",
 180          // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
 181          8235 => "\xE2\x80\xAB",
 182          // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
 183          8236 => "\xE2\x80\xAC",
 184          // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
 185          8237 => "\xE2\x80\xAD",
 186          // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
 187          8238 => "\xE2\x80\xAE",
 188          // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
 189          8294 => "\xE2\x81\xA6",
 190          // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
 191          8295 => "\xE2\x81\xA7",
 192          // FIRST STRONG ISOLATE // (use -> dir = "auto")
 193          8296 => "\xE2\x81\xA8",
 194          // POP DIRECTIONAL ISOLATE
 195          8297 => "\xE2\x81\xA9",
 196      ];
 197  
 198      /**
 199       * Get all languages from the constants "ASCII::.*LANGUAGE_CODE".
 200       *
 201       * @return string[]
 202       *
 203       * @psalm-return array<string, string>
 204       */
 205      public static function getAllLanguages(): array
 206      {
 207          // init
 208          static $LANGUAGES = [];
 209  
 210          if ($LANGUAGES !== []) {
 211              return $LANGUAGES;
 212          }
 213  
 214          foreach ((new \ReflectionClass(__CLASS__))->getConstants() as $constant => $lang) {
 215              if (\strpos($constant, 'EXTRA') !== false) {
 216                  $LANGUAGES[\strtolower($constant)] = $lang;
 217              } else {
 218                  $LANGUAGES[\strtolower(\str_replace('_LANGUAGE_CODE', '', $constant))] = $lang;
 219              }
 220          }
 221  
 222          return $LANGUAGES;
 223      }
 224  
 225      /**
 226       * Returns an replacement array for ASCII methods.
 227       *
 228       * EXAMPLE: <code>
 229       * $array = ASCII::charsArray();
 230       * var_dump($array['ru']['б']); // 'b'
 231       * </code>
 232       *
 233       * @psalm-suppress InvalidNullableReturnType - we use the prepare* methods here, so we don't get NULL here
 234       *
 235       * @param bool $replace_extra_symbols [optional] <p>Add some more replacements e.g. "£" with " pound ".</p>
 236       *
 237       * @psalm-pure
 238       *
 239       * @return array
 240       *
 241       * @psalm-return array<string, array<string , string>>
 242       */
 243      public static function charsArray(bool $replace_extra_symbols = false): array
 244      {
 245          if ($replace_extra_symbols) {
 246              self::prepareAsciiAndExtrasMaps();
 247  
 248              return self::$ASCII_MAPS_AND_EXTRAS ?? [];
 249          }
 250  
 251          self::prepareAsciiMaps();
 252  
 253          return self::$ASCII_MAPS ?? [];
 254      }
 255  
 256      /**
 257       * Returns an replacement array for ASCII methods with a mix of multiple languages.
 258       *
 259       * EXAMPLE: <code>
 260       * $array = ASCII::charsArrayWithMultiLanguageValues();
 261       * var_dump($array['b']); // ['β', 'б', 'ဗ', 'ბ', 'ب']
 262       * </code>
 263       *
 264       * @param bool $replace_extra_symbols [optional] <p>Add some more replacements e.g. "£" with " pound ".</p>
 265       *
 266       * @psalm-pure
 267       *
 268       * @return array
 269       *               <p>An array of replacements.</p>
 270       *
 271       * @psalm-return array<string, array<int, string>>
 272       */
 273      public static function charsArrayWithMultiLanguageValues(bool $replace_extra_symbols = false): array
 274      {
 275          /**
 276           * @var array<string, array>
 277           */
 278          static $CHARS_ARRAY = [];
 279          $cacheKey = '' . $replace_extra_symbols;
 280  
 281          if (isset($CHARS_ARRAY[$cacheKey])) {
 282              return $CHARS_ARRAY[$cacheKey];
 283          }
 284  
 285          // init
 286          $return = [];
 287          $language_all_chars = self::charsArrayWithSingleLanguageValues(
 288              $replace_extra_symbols,
 289              false
 290          );
 291  
 292          /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
 293          /** @var array<string, string> $language_all_chars */
 294          $language_all_chars = $language_all_chars;
 295  
 296          /** @noinspection AlterInForeachInspection */
 297          foreach ($language_all_chars as $key => &$value) {
 298              $return[$value][] = $key;
 299          }
 300  
 301          $CHARS_ARRAY[$cacheKey] = $return;
 302  
 303          /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */
 304          /** @var array<string, array<int, string>> $return */
 305          $return = $return;
 306  
 307          return $return;
 308      }
 309  
 310      /**
 311       * Returns an replacement array for ASCII methods with one language.
 312       *
 313       * For example, German will map 'ä' to 'ae', while other languages
 314       * will simply return e.g. 'a'.
 315       *
 316       * EXAMPLE: <code>
 317       * $array = ASCII::charsArrayWithOneLanguage('ru');
 318       * $tmpKey = \array_search('yo', $array['replace']);
 319       * echo $array['orig'][$tmpKey]; // 'ё'
 320       * </code>
 321       *
 322       * @psalm-suppress InvalidNullableReturnType - we use the prepare* methods here, so we don't get NULL here
 323       *
 324       * @param string $language              [optional] <p>Language of the source string e.g.: en, de_at, or de-ch.
 325       *                                      (default is 'en') | ASCII::*_LANGUAGE_CODE</p>
 326       * @param bool   $replace_extra_symbols [optional] <p>Add some more replacements e.g. "£" with " pound ".</p>
 327       * @param bool   $asOrigReplaceArray    [optional] <p>TRUE === return {orig: string[], replace: string[]}
 328       *                                      array</p>
 329       *
 330       * @psalm-pure
 331       *
 332       * @return array
 333       *               <p>An array of replacements.</p>
 334       *
 335       * @psalm-return array{orig: string[], replace: string[]}|array<string, string>
 336       */
 337      public static function charsArrayWithOneLanguage(
 338          string $language = self::ENGLISH_LANGUAGE_CODE,
 339          bool $replace_extra_symbols = false,
 340          bool $asOrigReplaceArray = true
 341      ): array {
 342          $language = self::get_language($language);
 343  
 344          // init
 345          /**
 346           * @var array<string, array>
 347           */
 348          static $CHARS_ARRAY = [];
 349          $cacheKey = '' . $replace_extra_symbols . '-' . $asOrigReplaceArray;
 350  
 351          // check static cache
 352          if (isset($CHARS_ARRAY[$cacheKey][$language])) {
 353              return $CHARS_ARRAY[$cacheKey][$language];
 354          }
 355  
 356          if ($replace_extra_symbols) {
 357              self::prepareAsciiAndExtrasMaps();
 358  
 359              /** @noinspection DuplicatedCode */
 360              if (isset(self::$ASCII_MAPS_AND_EXTRAS[$language])) {
 361                  $tmpArray = self::$ASCII_MAPS_AND_EXTRAS[$language];
 362  
 363                  if ($asOrigReplaceArray) {
 364                      $CHARS_ARRAY[$cacheKey][$language] = [
 365                          'orig'    => \array_keys($tmpArray),
 366                          'replace' => \array_values($tmpArray),
 367                      ];
 368                  } else {
 369                      $CHARS_ARRAY[$cacheKey][$language] = $tmpArray;
 370                  }
 371              } else {
 372                  /** @noinspection NestedPositiveIfStatementsInspection */
 373                  if ($asOrigReplaceArray) {
 374                      $CHARS_ARRAY[$cacheKey][$language] = [
 375                          'orig'    => [],
 376                          'replace' => [],
 377                      ];
 378                  } else {
 379                      $CHARS_ARRAY[$cacheKey][$language] = [];
 380                  }
 381              }
 382          } else {
 383              self::prepareAsciiMaps();
 384  
 385              /** @noinspection DuplicatedCode */
 386              if (isset(self::$ASCII_MAPS[$language])) {
 387                  $tmpArray = self::$ASCII_MAPS[$language];
 388  
 389                  if ($asOrigReplaceArray) {
 390                      $CHARS_ARRAY[$cacheKey][$language] = [
 391                          'orig'    => \array_keys($tmpArray),
 392                          'replace' => \array_values($tmpArray),
 393                      ];
 394                  } else {
 395                      $CHARS_ARRAY[$cacheKey][$language] = $tmpArray;
 396                  }
 397              } else {
 398                  /** @noinspection NestedPositiveIfStatementsInspection */
 399                  if ($asOrigReplaceArray) {
 400                      $CHARS_ARRAY[$cacheKey][$language] = [
 401                          'orig'    => [],
 402                          'replace' => [],
 403                      ];
 404                  } else {
 405                      $CHARS_ARRAY[$cacheKey][$language] = [];
 406                  }
 407              }
 408          }
 409  
 410          return $CHARS_ARRAY[$cacheKey][$language] ?? ['orig' => [], 'replace' => []];
 411      }
 412  
 413      /**
 414       * Returns an replacement array for ASCII methods with multiple languages.
 415       *
 416       * EXAMPLE: <code>
 417       * $array = ASCII::charsArrayWithSingleLanguageValues();
 418       * $tmpKey = \array_search('hnaik', $array['replace']);
 419       * echo $array['orig'][$tmpKey]; // '၌'
 420       * </code>
 421       *
 422       * @param bool $replace_extra_symbols [optional] <p>Add some more replacements e.g. "£" with " pound ".</p>
 423       * @param bool $asOrigReplaceArray    [optional] <p>TRUE === return {orig: string[], replace: string[]}
 424       *                                    array</p>
 425       *
 426       * @psalm-pure
 427       *
 428       * @return array
 429       *               <p>An array of replacements.</p>
 430       *
 431       * @psalm-return array{orig: string[], replace: string[]}|array<string, string>
 432       */
 433      public static function charsArrayWithSingleLanguageValues(
 434          bool $replace_extra_symbols = false,
 435          bool $asOrigReplaceArray = true
 436      ): array {
 437          // init
 438          /**
 439           * @var array<string,array>
 440           */
 441          static $CHARS_ARRAY = [];
 442          $cacheKey = '' . $replace_extra_symbols . '-' . $asOrigReplaceArray;
 443  
 444          if (isset($CHARS_ARRAY[$cacheKey])) {
 445              return $CHARS_ARRAY[$cacheKey];
 446          }
 447  
 448          if ($replace_extra_symbols) {
 449              self::prepareAsciiAndExtrasMaps();
 450  
 451              /** @noinspection AlterInForeachInspection */
 452              /** @psalm-suppress PossiblyNullIterator - we use the prepare* methods here, so we don't get NULL here */
 453              foreach (self::$ASCII_MAPS_AND_EXTRAS ?? [] as &$map) {
 454                  $CHARS_ARRAY[$cacheKey][] = $map;
 455              }
 456          } else {
 457              self::prepareAsciiMaps();
 458  
 459              /** @noinspection AlterInForeachInspection */
 460              /** @psalm-suppress PossiblyNullIterator - we use the prepare* methods here, so we don't get NULL here */
 461              foreach (self::$ASCII_MAPS ?? [] as &$map) {
 462                  $CHARS_ARRAY[$cacheKey][] = $map;
 463              }
 464          }
 465  
 466          $CHARS_ARRAY[$cacheKey] = \array_merge([], ...$CHARS_ARRAY[$cacheKey]);
 467  
 468          if ($asOrigReplaceArray) {
 469              $CHARS_ARRAY[$cacheKey] = [
 470                  'orig'    => \array_keys($CHARS_ARRAY[$cacheKey]),
 471                  'replace' => \array_values($CHARS_ARRAY[$cacheKey]),
 472              ];
 473          }
 474  
 475          return $CHARS_ARRAY[$cacheKey];
 476      }
 477  
 478      /**
 479       * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
 480       *
 481       * @param string $str                         <p>The string to be sanitized.</p>
 482       * @param bool   $normalize_whitespace        [optional] <p>Set to true, if you need to normalize the
 483       *                                            whitespace.</p>
 484       * @param bool   $normalize_msword            [optional] <p>Set to true, if you need to normalize MS Word chars
 485       *                                            e.g.: "…"
 486       *                                            => "..."</p>
 487       * @param bool   $keep_non_breaking_space     [optional] <p>Set to true, to keep non-breaking-spaces, in
 488       *                                            combination with
 489       *                                            $normalize_whitespace</p>
 490       * @param bool   $remove_invisible_characters [optional] <p>Set to false, if you not want to remove invisible
 491       *                                            characters e.g.: "\0"</p>
 492       *
 493       * @psalm-pure
 494       *
 495       * @return string
 496       *                <p>A clean UTF-8 string.</p>
 497       */
 498      public static function clean(
 499          string $str,
 500          bool $normalize_whitespace = true,
 501          bool $keep_non_breaking_space = false,
 502          bool $normalize_msword = true,
 503          bool $remove_invisible_characters = true
 504      ): string {
 505          // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
 506          // caused connection reset problem on larger strings
 507  
 508          $regex = '/
 509            (
 510              (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
 511              |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
 512              |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
 513              |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
 514              ){1,100}                      # ...one or more times
 515            )
 516          | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
 517          | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
 518          /x';
 519          $str = (string) \preg_replace($regex, '$1', $str);
 520  
 521          if ($normalize_whitespace) {
 522              $str = self::normalize_whitespace($str, $keep_non_breaking_space);
 523          }
 524  
 525          if ($normalize_msword) {
 526              $str = self::normalize_msword($str);
 527          }
 528  
 529          if ($remove_invisible_characters) {
 530              $str = self::remove_invisible_characters($str);
 531          }
 532  
 533          return $str;
 534      }
 535  
 536      /**
 537       * Checks if a string is 7 bit ASCII.
 538       *
 539       * EXAMPLE: <code>
 540       * ASCII::is_ascii('白'); // false
 541       * </code>
 542       *
 543       * @param string $str <p>The string to check.</p>
 544       *
 545       * @psalm-pure
 546       *
 547       * @return bool
 548       *              <p>
 549       *              <strong>true</strong> if it is ASCII<br>
 550       *              <strong>false</strong> otherwise
 551       *              </p>
 552       */
 553      public static function is_ascii(string $str): bool
 554      {
 555          if ($str === '') {
 556              return true;
 557          }
 558  
 559          return !\preg_match('/' . self::$REGEX_ASCII . '/', $str);
 560      }
 561  
 562      /**
 563       * Returns a string with smart quotes, ellipsis characters, and dashes from
 564       * Windows-1252 (commonly used in Word documents) replaced by their ASCII
 565       * equivalents.
 566       *
 567       * EXAMPLE: <code>
 568       * ASCII::normalize_msword('„Abcdef…”'); // '"Abcdef..."'
 569       * </code>
 570       *
 571       * @param string $str <p>The string to be normalized.</p>
 572       *
 573       * @psalm-pure
 574       *
 575       * @return string
 576       *                <p>A string with normalized characters for commonly used chars in Word documents.</p>
 577       */
 578      public static function normalize_msword(string $str): string
 579      {
 580          if ($str === '') {
 581              return '';
 582          }
 583  
 584          /**
 585           * @var array{orig: string[], replace: string[]}
 586           */
 587          static $MSWORD_CACHE = ['orig' => [], 'replace' => []];
 588  
 589          if (empty($MSWORD_CACHE['orig'])) {
 590              self::prepareAsciiMaps();
 591  
 592              /**
 593               * @psalm-suppress PossiblyNullArrayAccess - we use the prepare* methods here, so we don't get NULL here
 594               *
 595               * @var array<string, string>
 596               */
 597              $map = self::$ASCII_MAPS[self::EXTRA_MSWORD_CHARS_LANGUAGE_CODE] ?? [];
 598  
 599              $MSWORD_CACHE = [
 600                  'orig'    => \array_keys($map),
 601                  'replace' => \array_values($map),
 602              ];
 603          }
 604  
 605          return \str_replace($MSWORD_CACHE['orig'], $MSWORD_CACHE['replace'], $str);
 606      }
 607  
 608      /**
 609       * Normalize the whitespace.
 610       *
 611       * EXAMPLE: <code>
 612       * ASCII::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"
 613       * </code>
 614       *
 615       * @param string $str                          <p>The string to be normalized.</p>
 616       * @param bool   $keepNonBreakingSpace         [optional] <p>Set to true, to keep non-breaking-spaces.</p>
 617       * @param bool   $keepBidiUnicodeControls      [optional] <p>Set to true, to keep non-printable (for the web)
 618       *                                             bidirectional text chars.</p>
 619       * @param bool   $normalize_control_characters [optional] <p>Set to true, to convert LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p>
 620       *
 621       * @psalm-pure
 622       *
 623       * @return string
 624       *                <p>A string with normalized whitespace.</p>
 625       */
 626      public static function normalize_whitespace(
 627          string $str,
 628          bool $keepNonBreakingSpace = false,
 629          bool $keepBidiUnicodeControls = false,
 630          bool $normalize_control_characters = false
 631      ): string {
 632          if ($str === '') {
 633              return '';
 634          }
 635  
 636          /**
 637           * @var array<int,array<string,string>>
 638           */
 639          static $WHITESPACE_CACHE = [];
 640          $cacheKey = (int) $keepNonBreakingSpace;
 641  
 642          if ($normalize_control_characters) {
 643              $str = \str_replace(
 644                  [
 645                      "\x0d\x0c",     // 'END OF LINE'
 646                      "\xe2\x80\xa8", // 'LINE SEPARATOR'
 647                      "\xe2\x80\xa9", // 'PARAGRAPH SEPARATOR'
 648                      "\x0c",         // 'FORM FEED'
 649                      "\x0d",         // 'CARRIAGE RETURN'
 650                      "\x0b",         // 'VERTICAL TAB'
 651                  ],
 652                  [
 653                      "\n",
 654                      "\n",
 655                      "\n",
 656                      "\n",
 657                      "\n",
 658                      "\t",
 659                  ],
 660                  $str
 661              );
 662          }
 663  
 664          if (!isset($WHITESPACE_CACHE[$cacheKey])) {
 665              self::prepareAsciiMaps();
 666  
 667              $WHITESPACE_CACHE[$cacheKey] = self::$ASCII_MAPS[self::EXTRA_WHITESPACE_CHARS_LANGUAGE_CODE] ?? [];
 668  
 669              if ($keepNonBreakingSpace) {
 670                  unset($WHITESPACE_CACHE[$cacheKey]["\xc2\xa0"]);
 671              }
 672  
 673              $WHITESPACE_CACHE[$cacheKey] = \array_keys($WHITESPACE_CACHE[$cacheKey]);
 674          }
 675  
 676          if (!$keepBidiUnicodeControls) {
 677              /**
 678               * @var array<int,string>|null
 679               */
 680              static $BIDI_UNICODE_CONTROLS_CACHE = null;
 681  
 682              if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
 683                  $BIDI_UNICODE_CONTROLS_CACHE = self::$BIDI_UNI_CODE_CONTROLS_TABLE;
 684              }
 685  
 686              $str = \str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
 687          }
 688  
 689          return \str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
 690      }
 691  
 692      /**
 693       * Remove invisible characters from a string.
 694       *
 695       * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
 696       *
 697       * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
 698       *
 699       * @param string $str
 700       * @param bool   $url_encoded
 701       * @param string $replacement
 702       * @param bool   $keep_basic_control_characters
 703       *
 704       * @psalm-pure
 705       *
 706       * @return string
 707       */
 708      public static function remove_invisible_characters(
 709          string $str,
 710          bool $url_encoded = false,
 711          string $replacement = '',
 712          bool $keep_basic_control_characters = true
 713      ): string {
 714          // init
 715          $non_displayables = [];
 716  
 717          // every control character except:
 718          // - newline (dec 10),
 719          // - carriage return (dec 13),
 720          // - horizontal tab (dec 09)
 721          if ($url_encoded) {
 722              $non_displayables[] = '/%0[0-8bcefBCEF]/'; // url encoded 00-08, 11, 12, 14, 15
 723              $non_displayables[] = '/%1[0-9a-fA-F]/'; // url encoded 16-31
 724          }
 725  
 726          if ($keep_basic_control_characters) {
 727              $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
 728          } else {
 729              $str = self::normalize_whitespace($str, false, false, true);
 730              $non_displayables[] = '/[^\P{C}\s]/u';
 731          }
 732  
 733          do {
 734              $str = (string) \preg_replace($non_displayables, $replacement, $str, -1, $count);
 735          } while ($count !== 0);
 736  
 737          return $str;
 738      }
 739  
 740      /**
 741       * Returns an ASCII version of the string. A set of non-ASCII characters are
 742       * replaced with their closest ASCII counterparts, and the rest are removed
 743       * by default. The language or locale of the source string can be supplied
 744       * for language-specific transliteration in any of the following formats:
 745       * en, en_GB, or en-GB. For example, passing "de" results in "äöü" mapping
 746       * to "aeoeue" rather than "aou" as in other languages.
 747       *
 748       * EXAMPLE: <code>
 749       * ASCII::to_ascii('�Düsseldorf�', 'en'); // Dusseldorf
 750       * </code>
 751       *
 752       * @param string    $str                       <p>The input string.</p>
 753       * @param string    $language                  [optional] <p>Language of the source string.
 754       *                                             (default is 'en') | ASCII::*_LANGUAGE_CODE</p>
 755       * @param bool      $remove_unsupported_chars  [optional] <p>Whether or not to remove the
 756       *                                             unsupported characters.</p>
 757       * @param bool      $replace_extra_symbols     [optional]  <p>Add some more replacements e.g. "£" with " pound
 758       *                                             ".</p>
 759       * @param bool      $use_transliterate         [optional]  <p>Use ASCII::to_transliterate() for unknown chars.</p>
 760       * @param bool|null $replace_single_chars_only [optional]  <p>Single char replacement is better for the
 761       *                                             performance, but some languages need to replace more then one char
 762       *                                             at the same time. | NULL === auto-setting, depended on the
 763       *                                             language</p>
 764       *
 765       * @psalm-pure
 766       *
 767       * @return string
 768       *                <p>A string that contains only ASCII characters.</p>
 769       */
 770      public static function to_ascii(
 771          string $str,
 772          string $language = self::ENGLISH_LANGUAGE_CODE,
 773          bool $remove_unsupported_chars = true,
 774          bool $replace_extra_symbols = false,
 775          bool $use_transliterate = false,
 776          bool $replace_single_chars_only = null
 777      ): string {
 778          if ($str === '') {
 779              return '';
 780          }
 781  
 782          $language = self::get_language($language);
 783  
 784          static $EXTRA_SYMBOLS_CACHE = null;
 785  
 786          /**
 787           * @var array<string,array<string,string>>
 788           */
 789          static $REPLACE_HELPER_CACHE = [];
 790          $cacheKey = $language . '-' . $replace_extra_symbols;
 791  
 792          if (!isset($REPLACE_HELPER_CACHE[$cacheKey])) {
 793              $langAll = self::charsArrayWithSingleLanguageValues($replace_extra_symbols, false);
 794  
 795              $langSpecific = self::charsArrayWithOneLanguage($language, $replace_extra_symbols, false);
 796  
 797              if ($langSpecific === []) {
 798                  $REPLACE_HELPER_CACHE[$cacheKey] = $langAll;
 799              } else {
 800                  $REPLACE_HELPER_CACHE[$cacheKey] = \array_merge([], $langAll, $langSpecific);
 801              }
 802          }
 803  
 804          if (
 805              $replace_extra_symbols
 806              &&
 807              $EXTRA_SYMBOLS_CACHE === null
 808          ) {
 809              $EXTRA_SYMBOLS_CACHE = [];
 810              foreach (self::$ASCII_EXTRAS ?? [] as $extrasLanguageTmp => $extrasDataTmp) {
 811                  foreach ($extrasDataTmp as $extrasDataKeyTmp => $extrasDataValueTmp) {
 812                      $EXTRA_SYMBOLS_CACHE[$extrasDataKeyTmp] = $extrasDataKeyTmp;
 813                  }
 814              }
 815              $EXTRA_SYMBOLS_CACHE = \implode('', $EXTRA_SYMBOLS_CACHE);
 816          }
 817  
 818          $charDone = [];
 819          if (\preg_match_all('/' . self::$REGEX_ASCII . ($replace_extra_symbols ? '|[' . $EXTRA_SYMBOLS_CACHE . ']' : '') . '/u', $str, $matches)) {
 820              if (!$replace_single_chars_only) {
 821                  if (self::$LANGUAGE_MAX_KEY === null) {
 822                      self::$LANGUAGE_MAX_KEY = self::getData('ascii_language_max_key');
 823                  }
 824  
 825                  $maxKeyLength = self::$LANGUAGE_MAX_KEY[$language] ?? 0;
 826  
 827                  if ($maxKeyLength >= 5) {
 828                      foreach ($matches[0] as $keyTmp => $char) {
 829                          if (isset($matches[0][$keyTmp + 4])) {
 830                              $fiveChars = $matches[0][$keyTmp + 0] . $matches[0][$keyTmp + 1] . $matches[0][$keyTmp + 2] . $matches[0][$keyTmp + 3] . $matches[0][$keyTmp + 4];
 831                          } else {
 832                              $fiveChars = null;
 833                          }
 834                          if (
 835                              $fiveChars
 836                              &&
 837                              !isset($charDone[$fiveChars])
 838                              &&
 839                              isset($REPLACE_HELPER_CACHE[$cacheKey][$fiveChars])
 840                              &&
 841                              \strpos($str, $fiveChars) !== false
 842                          ) {
 843                              // DEBUG
 844                              //\var_dump($str, $fiveChars, $REPLACE_HELPER_CACHE[$cacheKey][$fiveChars]);
 845  
 846                              $charDone[$fiveChars] = true;
 847                              $str = \str_replace($fiveChars, $REPLACE_HELPER_CACHE[$cacheKey][$fiveChars], $str);
 848  
 849                              // DEBUG
 850                              //\var_dump($str, "\n");
 851                          }
 852                      }
 853                  }
 854  
 855                  if ($maxKeyLength >= 4) {
 856                      foreach ($matches[0] as $keyTmp => $char) {
 857                          if (isset($matches[0][$keyTmp + 3])) {
 858                              $fourChars = $matches[0][$keyTmp + 0] . $matches[0][$keyTmp + 1] . $matches[0][$keyTmp + 2] . $matches[0][$keyTmp + 3];
 859                          } else {
 860                              $fourChars = null;
 861                          }
 862                          if (
 863                              $fourChars
 864                              &&
 865                              !isset($charDone[$fourChars])
 866                              &&
 867                              isset($REPLACE_HELPER_CACHE[$cacheKey][$fourChars])
 868                              &&
 869                              \strpos($str, $fourChars) !== false
 870                          ) {
 871                              // DEBUG
 872                              //\var_dump($str, $fourChars, $REPLACE_HELPER_CACHE[$cacheKey][$fourChars]);
 873  
 874                              $charDone[$fourChars] = true;
 875                              $str = \str_replace($fourChars, $REPLACE_HELPER_CACHE[$cacheKey][$fourChars], $str);
 876  
 877                              // DEBUG
 878                              //\var_dump($str, "\n");
 879                          }
 880                      }
 881                  }
 882  
 883                  foreach ($matches[0] as $keyTmp => $char) {
 884                      if (isset($matches[0][$keyTmp + 2])) {
 885                          $threeChars = $matches[0][$keyTmp + 0] . $matches[0][$keyTmp + 1] . $matches[0][$keyTmp + 2];
 886                      } else {
 887                          $threeChars = null;
 888                      }
 889                      if (
 890                          $threeChars
 891                          &&
 892                          !isset($charDone[$threeChars])
 893                          &&
 894                          isset($REPLACE_HELPER_CACHE[$cacheKey][$threeChars])
 895                          &&
 896                          \strpos($str, $threeChars) !== false
 897                      ) {
 898                          // DEBUG
 899                          //\var_dump($str, $threeChars, $REPLACE_HELPER_CACHE[$cacheKey][$threeChars]);
 900  
 901                          $charDone[$threeChars] = true;
 902                          $str = \str_replace($threeChars, $REPLACE_HELPER_CACHE[$cacheKey][$threeChars], $str);
 903  
 904                          // DEBUG
 905                          //\var_dump($str, "\n");
 906                      }
 907                  }
 908  
 909                  foreach ($matches[0] as $keyTmp => $char) {
 910                      if (isset($matches[0][$keyTmp + 1])) {
 911                          $twoChars = $matches[0][$keyTmp + 0] . $matches[0][$keyTmp + 1];
 912                      } else {
 913                          $twoChars = null;
 914                      }
 915                      if (
 916                          $twoChars
 917                          &&
 918                          !isset($charDone[$twoChars])
 919                          &&
 920                          isset($REPLACE_HELPER_CACHE[$cacheKey][$twoChars])
 921                          &&
 922                          \strpos($str, $twoChars) !== false
 923                      ) {
 924                          // DEBUG
 925                          //\var_dump($str, $twoChars, $REPLACE_HELPER_CACHE[$cacheKey][$twoChars]);
 926  
 927                          $charDone[$twoChars] = true;
 928                          $str = \str_replace($twoChars, $REPLACE_HELPER_CACHE[$cacheKey][$twoChars], $str);
 929  
 930                          // DEBUG
 931                          //\var_dump($str, "\n");
 932                      }
 933                  }
 934              }
 935  
 936              foreach ($matches[0] as $keyTmp => $char) {
 937                  if (
 938                      !isset($charDone[$char])
 939                      &&
 940                      isset($REPLACE_HELPER_CACHE[$cacheKey][$char])
 941                      &&
 942                      \strpos($str, $char) !== false
 943                  ) {
 944                      // DEBUG
 945                      //\var_dump($str, $char, $REPLACE_HELPER_CACHE[$cacheKey][$char]);
 946  
 947                      $charDone[$char] = true;
 948                      $str = \str_replace($char, $REPLACE_HELPER_CACHE[$cacheKey][$char], $str);
 949  
 950                      // DEBUG
 951                      //\var_dump($str, "\n");
 952                  }
 953              }
 954          }
 955  
 956          /** @psalm-suppress PossiblyNullOperand - we use the prepare* methods here, so we don't get NULL here */
 957          if (!isset(self::$ASCII_MAPS[$language])) {
 958              $use_transliterate = true;
 959          }
 960  
 961          if ($use_transliterate) {
 962              /** @noinspection ArgumentEqualsDefaultValueInspection */
 963              $str = self::to_transliterate($str, null, false);
 964          }
 965  
 966          if ($remove_unsupported_chars) {
 967              $str = (string) \str_replace(["\n\r", "\n", "\r", "\t"], ' ', $str);
 968              $str = (string) \preg_replace('/' . self::$REGEX_ASCII . '/', '', $str);
 969          }
 970  
 971          return $str;
 972      }
 973  
 974      /**
 975       * Convert given string to safe filename (and keep string case).
 976       *
 977       * EXAMPLE: <code>
 978       * ASCII::to_filename('שדגשדג.png', true)); // 'shdgshdg.png'
 979       * </code>
 980       *
 981       * @param string $str
 982       * @param bool   $use_transliterate <p>ASCII::to_transliterate() is used by default - unsafe characters are
 983       *                                  simply replaced with hyphen otherwise.</p>
 984       * @param string $fallback_char
 985       *
 986       * @psalm-pure
 987       *
 988       * @return string
 989       *                <p>A string that contains only safe characters for a filename.</p>
 990       */
 991      public static function to_filename(
 992          string $str,
 993          bool $use_transliterate = true,
 994          string $fallback_char = '-'
 995      ): string {
 996          if ($use_transliterate) {
 997              $str = self::to_transliterate($str, $fallback_char);
 998          }
 999  
1000          $fallback_char_escaped = \preg_quote($fallback_char, '/');
1001  
1002          $str = (string) \preg_replace(
1003              [
1004                  '/[^' . $fallback_char_escaped . '.\\-a-zA-Z0-9\\s]/', // 1) remove un-needed chars
1005                  '/[\\s]+/u',                                           // 2) convert spaces to $fallback_char
1006                  '/[' . $fallback_char_escaped . ']+/u',                // 3) remove double $fallback_char's
1007              ],
1008              [
1009                  '',
1010                  $fallback_char,
1011                  $fallback_char,
1012              ],
1013              $str
1014          );
1015  
1016          return \trim($str, $fallback_char);
1017      }
1018  
1019      /**
1020       * Converts the string into an URL slug. This includes replacing non-ASCII
1021       * characters with their closest ASCII equivalents, removing remaining
1022       * non-ASCII and non-alphanumeric characters, and replacing whitespace with
1023       * $separator. The separator defaults to a single dash, and the string
1024       * is also converted to lowercase. The language of the source string can
1025       * also be supplied for language-specific transliteration.
1026       *
1027       * @param string                $str
1028       * @param string                $separator             [optional] <p>The string used to replace whitespace.</p>
1029       * @param string                $language              [optional] <p>Language of the source string.
1030       *                                                     (default is 'en') | ASCII::*_LANGUAGE_CODE</p>
1031       * @param array<string, string> $replacements          [optional] <p>A map of replaceable strings.</p>
1032       * @param bool                  $replace_extra_symbols [optional]  <p>Add some more replacements e.g. "£" with "
1033       *                                                     pound ".</p>
1034       * @param bool                  $use_str_to_lower      [optional] <p>Use "string to lower" for the input.</p>
1035       * @param bool                  $use_transliterate     [optional]  <p>Use ASCII::to_transliterate() for unknown
1036       *                                                     chars.</p>
1037       * @psalm-pure
1038       *
1039       * @return string
1040       *                <p>A string that has been converted to an URL slug.</p>
1041       */
1042      public static function to_slugify(
1043          string $str,
1044          string $separator = '-',
1045          string $language = self::ENGLISH_LANGUAGE_CODE,
1046          array $replacements = [],
1047          bool $replace_extra_symbols = false,
1048          bool $use_str_to_lower = true,
1049          bool $use_transliterate = false
1050      ): string {
1051          if ($str === '') {
1052              return '';
1053          }
1054  
1055          foreach ($replacements as $from => $to) {
1056              $str = \str_replace($from, $to, $str);
1057          }
1058  
1059          $str = self::to_ascii(
1060              $str,
1061              $language,
1062              false,
1063              $replace_extra_symbols,
1064              $use_transliterate
1065          );
1066  
1067          $str = \str_replace('@', $separator, $str);
1068  
1069          $str = (string) \preg_replace(
1070              '/[^a-zA-Z\\d\\s\\-_' . \preg_quote($separator, '/') . ']/',
1071              '',
1072              $str
1073          );
1074  
1075          if ($use_str_to_lower) {
1076              $str = \strtolower($str);
1077          }
1078  
1079          $str = (string) \preg_replace('/^[\'\\s]+|[\'\\s]+$/', '', $str);
1080          $str = (string) \preg_replace('/\\B([A-Z])/', '-\1', $str);
1081          $str = (string) \preg_replace('/[\\-_\\s]+/', $separator, $str);
1082  
1083          $l = \strlen($separator);
1084          if ($l && \strpos($str, $separator) === 0) {
1085              $str = (string) \substr($str, $l);
1086          }
1087  
1088          if (\substr($str, -$l) === $separator) {
1089              $str = (string) \substr($str, 0, \strlen($str) - $l);
1090          }
1091  
1092          return $str;
1093      }
1094  
1095      /**
1096       * Returns an ASCII version of the string. A set of non-ASCII characters are
1097       * replaced with their closest ASCII counterparts, and the rest are removed
1098       * unless instructed otherwise.
1099       *
1100       * EXAMPLE: <code>
1101       * ASCII::to_transliterate('déjà σσς iıii'); // 'deja sss iiii'
1102       * </code>
1103       *
1104       * @param string      $str     <p>The input string.</p>
1105       * @param string|null $unknown [optional] <p>Character use if character unknown. (default is '?')
1106       *                             But you can also use NULL to keep the unknown chars.</p>
1107       * @param bool        $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl
1108       *
1109       * @psalm-pure
1110       *
1111       * @return string
1112       *                <p>A String that contains only ASCII characters.</p>
1113       *
1114       * @noinspection ParameterDefaultValueIsNotNullInspection
1115       */
1116      public static function to_transliterate(
1117          string $str,
1118          $unknown = '?',
1119          bool $strict = false
1120      ): string {
1121          /**
1122           * @var array<int,string>|null
1123           */
1124          static $UTF8_TO_TRANSLIT = null;
1125  
1126          /**
1127           * null|\Transliterator
1128           */
1129          static $TRANSLITERATOR = null;
1130  
1131          /**
1132           * @var bool|null
1133           */
1134          static $SUPPORT_INTL = null;
1135  
1136          if ($str === '') {
1137              return '';
1138          }
1139  
1140          if ($SUPPORT_INTL === null) {
1141              $SUPPORT_INTL = \extension_loaded('intl');
1142          }
1143  
1144          // check if we only have ASCII, first (better performance)
1145          $str_tmp = $str;
1146          if (self::is_ascii($str)) {
1147              return $str;
1148          }
1149  
1150          $str = self::clean($str);
1151  
1152          // check again, if we only have ASCII, now ...
1153          if (
1154              $str_tmp !== $str
1155              &&
1156              self::is_ascii($str)
1157          ) {
1158              return $str;
1159          }
1160  
1161          if (
1162              $strict
1163              &&
1164              $SUPPORT_INTL === true
1165          ) {
1166              if (!isset($TRANSLITERATOR)) {
1167                  // INFO: see "*-Latin" rules via "transliterator_list_ids()"
1168                  /**
1169                   * @var \Transliterator
1170                   */
1171                  $TRANSLITERATOR = \transliterator_create('NFKC; [:Nonspacing Mark:] Remove; NFKC; Any-Latin; Latin-ASCII;');
1172              }
1173  
1174              // INFO: https://unicode.org/cldr/utility/character.jsp
1175              $str_tmp = \transliterator_transliterate($TRANSLITERATOR, $str);
1176  
1177              if ($str_tmp !== false) {
1178  
1179                  // check again, if we only have ASCII, now ...
1180                  if (
1181                      $str_tmp !== $str
1182                      &&
1183                      self::is_ascii($str_tmp)
1184                  ) {
1185                      return $str_tmp;
1186                  }
1187  
1188                  $str = $str_tmp;
1189              }
1190          }
1191  
1192          if (self::$ORD === null) {
1193              self::$ORD = self::getData('ascii_ord');
1194          }
1195  
1196          \preg_match_all('/.|[^\x00]$/us', $str, $array_tmp);
1197          $chars = $array_tmp[0];
1198          $ord = null;
1199          $str_tmp = '';
1200          foreach ($chars as &$c) {
1201              $ordC0 = self::$ORD[$c[0]];
1202  
1203              if ($ordC0 >= 0 && $ordC0 <= 127) {
1204                  $str_tmp .= $c;
1205  
1206                  continue;
1207              }
1208  
1209              $ordC1 = self::$ORD[$c[1]];
1210  
1211              // ASCII - next please
1212              if ($ordC0 >= 192 && $ordC0 <= 223) {
1213                  $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
1214              }
1215  
1216              if ($ordC0 >= 224) {
1217                  $ordC2 = self::$ORD[$c[2]];
1218  
1219                  if ($ordC0 <= 239) {
1220                      $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
1221                  }
1222  
1223                  if ($ordC0 >= 240) {
1224                      $ordC3 = self::$ORD[$c[3]];
1225  
1226                      if ($ordC0 <= 247) {
1227                          $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
1228                      }
1229  
1230                      // We only process valid UTF-8 chars (<= 4 byte), so we don't need this code here ...
1231                      /*
1232                      if ($ordC0 >= 248) {
1233                          $ordC4 = self::$ORD[$c[4]];
1234  
1235                          if ($ordC0 <= 251) {
1236                              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
1237                          }
1238  
1239                          if ($ordC0 >= 252) {
1240                              $ordC5 = self::$ORD[$c[5]];
1241  
1242                              if ($ordC0 <= 253) {
1243                                  $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
1244                              }
1245                          }
1246                      }
1247                       */
1248                  }
1249              }
1250  
1251              if (
1252                  $ordC0 === 254
1253                  ||
1254                  $ordC0 === 255
1255                  ||
1256                  $ord === null
1257              ) {
1258                  $str_tmp .= $unknown ?? $c;
1259  
1260                  continue;
1261              }
1262  
1263              $bank = $ord >> 8;
1264              if (!isset($UTF8_TO_TRANSLIT[$bank])) {
1265                  $UTF8_TO_TRANSLIT[$bank] = self::getDataIfExists(\sprintf('x%03x', $bank));
1266              }
1267  
1268              $new_char = $ord & 255;
1269  
1270              if (isset($UTF8_TO_TRANSLIT[$bank][$new_char])) {
1271  
1272                  // keep for debugging
1273                  /*
1274                  echo "file: " . sprintf('x%02x', $bank) . "\n";
1275                  echo "char: " . $c . "\n";
1276                  echo "ord: " . $ord . "\n";
1277                  echo "new_char: " . $new_char . "\n";
1278                  echo "new_char: " . mb_chr($new_char) . "\n";
1279                  echo "ascii: " . $UTF8_TO_TRANSLIT[$bank][$new_char] . "\n";
1280                  echo "bank:" . $bank . "\n\n";
1281                   */
1282  
1283                  $new_char = $UTF8_TO_TRANSLIT[$bank][$new_char];
1284  
1285                  /** @noinspection MissingOrEmptyGroupStatementInspection */
1286                  /** @noinspection PhpStatementHasEmptyBodyInspection */
1287                  if ($unknown === null && $new_char === '') {
1288                      // nothing
1289                  } elseif (
1290                      $new_char === '[?]'
1291                      ||
1292                      $new_char === '[?] '
1293                  ) {
1294                      $c = $unknown ?? $c;
1295                  } else {
1296                      $c = $new_char;
1297                  }
1298              } else {
1299  
1300                  // keep for debugging missing chars
1301                  /*
1302                  echo "file: " . sprintf('x%02x', $bank) . "\n";
1303                  echo "char: " . $c . "\n";
1304                  echo "ord: " . $ord . "\n";
1305                  echo "new_char: " . $new_char . "\n";
1306                  echo "new_char: " . mb_chr($new_char) . "\n";
1307                  echo "bank:" . $bank . "\n\n";
1308                   */
1309  
1310                  $c = $unknown ?? $c;
1311              }
1312  
1313              $str_tmp .= $c;
1314          }
1315  
1316          return $str_tmp;
1317      }
1318  
1319      /**
1320       * Get the language from a string.
1321       *
1322       * e.g.: de_at -> de_at
1323       *       de_DE -> de
1324       *       DE_DE -> de
1325       *       de-de -> de
1326       *
1327       * @noinspection ReturnTypeCanBeDeclaredInspection
1328       *
1329       * @param string $language
1330       *
1331       * @psalm-pure
1332       *
1333       * @return string
1334       */
1335      private static function get_language(string $language)
1336      {
1337          if ($language === '') {
1338              return '';
1339          }
1340  
1341          if (
1342              \strpos($language, '_') === false
1343              &&
1344              \strpos($language, '-') === false
1345          ) {
1346              return \strtolower($language);
1347          }
1348  
1349          $language = \str_replace('-', '_', \strtolower($language));
1350  
1351          $regex = '/(?<first>[a-z]+)_\g{first}/';
1352  
1353          return (string) \preg_replace($regex, '$1', $language);
1354      }
1355  
1356      /**
1357       * Get data from "/data/*.php".
1358       *
1359       * @noinspection ReturnTypeCanBeDeclaredInspection
1360       *
1361       * @param string $file
1362       *
1363       * @psalm-pure
1364       *
1365       * @return array<mixed>
1366       */
1367      private static function getData(string $file)
1368      {
1369          /** @noinspection PhpIncludeInspection */
1370          /** @noinspection UsingInclusionReturnValueInspection */
1371          /** @psalm-suppress UnresolvableInclude */
1372          return include __DIR__ . '/data/' . $file . '.php';
1373      }
1374  
1375      /**
1376       * Get data from "/data/*.php".
1377       *
1378       * @param string $file
1379       *
1380       * @psalm-pure
1381       *
1382       * @return array<mixed>
1383       */
1384      private static function getDataIfExists(string $file): array
1385      {
1386          $file = __DIR__ . '/data/' . $file . '.php';
1387          /** @psalm-suppress ImpureFunctionCall */
1388          if (\is_file($file)) {
1389              /** @noinspection PhpIncludeInspection */
1390              /** @noinspection UsingInclusionReturnValueInspection */
1391              /** @psalm-suppress UnresolvableInclude */
1392              return include $file;
1393          }
1394  
1395          return [];
1396      }
1397  
1398      /**
1399       * @psalm-pure
1400       *
1401       * @return void
1402       */
1403      private static function prepareAsciiAndExtrasMaps()
1404      {
1405          if (self::$ASCII_MAPS_AND_EXTRAS === null) {
1406              self::prepareAsciiMaps();
1407              self::prepareAsciiExtras();
1408  
1409              /** @psalm-suppress PossiblyNullArgument - we use the prepare* methods here, so we don't get NULL here */
1410              self::$ASCII_MAPS_AND_EXTRAS = \array_merge_recursive(
1411                  self::$ASCII_MAPS ?? [],
1412                  self::$ASCII_EXTRAS ?? []
1413              );
1414          }
1415      }
1416  
1417      /**
1418       * @psalm-pure
1419       *
1420       * @return void
1421       */
1422      private static function prepareAsciiMaps()
1423      {
1424          if (self::$ASCII_MAPS === null) {
1425              self::$ASCII_MAPS = self::getData('ascii_by_languages');
1426          }
1427      }
1428  
1429      /**
1430       * @psalm-pure
1431       *
1432       * @return void
1433       */
1434      private static function prepareAsciiExtras()
1435      {
1436          if (self::$ASCII_EXTRAS === null) {
1437              self::$ASCII_EXTRAS = self::getData('ascii_extras_by_languages');
1438          }
1439      }
1440  }
PHP Cross Reference of Joomla 4.2.2 documentation

/libraries/vendor/voku/portable-ascii/src/voku/helper/ -> ASCII.php (source)