PHPXRef 0.7.1 : Joomla 4.2.2 documentation : /libraries/vendor/symfony/string/AbstractUnicodeString.php source

[Summary view] [Print] [Text view]
   1  <?php
   2  
   3  /*
   4   * This file is part of the Symfony package.
   5   *
   6   * (c) Fabien Potencier <[email protected]>
   7   *
   8   * For the full copyright and license information, please view the LICENSE
   9   * file that was distributed with this source code.
  10   */
  11  
  12  namespace Symfony\Component\String;
  13  
  14  use Symfony\Component\String\Exception\ExceptionInterface;
  15  use Symfony\Component\String\Exception\InvalidArgumentException;
  16  use Symfony\Component\String\Exception\RuntimeException;
  17  
  18  /**
  19   * Represents a string of abstract Unicode characters.
  20   *
  21   * Unicode defines 3 types of "characters" (bytes, code points and grapheme clusters).
  22   * This class is the abstract type to use as a type-hint when the logic you want to
  23   * implement is Unicode-aware but doesn't care about code points vs grapheme clusters.
  24   *
  25   * @author Nicolas Grekas <[email protected]>
  26   *
  27   * @throws ExceptionInterface
  28   */
  29  abstract class AbstractUnicodeString extends AbstractString
  30  {
  31      public const NFC = \Normalizer::NFC;
  32      public const NFD = \Normalizer::NFD;
  33      public const NFKC = \Normalizer::NFKC;
  34      public const NFKD = \Normalizer::NFKD;
  35  
  36      // all ASCII letters sorted by typical frequency of occurrence
  37      private const ASCII = "\x20\x65\x69\x61\x73\x6E\x74\x72\x6F\x6C\x75\x64\x5D\x5B\x63\x6D\x70\x27\x0A\x67\x7C\x68\x76\x2E\x66\x62\x2C\x3A\x3D\x2D\x71\x31\x30\x43\x32\x2A\x79\x78\x29\x28\x4C\x39\x41\x53\x2F\x50\x22\x45\x6A\x4D\x49\x6B\x33\x3E\x35\x54\x3C\x44\x34\x7D\x42\x7B\x38\x46\x77\x52\x36\x37\x55\x47\x4E\x3B\x4A\x7A\x56\x23\x48\x4F\x57\x5F\x26\x21\x4B\x3F\x58\x51\x25\x59\x5C\x09\x5A\x2B\x7E\x5E\x24\x40\x60\x7F\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0D\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F";
  38  
  39      // the subset of folded case mappings that is not in lower case mappings
  40      private const FOLD_FROM = ['İ', 'µ', 'ſ', "\xCD\x85", 'ς', 'ϐ', 'ϑ', 'ϕ', 'ϖ', 'ϰ', 'ϱ', 'ϵ', 'ẛ', "\xE1\xBE\xBE", 'ß', 'İ', 'ŉ', 'ǰ', 'ΐ', 'ΰ', 'և', 'ẖ', 'ẗ', 'ẘ', 'ẙ', 'ẚ', 'ẞ', 'ὐ', 'ὒ', 'ὔ', 'ὖ', 'ᾀ', 'ᾁ', 'ᾂ', 'ᾃ', 'ᾄ', 'ᾅ', 'ᾆ', 'ᾇ', 'ᾈ', 'ᾉ', 'ᾊ', 'ᾋ', 'ᾌ', 'ᾍ', 'ᾎ', 'ᾏ', 'ᾐ', 'ᾑ', 'ᾒ', 'ᾓ', 'ᾔ', 'ᾕ', 'ᾖ', 'ᾗ', 'ᾘ', 'ᾙ', 'ᾚ', 'ᾛ', 'ᾜ', 'ᾝ', 'ᾞ', 'ᾟ', 'ᾠ', 'ᾡ', 'ᾢ', 'ᾣ', 'ᾤ', 'ᾥ', 'ᾦ', 'ᾧ', 'ᾨ', 'ᾩ', 'ᾪ', 'ᾫ', 'ᾬ', 'ᾭ', 'ᾮ', 'ᾯ', 'ᾲ', 'ᾳ', 'ᾴ', 'ᾶ', 'ᾷ', 'ᾼ', 'ῂ', 'ῃ', 'ῄ', 'ῆ', 'ῇ', 'ῌ', 'ῒ', 'ΐ', 'ῖ', 'ῗ', 'ῢ', 'ΰ', 'ῤ', 'ῦ', 'ῧ', 'ῲ', 'ῳ', 'ῴ', 'ῶ', 'ῷ', 'ῼ', 'ﬀ', 'ﬁ', 'ﬂ', 'ﬃ', 'ﬄ', 'ﬅ', 'ﬆ', 'ﬓ', 'ﬔ', 'ﬕ', 'ﬖ', 'ﬗ'];
  41      private const FOLD_TO = ['i̇', 'μ', 's', 'ι', 'σ', 'β', 'θ', 'φ', 'π', 'κ', 'ρ', 'ε', 'ṡ', 'ι', 'ss', 'i̇', 'ʼn', 'ǰ', 'ΐ', 'ΰ', 'եւ', 'ẖ', 'ẗ', 'ẘ', 'ẙ', 'aʾ', 'ss', 'ὐ', 'ὒ', 'ὔ', 'ὖ', 'ἀι', 'ἁι', 'ἂι', 'ἃι', 'ἄι', 'ἅι', 'ἆι', 'ἇι', 'ἀι', 'ἁι', 'ἂι', 'ἃι', 'ἄι', 'ἅι', 'ἆι', 'ἇι', 'ἠι', 'ἡι', 'ἢι', 'ἣι', 'ἤι', 'ἥι', 'ἦι', 'ἧι', 'ἠι', 'ἡι', 'ἢι', 'ἣι', 'ἤι', 'ἥι', 'ἦι', 'ἧι', 'ὠι', 'ὡι', 'ὢι', 'ὣι', 'ὤι', 'ὥι', 'ὦι', 'ὧι', 'ὠι', 'ὡι', 'ὢι', 'ὣι', 'ὤι', 'ὥι', 'ὦι', 'ὧι', 'ὰι', 'αι', 'άι', 'ᾶ', 'ᾶι', 'αι', 'ὴι', 'ηι', 'ήι', 'ῆ', 'ῆι', 'ηι', 'ῒ', 'ΐ', 'ῖ', 'ῗ', 'ῢ', 'ΰ', 'ῤ', 'ῦ', 'ῧ', 'ὼι', 'ωι', 'ώι', 'ῶ', 'ῶι', 'ωι', 'ff', 'fi', 'fl', 'ffi', 'ffl', 'st', 'st', 'մն', 'մե', 'մի', 'վն', 'մխ'];
  42  
  43      // the subset of upper case mappings that map one code point to many code points
  44      private const UPPER_FROM = ['ß', 'ﬀ', 'ﬁ', 'ﬂ', 'ﬃ', 'ﬄ', 'ﬅ', 'ﬆ', 'և', 'ﬓ', 'ﬔ', 'ﬕ', 'ﬖ', 'ﬗ', 'ŉ', 'ΐ', 'ΰ', 'ǰ', 'ẖ', 'ẗ', 'ẘ', 'ẙ', 'ẚ', 'ὐ', 'ὒ', 'ὔ', 'ὖ', 'ᾶ', 'ῆ', 'ῒ', 'ΐ', 'ῖ', 'ῗ', 'ῢ', 'ΰ', 'ῤ', 'ῦ', 'ῧ', 'ῶ'];
  45      private const UPPER_TO = ['SS', 'FF', 'FI', 'FL', 'FFI', 'FFL', 'ST', 'ST', 'ԵՒ', 'ՄՆ', 'ՄԵ', 'ՄԻ', 'ՎՆ', 'ՄԽ', 'ʼN', 'Ϊ́', 'Ϋ́', 'J̌', 'H̱', 'T̈', 'W̊', 'Y̊', 'Aʾ', 'Υ̓', 'Υ̓̀', 'Υ̓́', 'Υ̓͂', 'Α͂', 'Η͂', 'Ϊ̀', 'Ϊ́', 'Ι͂', 'Ϊ͂', 'Ϋ̀', 'Ϋ́', 'Ρ̓', 'Υ͂', 'Ϋ͂', 'Ω͂'];
  46  
  47      // the subset of https://github.com/unicode-org/cldr/blob/master/common/transforms/Latin-ASCII.xml that is not in NFKD
  48      private const TRANSLIT_FROM = ['Æ', 'Ð', 'Ø', 'Þ', 'ß', 'æ', 'ð', 'ø', 'þ', 'Đ', 'đ', 'Ħ', 'ħ', 'ı', 'ĸ', 'Ŀ', 'ŀ', 'Ł', 'ł', 'ŉ', 'Ŋ', 'ŋ', 'Œ', 'œ', 'Ŧ', 'ŧ', 'ƀ', 'Ɓ', 'Ƃ', 'ƃ', 'Ƈ', 'ƈ', 'Ɖ', 'Ɗ', 'Ƌ', 'ƌ', 'Ɛ', 'Ƒ', 'ƒ', 'Ɠ', 'ƕ', 'Ɩ', 'Ɨ', 'Ƙ', 'ƙ', 'ƚ', 'Ɲ', 'ƞ', 'Ƣ', 'ƣ', 'Ƥ', 'ƥ', 'ƫ', 'Ƭ', 'ƭ', 'Ʈ', 'Ʋ', 'Ƴ', 'ƴ', 'Ƶ', 'ƶ', 'Ǆ', 'ǅ', 'ǆ', 'Ǥ', 'ǥ', 'ȡ', 'Ȥ', 'ȥ', 'ȴ', 'ȵ', 'ȶ', 'ȷ', 'ȸ', 'ȹ', 'Ⱥ', 'Ȼ', 'ȼ', 'Ƚ', 'Ⱦ', 'ȿ', 'ɀ', 'Ƀ', 'Ʉ', 'Ɇ', 'ɇ', 'Ɉ', 'ɉ', 'Ɍ', 'ɍ', 'Ɏ', 'ɏ', 'ɓ', 'ɕ', 'ɖ', 'ɗ', 'ɛ', 'ɟ', 'ɠ', 'ɡ', 'ɢ', 'ɦ', 'ɧ', 'ɨ', 'ɪ', 'ɫ', 'ɬ', 'ɭ', 'ɱ', 'ɲ', 'ɳ', 'ɴ', 'ɶ', 'ɼ', 'ɽ', 'ɾ', 'ʀ', 'ʂ', 'ʈ', 'ʉ', 'ʋ', 'ʏ', 'ʐ', 'ʑ', 'ʙ', 'ʛ', 'ʜ', 'ʝ', 'ʟ', 'ʠ', 'ʣ', 'ʥ', 'ʦ', 'ʪ', 'ʫ', 'ᴀ', 'ᴁ', 'ᴃ', 'ᴄ', 'ᴅ', 'ᴆ', 'ᴇ', 'ᴊ', 'ᴋ', 'ᴌ', 'ᴍ', 'ᴏ', 'ᴘ', 'ᴛ', 'ᴜ', 'ᴠ', 'ᴡ', 'ᴢ', 'ᵫ', 'ᵬ', 'ᵭ', 'ᵮ', 'ᵯ', 'ᵰ', 'ᵱ', 'ᵲ', 'ᵳ', 'ᵴ', 'ᵵ', 'ᵶ', 'ᵺ', 'ᵻ', 'ᵽ', 'ᵾ', 'ᶀ', 'ᶁ', 'ᶂ', 'ᶃ', 'ᶄ', 'ᶅ', 'ᶆ', 'ᶇ', 'ᶈ', 'ᶉ', 'ᶊ', 'ᶌ', 'ᶍ', 'ᶎ', 'ᶏ', 'ᶑ', 'ᶒ', 'ᶓ', 'ᶖ', 'ᶙ', 'ẚ', 'ẜ', 'ẝ', 'ẞ', 'Ỻ', 'ỻ', 'Ỽ', 'ỽ', 'Ỿ', 'ỿ', '©', '®', '₠', '₢', '₣', '₤', '₧', '₺', '₹', 'ℌ', '℞', '㎧', '㎮', '㏆', '㏗', '㏞', '㏟', '¼', '½', '¾', '⅓', '⅔', '⅕', '⅖', '⅗', '⅘', '⅙', '⅚', '⅛', '⅜', '⅝', '⅞', '⅟', '〇', '‘', '’', '‚', '‛', '“', '”', '„', '‟', '′', '″', '〝', '〞', '«', '»', '‹', '›', '‐', '‑', '‒', '–', '—', '―', '︱', '︲', '﹘', '‖', '⁄', '⁅', '⁆', '⁎', '、', '。', '〈', '〉', '《', '》', '〔', '〕', '〘', '〙', '〚', '〛', '︑', '︒', '︹', '︺', '︽', '︾', '︿', '﹀', '﹑', '﹝', '﹞', '｟', '｠', '｡', '､', '×', '÷', '−', '∕', '∖', '∣', '∥', '≪', '≫', '⦅', '⦆'];
  49      private const TRANSLIT_TO = ['AE', 'D', 'O', 'TH', 'ss', 'ae', 'd', 'o', 'th', 'D', 'd', 'H', 'h', 'i', 'q', 'L', 'l', 'L', 'l', '\'n', 'N', 'n', 'OE', 'oe', 'T', 't', 'b', 'B', 'B', 'b', 'C', 'c', 'D', 'D', 'D', 'd', 'E', 'F', 'f', 'G', 'hv', 'I', 'I', 'K', 'k', 'l', 'N', 'n', 'OI', 'oi', 'P', 'p', 't', 'T', 't', 'T', 'V', 'Y', 'y', 'Z', 'z', 'DZ', 'Dz', 'dz', 'G', 'g', 'd', 'Z', 'z', 'l', 'n', 't', 'j', 'db', 'qp', 'A', 'C', 'c', 'L', 'T', 's', 'z', 'B', 'U', 'E', 'e', 'J', 'j', 'R', 'r', 'Y', 'y', 'b', 'c', 'd', 'd', 'e', 'j', 'g', 'g', 'G', 'h', 'h', 'i', 'I', 'l', 'l', 'l', 'm', 'n', 'n', 'N', 'OE', 'r', 'r', 'r', 'R', 's', 't', 'u', 'v', 'Y', 'z', 'z', 'B', 'G', 'H', 'j', 'L', 'q', 'dz', 'dz', 'ts', 'ls', 'lz', 'A', 'AE', 'B', 'C', 'D', 'D', 'E', 'J', 'K', 'L', 'M', 'O', 'P', 'T', 'U', 'V', 'W', 'Z', 'ue', 'b', 'd', 'f', 'm', 'n', 'p', 'r', 'r', 's', 't', 'z', 'th', 'I', 'p', 'U', 'b', 'd', 'f', 'g', 'k', 'l', 'm', 'n', 'p', 'r', 's', 'v', 'x', 'z', 'a', 'd', 'e', 'e', 'i', 'u', 'a', 's', 's', 'SS', 'LL', 'll', 'V', 'v', 'Y', 'y', '(C)', '(R)', 'CE', 'Cr', 'Fr.', 'L.', 'Pts', 'TL', 'Rs', 'x', 'Rx', 'm/s', 'rad/s', 'C/kg', 'pH', 'V/m', 'A/m', ' 1/4', ' 1/2', ' 3/4', ' 1/3', ' 2/3', ' 1/5', ' 2/5', ' 3/5', ' 4/5', ' 1/6', ' 5/6', ' 1/8', ' 3/8', ' 5/8', ' 7/8', ' 1/', '0', '\'', '\'', ',', '\'', '"', '"', ',,', '"', '\'', '"', '"', '"', '<<', '>>', '<', '>', '-', '-', '-', '-', '-', '-', '-', '-', '-', '||', '/', '[', ']', '*', ',', '.', '<', '>', '<<', '>>', '[', ']', '[', ']', '[', ']', ',', '.', '[', ']', '<<', '>>', '<', '>', ',', '[', ']', '((', '))', '.', ',', '*', '/', '-', '/', '\\', '|', '||', '<<', '>>', '((', '))'];
  50  
  51      private static $transliterators = [];
  52      private static $tableZero;
  53      private static $tableWide;
  54  
  55      /**
  56       * @return static
  57       */
  58      public static function fromCodePoints(int ...$codes): self
  59      {
  60          $string = '';
  61  
  62          foreach ($codes as $code) {
  63              if (0x80 > $code %= 0x200000) {
  64                  $string .= \chr($code);
  65              } elseif (0x800 > $code) {
  66                  $string .= \chr(0xC0 | $code >> 6).\chr(0x80 | $code & 0x3F);
  67              } elseif (0x10000 > $code) {
  68                  $string .= \chr(0xE0 | $code >> 12).\chr(0x80 | $code >> 6 & 0x3F).\chr(0x80 | $code & 0x3F);
  69              } else {
  70                  $string .= \chr(0xF0 | $code >> 18).\chr(0x80 | $code >> 12 & 0x3F).\chr(0x80 | $code >> 6 & 0x3F).\chr(0x80 | $code & 0x3F);
  71              }
  72          }
  73  
  74          return new static($string);
  75      }
  76  
  77      /**
  78       * Generic UTF-8 to ASCII transliteration.
  79       *
  80       * Install the intl extension for best results.
  81       *
  82       * @param string[]|\Transliterator[]|\Closure[] $rules See "*-Latin" rules from Transliterator::listIDs()
  83       */
  84      public function ascii(array $rules = []): self
  85      {
  86          $str = clone $this;
  87          $s = $str->string;
  88          $str->string = '';
  89  
  90          array_unshift($rules, 'nfd');
  91          $rules[] = 'latin-ascii';
  92  
  93          if (\function_exists('transliterator_transliterate')) {
  94              $rules[] = 'any-latin/bgn';
  95          }
  96  
  97          $rules[] = 'nfkd';
  98          $rules[] = '[:nonspacing mark:] remove';
  99  
 100          while (\strlen($s) - 1 > $i = strspn($s, self::ASCII)) {
 101              if (0 < --$i) {
 102                  $str->string .= substr($s, 0, $i);
 103                  $s = substr($s, $i);
 104              }
 105  
 106              if (!$rule = array_shift($rules)) {
 107                  $rules = []; // An empty rule interrupts the next ones
 108              }
 109  
 110              if ($rule instanceof \Transliterator) {
 111                  $s = $rule->transliterate($s);
 112              } elseif ($rule instanceof \Closure) {
 113                  $s = $rule($s);
 114              } elseif ($rule) {
 115                  if ('nfd' === $rule = strtolower($rule)) {
 116                      normalizer_is_normalized($s, self::NFD) ?: $s = normalizer_normalize($s, self::NFD);
 117                  } elseif ('nfkd' === $rule) {
 118                      normalizer_is_normalized($s, self::NFKD) ?: $s = normalizer_normalize($s, self::NFKD);
 119                  } elseif ('[:nonspacing mark:] remove' === $rule) {
 120                      $s = preg_replace('/\p{Mn}++/u', '', $s);
 121                  } elseif ('latin-ascii' === $rule) {
 122                      $s = str_replace(self::TRANSLIT_FROM, self::TRANSLIT_TO, $s);
 123                  } elseif ('de-ascii' === $rule) {
 124                      $s = preg_replace("/([AUO])\u{0308}(?=\p{Ll})/u", '$1e', $s);
 125                      $s = str_replace(["a\u{0308}", "o\u{0308}", "u\u{0308}", "A\u{0308}", "O\u{0308}", "U\u{0308}"], ['ae', 'oe', 'ue', 'AE', 'OE', 'UE'], $s);
 126                  } elseif (\function_exists('transliterator_transliterate')) {
 127                      if (null === $transliterator = self::$transliterators[$rule] ?? self::$transliterators[$rule] = \Transliterator::create($rule)) {
 128                          if ('any-latin/bgn' === $rule) {
 129                              $rule = 'any-latin';
 130                              $transliterator = self::$transliterators[$rule] ?? self::$transliterators[$rule] = \Transliterator::create($rule);
 131                          }
 132  
 133                          if (null === $transliterator) {
 134                              throw new InvalidArgumentException(sprintf('Unknown transliteration rule "%s".', $rule));
 135                          }
 136  
 137                          self::$transliterators['any-latin/bgn'] = $transliterator;
 138                      }
 139  
 140                      $s = $transliterator->transliterate($s);
 141                  }
 142              } elseif (!\function_exists('iconv')) {
 143                  $s = preg_replace('/[^\x00-\x7F]/u', '?', $s);
 144              } else {
 145                  $s = @preg_replace_callback('/[^\x00-\x7F]/u', static function ($c) {
 146                      $c = (string) iconv('UTF-8', 'ASCII//TRANSLIT', $c[0]);
 147  
 148                      if ('' === $c && '' === iconv('UTF-8', 'ASCII//TRANSLIT', '²')) {
 149                          throw new \LogicException(sprintf('"%s" requires a translit-able iconv implementation, try installing "gnu-libiconv" if you\'re using Alpine Linux.', static::class));
 150                      }
 151  
 152                      return 1 < \strlen($c) ? ltrim($c, '\'`"^~') : ('' !== $c ? $c : '?');
 153                  }, $s);
 154              }
 155          }
 156  
 157          $str->string .= $s;
 158  
 159          return $str;
 160      }
 161  
 162      public function camel(): parent
 163      {
 164          $str = clone $this;
 165          $str->string = str_replace(' ', '', preg_replace_callback('/\b./u', static function ($m) use (&$i) {
 166              return 1 === ++$i ? ('İ' === $m[0] ? 'i̇' : mb_strtolower($m[0], 'UTF-8')) : mb_convert_case($m[0], \MB_CASE_TITLE, 'UTF-8');
 167          }, preg_replace('/[^\pL0-9]++/u', ' ', $this->string)));
 168  
 169          return $str;
 170      }
 171  
 172      /**
 173       * @return int[]
 174       */
 175      public function codePointsAt(int $offset): array
 176      {
 177          $str = $this->slice($offset, 1);
 178  
 179          if ('' === $str->string) {
 180              return [];
 181          }
 182  
 183          $codePoints = [];
 184  
 185          foreach (preg_split('//u', $str->string, -1, \PREG_SPLIT_NO_EMPTY) as $c) {
 186              $codePoints[] = mb_ord($c, 'UTF-8');
 187          }
 188  
 189          return $codePoints;
 190      }
 191  
 192      public function folded(bool $compat = true): parent
 193      {
 194          $str = clone $this;
 195  
 196          if (!$compat || \PHP_VERSION_ID < 70300 || !\defined('Normalizer::NFKC_CF')) {
 197              $str->string = normalizer_normalize($str->string, $compat ? \Normalizer::NFKC : \Normalizer::NFC);
 198              $str->string = mb_strtolower(str_replace(self::FOLD_FROM, self::FOLD_TO, $this->string), 'UTF-8');
 199          } else {
 200              $str->string = normalizer_normalize($str->string, \Normalizer::NFKC_CF);
 201          }
 202  
 203          return $str;
 204      }
 205  
 206      public function join(array $strings, string $lastGlue = null): parent
 207      {
 208          $str = clone $this;
 209  
 210          $tail = null !== $lastGlue && 1 < \count($strings) ? $lastGlue.array_pop($strings) : '';
 211          $str->string = implode($this->string, $strings).$tail;
 212  
 213          if (!preg_match('//u', $str->string)) {
 214              throw new InvalidArgumentException('Invalid UTF-8 string.');
 215          }
 216  
 217          return $str;
 218      }
 219  
 220      public function lower(): parent
 221      {
 222          $str = clone $this;
 223          $str->string = mb_strtolower(str_replace('İ', 'i̇', $str->string), 'UTF-8');
 224  
 225          return $str;
 226      }
 227  
 228      public function match(string $regexp, int $flags = 0, int $offset = 0): array
 229      {
 230          $match = ((\PREG_PATTERN_ORDER | \PREG_SET_ORDER) & $flags) ? 'preg_match_all' : 'preg_match';
 231  
 232          if ($this->ignoreCase) {
 233              $regexp .= 'i';
 234          }
 235  
 236          set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); });
 237  
 238          try {
 239              if (false === $match($regexp.'u', $this->string, $matches, $flags | \PREG_UNMATCHED_AS_NULL, $offset)) {
 240                  $lastError = preg_last_error();
 241  
 242                  foreach (get_defined_constants(true)['pcre'] as $k => $v) {
 243                      if ($lastError === $v && '_ERROR' === substr($k, -6)) {
 244                          throw new RuntimeException('Matching failed with '.$k.'.');
 245                      }
 246                  }
 247  
 248                  throw new RuntimeException('Matching failed with unknown error code.');
 249              }
 250          } finally {
 251              restore_error_handler();
 252          }
 253  
 254          return $matches;
 255      }
 256  
 257      /**
 258       * @return static
 259       */
 260      public function normalize(int $form = self::NFC): self
 261      {
 262          if (!\in_array($form, [self::NFC, self::NFD, self::NFKC, self::NFKD])) {
 263              throw new InvalidArgumentException('Unsupported normalization form.');
 264          }
 265  
 266          $str = clone $this;
 267          normalizer_is_normalized($str->string, $form) ?: $str->string = normalizer_normalize($str->string, $form);
 268  
 269          return $str;
 270      }
 271  
 272      public function padBoth(int $length, string $padStr = ' '): parent
 273      {
 274          if ('' === $padStr || !preg_match('//u', $padStr)) {
 275              throw new InvalidArgumentException('Invalid UTF-8 string.');
 276          }
 277  
 278          $pad = clone $this;
 279          $pad->string = $padStr;
 280  
 281          return $this->pad($length, $pad, \STR_PAD_BOTH);
 282      }
 283  
 284      public function padEnd(int $length, string $padStr = ' '): parent
 285      {
 286          if ('' === $padStr || !preg_match('//u', $padStr)) {
 287              throw new InvalidArgumentException('Invalid UTF-8 string.');
 288          }
 289  
 290          $pad = clone $this;
 291          $pad->string = $padStr;
 292  
 293          return $this->pad($length, $pad, \STR_PAD_RIGHT);
 294      }
 295  
 296      public function padStart(int $length, string $padStr = ' '): parent
 297      {
 298          if ('' === $padStr || !preg_match('//u', $padStr)) {
 299              throw new InvalidArgumentException('Invalid UTF-8 string.');
 300          }
 301  
 302          $pad = clone $this;
 303          $pad->string = $padStr;
 304  
 305          return $this->pad($length, $pad, \STR_PAD_LEFT);
 306      }
 307  
 308      public function replaceMatches(string $fromRegexp, $to): parent
 309      {
 310          if ($this->ignoreCase) {
 311              $fromRegexp .= 'i';
 312          }
 313  
 314          if (\is_array($to) || $to instanceof \Closure) {
 315              if (!\is_callable($to)) {
 316                  throw new \TypeError(sprintf('Argument 2 passed to "%s::replaceMatches()" must be callable, array given.', static::class));
 317              }
 318  
 319              $replace = 'preg_replace_callback';
 320              $to = static function (array $m) use ($to): string {
 321                  $to = $to($m);
 322  
 323                  if ('' !== $to && (!\is_string($to) || !preg_match('//u', $to))) {
 324                      throw new InvalidArgumentException('Replace callback must return a valid UTF-8 string.');
 325                  }
 326  
 327                  return $to;
 328              };
 329          } elseif ('' !== $to && !preg_match('//u', $to)) {
 330              throw new InvalidArgumentException('Invalid UTF-8 string.');
 331          } else {
 332              $replace = 'preg_replace';
 333          }
 334  
 335          set_error_handler(static function ($t, $m) { throw new InvalidArgumentException($m); });
 336  
 337          try {
 338              if (null === $string = $replace($fromRegexp.'u', $to, $this->string)) {
 339                  $lastError = preg_last_error();
 340  
 341                  foreach (get_defined_constants(true)['pcre'] as $k => $v) {
 342                      if ($lastError === $v && '_ERROR' === substr($k, -6)) {
 343                          throw new RuntimeException('Matching failed with '.$k.'.');
 344                      }
 345                  }
 346  
 347                  throw new RuntimeException('Matching failed with unknown error code.');
 348              }
 349          } finally {
 350              restore_error_handler();
 351          }
 352  
 353          $str = clone $this;
 354          $str->string = $string;
 355  
 356          return $str;
 357      }
 358  
 359      public function reverse(): parent
 360      {
 361          $str = clone $this;
 362          $str->string = implode('', array_reverse(preg_split('/(\X)/u', $str->string, -1, \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY)));
 363  
 364          return $str;
 365      }
 366  
 367      public function snake(): parent
 368      {
 369          $str = $this->camel()->title();
 370          $str->string = mb_strtolower(preg_replace(['/(\p{Lu}+)(\p{Lu}\p{Ll})/u', '/([\p{Ll}0-9])(\p{Lu})/u'], '\1_\2', $str->string), 'UTF-8');
 371  
 372          return $str;
 373      }
 374  
 375      public function title(bool $allWords = false): parent
 376      {
 377          $str = clone $this;
 378  
 379          $limit = $allWords ? -1 : 1;
 380  
 381          $str->string = preg_replace_callback('/\b./u', static function (array $m): string {
 382              return mb_convert_case($m[0], \MB_CASE_TITLE, 'UTF-8');
 383          }, $str->string, $limit);
 384  
 385          return $str;
 386      }
 387  
 388      public function trim(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): parent
 389      {
 390          if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) {
 391              throw new InvalidArgumentException('Invalid UTF-8 chars.');
 392          }
 393          $chars = preg_quote($chars);
 394  
 395          $str = clone $this;
 396          $str->string = preg_replace("{^[$chars]++|[$chars]++$}uD", '', $str->string);
 397  
 398          return $str;
 399      }
 400  
 401      public function trimEnd(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): parent
 402      {
 403          if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) {
 404              throw new InvalidArgumentException('Invalid UTF-8 chars.');
 405          }
 406          $chars = preg_quote($chars);
 407  
 408          $str = clone $this;
 409          $str->string = preg_replace("{[$chars]++$}uD", '', $str->string);
 410  
 411          return $str;
 412      }
 413  
 414      public function trimPrefix($prefix): parent
 415      {
 416          if (!$this->ignoreCase) {
 417              return parent::trimPrefix($prefix);
 418          }
 419  
 420          $str = clone $this;
 421  
 422          if ($prefix instanceof \Traversable) {
 423              $prefix = iterator_to_array($prefix, false);
 424          } elseif ($prefix instanceof parent) {
 425              $prefix = $prefix->string;
 426          }
 427  
 428          $prefix = implode('|', array_map('preg_quote', (array) $prefix));
 429          $str->string = preg_replace("{^(?:$prefix)}iuD", '', $this->string);
 430  
 431          return $str;
 432      }
 433  
 434      public function trimStart(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): parent
 435      {
 436          if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) {
 437              throw new InvalidArgumentException('Invalid UTF-8 chars.');
 438          }
 439          $chars = preg_quote($chars);
 440  
 441          $str = clone $this;
 442          $str->string = preg_replace("{^[$chars]++}uD", '', $str->string);
 443  
 444          return $str;
 445      }
 446  
 447      public function trimSuffix($suffix): parent
 448      {
 449          if (!$this->ignoreCase) {
 450              return parent::trimSuffix($suffix);
 451          }
 452  
 453          $str = clone $this;
 454  
 455          if ($suffix instanceof \Traversable) {
 456              $suffix = iterator_to_array($suffix, false);
 457          } elseif ($suffix instanceof parent) {
 458              $suffix = $suffix->string;
 459          }
 460  
 461          $suffix = implode('|', array_map('preg_quote', (array) $suffix));
 462          $str->string = preg_replace("{(?:$suffix)$}iuD", '', $this->string);
 463  
 464          return $str;
 465      }
 466  
 467      public function upper(): parent
 468      {
 469          $str = clone $this;
 470          $str->string = mb_strtoupper($str->string, 'UTF-8');
 471  
 472          if (\PHP_VERSION_ID < 70300) {
 473              $str->string = str_replace(self::UPPER_FROM, self::UPPER_TO, $str->string);
 474          }
 475  
 476          return $str;
 477      }
 478  
 479      public function width(bool $ignoreAnsiDecoration = true): int
 480      {
 481          $width = 0;
 482          $s = str_replace(["\x00", "\x05", "\x07"], '', $this->string);
 483  
 484          if (false !== strpos($s, "\r")) {
 485              $s = str_replace(["\r\n", "\r"], "\n", $s);
 486          }
 487  
 488          if (!$ignoreAnsiDecoration) {
 489              $s = preg_replace('/[\p{Cc}\x7F]++/u', '', $s);
 490          }
 491  
 492          foreach (explode("\n", $s) as $s) {
 493              if ($ignoreAnsiDecoration) {
 494                  $s = preg_replace('/(?:\x1B(?:
 495                      \[ [\x30-\x3F]*+ [\x20-\x2F]*+ [0x40-\x7E]
 496                      | [P\]X^_] .*? \x1B\\\\
 497                      | [\x41-\x7E]
 498                  )|[\p{Cc}\x7F]++)/xu', '', $s);
 499              }
 500  
 501              // Non printable characters have been dropped, so wcswidth cannot logically return -1.
 502              $width += $this->wcswidth($s);
 503          }
 504  
 505          return $width;
 506      }
 507  
 508      /**
 509       * @return static
 510       */
 511      private function pad(int $len, self $pad, int $type): parent
 512      {
 513          $sLen = $this->length();
 514  
 515          if ($len <= $sLen) {
 516              return clone $this;
 517          }
 518  
 519          $padLen = $pad->length();
 520          $freeLen = $len - $sLen;
 521          $len = $freeLen % $padLen;
 522  
 523          switch ($type) {
 524              case \STR_PAD_RIGHT:
 525                  return $this->append(str_repeat($pad->string, intdiv($freeLen, $padLen)).($len ? $pad->slice(0, $len) : ''));
 526  
 527              case \STR_PAD_LEFT:
 528                  return $this->prepend(str_repeat($pad->string, intdiv($freeLen, $padLen)).($len ? $pad->slice(0, $len) : ''));
 529  
 530              case \STR_PAD_BOTH:
 531                  $freeLen /= 2;
 532  
 533                  $rightLen = ceil($freeLen);
 534                  $len = $rightLen % $padLen;
 535                  $str = $this->append(str_repeat($pad->string, intdiv($rightLen, $padLen)).($len ? $pad->slice(0, $len) : ''));
 536  
 537                  $leftLen = floor($freeLen);
 538                  $len = $leftLen % $padLen;
 539  
 540                  return $str->prepend(str_repeat($pad->string, intdiv($leftLen, $padLen)).($len ? $pad->slice(0, $len) : ''));
 541  
 542              default:
 543                  throw new InvalidArgumentException('Invalid padding type.');
 544          }
 545      }
 546  
 547      /**
 548       * Based on https://github.com/jquast/wcwidth, a Python implementation of https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c.
 549       */
 550      private function wcswidth(string $string): int
 551      {
 552          $width = 0;
 553  
 554          foreach (preg_split('//u', $string, -1, \PREG_SPLIT_NO_EMPTY) as $c) {
 555              $codePoint = mb_ord($c, 'UTF-8');
 556  
 557              if (0 === $codePoint // NULL
 558                  || 0x034F === $codePoint // COMBINING GRAPHEME JOINER
 559                  || (0x200B <= $codePoint && 0x200F >= $codePoint) // ZERO WIDTH SPACE to RIGHT-TO-LEFT MARK
 560                  || 0x2028 === $codePoint // LINE SEPARATOR
 561                  || 0x2029 === $codePoint // PARAGRAPH SEPARATOR
 562                  || (0x202A <= $codePoint && 0x202E >= $codePoint) // LEFT-TO-RIGHT EMBEDDING to RIGHT-TO-LEFT OVERRIDE
 563                  || (0x2060 <= $codePoint && 0x2063 >= $codePoint) // WORD JOINER to INVISIBLE SEPARATOR
 564              ) {
 565                  continue;
 566              }
 567  
 568              // Non printable characters
 569              if (32 > $codePoint // C0 control characters
 570                  || (0x07F <= $codePoint && 0x0A0 > $codePoint) // C1 control characters and DEL
 571              ) {
 572                  return -1;
 573              }
 574  
 575              if (null === self::$tableZero) {
 576                  self::$tableZero = require  __DIR__.'/Resources/data/wcswidth_table_zero.php';
 577              }
 578  
 579              if ($codePoint >= self::$tableZero[0][0] && $codePoint <= self::$tableZero[$ubound = \count(self::$tableZero) - 1][1]) {
 580                  $lbound = 0;
 581                  while ($ubound >= $lbound) {
 582                      $mid = floor(($lbound + $ubound) / 2);
 583  
 584                      if ($codePoint > self::$tableZero[$mid][1]) {
 585                          $lbound = $mid + 1;
 586                      } elseif ($codePoint < self::$tableZero[$mid][0]) {
 587                          $ubound = $mid - 1;
 588                      } else {
 589                          continue 2;
 590                      }
 591                  }
 592              }
 593  
 594              if (null === self::$tableWide) {
 595                  self::$tableWide = require  __DIR__.'/Resources/data/wcswidth_table_wide.php';
 596              }
 597  
 598              if ($codePoint >= self::$tableWide[0][0] && $codePoint <= self::$tableWide[$ubound = \count(self::$tableWide) - 1][1]) {
 599                  $lbound = 0;
 600                  while ($ubound >= $lbound) {
 601                      $mid = floor(($lbound + $ubound) / 2);
 602  
 603                      if ($codePoint > self::$tableWide[$mid][1]) {
 604                          $lbound = $mid + 1;
 605                      } elseif ($codePoint < self::$tableWide[$mid][0]) {
 606                          $ubound = $mid - 1;
 607                      } else {
 608                          $width += 2;
 609  
 610                          continue 2;
 611                      }
 612                  }
 613              }
 614  
 615              ++$width;
 616          }
 617  
 618          return $width;
 619      }
 620  }
PHP Cross Reference of Joomla 4.2.2 documentation

/libraries/vendor/symfony/string/ -> AbstractUnicodeString.php (source)