[ Index ]

PHP Cross Reference of Joomla 4.2.2 documentation

title

Body

[close]

/libraries/vendor/algo26-matthias/idna-convert/src/NamePrep/ -> NamePrep.php (source)

   1  <?php
   2  
   3  namespace Algo26\IdnaConvert\NamePrep;
   4  
   5  use Algo26\IdnaConvert\Exception\InvalidCharacterException;
   6  use Algo26\IdnaConvert\Exception\InvalidIdnVersionException;
   7  
   8  class NamePrep implements NamePrepInterface
   9  {
  10      const sBase = 0xAC00;
  11      const lBase = 0x1100;
  12      const vBase = 0x1161;
  13      const tBase = 0x11A7;
  14      const lCount = 19;
  15      const vCount = 21;
  16      const tCount = 28;
  17      const nCount = 588;   // vCount * tCount
  18      const sCount = 11172; // lCount * tCount * vCount
  19      const sLast = self::sBase + self::lCount * self::vCount * self::tCount;
  20  
  21      /** @var NamePrepDataInterface */
  22      private $namePrepData;
  23  
  24      /**
  25       * @param string|null $idnVersion
  26       *
  27       * @throws InvalidIdnVersionException
  28       */
  29      public function __construct(?string $idnVersion = null)
  30      {
  31          if ($idnVersion === null || $idnVersion == 2008) {
  32              $this->namePrepData = new NamePrepData2008();
  33  
  34              return;
  35          }
  36  
  37          if ($idnVersion == 2003) {
  38              $this->namePrepData = new NamePrepData2003();
  39  
  40              return;
  41          }
  42  
  43          throw new InvalidIdnVersionException('IDN version must bei either 2003 or 2008');
  44      }
  45  
  46      /**
  47       * @param array $inputArray
  48       *
  49       * @return array
  50       * @throws InvalidCharacterException
  51       */
  52      public function do(array $inputArray): array
  53      {
  54          $outputArray = $this->applyCharacterMaps($inputArray);
  55          $outputArray = $this->hangulCompose($outputArray);
  56          $outputArray = $this->combineCodePoints($outputArray);
  57  
  58          return $outputArray;
  59      }
  60  
  61      /**
  62       * @param array $inputArray
  63       *
  64       * @return array
  65       * @throws InvalidCharacterException
  66       */
  67      private function applyCharacterMaps(array $inputArray): array
  68      {
  69          $outputArray = [];
  70          foreach ($inputArray as $codePoint) {
  71              // Map to nothing == skip that code point
  72              if (in_array($codePoint, $this->namePrepData->mapToNothing)) {
  73                  continue;
  74              }
  75              // Try to find prohibited input
  76              if (in_array($codePoint, $this->namePrepData->prohibit)
  77                  || in_array($codePoint, $this->namePrepData->generalProhibited)
  78              ) {
  79                  throw new InvalidCharacterException(sprintf('Prohibited input U+%08X', $codePoint), 101);
  80              }
  81              foreach ($this->namePrepData->prohibitRanges as $range) {
  82                  if ($range[0] <= $codePoint && $codePoint <= $range[1]) {
  83                      throw new InvalidCharacterException(sprintf('Prohibited input U+%08X', $codePoint), 102);
  84                  }
  85              }
  86  
  87              if (0xAC00 <= $codePoint && $codePoint <= 0xD7AF) {
  88                  // Hangul syllable decomposition
  89                  foreach ($this->hangulDecompose($codePoint) as $decomposed) {
  90                      $outputArray[] = (int) $decomposed;
  91                  }
  92              } elseif (isset($this->namePrepData->replaceMaps[$codePoint])) {
  93                  foreach ($this->applyCanonicalOrdering($this->namePrepData->replaceMaps[$codePoint]) as $reordered) {
  94                      $outputArray[] = (int) $reordered;
  95                  }
  96              } else {
  97                  $outputArray[] = (int) $codePoint;
  98              }
  99          }
 100  
 101          return $outputArray;
 102      }
 103  
 104      private function combineCodePoints(array $codePoints): array
 105      {
 106          $previousClass = 0;
 107          $previousStarter = 0;
 108          $outputLength = count($codePoints);
 109          for ($outerIndex = 0; $outerIndex < $outputLength; ++$outerIndex) {
 110              $combiningClass = $this->getCombiningClass($codePoints[$outerIndex]);
 111              if (
 112                  ($previousClass === 0 || $previousClass > $combiningClass)
 113                  && $combiningClass !== 0
 114              ) {
 115                  // Try to match
 116                  $sequenceLength = $outerIndex - $previousStarter;
 117                  $combined = $this->combine(array_slice($codePoints, $previousStarter, $sequenceLength));
 118                  // On match: Replace the last starter with the composed character and remove
 119                  // the now redundant non-starter(s)
 120                  if (false !== $combined) {
 121                      $codePoints[$previousStarter] = $combined;
 122                      if ($sequenceLength > 1) {
 123                          for ($innerIndex = $outerIndex + 1; $innerIndex < $outputLength; ++$innerIndex) {
 124                              $codePoints[$innerIndex - 1] = $codePoints[$innerIndex];
 125                          }
 126                          unset($codePoints[$outputLength]);
 127                      }
 128                      // Rewind the for loop by one, since there can be more possible compositions
 129                      $outerIndex--;
 130                      $outputLength--;
 131                      $previousClass = 0;
 132                      if ($outerIndex !== $previousStarter) {
 133                          $this->getCombiningClass($codePoints[$outerIndex - 1]);
 134                      }
 135  
 136                      continue;
 137                  }
 138              }
 139  
 140              if ($combiningClass === 0) {
 141                  $previousStarter = $outerIndex;
 142              }
 143              $previousClass = $combiningClass;
 144          }
 145  
 146          return $codePoints;
 147      }
 148  
 149      /**
 150       * Decomposes a Hangul syllable
 151       * (see http://www.unicode.org/unicode/reports/tr15/#Hangul
 152       * @param    integer  32bit UCS4 code point
 153       * @return   array    Either Hangul Syllable decomposed or original 32bit value as one value array
 154       */
 155      private function hangulDecompose(int $codePoint): array
 156      {
 157          $sIndex = (int) $codePoint - self::sBase;
 158          if ($sIndex < 0 || $sIndex >= self::sCount) {
 159              return [$codePoint];
 160          }
 161  
 162          $result = [
 163              (int) self::lBase + $sIndex / self::nCount,
 164              (int) self::vBase + ($sIndex % self::nCount) / self::tCount,
 165          ];
 166          $T = intval(self::tBase + $sIndex % self::tCount);
 167          if ($T != self::tBase) {
 168              $result[] = $T;
 169          }
 170  
 171          return $result;
 172      }
 173  
 174      /**
 175       * Compose a Hangul syllable
 176       * (see http://www.unicode.org/unicode/reports/tr15/#Hangul
 177       *
 178       * @param  array $input   Decomposed UCS4 sequence
 179       * @return array UCS4 sequence with syllables composed
 180       */
 181      private function hangulCompose(array $input): array
 182      {
 183          $inputLength = count($input);
 184          if ($inputLength === 0) {
 185              return [];
 186          }
 187  
 188          $previousCharCode = (int) $input[0];
 189  
 190          // copy first codepoint from input to output
 191          $result = [
 192              $previousCharCode,
 193          ];
 194  
 195          for ($i = 1; $i < $inputLength; ++$i) {
 196              $charCode = (int) $input[$i];
 197              $sIndex = $previousCharCode - self::sBase;
 198              $lIndex = $previousCharCode - self::lBase;
 199              $vIndex = $charCode - self::vBase;
 200              $tIndex = $charCode - self::tBase;
 201  
 202              // Find out, whether two current characters are LV and T
 203              if (0 <= $sIndex
 204                  && $sIndex < self::sCount
 205                  && ($sIndex % self::tCount == 0)
 206                  && 0 <= $tIndex
 207                  && $tIndex <= self::tCount
 208              ) {
 209                  // create syllable of form LVT
 210                  $previousCharCode += $tIndex;
 211                  $result[(count($result) - 1)] = $previousCharCode; // reset last
 212  
 213                  continue; // discard char
 214              }
 215  
 216              // Find out, whether two current characters form L and V
 217              if (0 <= $lIndex
 218                  && $lIndex < self::lCount
 219                  && 0 <= $vIndex
 220                  && $vIndex < self::vCount
 221              ) {
 222                  // create syllable of form LV
 223                  $previousCharCode = (int) self::sBase + ($lIndex * self::vCount + $vIndex) * self::tCount;
 224                  $result[(count($result) - 1)] = $previousCharCode; // reset last
 225  
 226                  continue; // discard char
 227              }
 228              // if neither case was true, just add the character
 229              $previousCharCode = $charCode;
 230              $result[] = $charCode;
 231          }
 232  
 233          return $result;
 234      }
 235  
 236      /**
 237       * Returns the combining class of a certain wide char
 238       * @param integer  $char  Wide char to check (32bit integer)
 239       * @return integer Combining class if found, else 0
 240       */
 241      private function getCombiningClass(int $char): int
 242      {
 243          return isset($this->namePrepData->normalizeCombiningClasses[$char])
 244              ? $this->namePrepData->normalizeCombiningClasses[$char]
 245              : 0;
 246      }
 247  
 248      /**
 249       * Applies the canonical ordering of a decomposed UCS4 sequence
 250       * @param array  $input Decomposed UCS4 sequence
 251       * @return array Ordered USC4 sequence
 252       */
 253      private function applyCanonicalOrdering(array $input): array
 254      {
 255          $needsSwapping = true;
 256          $inputLength = count($input);
 257          while ($needsSwapping) {
 258              $needsSwapping = false;
 259              $previousClass = $this->getCombiningClass(intval($input[0]));
 260              for ($outerIndex = 0; $outerIndex < $inputLength - 1; ++$outerIndex) {
 261                  $nextClass = $this->getCombiningClass(intval($input[$outerIndex + 1]));
 262                  if ($nextClass !== 0 && $previousClass > $nextClass) {
 263                      // Move item leftward until it fits
 264                      for ($innerIndex = $outerIndex + 1; $innerIndex > 0; --$innerIndex) {
 265                          if ($this->getCombiningClass(intval($input[$innerIndex - 1])) <= $nextClass) {
 266                              break;
 267                          }
 268                          $charToMove = intval($input[$innerIndex]);
 269                          $input[$innerIndex] = intval($input[$innerIndex - 1]);
 270                          $input[$innerIndex - 1] = $charToMove;
 271                          $needsSwapping = true;
 272                      }
 273                      // Reentering the loop looking at the old character again
 274                      $nextClass = $previousClass;
 275                  }
 276                  $previousClass = $nextClass;
 277              }
 278          }
 279  
 280          return $input;
 281      }
 282  
 283      /**
 284       * Do composition of a sequence of starter and non-starter
 285       * @param   array $input UCS4 Decomposed sequence
 286       * @return  array|false  Ordered USC4 sequence
 287       */
 288      private function combine(array $input)
 289      {
 290          $inputLength = count($input);
 291          if (0 === $inputLength) {
 292              return false;
 293          }
 294  
 295          foreach ($this->namePrepData->replaceMaps as $namePrepSource => $namePrepTarget) {
 296              if ($namePrepTarget[0] !== $input[0]) {
 297                  continue;
 298              }
 299              if (count($namePrepTarget) !== $inputLength) {
 300                  continue;
 301              }
 302              $hit = false;
 303              foreach ($input as $k2 => $v2) {
 304                  if ($v2 === $namePrepTarget[$k2]) {
 305                      $hit = true;
 306                  } else {
 307                      $hit = false;
 308                      break;
 309                  }
 310              }
 311              if ($hit) {
 312                  return $namePrepSource;
 313              }
 314          }
 315  
 316          return false;
 317      }
 318  }


Generated: Wed Sep 7 05:41:13 2022 Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer