[ Index ] |
PHP Cross Reference of Joomla 4.2.2 documentation |
[Summary view] [Print] [Text view]
1 <?php 2 3 namespace Algo26\IdnaConvert\NamePrep; 4 5 use Algo26\IdnaConvert\Exception\InvalidCharacterException; 6 use Algo26\IdnaConvert\Exception\InvalidIdnVersionException; 7 8 class NamePrep implements NamePrepInterface 9 { 10 const sBase = 0xAC00; 11 const lBase = 0x1100; 12 const vBase = 0x1161; 13 const tBase = 0x11A7; 14 const lCount = 19; 15 const vCount = 21; 16 const tCount = 28; 17 const nCount = 588; // vCount * tCount 18 const sCount = 11172; // lCount * tCount * vCount 19 const sLast = self::sBase + self::lCount * self::vCount * self::tCount; 20 21 /** @var NamePrepDataInterface */ 22 private $namePrepData; 23 24 /** 25 * @param string|null $idnVersion 26 * 27 * @throws InvalidIdnVersionException 28 */ 29 public function __construct(?string $idnVersion = null) 30 { 31 if ($idnVersion === null || $idnVersion == 2008) { 32 $this->namePrepData = new NamePrepData2008(); 33 34 return; 35 } 36 37 if ($idnVersion == 2003) { 38 $this->namePrepData = new NamePrepData2003(); 39 40 return; 41 } 42 43 throw new InvalidIdnVersionException('IDN version must bei either 2003 or 2008'); 44 } 45 46 /** 47 * @param array $inputArray 48 * 49 * @return array 50 * @throws InvalidCharacterException 51 */ 52 public function do(array $inputArray): array 53 { 54 $outputArray = $this->applyCharacterMaps($inputArray); 55 $outputArray = $this->hangulCompose($outputArray); 56 $outputArray = $this->combineCodePoints($outputArray); 57 58 return $outputArray; 59 } 60 61 /** 62 * @param array $inputArray 63 * 64 * @return array 65 * @throws InvalidCharacterException 66 */ 67 private function applyCharacterMaps(array $inputArray): array 68 { 69 $outputArray = []; 70 foreach ($inputArray as $codePoint) { 71 // Map to nothing == skip that code point 72 if (in_array($codePoint, $this->namePrepData->mapToNothing)) { 73 continue; 74 } 75 // Try to find prohibited input 76 if (in_array($codePoint, $this->namePrepData->prohibit) 77 || in_array($codePoint, $this->namePrepData->generalProhibited) 78 ) { 79 throw new InvalidCharacterException(sprintf('Prohibited input U+%08X', $codePoint), 101); 80 } 81 foreach ($this->namePrepData->prohibitRanges as $range) { 82 if ($range[0] <= $codePoint && $codePoint <= $range[1]) { 83 throw new InvalidCharacterException(sprintf('Prohibited input U+%08X', $codePoint), 102); 84 } 85 } 86 87 if (0xAC00 <= $codePoint && $codePoint <= 0xD7AF) { 88 // Hangul syllable decomposition 89 foreach ($this->hangulDecompose($codePoint) as $decomposed) { 90 $outputArray[] = (int) $decomposed; 91 } 92 } elseif (isset($this->namePrepData->replaceMaps[$codePoint])) { 93 foreach ($this->applyCanonicalOrdering($this->namePrepData->replaceMaps[$codePoint]) as $reordered) { 94 $outputArray[] = (int) $reordered; 95 } 96 } else { 97 $outputArray[] = (int) $codePoint; 98 } 99 } 100 101 return $outputArray; 102 } 103 104 private function combineCodePoints(array $codePoints): array 105 { 106 $previousClass = 0; 107 $previousStarter = 0; 108 $outputLength = count($codePoints); 109 for ($outerIndex = 0; $outerIndex < $outputLength; ++$outerIndex) { 110 $combiningClass = $this->getCombiningClass($codePoints[$outerIndex]); 111 if ( 112 ($previousClass === 0 || $previousClass > $combiningClass) 113 && $combiningClass !== 0 114 ) { 115 // Try to match 116 $sequenceLength = $outerIndex - $previousStarter; 117 $combined = $this->combine(array_slice($codePoints, $previousStarter, $sequenceLength)); 118 // On match: Replace the last starter with the composed character and remove 119 // the now redundant non-starter(s) 120 if (false !== $combined) { 121 $codePoints[$previousStarter] = $combined; 122 if ($sequenceLength > 1) { 123 for ($innerIndex = $outerIndex + 1; $innerIndex < $outputLength; ++$innerIndex) { 124 $codePoints[$innerIndex - 1] = $codePoints[$innerIndex]; 125 } 126 unset($codePoints[$outputLength]); 127 } 128 // Rewind the for loop by one, since there can be more possible compositions 129 $outerIndex--; 130 $outputLength--; 131 $previousClass = 0; 132 if ($outerIndex !== $previousStarter) { 133 $this->getCombiningClass($codePoints[$outerIndex - 1]); 134 } 135 136 continue; 137 } 138 } 139 140 if ($combiningClass === 0) { 141 $previousStarter = $outerIndex; 142 } 143 $previousClass = $combiningClass; 144 } 145 146 return $codePoints; 147 } 148 149 /** 150 * Decomposes a Hangul syllable 151 * (see http://www.unicode.org/unicode/reports/tr15/#Hangul 152 * @param integer 32bit UCS4 code point 153 * @return array Either Hangul Syllable decomposed or original 32bit value as one value array 154 */ 155 private function hangulDecompose(int $codePoint): array 156 { 157 $sIndex = (int) $codePoint - self::sBase; 158 if ($sIndex < 0 || $sIndex >= self::sCount) { 159 return [$codePoint]; 160 } 161 162 $result = [ 163 (int) self::lBase + $sIndex / self::nCount, 164 (int) self::vBase + ($sIndex % self::nCount) / self::tCount, 165 ]; 166 $T = intval(self::tBase + $sIndex % self::tCount); 167 if ($T != self::tBase) { 168 $result[] = $T; 169 } 170 171 return $result; 172 } 173 174 /** 175 * Compose a Hangul syllable 176 * (see http://www.unicode.org/unicode/reports/tr15/#Hangul 177 * 178 * @param array $input Decomposed UCS4 sequence 179 * @return array UCS4 sequence with syllables composed 180 */ 181 private function hangulCompose(array $input): array 182 { 183 $inputLength = count($input); 184 if ($inputLength === 0) { 185 return []; 186 } 187 188 $previousCharCode = (int) $input[0]; 189 190 // copy first codepoint from input to output 191 $result = [ 192 $previousCharCode, 193 ]; 194 195 for ($i = 1; $i < $inputLength; ++$i) { 196 $charCode = (int) $input[$i]; 197 $sIndex = $previousCharCode - self::sBase; 198 $lIndex = $previousCharCode - self::lBase; 199 $vIndex = $charCode - self::vBase; 200 $tIndex = $charCode - self::tBase; 201 202 // Find out, whether two current characters are LV and T 203 if (0 <= $sIndex 204 && $sIndex < self::sCount 205 && ($sIndex % self::tCount == 0) 206 && 0 <= $tIndex 207 && $tIndex <= self::tCount 208 ) { 209 // create syllable of form LVT 210 $previousCharCode += $tIndex; 211 $result[(count($result) - 1)] = $previousCharCode; // reset last 212 213 continue; // discard char 214 } 215 216 // Find out, whether two current characters form L and V 217 if (0 <= $lIndex 218 && $lIndex < self::lCount 219 && 0 <= $vIndex 220 && $vIndex < self::vCount 221 ) { 222 // create syllable of form LV 223 $previousCharCode = (int) self::sBase + ($lIndex * self::vCount + $vIndex) * self::tCount; 224 $result[(count($result) - 1)] = $previousCharCode; // reset last 225 226 continue; // discard char 227 } 228 // if neither case was true, just add the character 229 $previousCharCode = $charCode; 230 $result[] = $charCode; 231 } 232 233 return $result; 234 } 235 236 /** 237 * Returns the combining class of a certain wide char 238 * @param integer $char Wide char to check (32bit integer) 239 * @return integer Combining class if found, else 0 240 */ 241 private function getCombiningClass(int $char): int 242 { 243 return isset($this->namePrepData->normalizeCombiningClasses[$char]) 244 ? $this->namePrepData->normalizeCombiningClasses[$char] 245 : 0; 246 } 247 248 /** 249 * Applies the canonical ordering of a decomposed UCS4 sequence 250 * @param array $input Decomposed UCS4 sequence 251 * @return array Ordered USC4 sequence 252 */ 253 private function applyCanonicalOrdering(array $input): array 254 { 255 $needsSwapping = true; 256 $inputLength = count($input); 257 while ($needsSwapping) { 258 $needsSwapping = false; 259 $previousClass = $this->getCombiningClass(intval($input[0])); 260 for ($outerIndex = 0; $outerIndex < $inputLength - 1; ++$outerIndex) { 261 $nextClass = $this->getCombiningClass(intval($input[$outerIndex + 1])); 262 if ($nextClass !== 0 && $previousClass > $nextClass) { 263 // Move item leftward until it fits 264 for ($innerIndex = $outerIndex + 1; $innerIndex > 0; --$innerIndex) { 265 if ($this->getCombiningClass(intval($input[$innerIndex - 1])) <= $nextClass) { 266 break; 267 } 268 $charToMove = intval($input[$innerIndex]); 269 $input[$innerIndex] = intval($input[$innerIndex - 1]); 270 $input[$innerIndex - 1] = $charToMove; 271 $needsSwapping = true; 272 } 273 // Reentering the loop looking at the old character again 274 $nextClass = $previousClass; 275 } 276 $previousClass = $nextClass; 277 } 278 } 279 280 return $input; 281 } 282 283 /** 284 * Do composition of a sequence of starter and non-starter 285 * @param array $input UCS4 Decomposed sequence 286 * @return array|false Ordered USC4 sequence 287 */ 288 private function combine(array $input) 289 { 290 $inputLength = count($input); 291 if (0 === $inputLength) { 292 return false; 293 } 294 295 foreach ($this->namePrepData->replaceMaps as $namePrepSource => $namePrepTarget) { 296 if ($namePrepTarget[0] !== $input[0]) { 297 continue; 298 } 299 if (count($namePrepTarget) !== $inputLength) { 300 continue; 301 } 302 $hit = false; 303 foreach ($input as $k2 => $v2) { 304 if ($v2 === $namePrepTarget[$k2]) { 305 $hit = true; 306 } else { 307 $hit = false; 308 break; 309 } 310 } 311 if ($hit) { 312 return $namePrepSource; 313 } 314 } 315 316 return false; 317 } 318 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Sep 7 05:41:13 2022 | Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer |