[ Index ] |
PHP Cross Reference of Joomla 4.2.2 documentation |
[Summary view] [Print] [Text view]
1 <?php 2 3 /* 4 * This file is part of the Symfony package. 5 * 6 * (c) Fabien Potencier <[email protected]> 7 * 8 * For the full copyright and license information, please view the LICENSE 9 * file that was distributed with this source code. 10 */ 11 12 namespace Symfony\Polyfill\Iconv; 13 14 /** 15 * iconv implementation in pure PHP, UTF-8 centric. 16 * 17 * Implemented: 18 * - iconv - Convert string to requested character encoding 19 * - iconv_mime_decode - Decodes a MIME header field 20 * - iconv_mime_decode_headers - Decodes multiple MIME header fields at once 21 * - iconv_get_encoding - Retrieve internal configuration variables of iconv extension 22 * - iconv_set_encoding - Set current setting for character encoding conversion 23 * - iconv_mime_encode - Composes a MIME header field 24 * - iconv_strlen - Returns the character count of string 25 * - iconv_strpos - Finds position of first occurrence of a needle within a haystack 26 * - iconv_strrpos - Finds the last occurrence of a needle within a haystack 27 * - iconv_substr - Cut out part of a string 28 * 29 * Charsets available for conversion are defined by files 30 * in the charset/ directory and by Iconv::$alias below. 31 * You're welcome to send back any addition you make. 32 * 33 * @author Nicolas Grekas <[email protected]> 34 * 35 * @internal 36 */ 37 final class Iconv 38 { 39 public const ERROR_ILLEGAL_CHARACTER = 'iconv(): Detected an illegal character in input string'; 40 public const ERROR_WRONG_CHARSET = 'iconv(): Wrong charset, conversion from `%s\' to `%s\' is not allowed'; 41 42 public static $inputEncoding = 'utf-8'; 43 public static $outputEncoding = 'utf-8'; 44 public static $internalEncoding = 'utf-8'; 45 46 private static $alias = [ 47 'utf8' => 'utf-8', 48 'ascii' => 'us-ascii', 49 'tis-620' => 'iso-8859-11', 50 'cp1250' => 'windows-1250', 51 'cp1251' => 'windows-1251', 52 'cp1252' => 'windows-1252', 53 'cp1253' => 'windows-1253', 54 'cp1254' => 'windows-1254', 55 'cp1255' => 'windows-1255', 56 'cp1256' => 'windows-1256', 57 'cp1257' => 'windows-1257', 58 'cp1258' => 'windows-1258', 59 'shift-jis' => 'cp932', 60 'shift_jis' => 'cp932', 61 'latin1' => 'iso-8859-1', 62 'latin2' => 'iso-8859-2', 63 'latin3' => 'iso-8859-3', 64 'latin4' => 'iso-8859-4', 65 'latin5' => 'iso-8859-9', 66 'latin6' => 'iso-8859-10', 67 'latin7' => 'iso-8859-13', 68 'latin8' => 'iso-8859-14', 69 'latin9' => 'iso-8859-15', 70 'latin10' => 'iso-8859-16', 71 'iso8859-1' => 'iso-8859-1', 72 'iso8859-2' => 'iso-8859-2', 73 'iso8859-3' => 'iso-8859-3', 74 'iso8859-4' => 'iso-8859-4', 75 'iso8859-5' => 'iso-8859-5', 76 'iso8859-6' => 'iso-8859-6', 77 'iso8859-7' => 'iso-8859-7', 78 'iso8859-8' => 'iso-8859-8', 79 'iso8859-9' => 'iso-8859-9', 80 'iso8859-10' => 'iso-8859-10', 81 'iso8859-11' => 'iso-8859-11', 82 'iso8859-12' => 'iso-8859-12', 83 'iso8859-13' => 'iso-8859-13', 84 'iso8859-14' => 'iso-8859-14', 85 'iso8859-15' => 'iso-8859-15', 86 'iso8859-16' => 'iso-8859-16', 87 'iso_8859-1' => 'iso-8859-1', 88 'iso_8859-2' => 'iso-8859-2', 89 'iso_8859-3' => 'iso-8859-3', 90 'iso_8859-4' => 'iso-8859-4', 91 'iso_8859-5' => 'iso-8859-5', 92 'iso_8859-6' => 'iso-8859-6', 93 'iso_8859-7' => 'iso-8859-7', 94 'iso_8859-8' => 'iso-8859-8', 95 'iso_8859-9' => 'iso-8859-9', 96 'iso_8859-10' => 'iso-8859-10', 97 'iso_8859-11' => 'iso-8859-11', 98 'iso_8859-12' => 'iso-8859-12', 99 'iso_8859-13' => 'iso-8859-13', 100 'iso_8859-14' => 'iso-8859-14', 101 'iso_8859-15' => 'iso-8859-15', 102 'iso_8859-16' => 'iso-8859-16', 103 'iso88591' => 'iso-8859-1', 104 'iso88592' => 'iso-8859-2', 105 'iso88593' => 'iso-8859-3', 106 'iso88594' => 'iso-8859-4', 107 'iso88595' => 'iso-8859-5', 108 'iso88596' => 'iso-8859-6', 109 'iso88597' => 'iso-8859-7', 110 'iso88598' => 'iso-8859-8', 111 'iso88599' => 'iso-8859-9', 112 'iso885910' => 'iso-8859-10', 113 'iso885911' => 'iso-8859-11', 114 'iso885912' => 'iso-8859-12', 115 'iso885913' => 'iso-8859-13', 116 'iso885914' => 'iso-8859-14', 117 'iso885915' => 'iso-8859-15', 118 'iso885916' => 'iso-8859-16', 119 ]; 120 private static $translitMap = []; 121 private static $convertMap = []; 122 private static $errorHandler; 123 private static $lastError; 124 125 private static $ulenMask = ["\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4]; 126 private static $isValidUtf8; 127 128 public static function iconv($inCharset, $outCharset, $str) 129 { 130 $str = (string) $str; 131 if ('' === $str) { 132 return ''; 133 } 134 135 // Prepare for //IGNORE and //TRANSLIT 136 137 $translit = $ignore = ''; 138 139 $outCharset = strtolower($outCharset); 140 $inCharset = strtolower($inCharset); 141 142 if ('' === $outCharset) { 143 $outCharset = 'iso-8859-1'; 144 } 145 if ('' === $inCharset) { 146 $inCharset = 'iso-8859-1'; 147 } 148 149 do { 150 $loop = false; 151 152 if ('//translit' === substr($outCharset, -10)) { 153 $loop = $translit = true; 154 $outCharset = substr($outCharset, 0, -10); 155 } 156 157 if ('//ignore' === substr($outCharset, -8)) { 158 $loop = $ignore = true; 159 $outCharset = substr($outCharset, 0, -8); 160 } 161 } while ($loop); 162 163 do { 164 $loop = false; 165 166 if ('//translit' === substr($inCharset, -10)) { 167 $loop = true; 168 $inCharset = substr($inCharset, 0, -10); 169 } 170 171 if ('//ignore' === substr($inCharset, -8)) { 172 $loop = true; 173 $inCharset = substr($inCharset, 0, -8); 174 } 175 } while ($loop); 176 177 if (isset(self::$alias[$inCharset])) { 178 $inCharset = self::$alias[$inCharset]; 179 } 180 if (isset(self::$alias[$outCharset])) { 181 $outCharset = self::$alias[$outCharset]; 182 } 183 184 // Load charset maps 185 186 if (('utf-8' !== $inCharset && !self::loadMap('from.', $inCharset, $inMap)) 187 || ('utf-8' !== $outCharset && !self::loadMap('to.', $outCharset, $outMap))) { 188 trigger_error(sprintf(self::ERROR_WRONG_CHARSET, $inCharset, $outCharset)); 189 190 return false; 191 } 192 193 if ('utf-8' !== $inCharset) { 194 // Convert input to UTF-8 195 $result = ''; 196 if (self::mapToUtf8($result, $inMap, $str, $ignore)) { 197 $str = $result; 198 } else { 199 $str = false; 200 } 201 self::$isValidUtf8 = true; 202 } else { 203 self::$isValidUtf8 = preg_match('//u', $str); 204 205 if (!self::$isValidUtf8 && !$ignore) { 206 trigger_error(self::ERROR_ILLEGAL_CHARACTER); 207 208 return false; 209 } 210 211 if ('utf-8' === $outCharset) { 212 // UTF-8 validation 213 $str = self::utf8ToUtf8($str, $ignore); 214 } 215 } 216 217 if ('utf-8' !== $outCharset && false !== $str) { 218 // Convert output to UTF-8 219 $result = ''; 220 if (self::mapFromUtf8($result, $outMap, $str, $ignore, $translit)) { 221 return $result; 222 } 223 224 return false; 225 } 226 227 return $str; 228 } 229 230 public static function iconv_mime_decode_headers($str, $mode = 0, $charset = null) 231 { 232 if (null === $charset) { 233 $charset = self::$internalEncoding; 234 } 235 236 if (false !== strpos($str, "\r")) { 237 $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n"); 238 } 239 $str = explode("\n\n", $str, 2); 240 241 $headers = []; 242 243 $str = preg_split('/\n(?![ \t])/', $str[0]); 244 foreach ($str as $str) { 245 $str = self::iconv_mime_decode($str, $mode, $charset); 246 if (false === $str) { 247 return false; 248 } 249 $str = explode(':', $str, 2); 250 251 if (2 === \count($str)) { 252 if (isset($headers[$str[0]])) { 253 if (!\is_array($headers[$str[0]])) { 254 $headers[$str[0]] = [$headers[$str[0]]]; 255 } 256 $headers[$str[0]][] = ltrim($str[1]); 257 } else { 258 $headers[$str[0]] = ltrim($str[1]); 259 } 260 } 261 } 262 263 return $headers; 264 } 265 266 public static function iconv_mime_decode($str, $mode = 0, $charset = null) 267 { 268 if (null === $charset) { 269 $charset = self::$internalEncoding; 270 } 271 if (\ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) { 272 $charset .= '//IGNORE'; 273 } 274 275 if (false !== strpos($str, "\r")) { 276 $str = strtr(str_replace("\r\n", "\n", $str), "\r", "\n"); 277 } 278 $str = preg_split('/\n(?![ \t])/', rtrim($str), 2); 279 $str = preg_replace('/[ \t]*\n[ \t]+/', ' ', rtrim($str[0])); 280 $str = preg_split('/=\?([^?]+)\?([bqBQ])\?(.*?)\?=/', $str, -1, \PREG_SPLIT_DELIM_CAPTURE); 281 282 $result = self::iconv('utf-8', $charset, $str[0]); 283 if (false === $result) { 284 return false; 285 } 286 287 $i = 1; 288 $len = \count($str); 289 290 while ($i < $len) { 291 $c = strtolower($str[$i]); 292 if ((\ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) 293 && 'utf-8' !== $c 294 && !isset(self::$alias[$c]) 295 && !self::loadMap('from.', $c, $d)) { 296 $d = false; 297 } elseif ('B' === strtoupper($str[$i + 1])) { 298 $d = base64_decode($str[$i + 2]); 299 } else { 300 $d = rawurldecode(strtr(str_replace('%', '%25', $str[$i + 2]), '=_', '% ')); 301 } 302 303 if (false !== $d) { 304 if ('' !== $d) { 305 if ('' === $d = self::iconv($c, $charset, $d)) { 306 $str[$i + 3] = substr($str[$i + 3], 1); 307 } else { 308 $result .= $d; 309 } 310 } 311 $d = self::iconv('utf-8', $charset, $str[$i + 3]); 312 if ('' !== trim($d)) { 313 $result .= $d; 314 } 315 } elseif (\ICONV_MIME_DECODE_CONTINUE_ON_ERROR & $mode) { 316 $result .= "=?{$str[$i]}?{$str[$i + 1]}?{$str[$i + 2]}?={$str[$i + 3]}"; 317 } else { 318 $result = false; 319 break; 320 } 321 322 $i += 4; 323 } 324 325 return $result; 326 } 327 328 public static function iconv_get_encoding($type = 'all') 329 { 330 switch ($type) { 331 case 'input_encoding': return self::$inputEncoding; 332 case 'output_encoding': return self::$outputEncoding; 333 case 'internal_encoding': return self::$internalEncoding; 334 } 335 336 return [ 337 'input_encoding' => self::$inputEncoding, 338 'output_encoding' => self::$outputEncoding, 339 'internal_encoding' => self::$internalEncoding, 340 ]; 341 } 342 343 public static function iconv_set_encoding($type, $charset) 344 { 345 switch ($type) { 346 case 'input_encoding': self::$inputEncoding = $charset; break; 347 case 'output_encoding': self::$outputEncoding = $charset; break; 348 case 'internal_encoding': self::$internalEncoding = $charset; break; 349 default: return false; 350 } 351 352 return true; 353 } 354 355 public static function iconv_mime_encode($fieldName, $fieldValue, $pref = null) 356 { 357 if (!\is_array($pref)) { 358 $pref = []; 359 } 360 361 $pref += [ 362 'scheme' => 'B', 363 'input-charset' => self::$internalEncoding, 364 'output-charset' => self::$internalEncoding, 365 'line-length' => 76, 366 'line-break-chars' => "\r\n", 367 ]; 368 369 if (preg_match('/[\x80-\xFF]/', $fieldName)) { 370 $fieldName = ''; 371 } 372 373 $scheme = strtoupper(substr($pref['scheme'], 0, 1)); 374 $in = strtolower($pref['input-charset']); 375 $out = strtolower($pref['output-charset']); 376 377 if ('utf-8' !== $in && false === $fieldValue = self::iconv($in, 'utf-8', $fieldValue)) { 378 return false; 379 } 380 381 preg_match_all('/./us', $fieldValue, $chars); 382 383 $chars = $chars[0] ?? []; 384 385 $lineBreak = (int) $pref['line-length']; 386 $lineStart = "=?{$pref['output-charset']}?{$scheme}?"; 387 $lineLength = \strlen($fieldName) + 2 + \strlen($lineStart) + 2; 388 $lineOffset = \strlen($lineStart) + 3; 389 $lineData = ''; 390 391 $fieldValue = []; 392 393 $Q = 'Q' === $scheme; 394 395 foreach ($chars as $c) { 396 if ('utf-8' !== $out && false === $c = self::iconv('utf-8', $out, $c)) { 397 return false; 398 } 399 400 $o = $Q 401 ? $c = preg_replace_callback( 402 '/[=_\?\x00-\x1F\x80-\xFF]/', 403 [__CLASS__, 'qpByteCallback'], 404 $c 405 ) 406 : base64_encode($lineData.$c); 407 408 if (isset($o[$lineBreak - $lineLength])) { 409 if (!$Q) { 410 $lineData = base64_encode($lineData); 411 } 412 $fieldValue[] = $lineStart.$lineData.'?='; 413 $lineLength = $lineOffset; 414 $lineData = ''; 415 } 416 417 $lineData .= $c; 418 $Q && $lineLength += \strlen($c); 419 } 420 421 if ('' !== $lineData) { 422 if (!$Q) { 423 $lineData = base64_encode($lineData); 424 } 425 $fieldValue[] = $lineStart.$lineData.'?='; 426 } 427 428 return $fieldName.': '.implode($pref['line-break-chars'].' ', $fieldValue); 429 } 430 431 public static function iconv_strlen($s, $encoding = null) 432 { 433 static $hasXml = null; 434 if (null === $hasXml) { 435 $hasXml = \extension_loaded('xml'); 436 } 437 438 if ($hasXml) { 439 return self::strlen1($s, $encoding); 440 } 441 442 return self::strlen2($s, $encoding); 443 } 444 445 public static function strlen1($s, $encoding = null) 446 { 447 if (null === $encoding) { 448 $encoding = self::$internalEncoding; 449 } 450 if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) { 451 return false; 452 } 453 454 return \strlen(utf8_decode($s)); 455 } 456 457 public static function strlen2($s, $encoding = null) 458 { 459 if (null === $encoding) { 460 $encoding = self::$internalEncoding; 461 } 462 if (0 !== stripos($encoding, 'utf-8') && false === $s = self::iconv($encoding, 'utf-8', $s)) { 463 return false; 464 } 465 466 $ulenMask = self::$ulenMask; 467 468 $i = 0; 469 $j = 0; 470 $len = \strlen($s); 471 472 while ($i < $len) { 473 $u = $s[$i] & "\xF0"; 474 $i += $ulenMask[$u] ?? 1; 475 ++$j; 476 } 477 478 return $j; 479 } 480 481 public static function iconv_strpos($haystack, $needle, $offset = 0, $encoding = null) 482 { 483 if (null === $encoding) { 484 $encoding = self::$internalEncoding; 485 } 486 487 if (0 !== stripos($encoding, 'utf-8')) { 488 if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) { 489 return false; 490 } 491 if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) { 492 return false; 493 } 494 } 495 496 if ($offset = (int) $offset) { 497 $haystack = self::iconv_substr($haystack, $offset, 2147483647, 'utf-8'); 498 } 499 $pos = strpos($haystack, $needle); 500 501 return false === $pos ? false : ($offset + ($pos ? self::iconv_strlen(substr($haystack, 0, $pos), 'utf-8') : 0)); 502 } 503 504 public static function iconv_strrpos($haystack, $needle, $encoding = null) 505 { 506 if (null === $encoding) { 507 $encoding = self::$internalEncoding; 508 } 509 510 if (0 !== stripos($encoding, 'utf-8')) { 511 if (false === $haystack = self::iconv($encoding, 'utf-8', $haystack)) { 512 return false; 513 } 514 if (false === $needle = self::iconv($encoding, 'utf-8', $needle)) { 515 return false; 516 } 517 } 518 519 $pos = isset($needle[0]) ? strrpos($haystack, $needle) : false; 520 521 return false === $pos ? false : self::iconv_strlen($pos ? substr($haystack, 0, $pos) : $haystack, 'utf-8'); 522 } 523 524 public static function iconv_substr($s, $start, $length = 2147483647, $encoding = null) 525 { 526 if (null === $encoding) { 527 $encoding = self::$internalEncoding; 528 } 529 if (0 !== stripos($encoding, 'utf-8')) { 530 $encoding = null; 531 } elseif (false === $s = self::iconv($encoding, 'utf-8', $s)) { 532 return false; 533 } 534 535 $s = (string) $s; 536 $slen = self::iconv_strlen($s, 'utf-8'); 537 $start = (int) $start; 538 539 if (0 > $start) { 540 $start += $slen; 541 } 542 if (0 > $start) { 543 if (\PHP_VERSION_ID < 80000) { 544 return false; 545 } 546 547 $start = 0; 548 } 549 if ($start >= $slen) { 550 return \PHP_VERSION_ID >= 80000 ? '' : false; 551 } 552 553 $rx = $slen - $start; 554 555 if (0 > $length) { 556 $length += $rx; 557 } 558 if (0 === $length) { 559 return ''; 560 } 561 if (0 > $length) { 562 return \PHP_VERSION_ID >= 80000 ? '' : false; 563 } 564 565 if ($length > $rx) { 566 $length = $rx; 567 } 568 569 $rx = '/^'.($start ? self::pregOffset($start) : '').'('.self::pregOffset($length).')/u'; 570 571 $s = preg_match($rx, $s, $s) ? $s[1] : ''; 572 573 if (null === $encoding) { 574 return $s; 575 } 576 577 return self::iconv('utf-8', $encoding, $s); 578 } 579 580 private static function loadMap($type, $charset, &$map) 581 { 582 if (!isset(self::$convertMap[$type.$charset])) { 583 if (false === $map = self::getData($type.$charset)) { 584 if ('to.' === $type && self::loadMap('from.', $charset, $map)) { 585 $map = array_flip($map); 586 } else { 587 return false; 588 } 589 } 590 591 self::$convertMap[$type.$charset] = $map; 592 } else { 593 $map = self::$convertMap[$type.$charset]; 594 } 595 596 return true; 597 } 598 599 private static function utf8ToUtf8($str, $ignore) 600 { 601 $ulenMask = self::$ulenMask; 602 $valid = self::$isValidUtf8; 603 604 $u = $str; 605 $i = $j = 0; 606 $len = \strlen($str); 607 608 while ($i < $len) { 609 if ($str[$i] < "\x80") { 610 $u[$j++] = $str[$i++]; 611 } else { 612 $ulen = $str[$i] & "\xF0"; 613 $ulen = $ulenMask[$ulen] ?? 1; 614 $uchr = substr($str, $i, $ulen); 615 616 if (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr))) { 617 if ($ignore) { 618 ++$i; 619 continue; 620 } 621 622 trigger_error(self::ERROR_ILLEGAL_CHARACTER); 623 624 return false; 625 } 626 627 $i += $ulen; 628 629 $u[$j++] = $uchr[0]; 630 631 isset($uchr[1]) && 0 !== ($u[$j++] = $uchr[1]) 632 && isset($uchr[2]) && 0 !== ($u[$j++] = $uchr[2]) 633 && isset($uchr[3]) && 0 !== ($u[$j++] = $uchr[3]); 634 } 635 } 636 637 return substr($u, 0, $j); 638 } 639 640 private static function mapToUtf8(&$result, array $map, $str, $ignore) 641 { 642 $len = \strlen($str); 643 for ($i = 0; $i < $len; ++$i) { 644 if (isset($str[$i + 1], $map[$str[$i].$str[$i + 1]])) { 645 $result .= $map[$str[$i].$str[++$i]]; 646 } elseif (isset($map[$str[$i]])) { 647 $result .= $map[$str[$i]]; 648 } elseif (!$ignore) { 649 trigger_error(self::ERROR_ILLEGAL_CHARACTER); 650 651 return false; 652 } 653 } 654 655 return true; 656 } 657 658 private static function mapFromUtf8(&$result, array $map, $str, $ignore, $translit) 659 { 660 $ulenMask = self::$ulenMask; 661 $valid = self::$isValidUtf8; 662 663 if ($translit && !self::$translitMap) { 664 self::$translitMap = self::getData('translit'); 665 } 666 667 $i = 0; 668 $len = \strlen($str); 669 670 while ($i < $len) { 671 if ($str[$i] < "\x80") { 672 $uchr = $str[$i++]; 673 } else { 674 $ulen = $str[$i] & "\xF0"; 675 $ulen = $ulenMask[$ulen] ?? 1; 676 $uchr = substr($str, $i, $ulen); 677 678 if ($ignore && (1 === $ulen || !($valid || preg_match('/^.$/us', $uchr)))) { 679 ++$i; 680 continue; 681 } 682 683 $i += $ulen; 684 } 685 686 if (isset($map[$uchr])) { 687 $result .= $map[$uchr]; 688 } elseif ($translit) { 689 if (isset(self::$translitMap[$uchr])) { 690 $uchr = self::$translitMap[$uchr]; 691 } elseif ($uchr >= "\xC3\x80") { 692 $uchr = \Normalizer::normalize($uchr, \Normalizer::NFD); 693 694 if ($uchr[0] < "\x80") { 695 $uchr = $uchr[0]; 696 } elseif ($ignore) { 697 continue; 698 } else { 699 return false; 700 } 701 } elseif ($ignore) { 702 continue; 703 } else { 704 return false; 705 } 706 707 $str = $uchr.substr($str, $i); 708 $len = \strlen($str); 709 $i = 0; 710 } elseif (!$ignore) { 711 return false; 712 } 713 } 714 715 return true; 716 } 717 718 private static function qpByteCallback(array $m) 719 { 720 return '='.strtoupper(dechex(\ord($m[0]))); 721 } 722 723 private static function pregOffset($offset) 724 { 725 $rx = []; 726 $offset = (int) $offset; 727 728 while ($offset > 65535) { 729 $rx[] = '.{65535}'; 730 $offset -= 65535; 731 } 732 733 return implode('', $rx).'.{'.$offset.'}'; 734 } 735 736 private static function getData($file) 737 { 738 if (file_exists($file = __DIR__.'/Resources/charset/'.$file.'.php')) { 739 return require $file; 740 } 741 742 return false; 743 } 744 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Sep 7 05:41:13 2022 | Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer |