[ Index ] |
PHP Cross Reference of Joomla 4.2.2 documentation |
[Summary view] [Print] [Text view]
1 <?php 2 3 declare(strict_types=1); 4 5 namespace voku\helper; 6 7 /** 8 * @psalm-immutable 9 */ 10 final class UTF8 11 { 12 /** 13 * (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control]) 14 * This regular expression is a work around for http://bugs.exim.org/1279 15 * 16 * @deprecated <p>please don't use it anymore</p> 17 */ 18 const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])"; 19 20 /** 21 * Bom => Byte-Length 22 * 23 * INFO: https://en.wikipedia.org/wiki/Byte_order_mark 24 * 25 * @var array<string, int> 26 */ 27 private static $BOM = [ 28 "\xef\xbb\xbf" => 3, // UTF-8 BOM 29 '' => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...) 30 "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM 31 ' þÿ' => 6, // UTF-32 (BE) BOM as "WINDOWS-1252" 32 "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM 33 'ÿþ ' => 6, // UTF-32 (LE) BOM as "WINDOWS-1252" 34 "\xfe\xff" => 2, // UTF-16 (BE) BOM 35 'þÿ' => 4, // UTF-16 (BE) BOM as "WINDOWS-1252" 36 "\xff\xfe" => 2, // UTF-16 (LE) BOM 37 'ÿþ' => 4, // UTF-16 (LE) BOM as "WINDOWS-1252" 38 ]; 39 40 /** 41 * Numeric code point => UTF-8 Character 42 * 43 * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp 44 * 45 * @var array<int, string> 46 */ 47 private static $WHITESPACE = [ 48 // NULL Byte 49 0 => "\x0", 50 // Tab 51 9 => "\x9", 52 // New Line 53 10 => "\xa", 54 // Vertical Tab 55 11 => "\xb", 56 // Carriage Return 57 13 => "\xd", 58 // Ordinary Space 59 32 => "\x20", 60 // NO-BREAK SPACE 61 160 => "\xc2\xa0", 62 // OGHAM SPACE MARK 63 5760 => "\xe1\x9a\x80", 64 // MONGOLIAN VOWEL SEPARATOR 65 6158 => "\xe1\xa0\x8e", 66 // EN QUAD 67 8192 => "\xe2\x80\x80", 68 // EM QUAD 69 8193 => "\xe2\x80\x81", 70 // EN SPACE 71 8194 => "\xe2\x80\x82", 72 // EM SPACE 73 8195 => "\xe2\x80\x83", 74 // THREE-PER-EM SPACE 75 8196 => "\xe2\x80\x84", 76 // FOUR-PER-EM SPACE 77 8197 => "\xe2\x80\x85", 78 // SIX-PER-EM SPACE 79 8198 => "\xe2\x80\x86", 80 // FIGURE SPACE 81 8199 => "\xe2\x80\x87", 82 // PUNCTUATION SPACE 83 8200 => "\xe2\x80\x88", 84 // THIN SPACE 85 8201 => "\xe2\x80\x89", 86 // HAIR SPACE 87 8202 => "\xe2\x80\x8a", 88 // LINE SEPARATOR 89 8232 => "\xe2\x80\xa8", 90 // PARAGRAPH SEPARATOR 91 8233 => "\xe2\x80\xa9", 92 // NARROW NO-BREAK SPACE 93 8239 => "\xe2\x80\xaf", 94 // MEDIUM MATHEMATICAL SPACE 95 8287 => "\xe2\x81\x9f", 96 // HALFWIDTH HANGUL FILLER 97 65440 => "\xef\xbe\xa0", 98 // IDEOGRAPHIC SPACE 99 12288 => "\xe3\x80\x80", 100 ]; 101 102 /** 103 * @var array<string, string> 104 */ 105 private static $WHITESPACE_TABLE = [ 106 'SPACE' => "\x20", 107 'NO-BREAK SPACE' => "\xc2\xa0", 108 'OGHAM SPACE MARK' => "\xe1\x9a\x80", 109 'EN QUAD' => "\xe2\x80\x80", 110 'EM QUAD' => "\xe2\x80\x81", 111 'EN SPACE' => "\xe2\x80\x82", 112 'EM SPACE' => "\xe2\x80\x83", 113 'THREE-PER-EM SPACE' => "\xe2\x80\x84", 114 'FOUR-PER-EM SPACE' => "\xe2\x80\x85", 115 'SIX-PER-EM SPACE' => "\xe2\x80\x86", 116 'FIGURE SPACE' => "\xe2\x80\x87", 117 'PUNCTUATION SPACE' => "\xe2\x80\x88", 118 'THIN SPACE' => "\xe2\x80\x89", 119 'HAIR SPACE' => "\xe2\x80\x8a", 120 'LINE SEPARATOR' => "\xe2\x80\xa8", 121 'PARAGRAPH SEPARATOR' => "\xe2\x80\xa9", 122 'ZERO WIDTH SPACE' => "\xe2\x80\x8b", 123 'NARROW NO-BREAK SPACE' => "\xe2\x80\xaf", 124 'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f", 125 'IDEOGRAPHIC SPACE' => "\xe3\x80\x80", 126 'HALFWIDTH HANGUL FILLER' => "\xef\xbe\xa0", 127 ]; 128 129 /** 130 * @var array 131 * 132 * @phpstan-var array{upper: string[], lower: string[]} 133 */ 134 private static $COMMON_CASE_FOLD = [ 135 'upper' => [ 136 'µ', 137 'ſ', 138 "\xCD\x85", 139 'ς', 140 'ẞ', 141 "\xCF\x90", 142 "\xCF\x91", 143 "\xCF\x95", 144 "\xCF\x96", 145 "\xCF\xB0", 146 "\xCF\xB1", 147 "\xCF\xB5", 148 "\xE1\xBA\x9B", 149 "\xE1\xBE\xBE", 150 ], 151 'lower' => [ 152 'μ', 153 's', 154 'ι', 155 'σ', 156 'ß', 157 'β', 158 'θ', 159 'φ', 160 'π', 161 'κ', 162 'ρ', 163 'ε', 164 "\xE1\xB9\xA1", 165 'ι', 166 ], 167 ]; 168 169 /** 170 * @var array 171 * 172 * @phpstan-var array<string, mixed> 173 */ 174 private static $SUPPORT = []; 175 176 /** 177 * @var string[]|null 178 * 179 * @phpstan-var array<string, string>|null 180 */ 181 private static $BROKEN_UTF8_FIX; 182 183 /** 184 * @var string[]|null 185 * 186 * @phpstan-var array<int, string>|null 187 */ 188 private static $WIN1252_TO_UTF8; 189 190 /** 191 * @var string[]|null 192 * 193 * @phpstan-var array<int ,string>|null 194 */ 195 private static $INTL_TRANSLITERATOR_LIST; 196 197 /** 198 * @var string[]|null 199 * 200 * @phpstan-var array<string>|null 201 */ 202 private static $ENCODINGS; 203 204 /** 205 * @var int[]|null 206 * 207 * @phpstan-var array<string ,int>|null 208 */ 209 private static $ORD; 210 211 /** 212 * @var string[]|null 213 * 214 * @phpstan-var array<string, string>|null 215 */ 216 private static $EMOJI; 217 218 /** 219 * @var string[]|null 220 * 221 * @phpstan-var array<string>|null 222 */ 223 private static $EMOJI_VALUES_CACHE; 224 225 /** 226 * @var string[]|null 227 * 228 * @phpstan-var array<string>|null 229 */ 230 private static $EMOJI_KEYS_CACHE; 231 232 /** 233 * @var string[]|null 234 * 235 * @phpstan-var array<string>|null 236 */ 237 private static $EMOJI_KEYS_REVERSIBLE_CACHE; 238 239 /** 240 * @var string[]|null 241 * 242 * @phpstan-var array<int, string>|null 243 */ 244 private static $CHR; 245 246 /** 247 * __construct() 248 */ 249 public function __construct() 250 { 251 } 252 253 /** 254 * Return the character at the specified position: $str[1] like functionality. 255 * 256 * EXAMPLE: <code>UTF8::access('fòô', 1); // 'ò'</code> 257 * 258 * @param string $str <p>A UTF-8 string.</p> 259 * @param int $pos <p>The position of character to return.</p> 260 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 261 * 262 * @psalm-pure 263 * 264 * @return string 265 * <p>Single multi-byte character.</p> 266 */ 267 public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string 268 { 269 if ($str === '' || $pos < 0) { 270 return ''; 271 } 272 273 if ($encoding === 'UTF-8') { 274 return (string) \mb_substr($str, $pos, 1); 275 } 276 277 return (string) self::substr($str, $pos, 1, $encoding); 278 } 279 280 /** 281 * Prepends UTF-8 BOM character to the string and returns the whole string. 282 * 283 * INFO: If BOM already existed there, the Input string is returned. 284 * 285 * EXAMPLE: <code>UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô'</code> 286 * 287 * @param string $str <p>The input string.</p> 288 * 289 * @psalm-pure 290 * 291 * @return string 292 * <p>The output string that contains BOM.</p> 293 */ 294 public static function add_bom_to_string(string $str): string 295 { 296 if (!self::string_has_bom($str)) { 297 $str = self::bom() . $str; 298 } 299 300 return $str; 301 } 302 303 /** 304 * Changes all keys in an array. 305 * 306 * @param array<string, mixed> $array <p>The array to work on</p> 307 * @param int $case [optional] <p> Either <strong>CASE_UPPER</strong><br> 308 * or <strong>CASE_LOWER</strong> (default)</p> 309 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 310 * 311 * @psalm-pure 312 * 313 * @return string[] 314 * <p>An array with its keys lower- or uppercased.</p> 315 */ 316 public static function array_change_key_case( 317 array $array, 318 int $case = \CASE_LOWER, 319 string $encoding = 'UTF-8' 320 ): array { 321 if ( 322 $case !== \CASE_LOWER 323 && 324 $case !== \CASE_UPPER 325 ) { 326 $case = \CASE_LOWER; 327 } 328 329 $return = []; 330 foreach ($array as $key => &$value) { 331 $key = $case === \CASE_LOWER 332 ? self::strtolower((string) $key, $encoding) 333 : self::strtoupper((string) $key, $encoding); 334 335 $return[$key] = $value; 336 } 337 338 return $return; 339 } 340 341 /** 342 * Returns the substring between $start and $end, if found, or an empty 343 * string. An optional offset may be supplied from which to begin the 344 * search for the start string. 345 * 346 * @param string $str 347 * @param string $start <p>Delimiter marking the start of the substring.</p> 348 * @param string $end <p>Delimiter marking the end of the substring.</p> 349 * @param int $offset [optional] <p>Index from which to begin the search. Default: 0</p> 350 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 351 * 352 * @psalm-pure 353 * 354 * @return string 355 */ 356 public static function between( 357 string $str, 358 string $start, 359 string $end, 360 int $offset = 0, 361 string $encoding = 'UTF-8' 362 ): string { 363 if ($encoding === 'UTF-8') { 364 $start_position = \mb_strpos($str, $start, $offset); 365 if ($start_position === false) { 366 return ''; 367 } 368 369 $substr_index = $start_position + (int) \mb_strlen($start); 370 $end_position = \mb_strpos($str, $end, $substr_index); 371 if ( 372 $end_position === false 373 || 374 $end_position === $substr_index 375 ) { 376 return ''; 377 } 378 379 return (string) \mb_substr($str, $substr_index, $end_position - $substr_index); 380 } 381 382 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 383 384 $start_position = self::strpos($str, $start, $offset, $encoding); 385 if ($start_position === false) { 386 return ''; 387 } 388 389 $substr_index = $start_position + (int) self::strlen($start, $encoding); 390 $end_position = self::strpos($str, $end, $substr_index, $encoding); 391 if ( 392 $end_position === false 393 || 394 $end_position === $substr_index 395 ) { 396 return ''; 397 } 398 399 return (string) self::substr( 400 $str, 401 $substr_index, 402 $end_position - $substr_index, 403 $encoding 404 ); 405 } 406 407 /** 408 * Convert binary into a string. 409 * 410 * INFO: opposite to UTF8::str_to_binary() 411 * 412 * EXAMPLE: <code>UTF8::binary_to_str('11110000100111111001100010000011'); // '😃'</code> 413 * 414 * @param string $bin 1|0 415 * 416 * @psalm-pure 417 * 418 * @return string 419 */ 420 public static function binary_to_str($bin): string 421 { 422 if (!isset($bin[0])) { 423 return ''; 424 } 425 426 $convert = \base_convert($bin, 2, 16); 427 if ($convert === '0') { 428 return ''; 429 } 430 431 return \pack('H*', $convert); 432 } 433 434 /** 435 * Returns the UTF-8 Byte Order Mark Character. 436 * 437 * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values 438 * 439 * EXAMPLE: <code>UTF8::bom(); // "\xEF\xBB\xBF"</code> 440 * 441 * @psalm-pure 442 * 443 * @return string 444 * <p>UTF-8 Byte Order Mark.</p> 445 */ 446 public static function bom(): string 447 { 448 return "\xef\xbb\xbf"; 449 } 450 451 /** 452 * @alias of UTF8::chr_map() 453 * 454 * @param callable $callback 455 * @param string $str 456 * 457 * @psalm-pure 458 * 459 * @return string[] 460 * 461 * @see UTF8::chr_map() 462 */ 463 public static function callback($callback, string $str): array 464 { 465 return self::chr_map($callback, $str); 466 } 467 468 /** 469 * Returns the character at $index, with indexes starting at 0. 470 * 471 * @param string $str <p>The input string.</p> 472 * @param int $index <p>Position of the character.</p> 473 * @param string $encoding [optional] <p>Default is UTF-8</p> 474 * 475 * @psalm-pure 476 * 477 * @return string 478 * <p>The character at $index.</p> 479 */ 480 public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string 481 { 482 if ($encoding === 'UTF-8') { 483 return (string) \mb_substr($str, $index, 1); 484 } 485 486 return (string) self::substr($str, $index, 1, $encoding); 487 } 488 489 /** 490 * Returns an array consisting of the characters in the string. 491 * 492 * @param string $str <p>The input string.</p> 493 * 494 * @psalm-pure 495 * 496 * @return string[] 497 * <p>An array of chars.</p> 498 */ 499 public static function chars(string $str): array 500 { 501 /** @var string[] */ 502 return self::str_split($str); 503 } 504 505 /** 506 * This method will auto-detect your server environment for UTF-8 support. 507 * 508 * @return true|null 509 * 510 * @internal <p>You don't need to run it manually, it will be triggered if it's needed.</p> 511 */ 512 public static function checkForSupport() 513 { 514 if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { 515 self::$SUPPORT['already_checked_via_portable_utf8'] = true; 516 517 // http://php.net/manual/en/book.mbstring.php 518 self::$SUPPORT['mbstring'] = self::mbstring_loaded(); 519 520 self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded(); 521 if (self::$SUPPORT['mbstring'] === true) { 522 \mb_internal_encoding('UTF-8'); 523 /** @noinspection UnusedFunctionResultInspection */ 524 /** @noinspection PhpComposerExtensionStubsInspection */ 525 \mb_regex_encoding('UTF-8'); 526 self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8'; 527 } 528 529 // http://php.net/manual/en/book.iconv.php 530 self::$SUPPORT['iconv'] = self::iconv_loaded(); 531 532 // http://php.net/manual/en/book.intl.php 533 self::$SUPPORT['intl'] = self::intl_loaded(); 534 535 // http://php.net/manual/en/class.intlchar.php 536 self::$SUPPORT['intlChar'] = self::intlChar_loaded(); 537 538 // http://php.net/manual/en/book.ctype.php 539 self::$SUPPORT['ctype'] = self::ctype_loaded(); 540 541 // http://php.net/manual/en/class.finfo.php 542 self::$SUPPORT['finfo'] = self::finfo_loaded(); 543 544 // http://php.net/manual/en/book.json.php 545 self::$SUPPORT['json'] = self::json_loaded(); 546 547 // http://php.net/manual/en/book.pcre.php 548 self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support(); 549 550 self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used(); 551 if (self::$SUPPORT['symfony_polyfill_used'] === true) { 552 \mb_internal_encoding('UTF-8'); 553 self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8'; 554 } 555 556 return true; 557 } 558 559 return null; 560 } 561 562 /** 563 * Generates a UTF-8 encoded character from the given code point. 564 * 565 * INFO: opposite to UTF8::ord() 566 * 567 * EXAMPLE: <code>UTF8::chr(0x2603); // '☃'</code> 568 * 569 * @param int $code_point <p>The code point for which to generate a character.</p> 570 * @param string $encoding [optional] <p>Default is UTF-8</p> 571 * 572 * @psalm-pure 573 * 574 * @return string|null 575 * <p>Multi-byte character, returns null on failure or empty input.</p> 576 */ 577 public static function chr($code_point, string $encoding = 'UTF-8') 578 { 579 // init 580 /** 581 * @psalm-suppress ImpureStaticVariable 582 * 583 * @var array<string,string> 584 */ 585 static $CHAR_CACHE = []; 586 587 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 588 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 589 } 590 591 /** @noinspection InArrayCanBeUsedInspection */ 592 if ( 593 $encoding !== 'UTF-8' 594 && 595 $encoding !== 'ISO-8859-1' 596 && 597 $encoding !== 'WINDOWS-1252' 598 && 599 self::$SUPPORT['mbstring'] === false 600 ) { 601 /** 602 * @psalm-suppress ImpureFunctionCall - is is only a warning 603 */ 604 \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); 605 } 606 607 if (!\is_int($code_point) || $code_point <= 0) { 608 return null; 609 } 610 611 $cache_key = $code_point . '_' . $encoding; 612 if (isset($CHAR_CACHE[$cache_key])) { 613 return $CHAR_CACHE[$cache_key]; 614 } 615 616 if ($code_point <= 0x80) { // only for "simple"-chars 617 618 if (self::$CHR === null) { 619 self::$CHR = self::getData('chr'); 620 } 621 622 /** 623 * @psalm-suppress PossiblyNullArrayAccess 624 */ 625 $chr = self::$CHR[$code_point]; 626 627 if ($encoding !== 'UTF-8') { 628 $chr = self::encode($encoding, $chr); 629 } 630 631 return $CHAR_CACHE[$cache_key] = $chr; 632 } 633 634 // 635 // fallback via "IntlChar" 636 // 637 638 if (self::$SUPPORT['intlChar'] === true) { 639 /** @noinspection PhpComposerExtensionStubsInspection */ 640 $chr = \IntlChar::chr($code_point); 641 642 if ($encoding !== 'UTF-8') { 643 $chr = self::encode($encoding, $chr); 644 } 645 646 return $CHAR_CACHE[$cache_key] = $chr; 647 } 648 649 // 650 // fallback via vanilla php 651 // 652 653 if (self::$CHR === null) { 654 self::$CHR = self::getData('chr'); 655 } 656 657 $code_point = (int) $code_point; 658 if ($code_point <= 0x7FF) { 659 /** 660 * @psalm-suppress PossiblyNullArrayAccess 661 */ 662 $chr = self::$CHR[($code_point >> 6) + 0xC0] . 663 self::$CHR[($code_point & 0x3F) + 0x80]; 664 } elseif ($code_point <= 0xFFFF) { 665 /** 666 * @psalm-suppress PossiblyNullArrayAccess 667 */ 668 $chr = self::$CHR[($code_point >> 12) + 0xE0] . 669 self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] . 670 self::$CHR[($code_point & 0x3F) + 0x80]; 671 } else { 672 /** 673 * @psalm-suppress PossiblyNullArrayAccess 674 */ 675 $chr = self::$CHR[($code_point >> 18) + 0xF0] . 676 self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] . 677 self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] . 678 self::$CHR[($code_point & 0x3F) + 0x80]; 679 } 680 681 if ($encoding !== 'UTF-8') { 682 $chr = self::encode($encoding, $chr); 683 } 684 685 return $CHAR_CACHE[$cache_key] = $chr; 686 } 687 688 /** 689 * Applies callback to all characters of a string. 690 * 691 * EXAMPLE: <code>UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε']</code> 692 * 693 * @param callable $callback <p>The callback function.</p> 694 * @param string $str <p>UTF-8 string to run callback on.</p> 695 * 696 * @psalm-pure 697 * 698 * @return string[] 699 * <p>The outcome of the callback, as array.</p> 700 */ 701 public static function chr_map($callback, string $str): array 702 { 703 return \array_map( 704 $callback, 705 self::str_split($str) 706 ); 707 } 708 709 /** 710 * Generates an array of byte length of each character of a Unicode string. 711 * 712 * 1 byte => U+0000 - U+007F 713 * 2 byte => U+0080 - U+07FF 714 * 3 byte => U+0800 - U+FFFF 715 * 4 byte => U+10000 - U+10FFFF 716 * 717 * EXAMPLE: <code>UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1]</code> 718 * 719 * @param string $str <p>The original unicode string.</p> 720 * 721 * @psalm-pure 722 * 723 * @return int[] 724 * <p>An array of byte lengths of each character.</p> 725 */ 726 public static function chr_size_list(string $str): array 727 { 728 if ($str === '') { 729 return []; 730 } 731 732 if (self::$SUPPORT['mbstring_func_overload'] === true) { 733 return \array_map( 734 static function (string $data): int { 735 // "mb_" is available if overload is used, so use it ... 736 return \mb_strlen($data, 'CP850'); // 8-BIT 737 }, 738 self::str_split($str) 739 ); 740 } 741 742 return \array_map('\strlen', self::str_split($str)); 743 } 744 745 /** 746 * Get a decimal code representation of a specific character. 747 * 748 * INFO: opposite to UTF8::decimal_to_chr() 749 * 750 * EXAMPLE: <code>UTF8::chr_to_decimal('§'); // 0xa7</code> 751 * 752 * @param string $char <p>The input character.</p> 753 * 754 * @psalm-pure 755 * 756 * @return int 757 */ 758 public static function chr_to_decimal(string $char): int 759 { 760 if (self::$SUPPORT['iconv'] === true) { 761 $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char); 762 if ($chr_tmp !== false) { 763 /** @noinspection OffsetOperationsInspection */ 764 return \unpack('V', $chr_tmp)[1]; 765 } 766 } 767 768 $code = self::ord($char[0]); 769 $bytes = 1; 770 771 if (!($code & 0x80)) { 772 // 0xxxxxxx 773 return $code; 774 } 775 776 if (($code & 0xe0) === 0xc0) { 777 // 110xxxxx 778 $bytes = 2; 779 $code &= ~0xc0; 780 } elseif (($code & 0xf0) === 0xe0) { 781 // 1110xxxx 782 $bytes = 3; 783 $code &= ~0xe0; 784 } elseif (($code & 0xf8) === 0xf0) { 785 // 11110xxx 786 $bytes = 4; 787 $code &= ~0xf0; 788 } 789 790 for ($i = 2; $i <= $bytes; ++$i) { 791 // 10xxxxxx 792 $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80); 793 } 794 795 return $code; 796 } 797 798 /** 799 * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character. 800 * 801 * EXAMPLE: <code>UTF8::chr_to_hex('§'); // U+00a7</code> 802 * 803 * @param int|string $char <p>The input character</p> 804 * @param string $prefix [optional] 805 * 806 * @psalm-pure 807 * 808 * @return string 809 * <p>The code point encoded as U+xxxx.</p> 810 */ 811 public static function chr_to_hex($char, string $prefix = 'U+'): string 812 { 813 if ($char === '') { 814 return ''; 815 } 816 817 if ($char === '�') { 818 $char = ''; 819 } 820 821 return self::int_to_hex(self::ord((string) $char), $prefix); 822 } 823 824 /** 825 * alias for "UTF8::chr_to_decimal()" 826 * 827 * @param string $chr 828 * 829 * @psalm-pure 830 * 831 * @return int 832 * 833 * @see UTF8::chr_to_decimal() 834 * @deprecated <p>please use "UTF8::chr_to_decimal()"</p> 835 */ 836 public static function chr_to_int(string $chr): int 837 { 838 return self::chr_to_decimal($chr); 839 } 840 841 /** 842 * Splits a string into smaller chunks and multiple lines, using the specified line ending character. 843 * 844 * EXAMPLE: <code>UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"</code> 845 * 846 * @param string $body <p>The original string to be split.</p> 847 * @param int $chunk_length [optional] <p>The maximum character length of a chunk.</p> 848 * @param string $end [optional] <p>The character(s) to be inserted at the end of each chunk.</p> 849 * 850 * @psalm-pure 851 * 852 * @return string 853 * <p>The chunked string.</p> 854 */ 855 public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string 856 { 857 return \implode($end, self::str_split($body, $chunk_length)); 858 } 859 860 /** 861 * Accepts a string and removes all non-UTF-8 characters from it + extras if needed. 862 * 863 * EXAMPLE: <code>UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef …” — 😃 - Düsseldorf'</code> 864 * 865 * @param string $str <p>The string to be sanitized.</p> 866 * @param bool $remove_bom [optional] <p>Set to true, if you need to remove 867 * UTF-BOM.</p> 868 * @param bool $normalize_whitespace [optional] <p>Set to true, if you need to normalize the 869 * whitespace.</p> 870 * @param bool $normalize_msword [optional] <p>Set to true, if you need to normalize MS 871 * Word chars e.g.: "…" 872 * => "..."</p> 873 * @param bool $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, 874 * in 875 * combination with 876 * $normalize_whitespace</p> 877 * @param bool $replace_diamond_question_mark [optional] <p>Set to true, if you need to remove diamond 878 * question mark e.g.: "�"</p> 879 * @param bool $remove_invisible_characters [optional] <p>Set to false, if you not want to remove 880 * invisible characters e.g.: "\0"</p> 881 * @param bool $remove_invisible_characters_url_encoded [optional] <p>Set to true, if you not want to remove 882 * invisible url encoded characters e.g.: "%0B"<br> WARNING: 883 * maybe contains false-positives e.g. aa%0Baa -> aaaa. 884 * </p> 885 * 886 * @psalm-pure 887 * 888 * @return string 889 * <p>An clean UTF-8 encoded string.</p> 890 * 891 * @noinspection PhpTooManyParametersInspection 892 */ 893 public static function clean( 894 string $str, 895 bool $remove_bom = false, 896 bool $normalize_whitespace = false, 897 bool $normalize_msword = false, 898 bool $keep_non_breaking_space = false, 899 bool $replace_diamond_question_mark = false, 900 bool $remove_invisible_characters = true, 901 bool $remove_invisible_characters_url_encoded = false 902 ): string { 903 // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string 904 // caused connection reset problem on larger strings 905 906 $regex = '/ 907 ( 908 (?: [\x00-\x7F] # single-byte sequences 0xxxxxxx 909 | [\xC0-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx 910 | [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences 1110xxxx 10xxxxxx * 2 911 | [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3 912 ){1,100} # ...one or more times 913 ) 914 | ( [\x80-\xBF] ) # invalid byte in range 10000000 - 10111111 915 | ( [\xC0-\xFF] ) # invalid byte in range 11000000 - 11111111 916 /x'; 917 /** @noinspection NotOptimalRegularExpressionsInspection */ 918 $str = (string) \preg_replace($regex, '$1', $str); 919 920 if ($replace_diamond_question_mark) { 921 $str = self::replace_diamond_question_mark($str); 922 } 923 924 if ($remove_invisible_characters) { 925 $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded); 926 } 927 928 if ($normalize_whitespace) { 929 $str = self::normalize_whitespace($str, $keep_non_breaking_space); 930 } 931 932 if ($normalize_msword) { 933 $str = self::normalize_msword($str); 934 } 935 936 if ($remove_bom) { 937 $str = self::remove_bom($str); 938 } 939 940 return $str; 941 } 942 943 /** 944 * Clean-up a string and show only printable UTF-8 chars at the end + fix UTF-8 encoding. 945 * 946 * EXAMPLE: <code>UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - Düsseldorf", true, true); // '„Abcdef …” — 😃 - Düsseldorf'</code> 947 * 948 * @param string $str <p>The input string.</p> 949 * 950 * @psalm-pure 951 * 952 * @return string 953 */ 954 public static function cleanup($str): string 955 { 956 // init 957 $str = (string) $str; 958 959 if ($str === '') { 960 return ''; 961 } 962 963 // fixed ISO <-> UTF-8 Errors 964 $str = self::fix_simple_utf8($str); 965 966 // remove all none UTF-8 symbols 967 // && remove diamond question mark (�) 968 // && remove remove invisible characters (e.g. "\0") 969 // && remove BOM 970 // && normalize whitespace chars (but keep non-breaking-spaces) 971 return self::clean( 972 $str, 973 true, 974 true, 975 false, 976 true, 977 true 978 ); 979 } 980 981 /** 982 * Accepts a string or a array of strings and returns an array of Unicode code points. 983 * 984 * INFO: opposite to UTF8::string() 985 * 986 * EXAMPLE: <code> 987 * UTF8::codepoints('κöñ'); // array(954, 246, 241) 988 * // ... OR ... 989 * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1') 990 * </code> 991 * 992 * @param string|string[] $arg <p>A UTF-8 encoded string or an array of such strings.</p> 993 * @param bool $use_u_style <p>If True, will return code points in U+xxxx format, 994 * default, code points will be returned as integers.</p> 995 * 996 * @psalm-pure 997 * 998 * @return int[]|string[] 999 * <p> 1000 * The array of code points:<br> 1001 * int[] for $u_style === false<br> 1002 * string[] for $u_style === true<br> 1003 * </p> 1004 */ 1005 public static function codepoints($arg, bool $use_u_style = false): array 1006 { 1007 if (\is_string($arg)) { 1008 $arg = self::str_split($arg); 1009 } 1010 1011 /** 1012 * @psalm-suppress DocblockTypeContradiction 1013 */ 1014 if (!\is_array($arg)) { 1015 return []; 1016 } 1017 1018 if ($arg === []) { 1019 return []; 1020 } 1021 1022 $arg = \array_map( 1023 [ 1024 self::class, 1025 'ord', 1026 ], 1027 $arg 1028 ); 1029 1030 if ($use_u_style) { 1031 $arg = \array_map( 1032 [ 1033 self::class, 1034 'int_to_hex', 1035 ], 1036 $arg 1037 ); 1038 } 1039 1040 return $arg; 1041 } 1042 1043 /** 1044 * Trims the string and replaces consecutive whitespace characters with a 1045 * single space. This includes tabs and newline characters, as well as 1046 * multibyte whitespace such as the thin space and ideographic space. 1047 * 1048 * @param string $str <p>The input string.</p> 1049 * 1050 * @psalm-pure 1051 * 1052 * @return string 1053 * <p>A string with trimmed $str and condensed whitespace.</p> 1054 */ 1055 public static function collapse_whitespace(string $str): string 1056 { 1057 if (self::$SUPPORT['mbstring'] === true) { 1058 /** @noinspection PhpComposerExtensionStubsInspection */ 1059 return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str)); 1060 } 1061 1062 return \trim(self::regex_replace($str, '[[:space:]]+', ' ')); 1063 } 1064 1065 /** 1066 * Returns count of characters used in a string. 1067 * 1068 * EXAMPLE: <code>UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1)</code> 1069 * 1070 * @param string $str <p>The input string.</p> 1071 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 1072 * @param bool $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use 1073 * 1074 * @psalm-pure 1075 * 1076 * @return int[] 1077 * <p>An associative array of Character as keys and 1078 * their count as values.</p> 1079 */ 1080 public static function count_chars( 1081 string $str, 1082 bool $clean_utf8 = false, 1083 bool $try_to_use_mb_functions = true 1084 ): array { 1085 return \array_count_values( 1086 self::str_split( 1087 $str, 1088 1, 1089 $clean_utf8, 1090 $try_to_use_mb_functions 1091 ) 1092 ); 1093 } 1094 1095 /** 1096 * Create a valid CSS identifier for e.g. "class"- or "id"-attributes. 1097 * 1098 * EXAMPLE: <code>UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar</code> 1099 * 1100 * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95 1101 * 1102 * @param string $str <p>INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically</p> 1103 * @param string[] $filter 1104 * @param bool $strip_tags 1105 * @param bool $strtolower 1106 * 1107 * @psalm-pure 1108 * 1109 * @return string 1110 * 1111 * @phpstan-param array<string,string> $filter 1112 */ 1113 public static function css_identifier( 1114 string $str = '', 1115 array $filter = [ 1116 ' ' => '-', 1117 '/' => '-', 1118 '[' => '', 1119 ']' => '', 1120 ], 1121 bool $strip_tags = false, 1122 bool $strtolower = true 1123 ): string { 1124 // We could also use strtr() here but its much slower than str_replace(). In 1125 // order to keep '__' to stay '__' we first replace it with a different 1126 // placeholder after checking that it is not defined as a filter. 1127 $double_underscore_replacements = 0; 1128 1129 // Fallback ... 1130 if (\trim($str) === '') { 1131 $str = \uniqid('auto-generated-css-class', true); 1132 } else { 1133 $str = self::clean($str); 1134 } 1135 1136 if ($strip_tags) { 1137 $str = \strip_tags($str); 1138 } 1139 1140 if ($strtolower) { 1141 $str = \strtolower($str); 1142 } 1143 1144 if (!isset($filter['__'])) { 1145 $str = \str_replace('__', '##', $str, $double_underscore_replacements); 1146 } 1147 1148 /* @noinspection ArrayValuesMissUseInspection */ 1149 $str = \str_replace(\array_keys($filter), \array_values($filter), $str); 1150 // Replace temporary placeholder '##' with '__' only if the original 1151 // $identifier contained '__'. 1152 if ($double_underscore_replacements > 0) { 1153 $str = \str_replace('##', '__', $str); 1154 } 1155 1156 // Valid characters in a CSS identifier are: 1157 // - the hyphen (U+002D) 1158 // - a-z (U+0030 - U+0039) 1159 // - A-Z (U+0041 - U+005A) 1160 // - the underscore (U+005F) 1161 // - 0-9 (U+0061 - U+007A) 1162 // - ISO 10646 characters U+00A1 and higher 1163 // We strip out any character not in the above list. 1164 $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str); 1165 // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit. 1166 $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str); 1167 1168 return \trim($str, '-'); 1169 } 1170 1171 /** 1172 * Remove css media-queries. 1173 * 1174 * @param string $str 1175 * 1176 * @psalm-pure 1177 * 1178 * @return string 1179 */ 1180 public static function css_stripe_media_queries(string $str): string 1181 { 1182 return (string) \preg_replace( 1183 '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU', 1184 '', 1185 $str 1186 ); 1187 } 1188 1189 /** 1190 * Checks whether ctype is available on the server. 1191 * 1192 * @psalm-pure 1193 * 1194 * @return bool 1195 * <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> 1196 * 1197 * @internal <p>Please do not use it anymore, we will make is private in next major version.</p> 1198 */ 1199 public static function ctype_loaded(): bool 1200 { 1201 return \extension_loaded('ctype'); 1202 } 1203 1204 /** 1205 * Converts an int value into a UTF-8 character. 1206 * 1207 * INFO: opposite to UTF8::string() 1208 * 1209 * EXAMPLE: <code>UTF8::decimal_to_chr(931); // 'Σ'</code> 1210 * 1211 * @param int|string $int 1212 * 1213 * @phpstan-param int|numeric-string $int 1214 * 1215 * @psalm-pure 1216 * 1217 * @return string 1218 */ 1219 public static function decimal_to_chr($int): string 1220 { 1221 return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5); 1222 } 1223 1224 /** 1225 * Decodes a MIME header field 1226 * 1227 * @param string $str 1228 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 1229 * 1230 * @psalm-pure 1231 * 1232 * @return false|string 1233 * <p>A decoded MIME field on success, 1234 * or false if an error occurs during the decoding.</p> 1235 */ 1236 public static function decode_mimeheader($str, string $encoding = 'UTF-8') 1237 { 1238 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 1239 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 1240 } 1241 1242 // always fallback via symfony polyfill 1243 return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding); 1244 } 1245 1246 /** 1247 * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji. 1248 * 1249 * @see https://en.wikipedia.org/wiki/ISO_3166-1 1250 * 1251 * @param string $country_code_iso_3166_1 <p>e.g. DE</p> 1252 * 1253 * @return string 1254 * <p>Emoji or empty string on error.</p> 1255 */ 1256 public static function emoji_from_country_code(string $country_code_iso_3166_1): string 1257 { 1258 if ($country_code_iso_3166_1 === '') { 1259 return ''; 1260 } 1261 1262 if (self::strlen($country_code_iso_3166_1) !== 2) { 1263 return ''; 1264 } 1265 1266 $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1); 1267 1268 $flagOffset = 0x1F1E6; 1269 $asciiOffset = 0x41; 1270 1271 return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') . 1272 (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? ''); 1273 } 1274 1275 /** 1276 * Decodes a string which was encoded by "UTF8::emoji_encode()". 1277 * 1278 * INFO: opposite to UTF8::emoji_encode() 1279 * 1280 * EXAMPLE: <code> 1281 * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹' 1282 * // 1283 * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹' 1284 * </code> 1285 * 1286 * @param string $str <p>The input string.</p> 1287 * @param bool $use_reversible_string_mappings [optional] <p> 1288 * When <b>TRUE</b>, we se a reversible string mapping 1289 * between "emoji_encode" and "emoji_decode".</p> 1290 * 1291 * @psalm-pure 1292 * 1293 * @return string 1294 */ 1295 public static function emoji_decode( 1296 string $str, 1297 bool $use_reversible_string_mappings = false 1298 ): string { 1299 self::initEmojiData(); 1300 1301 if ($use_reversible_string_mappings) { 1302 return (string) \str_replace( 1303 (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE, 1304 (array) self::$EMOJI_VALUES_CACHE, 1305 $str 1306 ); 1307 } 1308 1309 return (string) \str_replace( 1310 (array) self::$EMOJI_KEYS_CACHE, 1311 (array) self::$EMOJI_VALUES_CACHE, 1312 $str 1313 ); 1314 } 1315 1316 /** 1317 * Encode a string with emoji chars into a non-emoji string. 1318 * 1319 * INFO: opposite to UTF8::emoji_decode() 1320 * 1321 * EXAMPLE: <code> 1322 * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE' 1323 * // 1324 * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_' 1325 * </code> 1326 * 1327 * @param string $str <p>The input string</p> 1328 * @param bool $use_reversible_string_mappings [optional] <p> 1329 * when <b>TRUE</b>, we use a reversible string mapping 1330 * between "emoji_encode" and "emoji_decode"</p> 1331 * 1332 * @psalm-pure 1333 * 1334 * @return string 1335 */ 1336 public static function emoji_encode( 1337 string $str, 1338 bool $use_reversible_string_mappings = false 1339 ): string { 1340 self::initEmojiData(); 1341 1342 if ($use_reversible_string_mappings) { 1343 return (string) \str_replace( 1344 (array) self::$EMOJI_VALUES_CACHE, 1345 (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE, 1346 $str 1347 ); 1348 } 1349 1350 return (string) \str_replace( 1351 (array) self::$EMOJI_VALUES_CACHE, 1352 (array) self::$EMOJI_KEYS_CACHE, 1353 $str 1354 ); 1355 } 1356 1357 /** 1358 * Encode a string with a new charset-encoding. 1359 * 1360 * INFO: This function will also try to fix broken / double encoding, 1361 * so you can call this function also on a UTF-8 string and you don't mess up the string. 1362 * 1363 * EXAMPLE: <code> 1364 * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-' 1365 * // 1366 * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-' 1367 * // 1368 * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-中文空白-' 1369 * // 1370 * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t' 1371 * </code> 1372 * 1373 * @param string $to_encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p> 1374 * @param string $str <p>The input string</p> 1375 * @param bool $auto_detect_the_from_encoding [optional] <p>Force the new encoding (we try to fix broken / double 1376 * encoding for UTF-8)<br> otherwise we auto-detect the current 1377 * string-encoding</p> 1378 * @param string $from_encoding [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br> 1379 * A empty string will trigger the autodetect anyway.</p> 1380 * 1381 * @psalm-pure 1382 * 1383 * @return string 1384 * 1385 * @psalm-suppress InvalidReturnStatement 1386 */ 1387 public static function encode( 1388 string $to_encoding, 1389 string $str, 1390 bool $auto_detect_the_from_encoding = true, 1391 string $from_encoding = '' 1392 ): string { 1393 if ($str === '' || $to_encoding === '') { 1394 return $str; 1395 } 1396 1397 if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') { 1398 $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8'); 1399 } 1400 1401 if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') { 1402 $from_encoding = self::normalize_encoding($from_encoding); 1403 } 1404 1405 if ( 1406 $to_encoding 1407 && 1408 $from_encoding 1409 && 1410 $from_encoding === $to_encoding 1411 ) { 1412 return $str; 1413 } 1414 1415 if ($to_encoding === 'JSON') { 1416 $return = self::json_encode($str); 1417 if ($return === false) { 1418 throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().'); 1419 } 1420 1421 return $return; 1422 } 1423 if ($from_encoding === 'JSON') { 1424 $str = self::json_decode($str); 1425 $from_encoding = ''; 1426 } 1427 1428 if ($to_encoding === 'BASE64') { 1429 return \base64_encode($str); 1430 } 1431 if ($from_encoding === 'BASE64') { 1432 $str = \base64_decode($str, true); 1433 $from_encoding = ''; 1434 } 1435 1436 if ($to_encoding === 'HTML-ENTITIES') { 1437 return self::html_encode($str, true); 1438 } 1439 if ($from_encoding === 'HTML-ENTITIES') { 1440 $str = self::html_entity_decode($str, \ENT_COMPAT); 1441 $from_encoding = ''; 1442 } 1443 1444 $from_encoding_auto_detected = false; 1445 if ( 1446 $auto_detect_the_from_encoding 1447 || 1448 !$from_encoding 1449 ) { 1450 $from_encoding_auto_detected = self::str_detect_encoding($str); 1451 } 1452 1453 // DEBUG 1454 //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n"); 1455 1456 if ($from_encoding_auto_detected !== false) { 1457 /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */ 1458 $from_encoding = $from_encoding_auto_detected; 1459 } elseif ($auto_detect_the_from_encoding) { 1460 // fallback for the "autodetect"-mode 1461 return self::to_utf8($str); 1462 } 1463 1464 if ( 1465 !$from_encoding 1466 || 1467 $from_encoding === $to_encoding 1468 ) { 1469 return $str; 1470 } 1471 1472 if ( 1473 $to_encoding === 'UTF-8' 1474 && 1475 ( 1476 $from_encoding === 'WINDOWS-1252' 1477 || 1478 $from_encoding === 'ISO-8859-1' 1479 ) 1480 ) { 1481 return self::to_utf8($str); 1482 } 1483 1484 if ( 1485 $to_encoding === 'ISO-8859-1' 1486 && 1487 ( 1488 $from_encoding === 'WINDOWS-1252' 1489 || 1490 $from_encoding === 'UTF-8' 1491 ) 1492 ) { 1493 return self::to_iso8859($str); 1494 } 1495 1496 /** @noinspection InArrayCanBeUsedInspection */ 1497 if ( 1498 $to_encoding !== 'UTF-8' 1499 && 1500 $to_encoding !== 'ISO-8859-1' 1501 && 1502 $to_encoding !== 'WINDOWS-1252' 1503 && 1504 self::$SUPPORT['mbstring'] === false 1505 ) { 1506 /** 1507 * @psalm-suppress ImpureFunctionCall - is is only a warning 1508 */ 1509 \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING); 1510 } 1511 1512 if (self::$SUPPORT['mbstring'] === true) { 1513 // warning: do not use the symfony polyfill here 1514 $str_encoded = \mb_convert_encoding( 1515 $str, 1516 $to_encoding, 1517 $from_encoding 1518 ); 1519 1520 if ($str_encoded) { 1521 \assert(\is_string($str_encoded)); 1522 1523 return $str_encoded; 1524 } 1525 } 1526 1527 /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */ 1528 $return = @\iconv($from_encoding, $to_encoding, $str); 1529 if ($return !== false) { 1530 return $return; 1531 } 1532 1533 return $str; 1534 } 1535 1536 /** 1537 * @param string $str 1538 * @param string $from_charset [optional] <p>Set the input charset.</p> 1539 * @param string $to_charset [optional] <p>Set the output charset.</p> 1540 * @param string $transfer_encoding [optional] <p>Set the transfer encoding.</p> 1541 * @param string $linefeed [optional] <p>Set the used linefeed.</p> 1542 * @param int $indent [optional] <p>Set the max length indent.</p> 1543 * 1544 * @psalm-pure 1545 * 1546 * @return false|string 1547 * <p>An encoded MIME field on success, 1548 * or false if an error occurs during the encoding.</p> 1549 */ 1550 public static function encode_mimeheader( 1551 string $str, 1552 string $from_charset = 'UTF-8', 1553 string $to_charset = 'UTF-8', 1554 string $transfer_encoding = 'Q', 1555 string $linefeed = "\r\n", 1556 int $indent = 76 1557 ) { 1558 if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') { 1559 $from_charset = self::normalize_encoding($from_charset, 'UTF-8'); 1560 } 1561 1562 if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') { 1563 $to_charset = self::normalize_encoding($to_charset, 'UTF-8'); 1564 } 1565 1566 // always fallback via symfony polyfill 1567 return \iconv_mime_encode( 1568 '', 1569 $str, 1570 [ 1571 'scheme' => $transfer_encoding, 1572 'line-length' => $indent, 1573 'input-charset' => $from_charset, 1574 'output-charset' => $to_charset, 1575 'line-break-chars' => $linefeed, 1576 ] 1577 ); 1578 } 1579 1580 /** 1581 * Create an extract from a sentence, so if the search-string was found, it try to centered in the output. 1582 * 1583 * @param string $str <p>The input string.</p> 1584 * @param string $search <p>The searched string.</p> 1585 * @param int|null $length [optional] <p>Default: null === text->length / 2</p> 1586 * @param string $replacer_for_skipped_text [optional] <p>Default: …</p> 1587 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 1588 * 1589 * @psalm-pure 1590 * 1591 * @return string 1592 */ 1593 public static function extract_text( 1594 string $str, 1595 string $search = '', 1596 int $length = null, 1597 string $replacer_for_skipped_text = '…', 1598 string $encoding = 'UTF-8' 1599 ): string { 1600 if ($str === '') { 1601 return ''; 1602 } 1603 1604 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 1605 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 1606 } 1607 1608 $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&"; 1609 1610 if ($length === null) { 1611 $length = (int) \round((int) self::strlen($str, $encoding) / 2); 1612 } 1613 1614 if ($search === '') { 1615 if ($encoding === 'UTF-8') { 1616 if ($length > 0) { 1617 $string_length = (int) \mb_strlen($str); 1618 $end = ($length - 1) > $string_length ? $string_length : ($length - 1); 1619 } else { 1620 $end = 0; 1621 } 1622 1623 $pos = (int) \min( 1624 \mb_strpos($str, ' ', $end), 1625 \mb_strpos($str, '.', $end) 1626 ); 1627 } else { 1628 if ($length > 0) { 1629 $string_length = (int) self::strlen($str, $encoding); 1630 $end = ($length - 1) > $string_length ? $string_length : ($length - 1); 1631 } else { 1632 $end = 0; 1633 } 1634 1635 $pos = (int) \min( 1636 self::strpos($str, ' ', $end, $encoding), 1637 self::strpos($str, '.', $end, $encoding) 1638 ); 1639 } 1640 1641 if ($pos) { 1642 if ($encoding === 'UTF-8') { 1643 $str_sub = \mb_substr($str, 0, $pos); 1644 } else { 1645 $str_sub = self::substr($str, 0, $pos, $encoding); 1646 } 1647 1648 if ($str_sub === false) { 1649 return ''; 1650 } 1651 1652 return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text; 1653 } 1654 1655 return $str; 1656 } 1657 1658 if ($encoding === 'UTF-8') { 1659 $word_position = (int) \mb_stripos($str, $search); 1660 $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2); 1661 } else { 1662 $word_position = (int) self::stripos($str, $search, 0, $encoding); 1663 $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2); 1664 } 1665 1666 $pos_start = 0; 1667 if ($half_side > 0) { 1668 if ($encoding === 'UTF-8') { 1669 $half_text = \mb_substr($str, 0, $half_side); 1670 } else { 1671 $half_text = self::substr($str, 0, $half_side, $encoding); 1672 } 1673 if ($half_text !== false) { 1674 if ($encoding === 'UTF-8') { 1675 $pos_start = (int) \max( 1676 \mb_strrpos($half_text, ' '), 1677 \mb_strrpos($half_text, '.') 1678 ); 1679 } else { 1680 $pos_start = (int) \max( 1681 self::strrpos($half_text, ' ', 0, $encoding), 1682 self::strrpos($half_text, '.', 0, $encoding) 1683 ); 1684 } 1685 } 1686 } 1687 1688 if ($word_position && $half_side > 0) { 1689 $offset = $pos_start + $length - 1; 1690 $real_length = (int) self::strlen($str, $encoding); 1691 1692 if ($offset > $real_length) { 1693 $offset = $real_length; 1694 } 1695 1696 if ($encoding === 'UTF-8') { 1697 $pos_end = (int) \min( 1698 \mb_strpos($str, ' ', $offset), 1699 \mb_strpos($str, '.', $offset) 1700 ) - $pos_start; 1701 } else { 1702 $pos_end = (int) \min( 1703 self::strpos($str, ' ', $offset, $encoding), 1704 self::strpos($str, '.', $offset, $encoding) 1705 ) - $pos_start; 1706 } 1707 1708 if (!$pos_end || $pos_end <= 0) { 1709 if ($encoding === 'UTF-8') { 1710 $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str)); 1711 } else { 1712 $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding); 1713 } 1714 if ($str_sub !== false) { 1715 $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars); 1716 } else { 1717 $extract = ''; 1718 } 1719 } else { 1720 if ($encoding === 'UTF-8') { 1721 $str_sub = \mb_substr($str, $pos_start, $pos_end); 1722 } else { 1723 $str_sub = self::substr($str, $pos_start, $pos_end, $encoding); 1724 } 1725 if ($str_sub !== false) { 1726 $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text; 1727 } else { 1728 $extract = ''; 1729 } 1730 } 1731 } else { 1732 $offset = $length - 1; 1733 $true_length = (int) self::strlen($str, $encoding); 1734 1735 if ($offset > $true_length) { 1736 $offset = $true_length; 1737 } 1738 1739 if ($encoding === 'UTF-8') { 1740 $pos_end = (int) \min( 1741 \mb_strpos($str, ' ', $offset), 1742 \mb_strpos($str, '.', $offset) 1743 ); 1744 } else { 1745 $pos_end = (int) \min( 1746 self::strpos($str, ' ', $offset, $encoding), 1747 self::strpos($str, '.', $offset, $encoding) 1748 ); 1749 } 1750 1751 if ($pos_end) { 1752 if ($encoding === 'UTF-8') { 1753 $str_sub = \mb_substr($str, 0, $pos_end); 1754 } else { 1755 $str_sub = self::substr($str, 0, $pos_end, $encoding); 1756 } 1757 if ($str_sub !== false) { 1758 $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text; 1759 } else { 1760 $extract = ''; 1761 } 1762 } else { 1763 $extract = $str; 1764 } 1765 } 1766 1767 return $extract; 1768 } 1769 1770 /** 1771 * Reads entire file into a string. 1772 * 1773 * EXAMPLE: <code>UTF8::file_get_contents('utf16le.txt'); // ...</code> 1774 * 1775 * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!! 1776 * 1777 * @see http://php.net/manual/en/function.file-get-contents.php 1778 * 1779 * @param string $filename <p> 1780 * Name of the file to read. 1781 * </p> 1782 * @param bool $use_include_path [optional] <p> 1783 * Prior to PHP 5, this parameter is called 1784 * use_include_path and is a bool. 1785 * As of PHP 5 the FILE_USE_INCLUDE_PATH can be used 1786 * to trigger include path 1787 * search. 1788 * </p> 1789 * @param resource|null $context [optional] <p> 1790 * A valid context resource created with 1791 * stream_context_create. If you don't need to use a 1792 * custom context, you can skip this parameter by &null;. 1793 * </p> 1794 * @param int|null $offset [optional] <p> 1795 * The offset where the reading starts. 1796 * </p> 1797 * @param int|null $max_length [optional] <p> 1798 * Maximum length of data read. The default is to read until end 1799 * of file is reached. 1800 * </p> 1801 * @param int $timeout <p>The time in seconds for the timeout.</p> 1802 * @param bool $convert_to_utf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for 1803 * some files, because they used non default utf-8 chars. Binary files 1804 * like images or pdf will not be converted.</p> 1805 * @param string $from_encoding [optional] <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.<br> 1806 * A empty string will trigger the autodetect anyway.</p> 1807 * 1808 * @psalm-pure 1809 * 1810 * @return false|string 1811 * <p>The function returns the read data as string or <b>false</b> on failure.</p> 1812 * 1813 * @noinspection PhpTooManyParametersInspection 1814 */ 1815 public static function file_get_contents( 1816 string $filename, 1817 bool $use_include_path = false, 1818 $context = null, 1819 int $offset = null, 1820 int $max_length = null, 1821 int $timeout = 10, 1822 bool $convert_to_utf8 = true, 1823 string $from_encoding = '' 1824 ) { 1825 // init 1826 $filename = \filter_var($filename, \FILTER_SANITIZE_STRING); 1827 /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */ 1828 if ($filename === false) { 1829 return false; 1830 } 1831 1832 if ($timeout && $context === null) { 1833 $context = \stream_context_create( 1834 [ 1835 'http' => [ 1836 'timeout' => $timeout, 1837 ], 1838 ] 1839 ); 1840 } 1841 1842 if ($offset === null) { 1843 $offset = 0; 1844 } 1845 1846 if (\is_int($max_length)) { 1847 $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length); 1848 } else { 1849 $data = \file_get_contents($filename, $use_include_path, $context, $offset); 1850 } 1851 1852 // return false on error 1853 if ($data === false) { 1854 return false; 1855 } 1856 1857 if ($convert_to_utf8) { 1858 if ( 1859 !self::is_binary($data, true) 1860 || 1861 self::is_utf16($data, false) !== false 1862 || 1863 self::is_utf32($data, false) !== false 1864 ) { 1865 $data = self::encode('UTF-8', $data, false, $from_encoding); 1866 $data = self::cleanup($data); 1867 } 1868 } 1869 1870 return $data; 1871 } 1872 1873 /** 1874 * Checks if a file starts with BOM (Byte Order Mark) character. 1875 * 1876 * EXAMPLE: <code>UTF8::file_has_bom('utf8_with_bom.txt'); // true</code> 1877 * 1878 * @param string $file_path <p>Path to a valid file.</p> 1879 * 1880 * @throws \RuntimeException if file_get_contents() returned false 1881 * 1882 * @return bool 1883 * <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise</p> 1884 * 1885 * @psalm-pure 1886 */ 1887 public static function file_has_bom(string $file_path): bool 1888 { 1889 $file_content = \file_get_contents($file_path); 1890 if ($file_content === false) { 1891 throw new \RuntimeException('file_get_contents() returned false for:' . $file_path); 1892 } 1893 1894 return self::string_has_bom($file_content); 1895 } 1896 1897 /** 1898 * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. 1899 * 1900 * EXAMPLE: <code>UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a')</code> 1901 * 1902 * @param array|object|string $var 1903 * @param int $normalization_form 1904 * @param string $leading_combining 1905 * 1906 * @psalm-pure 1907 * 1908 * @return mixed 1909 * 1910 * @template TFilter 1911 * @phpstan-param TFilter $var 1912 * @phpstan-return TFilter 1913 */ 1914 public static function filter( 1915 $var, 1916 int $normalization_form = \Normalizer::NFC, 1917 string $leading_combining = '◌' 1918 ) { 1919 switch (\gettype($var)) { 1920 case 'object': 1921 case 'array': 1922 foreach ($var as $k => &$v) { 1923 $v = self::filter($v, $normalization_form, $leading_combining); 1924 } 1925 unset($v); 1926 1927 break; 1928 case 'string': 1929 1930 if (\strpos($var, "\r") !== false) { 1931 $var = self::normalize_line_ending($var); 1932 } 1933 1934 if (!ASCII::is_ascii($var)) { 1935 if (\Normalizer::isNormalized($var, $normalization_form)) { 1936 $n = '-'; 1937 } else { 1938 $n = \Normalizer::normalize($var, $normalization_form); 1939 1940 if (isset($n[0])) { 1941 $var = $n; 1942 } else { 1943 $var = self::encode('UTF-8', $var); 1944 } 1945 } 1946 1947 \assert(\is_string($var)); 1948 if ( 1949 $var[0] >= "\x80" 1950 && 1951 isset($n[0], $leading_combining[0]) 1952 && 1953 \preg_match('/^\\p{Mn}/u', $var) 1954 ) { 1955 // Prevent leading combining chars 1956 // for NFC-safe concatenations. 1957 $var = $leading_combining . $var; 1958 } 1959 } 1960 1961 break; 1962 default: 1963 // nothing 1964 } 1965 1966 /** @noinspection PhpSillyAssignmentInspection */ 1967 /** @phpstan-var TFilter $var */ 1968 $var = $var; 1969 1970 return $var; 1971 } 1972 1973 /** 1974 * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. 1975 * 1976 * Gets a specific external variable by name and optionally filters it. 1977 * 1978 * EXAMPLE: <code> 1979 * // _GET['foo'] = 'bar'; 1980 * UTF8::filter_input(INPUT_GET, 'foo', FILTER_SANITIZE_STRING)); // 'bar' 1981 * </code> 1982 * 1983 * @see http://php.net/manual/en/function.filter-input.php 1984 * 1985 * @param int $type <p> 1986 * One of <b>INPUT_GET</b>, <b>INPUT_POST</b>, 1987 * <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or 1988 * <b>INPUT_ENV</b>. 1989 * </p> 1990 * @param string $variable_name <p> 1991 * Name of a variable to get. 1992 * </p> 1993 * @param int $filter [optional] <p> 1994 * The ID of the filter to apply. The 1995 * manual page lists the available filters. 1996 * </p> 1997 * @param int|int[]|null $options [optional] <p> 1998 * Associative array of options or bitwise disjunction of flags. If filter 1999 * accepts options, flags can be provided in "flags" field of array. 2000 * </p> 2001 * 2002 * @psalm-pure 2003 * 2004 * @return mixed 2005 * <p> 2006 * Value of the requested variable on success, <b>FALSE</b> if the filter fails, or <b>NULL</b> if the 2007 * <i>variable_name</i> variable is not set. If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it 2008 * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails. 2009 * </p> 2010 */ 2011 public static function filter_input( 2012 int $type, 2013 string $variable_name, 2014 int $filter = \FILTER_DEFAULT, 2015 $options = null 2016 ) { 2017 /** 2018 * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here 2019 */ 2020 if ($options === null || \func_num_args() < 4) { 2021 $var = \filter_input($type, $variable_name, $filter); 2022 } else { 2023 $var = \filter_input($type, $variable_name, $filter, $options); 2024 } 2025 2026 return self::filter($var); 2027 } 2028 2029 /** 2030 * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. 2031 * 2032 * Gets external variables and optionally filters them. 2033 * 2034 * EXAMPLE: <code> 2035 * // _GET['foo'] = 'bar'; 2036 * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_SANITIZE_STRING')); // array('bar') 2037 * </code> 2038 * 2039 * @see http://php.net/manual/en/function.filter-input-array.php 2040 * 2041 * @param int $type <p> 2042 * One of <b>INPUT_GET</b>, <b>INPUT_POST</b>, 2043 * <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or 2044 * <b>INPUT_ENV</b>. 2045 * </p> 2046 * @param array|null $definition [optional] <p> 2047 * An array defining the arguments. A valid key is a string 2048 * containing a variable name and a valid value is either a filter type, or an array 2049 * optionally specifying the filter, flags and options. If the value is an 2050 * array, valid keys are filter which specifies the 2051 * filter type, 2052 * flags which specifies any flags that apply to the 2053 * filter, and options which specifies any options that 2054 * apply to the filter. See the example below for a better understanding. 2055 * </p> 2056 * <p> 2057 * This parameter can be also an integer holding a filter constant. Then all values in the 2058 * input array are filtered by this filter. 2059 * </p> 2060 * @param bool $add_empty [optional] <p> 2061 * Add missing keys as <b>NULL</b> to the return value. 2062 * </p> 2063 * 2064 * @psalm-pure 2065 * 2066 * @return mixed 2067 * <p> 2068 * An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. 2069 * An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not 2070 * set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it returns <b>FALSE</b> if the variable 2071 * is not set and <b>NULL</b> if the filter fails. 2072 * </p> 2073 */ 2074 public static function filter_input_array( 2075 int $type, 2076 $definition = null, 2077 bool $add_empty = true 2078 ) { 2079 /** 2080 * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here 2081 */ 2082 if ($definition === null || \func_num_args() < 2) { 2083 $a = \filter_input_array($type); 2084 } else { 2085 $a = \filter_input_array($type, $definition, $add_empty); 2086 } 2087 2088 return self::filter($a); 2089 } 2090 2091 /** 2092 * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. 2093 * 2094 * Filters a variable with a specified filter. 2095 * 2096 * EXAMPLE: <code>UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false</code> 2097 * 2098 * @see http://php.net/manual/en/function.filter-var.php 2099 * 2100 * @param float|int|string|null $variable <p> 2101 * Value to filter. 2102 * </p> 2103 * @param int $filter [optional] <p> 2104 * The ID of the filter to apply. The 2105 * manual page lists the available filters. 2106 * </p> 2107 * @param int|int[]|null $options [optional] <p> 2108 * Associative array of options or bitwise disjunction of flags. If filter 2109 * accepts options, flags can be provided in "flags" field of array. For 2110 * the "callback" filter, callable type should be passed. The 2111 * callback must accept one argument, the value to be filtered, and return 2112 * the value after filtering/sanitizing it. 2113 * </p> 2114 * <p> 2115 * <code> 2116 * // for filters that accept options, use this format 2117 * $options = array( 2118 * 'options' => array( 2119 * 'default' => 3, // value to return if the filter fails 2120 * // other options here 2121 * 'min_range' => 0 2122 * ), 2123 * 'flags' => FILTER_FLAG_ALLOW_OCTAL, 2124 * ); 2125 * $var = filter_var('0755', FILTER_VALIDATE_INT, $options); 2126 * // for filter that only accept flags, you can pass them directly 2127 * $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE); 2128 * // for filter that only accept flags, you can also pass as an array 2129 * $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, 2130 * array('flags' => FILTER_NULL_ON_FAILURE)); 2131 * // callback validate filter 2132 * function foo($value) 2133 * { 2134 * // Expected format: Surname, GivenNames 2135 * if (strpos($value, ", ") === false) return false; 2136 * list($surname, $givennames) = explode(", ", $value, 2); 2137 * $empty = (empty($surname) || empty($givennames)); 2138 * $notstrings = (!is_string($surname) || !is_string($givennames)); 2139 * if ($empty || $notstrings) { 2140 * return false; 2141 * } else { 2142 * return $value; 2143 * } 2144 * } 2145 * $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo')); 2146 * </code> 2147 * </p> 2148 * 2149 * @psalm-pure 2150 * 2151 * @return mixed 2152 * <p>The filtered data, or <b>FALSE</b> if the filter fails.</p> 2153 */ 2154 public static function filter_var( 2155 $variable, 2156 int $filter = \FILTER_DEFAULT, 2157 $options = null 2158 ) { 2159 /** 2160 * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here 2161 */ 2162 if (\func_num_args() < 3) { 2163 $variable = \filter_var($variable, $filter); 2164 } else { 2165 $variable = \filter_var($variable, $filter, $options); 2166 } 2167 2168 return self::filter($variable); 2169 } 2170 2171 /** 2172 * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. 2173 * 2174 * Gets multiple variables and optionally filters them. 2175 * 2176 * EXAMPLE: <code> 2177 * $filters = [ 2178 * 'name' => ['filter' => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']], 2179 * 'age' => ['filter' => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]], 2180 * 'email' => FILTER_VALIDATE_EMAIL, 2181 * ]; 2182 * 2183 * $data = [ 2184 * 'name' => 'κόσμε', 2185 * 'age' => '18', 2186 * 'email' => '[email protected]' 2187 * ]; 2188 * 2189 * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => '[email protected]'] 2190 * </code> 2191 * 2192 * @see http://php.net/manual/en/function.filter-var-array.php 2193 * 2194 * @param array<mixed> $data <p> 2195 * An array with string keys containing the data to filter. 2196 * </p> 2197 * @param array|int|null $definition [optional] <p> 2198 * An array defining the arguments. A valid key is a string 2199 * containing a variable name and a valid value is either a 2200 * filter type, or an 2201 * array optionally specifying the filter, flags and options. 2202 * If the value is an array, valid keys are filter 2203 * which specifies the filter type, 2204 * flags which specifies any flags that apply to the 2205 * filter, and options which specifies any options that 2206 * apply to the filter. See the example below for a better understanding. 2207 * </p> 2208 * <p> 2209 * This parameter can be also an integer holding a filter constant. Then all values 2210 * in the input array are filtered by this filter. 2211 * </p> 2212 * @param bool $add_empty [optional] <p> 2213 * Add missing keys as <b>NULL</b> to the return value. 2214 * </p> 2215 * 2216 * @psalm-pure 2217 * 2218 * @return mixed 2219 * <p> 2220 * An array containing the values of the requested variables on success, or <b>FALSE</b> on failure. 2221 * An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if the variable is not 2222 * set. 2223 * </p> 2224 */ 2225 public static function filter_var_array( 2226 array $data, 2227 $definition = null, 2228 bool $add_empty = true 2229 ) { 2230 /** 2231 * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here 2232 */ 2233 if (\func_num_args() < 2) { 2234 $a = \filter_var_array($data); 2235 } else { 2236 $a = \filter_var_array($data, $definition, $add_empty); 2237 } 2238 2239 return self::filter($a); 2240 } 2241 2242 /** 2243 * Checks whether finfo is available on the server. 2244 * 2245 * @psalm-pure 2246 * 2247 * @return bool 2248 * <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> 2249 * 2250 * @internal <p>Please do not use it anymore, we will make is private in next major version.</p> 2251 */ 2252 public static function finfo_loaded(): bool 2253 { 2254 return \class_exists('finfo'); 2255 } 2256 2257 /** 2258 * Returns the first $n characters of the string. 2259 * 2260 * @param string $str <p>The input string.</p> 2261 * @param int $n <p>Number of characters to retrieve from the start.</p> 2262 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 2263 * 2264 * @psalm-pure 2265 * 2266 * @return string 2267 */ 2268 public static function first_char( 2269 string $str, 2270 int $n = 1, 2271 string $encoding = 'UTF-8' 2272 ): string { 2273 if ($str === '' || $n <= 0) { 2274 return ''; 2275 } 2276 2277 if ($encoding === 'UTF-8') { 2278 return (string) \mb_substr($str, 0, $n); 2279 } 2280 2281 return (string) self::substr($str, 0, $n, $encoding); 2282 } 2283 2284 /** 2285 * Check if the number of Unicode characters isn't greater than the specified integer. 2286 * 2287 * EXAMPLE: <code>UTF8::fits_inside('κόσμε', 6); // false</code> 2288 * 2289 * @param string $str the original string to be checked 2290 * @param int $box_size the size in number of chars to be checked against string 2291 * 2292 * @psalm-pure 2293 * 2294 * @return bool 2295 * <p><strong>TRUE</strong> if string is less than or equal to $box_size, <strong>FALSE</strong> otherwise.</p> 2296 */ 2297 public static function fits_inside(string $str, int $box_size): bool 2298 { 2299 return (int) self::strlen($str) <= $box_size; 2300 } 2301 2302 /** 2303 * Try to fix simple broken UTF-8 strings. 2304 * 2305 * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings. 2306 * 2307 * EXAMPLE: <code>UTF8::fix_simple_utf8('Düsseldorf'); // 'Düsseldorf'</code> 2308 * 2309 * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1 2310 * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it. 2311 * See: http://en.wikipedia.org/wiki/Windows-1252 2312 * 2313 * @param string $str <p>The input string</p> 2314 * 2315 * @psalm-pure 2316 * 2317 * @return string 2318 */ 2319 public static function fix_simple_utf8(string $str): string 2320 { 2321 if ($str === '') { 2322 return ''; 2323 } 2324 2325 /** 2326 * @psalm-suppress ImpureStaticVariable 2327 * 2328 * @var array<mixed>|null 2329 */ 2330 static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; 2331 2332 /** 2333 * @psalm-suppress ImpureStaticVariable 2334 * 2335 * @var array<mixed>|null 2336 */ 2337 static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; 2338 2339 if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { 2340 if (self::$BROKEN_UTF8_FIX === null) { 2341 self::$BROKEN_UTF8_FIX = self::getData('utf8_fix'); 2342 } 2343 2344 $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX); 2345 $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX; 2346 } 2347 2348 \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE)); 2349 2350 return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); 2351 } 2352 2353 /** 2354 * Fix a double (or multiple) encoded UTF8 string. 2355 * 2356 * EXAMPLE: <code>UTF8::fix_utf8('Fédération'); // 'Fédération'</code> 2357 * 2358 * @param string|string[] $str you can use a string or an array of strings 2359 * 2360 * @psalm-pure 2361 * 2362 * @return string|string[] 2363 * Will return the fixed input-"array" or 2364 * the fixed input-"string" 2365 * 2366 * @psalm-suppress InvalidReturnType 2367 */ 2368 public static function fix_utf8($str) 2369 { 2370 if (\is_array($str)) { 2371 foreach ($str as $k => &$v) { 2372 $v = self::fix_utf8($v); 2373 } 2374 unset($v); 2375 2376 /** 2377 * @psalm-suppress InvalidReturnStatement 2378 */ 2379 return $str; 2380 } 2381 2382 $str = (string) $str; 2383 $last = ''; 2384 while ($last !== $str) { 2385 $last = $str; 2386 /** 2387 * @psalm-suppress PossiblyInvalidArgument 2388 */ 2389 $str = self::to_utf8( 2390 self::utf8_decode($str, true) 2391 ); 2392 } 2393 2394 /** 2395 * @psalm-suppress InvalidReturnStatement 2396 */ 2397 return $str; 2398 } 2399 2400 /** 2401 * Get character of a specific character. 2402 * 2403 * EXAMPLE: <code>UTF8::getCharDirection('ا'); // 'RTL'</code> 2404 * 2405 * @param string $char 2406 * 2407 * @psalm-pure 2408 * 2409 * @return string 2410 * <p>'RTL' or 'LTR'.</p> 2411 */ 2412 public static function getCharDirection(string $char): string 2413 { 2414 if (self::$SUPPORT['intlChar'] === true) { 2415 /** @noinspection PhpComposerExtensionStubsInspection */ 2416 $tmp_return = \IntlChar::charDirection($char); 2417 2418 // from "IntlChar"-Class 2419 $char_direction = [ 2420 'RTL' => [1, 13, 14, 15, 21], 2421 'LTR' => [0, 11, 12, 20], 2422 ]; 2423 2424 if (\in_array($tmp_return, $char_direction['LTR'], true)) { 2425 return 'LTR'; 2426 } 2427 2428 if (\in_array($tmp_return, $char_direction['RTL'], true)) { 2429 return 'RTL'; 2430 } 2431 } 2432 2433 $c = static::chr_to_decimal($char); 2434 2435 if (!($c >= 0x5be && $c <= 0x10b7f)) { 2436 return 'LTR'; 2437 } 2438 2439 if ($c <= 0x85e) { 2440 if ($c === 0x5be || 2441 $c === 0x5c0 || 2442 $c === 0x5c3 || 2443 $c === 0x5c6 || 2444 ($c >= 0x5d0 && $c <= 0x5ea) || 2445 ($c >= 0x5f0 && $c <= 0x5f4) || 2446 $c === 0x608 || 2447 $c === 0x60b || 2448 $c === 0x60d || 2449 $c === 0x61b || 2450 ($c >= 0x61e && $c <= 0x64a) || 2451 ($c >= 0x66d && $c <= 0x66f) || 2452 ($c >= 0x671 && $c <= 0x6d5) || 2453 ($c >= 0x6e5 && $c <= 0x6e6) || 2454 ($c >= 0x6ee && $c <= 0x6ef) || 2455 ($c >= 0x6fa && $c <= 0x70d) || 2456 $c === 0x710 || 2457 ($c >= 0x712 && $c <= 0x72f) || 2458 ($c >= 0x74d && $c <= 0x7a5) || 2459 $c === 0x7b1 || 2460 ($c >= 0x7c0 && $c <= 0x7ea) || 2461 ($c >= 0x7f4 && $c <= 0x7f5) || 2462 $c === 0x7fa || 2463 ($c >= 0x800 && $c <= 0x815) || 2464 $c === 0x81a || 2465 $c === 0x824 || 2466 $c === 0x828 || 2467 ($c >= 0x830 && $c <= 0x83e) || 2468 ($c >= 0x840 && $c <= 0x858) || 2469 $c === 0x85e 2470 ) { 2471 return 'RTL'; 2472 } 2473 } elseif ($c === 0x200f) { 2474 return 'RTL'; 2475 } elseif ($c >= 0xfb1d) { 2476 if ($c === 0xfb1d || 2477 ($c >= 0xfb1f && $c <= 0xfb28) || 2478 ($c >= 0xfb2a && $c <= 0xfb36) || 2479 ($c >= 0xfb38 && $c <= 0xfb3c) || 2480 $c === 0xfb3e || 2481 ($c >= 0xfb40 && $c <= 0xfb41) || 2482 ($c >= 0xfb43 && $c <= 0xfb44) || 2483 ($c >= 0xfb46 && $c <= 0xfbc1) || 2484 ($c >= 0xfbd3 && $c <= 0xfd3d) || 2485 ($c >= 0xfd50 && $c <= 0xfd8f) || 2486 ($c >= 0xfd92 && $c <= 0xfdc7) || 2487 ($c >= 0xfdf0 && $c <= 0xfdfc) || 2488 ($c >= 0xfe70 && $c <= 0xfe74) || 2489 ($c >= 0xfe76 && $c <= 0xfefc) || 2490 ($c >= 0x10800 && $c <= 0x10805) || 2491 $c === 0x10808 || 2492 ($c >= 0x1080a && $c <= 0x10835) || 2493 ($c >= 0x10837 && $c <= 0x10838) || 2494 $c === 0x1083c || 2495 ($c >= 0x1083f && $c <= 0x10855) || 2496 ($c >= 0x10857 && $c <= 0x1085f) || 2497 ($c >= 0x10900 && $c <= 0x1091b) || 2498 ($c >= 0x10920 && $c <= 0x10939) || 2499 $c === 0x1093f || 2500 $c === 0x10a00 || 2501 ($c >= 0x10a10 && $c <= 0x10a13) || 2502 ($c >= 0x10a15 && $c <= 0x10a17) || 2503 ($c >= 0x10a19 && $c <= 0x10a33) || 2504 ($c >= 0x10a40 && $c <= 0x10a47) || 2505 ($c >= 0x10a50 && $c <= 0x10a58) || 2506 ($c >= 0x10a60 && $c <= 0x10a7f) || 2507 ($c >= 0x10b00 && $c <= 0x10b35) || 2508 ($c >= 0x10b40 && $c <= 0x10b55) || 2509 ($c >= 0x10b58 && $c <= 0x10b72) || 2510 ($c >= 0x10b78 && $c <= 0x10b7f) 2511 ) { 2512 return 'RTL'; 2513 } 2514 } 2515 2516 return 'LTR'; 2517 } 2518 2519 /** 2520 * Check for php-support. 2521 * 2522 * @param string|null $key 2523 * 2524 * @psalm-pure 2525 * 2526 * @return mixed 2527 * Return the full support-"array", if $key === null<br> 2528 * return bool-value, if $key is used and available<br> 2529 * otherwise return <strong>null</strong> 2530 */ 2531 public static function getSupportInfo(string $key = null) 2532 { 2533 if ($key === null) { 2534 return self::$SUPPORT; 2535 } 2536 2537 if (self::$INTL_TRANSLITERATOR_LIST === null) { 2538 self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list'); 2539 } 2540 // compatibility fix for old versions 2541 self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST; 2542 2543 return self::$SUPPORT[$key] ?? null; 2544 } 2545 2546 /** 2547 * Warning: this method only works for some file-types (png, jpg) 2548 * if you need more supported types, please use e.g. "finfo" 2549 * 2550 * @param string $str 2551 * @param array $fallback <p>with this keys: 'ext', 'mime', 'type' 2552 * 2553 * @psalm-pure 2554 * 2555 * @return null[]|string[] 2556 * <p>with this keys: 'ext', 'mime', 'type'</p> 2557 * 2558 * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback 2559 */ 2560 public static function get_file_type( 2561 string $str, 2562 array $fallback = [ 2563 'ext' => null, 2564 'mime' => 'application/octet-stream', 2565 'type' => null, 2566 ] 2567 ): array { 2568 if ($str === '') { 2569 return $fallback; 2570 } 2571 2572 /** @var false|string $str_info - needed for PhpStan (stubs error) */ 2573 $str_info = \substr($str, 0, 2); 2574 if ($str_info === false || \strlen($str_info) !== 2) { 2575 return $fallback; 2576 } 2577 2578 // DEBUG 2579 //var_dump($str_info); 2580 2581 $str_info = \unpack('C2chars', $str_info); 2582 2583 /** @noinspection PhpSillyAssignmentInspection */ 2584 /** @var array|false $str_info - needed for PhpStan (stubs error) */ 2585 $str_info = $str_info; 2586 2587 if ($str_info === false) { 2588 return $fallback; 2589 } 2590 /** @noinspection OffsetOperationsInspection */ 2591 $type_code = (int) ($str_info['chars1'] . $str_info['chars2']); 2592 2593 // DEBUG 2594 //var_dump($type_code); 2595 2596 // 2597 // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator 2598 // 2599 switch ($type_code) { 2600 // WARNING: do not add too simple comparisons, because of false-positive results: 2601 // 2602 // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip', 2603 // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ... 2604 // 2605 case 255216: 2606 $ext = 'jpg'; 2607 $mime = 'image/jpeg'; 2608 $type = 'binary'; 2609 2610 break; 2611 case 13780: 2612 $ext = 'png'; 2613 $mime = 'image/png'; 2614 $type = 'binary'; 2615 2616 break; 2617 default: 2618 return $fallback; 2619 } 2620 2621 return [ 2622 'ext' => $ext, 2623 'mime' => $mime, 2624 'type' => $type, 2625 ]; 2626 } 2627 2628 /** 2629 * @param int $length <p>Length of the random string.</p> 2630 * @param string $possible_chars [optional] <p>Characters string for the random selection.</p> 2631 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 2632 * 2633 * @return string 2634 */ 2635 public static function get_random_string( 2636 int $length, 2637 string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', 2638 string $encoding = 'UTF-8' 2639 ): string { 2640 // init 2641 $i = 0; 2642 $str = ''; 2643 2644 // 2645 // add random chars 2646 // 2647 2648 if ($encoding === 'UTF-8') { 2649 $max_length = (int) \mb_strlen($possible_chars); 2650 if ($max_length === 0) { 2651 return ''; 2652 } 2653 2654 while ($i < $length) { 2655 try { 2656 $rand_int = \random_int(0, $max_length - 1); 2657 } catch (\Exception $e) { 2658 /** @noinspection RandomApiMigrationInspection */ 2659 $rand_int = \mt_rand(0, $max_length - 1); 2660 } 2661 $char = \mb_substr($possible_chars, $rand_int, 1); 2662 if ($char !== false) { 2663 $str .= $char; 2664 ++$i; 2665 } 2666 } 2667 } else { 2668 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 2669 2670 $max_length = (int) self::strlen($possible_chars, $encoding); 2671 if ($max_length === 0) { 2672 return ''; 2673 } 2674 2675 while ($i < $length) { 2676 try { 2677 $rand_int = \random_int(0, $max_length - 1); 2678 } catch (\Exception $e) { 2679 /** @noinspection RandomApiMigrationInspection */ 2680 $rand_int = \mt_rand(0, $max_length - 1); 2681 } 2682 $char = self::substr($possible_chars, $rand_int, 1, $encoding); 2683 if ($char !== false) { 2684 $str .= $char; 2685 ++$i; 2686 } 2687 } 2688 } 2689 2690 return $str; 2691 } 2692 2693 /** 2694 * @param int|string $extra_entropy [optional] <p>Extra entropy via a string or int value.</p> 2695 * @param bool $use_md5 [optional] <p>Return the unique identifier as md5-hash? Default: true</p> 2696 * 2697 * @return string 2698 */ 2699 public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string 2700 { 2701 try { 2702 $rand_int = \random_int(0, \mt_getrandmax()); 2703 } catch (\Exception $e) { 2704 /** @noinspection RandomApiMigrationInspection */ 2705 $rand_int = \mt_rand(0, \mt_getrandmax()); 2706 } 2707 2708 $unique_helper = $rand_int . 2709 \session_id() . 2710 ($_SERVER['REMOTE_ADDR'] ?? '') . 2711 ($_SERVER['SERVER_ADDR'] ?? '') . 2712 $extra_entropy; 2713 2714 $unique_string = \uniqid($unique_helper, true); 2715 2716 if ($use_md5) { 2717 $unique_string = \md5($unique_string . $unique_helper); 2718 } 2719 2720 return $unique_string; 2721 } 2722 2723 /** 2724 * alias for "UTF8::string_has_bom()" 2725 * 2726 * @param string $str 2727 * 2728 * @psalm-pure 2729 * 2730 * @return bool 2731 * 2732 * @see UTF8::string_has_bom() 2733 * @deprecated <p>please use "UTF8::string_has_bom()"</p> 2734 */ 2735 public static function hasBom(string $str): bool 2736 { 2737 return self::string_has_bom($str); 2738 } 2739 2740 /** 2741 * Returns true if the string contains a lower case char, false otherwise. 2742 * 2743 * @param string $str <p>The input string.</p> 2744 * 2745 * @psalm-pure 2746 * 2747 * @return bool 2748 * <p>Whether or not the string contains a lower case character.</p> 2749 */ 2750 public static function has_lowercase(string $str): bool 2751 { 2752 if (self::$SUPPORT['mbstring'] === true) { 2753 /** @noinspection PhpComposerExtensionStubsInspection */ 2754 return \mb_ereg_match('.*[[:lower:]]', $str); 2755 } 2756 2757 return self::str_matches_pattern($str, '.*[[:lower:]]'); 2758 } 2759 2760 /** 2761 * Returns true if the string contains whitespace, false otherwise. 2762 * 2763 * @param string $str <p>The input string.</p> 2764 * 2765 * @psalm-pure 2766 * 2767 * @return bool 2768 * <p>Whether or not the string contains whitespace.</p> 2769 */ 2770 public static function has_whitespace(string $str): bool 2771 { 2772 if (self::$SUPPORT['mbstring'] === true) { 2773 /** @noinspection PhpComposerExtensionStubsInspection */ 2774 return \mb_ereg_match('.*[[:space:]]', $str); 2775 } 2776 2777 return self::str_matches_pattern($str, '.*[[:space:]]'); 2778 } 2779 2780 /** 2781 * Returns true if the string contains an upper case char, false otherwise. 2782 * 2783 * @param string $str <p>The input string.</p> 2784 * 2785 * @psalm-pure 2786 * 2787 * @return bool 2788 * <p>Whether or not the string contains an upper case character.</p> 2789 */ 2790 public static function has_uppercase(string $str): bool 2791 { 2792 if (self::$SUPPORT['mbstring'] === true) { 2793 /** @noinspection PhpComposerExtensionStubsInspection */ 2794 return \mb_ereg_match('.*[[:upper:]]', $str); 2795 } 2796 2797 return self::str_matches_pattern($str, '.*[[:upper:]]'); 2798 } 2799 2800 /** 2801 * Converts a hexadecimal value into a UTF-8 character. 2802 * 2803 * INFO: opposite to UTF8::chr_to_hex() 2804 * 2805 * EXAMPLE: <code>UTF8::hex_to_chr('U+00a7'); // '§'</code> 2806 * 2807 * @param string $hexdec <p>The hexadecimal value.</p> 2808 * 2809 * @psalm-pure 2810 * 2811 * @return false|string one single UTF-8 character 2812 */ 2813 public static function hex_to_chr(string $hexdec) 2814 { 2815 /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */ 2816 return self::decimal_to_chr((int) @\hexdec($hexdec)); 2817 } 2818 2819 /** 2820 * Converts hexadecimal U+xxxx code point representation to integer. 2821 * 2822 * INFO: opposite to UTF8::int_to_hex() 2823 * 2824 * EXAMPLE: <code>UTF8::hex_to_int('U+00f1'); // 241</code> 2825 * 2826 * @param string $hexdec <p>The hexadecimal code point representation.</p> 2827 * 2828 * @psalm-pure 2829 * 2830 * @return false|int 2831 * <p>The code point, or false on failure.</p> 2832 */ 2833 public static function hex_to_int($hexdec) 2834 { 2835 // init 2836 $hexdec = (string) $hexdec; 2837 2838 if ($hexdec === '') { 2839 return false; 2840 } 2841 2842 if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) { 2843 return \intval($match[1], 16); 2844 } 2845 2846 return false; 2847 } 2848 2849 /** 2850 * alias for "UTF8::html_entity_decode()" 2851 * 2852 * @param string $str 2853 * @param int|null $flags 2854 * @param string $encoding 2855 * 2856 * @psalm-pure 2857 * 2858 * @return string 2859 * 2860 * @see UTF8::html_entity_decode() 2861 * @deprecated <p>please use "UTF8::html_entity_decode()"</p> 2862 */ 2863 public static function html_decode( 2864 string $str, 2865 int $flags = null, 2866 string $encoding = 'UTF-8' 2867 ): string { 2868 return self::html_entity_decode($str, $flags, $encoding); 2869 } 2870 2871 /** 2872 * Converts a UTF-8 string to a series of HTML numbered entities. 2873 * 2874 * INFO: opposite to UTF8::html_decode() 2875 * 2876 * EXAMPLE: <code>UTF8::html_encode('中文空白'); // '中文空白'</code> 2877 * 2878 * @param string $str <p>The Unicode string to be encoded as numbered entities.</p> 2879 * @param bool $keep_ascii_chars [optional] <p>Keep ASCII chars.</p> 2880 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 2881 * 2882 * @psalm-pure 2883 * 2884 * @return string HTML numbered entities 2885 */ 2886 public static function html_encode( 2887 string $str, 2888 bool $keep_ascii_chars = false, 2889 string $encoding = 'UTF-8' 2890 ): string { 2891 if ($str === '') { 2892 return ''; 2893 } 2894 2895 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 2896 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 2897 } 2898 2899 // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity 2900 if (self::$SUPPORT['mbstring'] === true) { 2901 if ($keep_ascii_chars) { 2902 $start_code = 0x80; 2903 } else { 2904 $start_code = 0x00; 2905 } 2906 2907 if ($encoding === 'UTF-8') { 2908 /** @var false|string|null $return - needed for PhpStan (stubs error) */ 2909 $return = \mb_encode_numericentity( 2910 $str, 2911 [$start_code, 0xfffff, 0, 0xfffff] 2912 ); 2913 if ($return !== null && $return !== false) { 2914 return $return; 2915 } 2916 } 2917 2918 /** @var false|string|null $return - needed for PhpStan (stubs error) */ 2919 $return = \mb_encode_numericentity( 2920 $str, 2921 [$start_code, 0xfffff, 0, 0xfffff], 2922 $encoding 2923 ); 2924 if ($return !== null && $return !== false) { 2925 return $return; 2926 } 2927 } 2928 2929 // 2930 // fallback via vanilla php 2931 // 2932 2933 return \implode( 2934 '', 2935 \array_map( 2936 static function (string $chr) use ($keep_ascii_chars, $encoding): string { 2937 return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding); 2938 }, 2939 self::str_split($str) 2940 ) 2941 ); 2942 } 2943 2944 /** 2945 * UTF-8 version of html_entity_decode() 2946 * 2947 * The reason we are not using html_entity_decode() by itself is because 2948 * while it is not technically correct to leave out the semicolon 2949 * at the end of an entity most browsers will still interpret the entity 2950 * correctly. html_entity_decode() does not convert entities without 2951 * semicolons, so we are left with our own little solution here. Bummer. 2952 * 2953 * Convert all HTML entities to their applicable characters. 2954 * 2955 * INFO: opposite to UTF8::html_encode() 2956 * 2957 * EXAMPLE: <code>UTF8::html_entity_decode('中文空白'); // '中文空白'</code> 2958 * 2959 * @see http://php.net/manual/en/function.html-entity-decode.php 2960 * 2961 * @param string $str <p> 2962 * The input string. 2963 * </p> 2964 * @param int|null $flags [optional] <p> 2965 * A bitmask of one or more of the following flags, which specify how to handle quotes 2966 * and which document type to use. The default is ENT_COMPAT | ENT_HTML401. 2967 * <table> 2968 * Available <i>flags</i> constants 2969 * <tr valign="top"> 2970 * <td>Constant Name</td> 2971 * <td>Description</td> 2972 * </tr> 2973 * <tr valign="top"> 2974 * <td><b>ENT_COMPAT</b></td> 2975 * <td>Will convert double-quotes and leave single-quotes alone.</td> 2976 * </tr> 2977 * <tr valign="top"> 2978 * <td><b>ENT_QUOTES</b></td> 2979 * <td>Will convert both double and single quotes.</td> 2980 * </tr> 2981 * <tr valign="top"> 2982 * <td><b>ENT_NOQUOTES</b></td> 2983 * <td>Will leave both double and single quotes unconverted.</td> 2984 * </tr> 2985 * <tr valign="top"> 2986 * <td><b>ENT_HTML401</b></td> 2987 * <td> 2988 * Handle code as HTML 4.01. 2989 * </td> 2990 * </tr> 2991 * <tr valign="top"> 2992 * <td><b>ENT_XML1</b></td> 2993 * <td> 2994 * Handle code as XML 1. 2995 * </td> 2996 * </tr> 2997 * <tr valign="top"> 2998 * <td><b>ENT_XHTML</b></td> 2999 * <td> 3000 * Handle code as XHTML. 3001 * </td> 3002 * </tr> 3003 * <tr valign="top"> 3004 * <td><b>ENT_HTML5</b></td> 3005 * <td> 3006 * Handle code as HTML 5. 3007 * </td> 3008 * </tr> 3009 * </table> 3010 * </p> 3011 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 3012 * 3013 * @psalm-pure 3014 * 3015 * @return string the decoded string 3016 */ 3017 public static function html_entity_decode( 3018 string $str, 3019 int $flags = null, 3020 string $encoding = 'UTF-8' 3021 ): string { 3022 if ( 3023 !isset($str[3]) // examples: &; || &x; 3024 || 3025 \strpos($str, '&') === false // no "&" 3026 ) { 3027 return $str; 3028 } 3029 3030 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 3031 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 3032 } 3033 3034 if ($flags === null) { 3035 $flags = \ENT_QUOTES | \ENT_HTML5; 3036 } 3037 3038 /** @noinspection InArrayCanBeUsedInspection */ 3039 if ( 3040 $encoding !== 'UTF-8' 3041 && 3042 $encoding !== 'ISO-8859-1' 3043 && 3044 $encoding !== 'WINDOWS-1252' 3045 && 3046 self::$SUPPORT['mbstring'] === false 3047 ) { 3048 /** 3049 * @psalm-suppress ImpureFunctionCall - is is only a warning 3050 */ 3051 \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); 3052 } 3053 3054 do { 3055 $str_compare = $str; 3056 3057 if (\strpos($str, '&') !== false) { 3058 if (\strpos($str, '&#') !== false) { 3059 // decode also numeric & UTF16 two byte entities 3060 $str = (string) \preg_replace( 3061 '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S', 3062 '$1;', 3063 $str 3064 ); 3065 } 3066 3067 $str = \html_entity_decode( 3068 $str, 3069 $flags, 3070 $encoding 3071 ); 3072 } 3073 } while ($str_compare !== $str); 3074 3075 return $str; 3076 } 3077 3078 /** 3079 * Create a escape html version of the string via "UTF8::htmlspecialchars()". 3080 * 3081 * @param string $str 3082 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 3083 * 3084 * @psalm-pure 3085 * 3086 * @return string 3087 */ 3088 public static function html_escape(string $str, string $encoding = 'UTF-8'): string 3089 { 3090 return self::htmlspecialchars( 3091 $str, 3092 \ENT_QUOTES | \ENT_SUBSTITUTE, 3093 $encoding 3094 ); 3095 } 3096 3097 /** 3098 * Remove empty html-tag. 3099 * 3100 * e.g.: <pre><tag></tag></pre> 3101 * 3102 * @param string $str 3103 * 3104 * @psalm-pure 3105 * 3106 * @return string 3107 */ 3108 public static function html_stripe_empty_tags(string $str): string 3109 { 3110 return (string) \preg_replace( 3111 '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u', 3112 '', 3113 $str 3114 ); 3115 } 3116 3117 /** 3118 * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities(). 3119 * 3120 * EXAMPLE: <code>UTF8::htmlentities('<白-öäü>'); // '<白-öäü>'</code> 3121 * 3122 * @see http://php.net/manual/en/function.htmlentities.php 3123 * 3124 * @param string $str <p> 3125 * The input string. 3126 * </p> 3127 * @param int $flags [optional] <p> 3128 * A bitmask of one or more of the following flags, which specify how to handle 3129 * quotes, invalid code unit sequences and the used document type. The default is 3130 * ENT_COMPAT | ENT_HTML401. 3131 * <table> 3132 * Available <i>flags</i> constants 3133 * <tr valign="top"> 3134 * <td>Constant Name</td> 3135 * <td>Description</td> 3136 * </tr> 3137 * <tr valign="top"> 3138 * <td><b>ENT_COMPAT</b></td> 3139 * <td>Will convert double-quotes and leave single-quotes alone.</td> 3140 * </tr> 3141 * <tr valign="top"> 3142 * <td><b>ENT_QUOTES</b></td> 3143 * <td>Will convert both double and single quotes.</td> 3144 * </tr> 3145 * <tr valign="top"> 3146 * <td><b>ENT_NOQUOTES</b></td> 3147 * <td>Will leave both double and single quotes unconverted.</td> 3148 * </tr> 3149 * <tr valign="top"> 3150 * <td><b>ENT_IGNORE</b></td> 3151 * <td> 3152 * Silently discard invalid code unit sequences instead of returning 3153 * an empty string. Using this flag is discouraged as it 3154 * may have security implications. 3155 * </td> 3156 * </tr> 3157 * <tr valign="top"> 3158 * <td><b>ENT_SUBSTITUTE</b></td> 3159 * <td> 3160 * Replace invalid code unit sequences with a Unicode Replacement Character 3161 * U+FFFD (UTF-8) or &#38;#FFFD; (otherwise) instead of returning an empty 3162 * string. 3163 * </td> 3164 * </tr> 3165 * <tr valign="top"> 3166 * <td><b>ENT_DISALLOWED</b></td> 3167 * <td> 3168 * Replace invalid code points for the given document type with a 3169 * Unicode Replacement Character U+FFFD (UTF-8) or &#38;#FFFD; 3170 * (otherwise) instead of leaving them as is. This may be useful, for 3171 * instance, to ensure the well-formedness of XML documents with 3172 * embedded external content. 3173 * </td> 3174 * </tr> 3175 * <tr valign="top"> 3176 * <td><b>ENT_HTML401</b></td> 3177 * <td> 3178 * Handle code as HTML 4.01. 3179 * </td> 3180 * </tr> 3181 * <tr valign="top"> 3182 * <td><b>ENT_XML1</b></td> 3183 * <td> 3184 * Handle code as XML 1. 3185 * </td> 3186 * </tr> 3187 * <tr valign="top"> 3188 * <td><b>ENT_XHTML</b></td> 3189 * <td> 3190 * Handle code as XHTML. 3191 * </td> 3192 * </tr> 3193 * <tr valign="top"> 3194 * <td><b>ENT_HTML5</b></td> 3195 * <td> 3196 * Handle code as HTML 5. 3197 * </td> 3198 * </tr> 3199 * </table> 3200 * </p> 3201 * @param string $encoding [optional] <p> 3202 * Like <b>htmlspecialchars</b>, 3203 * <b>htmlentities</b> takes an optional third argument 3204 * <i>encoding</i> which defines encoding used in 3205 * conversion. 3206 * Although this argument is technically optional, you are highly 3207 * encouraged to specify the correct value for your code. 3208 * </p> 3209 * @param bool $double_encode [optional] <p> 3210 * When <i>double_encode</i> is turned off PHP will not 3211 * encode existing html entities. The default is to convert everything. 3212 * </p> 3213 * 3214 * @psalm-pure 3215 * 3216 * @return string 3217 * <p> 3218 * The encoded string. 3219 * <br><br> 3220 * If the input <i>string</i> contains an invalid code unit 3221 * sequence within the given <i>encoding</i> an empty string 3222 * will be returned, unless either the <b>ENT_IGNORE</b> or 3223 * <b>ENT_SUBSTITUTE</b> flags are set. 3224 * </p> 3225 */ 3226 public static function htmlentities( 3227 string $str, 3228 int $flags = \ENT_COMPAT, 3229 string $encoding = 'UTF-8', 3230 bool $double_encode = true 3231 ): string { 3232 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 3233 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 3234 } 3235 3236 $str = \htmlentities( 3237 $str, 3238 $flags, 3239 $encoding, 3240 $double_encode 3241 ); 3242 3243 /** 3244 * PHP doesn't replace a backslash to its html entity since this is something 3245 * that's mostly used to escape characters when inserting in a database. Since 3246 * we're using a decent database layer, we don't need this shit and we're replacing 3247 * the double backslashes by its' html entity equivalent. 3248 * 3249 * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303 3250 */ 3251 $str = \str_replace('\\', '\', $str); 3252 3253 return self::html_encode($str, true, $encoding); 3254 } 3255 3256 /** 3257 * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars() 3258 * 3259 * INFO: Take a look at "UTF8::htmlentities()" 3260 * 3261 * EXAMPLE: <code>UTF8::htmlspecialchars('<白-öäü>'); // '<白-öäü>'</code> 3262 * 3263 * @see http://php.net/manual/en/function.htmlspecialchars.php 3264 * 3265 * @param string $str <p> 3266 * The string being converted. 3267 * </p> 3268 * @param int $flags [optional] <p> 3269 * A bitmask of one or more of the following flags, which specify how to handle 3270 * quotes, invalid code unit sequences and the used document type. The default is 3271 * ENT_COMPAT | ENT_HTML401. 3272 * <table> 3273 * Available <i>flags</i> constants 3274 * <tr valign="top"> 3275 * <td>Constant Name</td> 3276 * <td>Description</td> 3277 * </tr> 3278 * <tr valign="top"> 3279 * <td><b>ENT_COMPAT</b></td> 3280 * <td>Will convert double-quotes and leave single-quotes alone.</td> 3281 * </tr> 3282 * <tr valign="top"> 3283 * <td><b>ENT_QUOTES</b></td> 3284 * <td>Will convert both double and single quotes.</td> 3285 * </tr> 3286 * <tr valign="top"> 3287 * <td><b>ENT_NOQUOTES</b></td> 3288 * <td>Will leave both double and single quotes unconverted.</td> 3289 * </tr> 3290 * <tr valign="top"> 3291 * <td><b>ENT_IGNORE</b></td> 3292 * <td> 3293 * Silently discard invalid code unit sequences instead of returning 3294 * an empty string. Using this flag is discouraged as it 3295 * may have security implications. 3296 * </td> 3297 * </tr> 3298 * <tr valign="top"> 3299 * <td><b>ENT_SUBSTITUTE</b></td> 3300 * <td> 3301 * Replace invalid code unit sequences with a Unicode Replacement Character 3302 * U+FFFD (UTF-8) or &#38;#FFFD; (otherwise) instead of returning an empty 3303 * string. 3304 * </td> 3305 * </tr> 3306 * <tr valign="top"> 3307 * <td><b>ENT_DISALLOWED</b></td> 3308 * <td> 3309 * Replace invalid code points for the given document type with a 3310 * Unicode Replacement Character U+FFFD (UTF-8) or &#38;#FFFD; 3311 * (otherwise) instead of leaving them as is. This may be useful, for 3312 * instance, to ensure the well-formedness of XML documents with 3313 * embedded external content. 3314 * </td> 3315 * </tr> 3316 * <tr valign="top"> 3317 * <td><b>ENT_HTML401</b></td> 3318 * <td> 3319 * Handle code as HTML 4.01. 3320 * </td> 3321 * </tr> 3322 * <tr valign="top"> 3323 * <td><b>ENT_XML1</b></td> 3324 * <td> 3325 * Handle code as XML 1. 3326 * </td> 3327 * </tr> 3328 * <tr valign="top"> 3329 * <td><b>ENT_XHTML</b></td> 3330 * <td> 3331 * Handle code as XHTML. 3332 * </td> 3333 * </tr> 3334 * <tr valign="top"> 3335 * <td><b>ENT_HTML5</b></td> 3336 * <td> 3337 * Handle code as HTML 5. 3338 * </td> 3339 * </tr> 3340 * </table> 3341 * </p> 3342 * @param string $encoding [optional] <p> 3343 * Defines encoding used in conversion. 3344 * </p> 3345 * <p> 3346 * For the purposes of this function, the encodings 3347 * ISO-8859-1, ISO-8859-15, 3348 * UTF-8, cp866, 3349 * cp1251, cp1252, and 3350 * KOI8-R are effectively equivalent, provided the 3351 * <i>string</i> itself is valid for the encoding, as 3352 * the characters affected by <b>htmlspecialchars</b> occupy 3353 * the same positions in all of these encodings. 3354 * </p> 3355 * @param bool $double_encode [optional] <p> 3356 * When <i>double_encode</i> is turned off PHP will not 3357 * encode existing html entities, the default is to convert everything. 3358 * </p> 3359 * 3360 * @psalm-pure 3361 * 3362 * @return string the converted string. 3363 * </p> 3364 * <p> 3365 * If the input <i>string</i> contains an invalid code unit 3366 * sequence within the given <i>encoding</i> an empty string 3367 * will be returned, unless either the <b>ENT_IGNORE</b> or 3368 * <b>ENT_SUBSTITUTE</b> flags are set 3369 */ 3370 public static function htmlspecialchars( 3371 string $str, 3372 int $flags = \ENT_COMPAT, 3373 string $encoding = 'UTF-8', 3374 bool $double_encode = true 3375 ): string { 3376 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 3377 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 3378 } 3379 3380 return \htmlspecialchars( 3381 $str, 3382 $flags, 3383 $encoding, 3384 $double_encode 3385 ); 3386 } 3387 3388 /** 3389 * Checks whether iconv is available on the server. 3390 * 3391 * @psalm-pure 3392 * 3393 * @return bool 3394 * <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> 3395 * 3396 * @internal <p>Please do not use it anymore, we will make is private in next major version.</p> 3397 */ 3398 public static function iconv_loaded(): bool 3399 { 3400 return \extension_loaded('iconv'); 3401 } 3402 3403 /** 3404 * alias for "UTF8::decimal_to_chr()" 3405 * 3406 * @param int|string $int 3407 * 3408 * @phpstan-param int|numeric-string $int 3409 * 3410 * @psalm-pure 3411 * 3412 * @return string 3413 * 3414 * @see UTF8::decimal_to_chr() 3415 * @deprecated <p>please use "UTF8::decimal_to_chr()"</p> 3416 */ 3417 public static function int_to_chr($int): string 3418 { 3419 return self::decimal_to_chr($int); 3420 } 3421 3422 /** 3423 * Converts Integer to hexadecimal U+xxxx code point representation. 3424 * 3425 * INFO: opposite to UTF8::hex_to_int() 3426 * 3427 * EXAMPLE: <code>UTF8::int_to_hex(241); // 'U+00f1'</code> 3428 * 3429 * @param int $int <p>The integer to be converted to hexadecimal code point.</p> 3430 * @param string $prefix [optional] 3431 * 3432 * @psalm-pure 3433 * 3434 * @return string the code point, or empty string on failure 3435 */ 3436 public static function int_to_hex(int $int, string $prefix = 'U+'): string 3437 { 3438 $hex = \dechex($int); 3439 3440 $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex); 3441 3442 return $prefix . $hex . ''; 3443 } 3444 3445 /** 3446 * Checks whether intl-char is available on the server. 3447 * 3448 * @psalm-pure 3449 * 3450 * @return bool 3451 * <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> 3452 * 3453 * @internal <p>Please do not use it anymore, we will make is private in next major version.</p> 3454 */ 3455 public static function intlChar_loaded(): bool 3456 { 3457 return \class_exists('IntlChar'); 3458 } 3459 3460 /** 3461 * Checks whether intl is available on the server. 3462 * 3463 * @psalm-pure 3464 * 3465 * @return bool 3466 * <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> 3467 * 3468 * @internal <p>Please do not use it anymore, we will make is private in next major version.</p> 3469 */ 3470 public static function intl_loaded(): bool 3471 { 3472 return \extension_loaded('intl'); 3473 } 3474 3475 /** 3476 * alias for "UTF8::is_ascii()" 3477 * 3478 * @param string $str 3479 * 3480 * @psalm-pure 3481 * 3482 * @return bool 3483 * 3484 * @see UTF8::is_ascii() 3485 * @deprecated <p>please use "UTF8::is_ascii()"</p> 3486 */ 3487 public static function isAscii(string $str): bool 3488 { 3489 return ASCII::is_ascii($str); 3490 } 3491 3492 /** 3493 * alias for "UTF8::is_base64()" 3494 * 3495 * @param string $str 3496 * 3497 * @psalm-pure 3498 * 3499 * @return bool 3500 * 3501 * @see UTF8::is_base64() 3502 * @deprecated <p>please use "UTF8::is_base64()"</p> 3503 */ 3504 public static function isBase64($str): bool 3505 { 3506 return self::is_base64($str); 3507 } 3508 3509 /** 3510 * alias for "UTF8::is_binary()" 3511 * 3512 * @param int|string $str 3513 * @param bool $strict 3514 * 3515 * @psalm-pure 3516 * 3517 * @return bool 3518 * 3519 * @see UTF8::is_binary() 3520 * @deprecated <p>please use "UTF8::is_binary()"</p> 3521 */ 3522 public static function isBinary($str, bool $strict = false): bool 3523 { 3524 return self::is_binary($str, $strict); 3525 } 3526 3527 /** 3528 * alias for "UTF8::is_bom()" 3529 * 3530 * @param string $utf8_chr 3531 * 3532 * @psalm-pure 3533 * 3534 * @return bool 3535 * 3536 * @see UTF8::is_bom() 3537 * @deprecated <p>please use "UTF8::is_bom()"</p> 3538 */ 3539 public static function isBom(string $utf8_chr): bool 3540 { 3541 return self::is_bom($utf8_chr); 3542 } 3543 3544 /** 3545 * alias for "UTF8::is_html()" 3546 * 3547 * @param string $str 3548 * 3549 * @psalm-pure 3550 * 3551 * @return bool 3552 * 3553 * @see UTF8::is_html() 3554 * @deprecated <p>please use "UTF8::is_html()"</p> 3555 */ 3556 public static function isHtml(string $str): bool 3557 { 3558 return self::is_html($str); 3559 } 3560 3561 /** 3562 * alias for "UTF8::is_json()" 3563 * 3564 * @param string $str 3565 * 3566 * @return bool 3567 * 3568 * @see UTF8::is_json() 3569 * @deprecated <p>please use "UTF8::is_json()"</p> 3570 */ 3571 public static function isJson(string $str): bool 3572 { 3573 return self::is_json($str); 3574 } 3575 3576 /** 3577 * alias for "UTF8::is_utf16()" 3578 * 3579 * @param string $str 3580 * 3581 * @psalm-pure 3582 * 3583 * @return false|int 3584 * <strong>false</strong> if is't not UTF16,<br> 3585 * <strong>1</strong> for UTF-16LE,<br> 3586 * <strong>2</strong> for UTF-16BE 3587 * 3588 * @see UTF8::is_utf16() 3589 * @deprecated <p>please use "UTF8::is_utf16()"</p> 3590 */ 3591 public static function isUtf16($str) 3592 { 3593 return self::is_utf16($str); 3594 } 3595 3596 /** 3597 * alias for "UTF8::is_utf32()" 3598 * 3599 * @param string $str 3600 * 3601 * @psalm-pure 3602 * 3603 * @return false|int 3604 * <strong>false</strong> if is't not UTF16, 3605 * <strong>1</strong> for UTF-32LE, 3606 * <strong>2</strong> for UTF-32BE 3607 * 3608 * @see UTF8::is_utf32() 3609 * @deprecated <p>please use "UTF8::is_utf32()"</p> 3610 */ 3611 public static function isUtf32($str) 3612 { 3613 return self::is_utf32($str); 3614 } 3615 3616 /** 3617 * alias for "UTF8::is_utf8()" 3618 * 3619 * @param string $str 3620 * @param bool $strict 3621 * 3622 * @psalm-pure 3623 * 3624 * @return bool 3625 * 3626 * @see UTF8::is_utf8() 3627 * @deprecated <p>please use "UTF8::is_utf8()"</p> 3628 */ 3629 public static function isUtf8($str, bool $strict = false): bool 3630 { 3631 return self::is_utf8($str, $strict); 3632 } 3633 3634 /** 3635 * Returns true if the string contains only alphabetic chars, false otherwise. 3636 * 3637 * @param string $str <p>The input string.</p> 3638 * 3639 * @psalm-pure 3640 * 3641 * @return bool 3642 * <p>Whether or not $str contains only alphabetic chars.</p> 3643 */ 3644 public static function is_alpha(string $str): bool 3645 { 3646 if (self::$SUPPORT['mbstring'] === true) { 3647 /** @noinspection PhpComposerExtensionStubsInspection */ 3648 return \mb_ereg_match('^[[:alpha:]]*$', $str); 3649 } 3650 3651 return self::str_matches_pattern($str, '^[[:alpha:]]*$'); 3652 } 3653 3654 /** 3655 * Returns true if the string contains only alphabetic and numeric chars, false otherwise. 3656 * 3657 * @param string $str <p>The input string.</p> 3658 * 3659 * @psalm-pure 3660 * 3661 * @return bool 3662 * <p>Whether or not $str contains only alphanumeric chars.</p> 3663 */ 3664 public static function is_alphanumeric(string $str): bool 3665 { 3666 if (self::$SUPPORT['mbstring'] === true) { 3667 /** @noinspection PhpComposerExtensionStubsInspection */ 3668 return \mb_ereg_match('^[[:alnum:]]*$', $str); 3669 } 3670 3671 return self::str_matches_pattern($str, '^[[:alnum:]]*$'); 3672 } 3673 3674 /** 3675 * Returns true if the string contains only punctuation chars, false otherwise. 3676 * 3677 * @param string $str <p>The input string.</p> 3678 * 3679 * @psalm-pure 3680 * 3681 * @return bool 3682 * <p>Whether or not $str contains only punctuation chars.</p> 3683 */ 3684 public static function is_punctuation(string $str): bool 3685 { 3686 return self::str_matches_pattern($str, '^[[:punct:]]*$'); 3687 } 3688 3689 /** 3690 * Returns true if the string contains only printable (non-invisible) chars, false otherwise. 3691 * 3692 * @param string $str <p>The input string.</p> 3693 * @param bool $ignore_control_characters [optional] <p>Ignore control characters like [LRM] or [LSEP].</p> 3694 * 3695 * @psalm-pure 3696 * 3697 * @return bool 3698 * <p>Whether or not $str contains only printable (non-invisible) chars.</p> 3699 */ 3700 public static function is_printable(string $str, bool $ignore_control_characters = false): bool 3701 { 3702 return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str; 3703 } 3704 3705 /** 3706 * Checks if a string is 7 bit ASCII. 3707 * 3708 * EXAMPLE: <code>UTF8::is_ascii('白'); // false</code> 3709 * 3710 * @param string $str <p>The string to check.</p> 3711 * 3712 * @psalm-pure 3713 * 3714 * @return bool 3715 * <p> 3716 * <strong>true</strong> if it is ASCII<br> 3717 * <strong>false</strong> otherwise 3718 * </p> 3719 */ 3720 public static function is_ascii(string $str): bool 3721 { 3722 return ASCII::is_ascii($str); 3723 } 3724 3725 /** 3726 * Returns true if the string is base64 encoded, false otherwise. 3727 * 3728 * EXAMPLE: <code>UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true</code> 3729 * 3730 * @param string|null $str <p>The input string.</p> 3731 * @param bool $empty_string_is_valid [optional] <p>Is an empty string valid base64 or not?</p> 3732 * 3733 * @psalm-pure 3734 * 3735 * @return bool 3736 * <p>Whether or not $str is base64 encoded.</p> 3737 */ 3738 public static function is_base64($str, bool $empty_string_is_valid = false): bool 3739 { 3740 if ( 3741 !$empty_string_is_valid 3742 && 3743 $str === '' 3744 ) { 3745 return false; 3746 } 3747 3748 if (!\is_string($str)) { 3749 return false; 3750 } 3751 3752 $base64String = \base64_decode($str, true); 3753 3754 return $base64String !== false && \base64_encode($base64String) === $str; 3755 } 3756 3757 /** 3758 * Check if the input is binary... (is look like a hack). 3759 * 3760 * EXAMPLE: <code>UTF8::is_binary(01); // true</code> 3761 * 3762 * @param int|string $input 3763 * @param bool $strict 3764 * 3765 * @psalm-pure 3766 * 3767 * @return bool 3768 */ 3769 public static function is_binary($input, bool $strict = false): bool 3770 { 3771 $input = (string) $input; 3772 if ($input === '') { 3773 return false; 3774 } 3775 3776 if (\preg_match('~^[01]+$~', $input)) { 3777 return true; 3778 } 3779 3780 $ext = self::get_file_type($input); 3781 if ($ext['type'] === 'binary') { 3782 return true; 3783 } 3784 3785 $test_length = \strlen($input); 3786 $test_null_counting = \substr_count($input, "\x0", 0, $test_length); 3787 if (($test_null_counting / $test_length) > 0.25) { 3788 return true; 3789 } 3790 3791 if ($strict) { 3792 if (self::$SUPPORT['finfo'] === false) { 3793 throw new \RuntimeException('ext-fileinfo: is not installed'); 3794 } 3795 3796 /** 3797 * @noinspection PhpComposerExtensionStubsInspection 3798 * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ... 3799 */ 3800 $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input); 3801 if ($finfo_encoding && $finfo_encoding === 'binary') { 3802 return true; 3803 } 3804 } 3805 3806 return false; 3807 } 3808 3809 /** 3810 * Check if the file is binary. 3811 * 3812 * EXAMPLE: <code>UTF8::is_binary('./utf32.txt'); // true</code> 3813 * 3814 * @param string $file 3815 * 3816 * @return bool 3817 */ 3818 public static function is_binary_file($file): bool 3819 { 3820 // init 3821 $block = ''; 3822 3823 $fp = \fopen($file, 'rb'); 3824 if (\is_resource($fp)) { 3825 $block = \fread($fp, 512); 3826 \fclose($fp); 3827 } 3828 3829 if ($block === '' || $block === false) { 3830 return false; 3831 } 3832 3833 return self::is_binary($block, true); 3834 } 3835 3836 /** 3837 * Returns true if the string contains only whitespace chars, false otherwise. 3838 * 3839 * @param string $str <p>The input string.</p> 3840 * 3841 * @psalm-pure 3842 * 3843 * @return bool 3844 * <p>Whether or not $str contains only whitespace characters.</p> 3845 */ 3846 public static function is_blank(string $str): bool 3847 { 3848 if (self::$SUPPORT['mbstring'] === true) { 3849 /** @noinspection PhpComposerExtensionStubsInspection */ 3850 return \mb_ereg_match('^[[:space:]]*$', $str); 3851 } 3852 3853 return self::str_matches_pattern($str, '^[[:space:]]*$'); 3854 } 3855 3856 /** 3857 * Checks if the given string is equal to any "Byte Order Mark". 3858 * 3859 * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string. 3860 * 3861 * EXAMPLE: <code>UTF8::is_bom("\xef\xbb\xbf"); // true</code> 3862 * 3863 * @param string $str <p>The input string.</p> 3864 * 3865 * @psalm-pure 3866 * 3867 * @return bool 3868 * <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p> 3869 */ 3870 public static function is_bom($str): bool 3871 { 3872 /** @noinspection PhpUnusedLocalVariableInspection */ 3873 foreach (self::$BOM as $bom_string => &$bom_byte_length) { 3874 if ($str === $bom_string) { 3875 return true; 3876 } 3877 } 3878 3879 return false; 3880 } 3881 3882 /** 3883 * Determine whether the string is considered to be empty. 3884 * 3885 * A variable is considered empty if it does not exist or if its value equals FALSE. 3886 * empty() does not generate a warning if the variable does not exist. 3887 * 3888 * @param array|float|int|string $str 3889 * 3890 * @psalm-pure 3891 * 3892 * @return bool 3893 * <p>Whether or not $str is empty().</p> 3894 */ 3895 public static function is_empty($str): bool 3896 { 3897 return empty($str); 3898 } 3899 3900 /** 3901 * Returns true if the string contains only hexadecimal chars, false otherwise. 3902 * 3903 * @param string $str <p>The input string.</p> 3904 * 3905 * @psalm-pure 3906 * 3907 * @return bool 3908 * <p>Whether or not $str contains only hexadecimal chars.</p> 3909 */ 3910 public static function is_hexadecimal(string $str): bool 3911 { 3912 if (self::$SUPPORT['mbstring'] === true) { 3913 /** @noinspection PhpComposerExtensionStubsInspection */ 3914 return \mb_ereg_match('^[[:xdigit:]]*$', $str); 3915 } 3916 3917 return self::str_matches_pattern($str, '^[[:xdigit:]]*$'); 3918 } 3919 3920 /** 3921 * Check if the string contains any HTML tags. 3922 * 3923 * EXAMPLE: <code>UTF8::is_html('<b>lall</b>'); // true</code> 3924 * 3925 * @param string $str <p>The input string.</p> 3926 * 3927 * @psalm-pure 3928 * 3929 * @return bool 3930 * <p>Whether or not $str contains html elements.</p> 3931 */ 3932 public static function is_html(string $str): bool 3933 { 3934 if ($str === '') { 3935 return false; 3936 } 3937 3938 // init 3939 $matches = []; 3940 3941 $str = self::emoji_encode($str); // hack for emoji support :/ 3942 3943 \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches); 3944 3945 return $matches !== []; 3946 } 3947 3948 /** 3949 * Check if $url is an correct url. 3950 * 3951 * @param string $url 3952 * @param bool $disallow_localhost 3953 * 3954 * @psalm-pure 3955 * 3956 * @return bool 3957 */ 3958 public static function is_url(string $url, bool $disallow_localhost = false): bool 3959 { 3960 if ($url === '') { 3961 return false; 3962 } 3963 3964 // WARNING: keep this as hack protection 3965 if (!self::str_istarts_with_any($url, ['http://', 'https://'])) { 3966 return false; 3967 } 3968 3969 // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/... 3970 if ($disallow_localhost) { 3971 if (self::str_istarts_with_any( 3972 $url, 3973 [ 3974 'http://localhost', 3975 'https://localhost', 3976 'http://127.0.0.1', 3977 'https://127.0.0.1', 3978 'http://::1', 3979 'https://::1', 3980 ] 3981 )) { 3982 return false; 3983 } 3984 3985 $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu'; 3986 /** @noinspection BypassedUrlValidationInspection */ 3987 if (\preg_match($regex, $url)) { 3988 return false; 3989 } 3990 } 3991 3992 // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters 3993 /** @noinspection SuspiciousAssignmentsInspection - false-positive - https://github.com/kalessil/phpinspectionsea/issues/1500 */ 3994 $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu'; 3995 /** @noinspection BypassedUrlValidationInspection */ 3996 if (\preg_match($regex, $url)) { 3997 return true; 3998 } 3999 4000 /** @noinspection BypassedUrlValidationInspection */ 4001 return \filter_var($url, \FILTER_VALIDATE_URL) !== false; 4002 } 4003 4004 /** 4005 * Try to check if "$str" is a JSON-string. 4006 * 4007 * EXAMPLE: <code>UTF8::is_json('{"array":[1,"¥","ä"]}'); // true</code> 4008 * 4009 * @param string $str <p>The input string.</p> 4010 * @param bool $only_array_or_object_results_are_valid [optional] <p>Only array and objects are valid json 4011 * results.</p> 4012 * 4013 * @return bool 4014 * <p>Whether or not the $str is in JSON format.</p> 4015 */ 4016 public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool 4017 { 4018 if ($str === '') { 4019 return false; 4020 } 4021 4022 if (self::$SUPPORT['json'] === false) { 4023 throw new \RuntimeException('ext-json: is not installed'); 4024 } 4025 4026 $jsonOrNull = self::json_decode($str); 4027 if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') { 4028 return false; 4029 } 4030 4031 if ( 4032 $only_array_or_object_results_are_valid 4033 && 4034 !\is_object($jsonOrNull) 4035 && 4036 !\is_array($jsonOrNull) 4037 ) { 4038 return false; 4039 } 4040 4041 /** @noinspection PhpComposerExtensionStubsInspection */ 4042 return \json_last_error() === \JSON_ERROR_NONE; 4043 } 4044 4045 /** 4046 * @param string $str <p>The input string.</p> 4047 * 4048 * @psalm-pure 4049 * 4050 * @return bool 4051 * <p>Whether or not $str contains only lowercase chars.</p> 4052 */ 4053 public static function is_lowercase(string $str): bool 4054 { 4055 if (self::$SUPPORT['mbstring'] === true) { 4056 /** @noinspection PhpComposerExtensionStubsInspection */ 4057 return \mb_ereg_match('^[[:lower:]]*$', $str); 4058 } 4059 4060 return self::str_matches_pattern($str, '^[[:lower:]]*$'); 4061 } 4062 4063 /** 4064 * Returns true if the string is serialized, false otherwise. 4065 * 4066 * @param string $str <p>The input string.</p> 4067 * 4068 * @psalm-pure 4069 * 4070 * @return bool 4071 * <p>Whether or not $str is serialized.</p> 4072 */ 4073 public static function is_serialized(string $str): bool 4074 { 4075 if ($str === '') { 4076 return false; 4077 } 4078 4079 /** @noinspection PhpUsageOfSilenceOperatorInspection */ 4080 /** @noinspection UnserializeExploitsInspection */ 4081 return $str === 'b:0;' 4082 || 4083 @\unserialize($str) !== false; 4084 } 4085 4086 /** 4087 * Returns true if the string contains only lower case chars, false 4088 * otherwise. 4089 * 4090 * @param string $str <p>The input string.</p> 4091 * 4092 * @psalm-pure 4093 * 4094 * @return bool 4095 * <p>Whether or not $str contains only lower case characters.</p> 4096 */ 4097 public static function is_uppercase(string $str): bool 4098 { 4099 if (self::$SUPPORT['mbstring'] === true) { 4100 /** @noinspection PhpComposerExtensionStubsInspection */ 4101 return \mb_ereg_match('^[[:upper:]]*$', $str); 4102 } 4103 4104 return self::str_matches_pattern($str, '^[[:upper:]]*$'); 4105 } 4106 4107 /** 4108 * Check if the string is UTF-16. 4109 * 4110 * EXAMPLE: <code> 4111 * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1 4112 * // 4113 * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2 4114 * // 4115 * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false 4116 * </code> 4117 * 4118 * @param string $str <p>The input string.</p> 4119 * @param bool $check_if_string_is_binary 4120 * 4121 * @psalm-pure 4122 * 4123 * @return false|int 4124 * <strong>false</strong> if is't not UTF-16,<br> 4125 * <strong>1</strong> for UTF-16LE,<br> 4126 * <strong>2</strong> for UTF-16BE 4127 */ 4128 public static function is_utf16($str, bool $check_if_string_is_binary = true) 4129 { 4130 // init 4131 $str = (string) $str; 4132 $str_chars = []; 4133 4134 if ( 4135 $check_if_string_is_binary 4136 && 4137 !self::is_binary($str, true) 4138 ) { 4139 return false; 4140 } 4141 4142 if (self::$SUPPORT['mbstring'] === false) { 4143 /** 4144 * @psalm-suppress ImpureFunctionCall - is is only a warning 4145 */ 4146 \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING); 4147 } 4148 4149 $str = self::remove_bom($str); 4150 4151 $maybe_utf16le = 0; 4152 $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE'); 4153 if ($test) { 4154 $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8'); 4155 $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE'); 4156 if ($test3 === $test) { 4157 /** 4158 * @psalm-suppress RedundantCondition 4159 */ 4160 if ($str_chars === []) { 4161 $str_chars = self::count_chars($str, true, false); 4162 } 4163 foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { 4164 if (\in_array($test3char, $str_chars, true)) { 4165 ++$maybe_utf16le; 4166 } 4167 } 4168 unset($test3charEmpty); 4169 } 4170 } 4171 4172 $maybe_utf16be = 0; 4173 $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE'); 4174 if ($test) { 4175 $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8'); 4176 $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE'); 4177 if ($test3 === $test) { 4178 if ($str_chars === []) { 4179 $str_chars = self::count_chars($str, true, false); 4180 } 4181 foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { 4182 if (\in_array($test3char, $str_chars, true)) { 4183 ++$maybe_utf16be; 4184 } 4185 } 4186 unset($test3charEmpty); 4187 } 4188 } 4189 4190 if ($maybe_utf16be !== $maybe_utf16le) { 4191 if ($maybe_utf16le > $maybe_utf16be) { 4192 return 1; 4193 } 4194 4195 return 2; 4196 } 4197 4198 return false; 4199 } 4200 4201 /** 4202 * Check if the string is UTF-32. 4203 * 4204 * EXAMPLE: <code> 4205 * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1 4206 * // 4207 * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2 4208 * // 4209 * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false 4210 * </code> 4211 * 4212 * @param string $str <p>The input string.</p> 4213 * @param bool $check_if_string_is_binary 4214 * 4215 * @psalm-pure 4216 * 4217 * @return false|int 4218 * <strong>false</strong> if is't not UTF-32,<br> 4219 * <strong>1</strong> for UTF-32LE,<br> 4220 * <strong>2</strong> for UTF-32BE 4221 */ 4222 public static function is_utf32($str, bool $check_if_string_is_binary = true) 4223 { 4224 // init 4225 $str = (string) $str; 4226 $str_chars = []; 4227 4228 if ( 4229 $check_if_string_is_binary 4230 && 4231 !self::is_binary($str, true) 4232 ) { 4233 return false; 4234 } 4235 4236 if (self::$SUPPORT['mbstring'] === false) { 4237 /** 4238 * @psalm-suppress ImpureFunctionCall - is is only a warning 4239 */ 4240 \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING); 4241 } 4242 4243 $str = self::remove_bom($str); 4244 4245 $maybe_utf32le = 0; 4246 $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE'); 4247 if ($test) { 4248 $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8'); 4249 $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE'); 4250 if ($test3 === $test) { 4251 /** 4252 * @psalm-suppress RedundantCondition 4253 */ 4254 if ($str_chars === []) { 4255 $str_chars = self::count_chars($str, true, false); 4256 } 4257 foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { 4258 if (\in_array($test3char, $str_chars, true)) { 4259 ++$maybe_utf32le; 4260 } 4261 } 4262 unset($test3charEmpty); 4263 } 4264 } 4265 4266 $maybe_utf32be = 0; 4267 $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE'); 4268 if ($test) { 4269 $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8'); 4270 $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE'); 4271 if ($test3 === $test) { 4272 if ($str_chars === []) { 4273 $str_chars = self::count_chars($str, true, false); 4274 } 4275 foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { 4276 if (\in_array($test3char, $str_chars, true)) { 4277 ++$maybe_utf32be; 4278 } 4279 } 4280 unset($test3charEmpty); 4281 } 4282 } 4283 4284 if ($maybe_utf32be !== $maybe_utf32le) { 4285 if ($maybe_utf32le > $maybe_utf32be) { 4286 return 1; 4287 } 4288 4289 return 2; 4290 } 4291 4292 return false; 4293 } 4294 4295 /** 4296 * Checks whether the passed input contains only byte sequences that appear valid UTF-8. 4297 * 4298 * EXAMPLE: <code> 4299 * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true 4300 * // 4301 * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false 4302 * </code> 4303 * 4304 * @param int|string|string[]|null $str <p>The input to be checked.</p> 4305 * @param bool $strict <p>Check also if the string is not UTF-16 or UTF-32.</p> 4306 * 4307 * @psalm-pure 4308 * 4309 * @return bool 4310 */ 4311 public static function is_utf8($str, bool $strict = false): bool 4312 { 4313 if (\is_array($str)) { 4314 foreach ($str as &$v) { 4315 if (!self::is_utf8($v, $strict)) { 4316 return false; 4317 } 4318 } 4319 4320 return true; 4321 } 4322 4323 return self::is_utf8_string((string) $str, $strict); 4324 } 4325 4326 /** 4327 * (PHP 5 >= 5.2.0, PECL json >= 1.2.0)<br/> 4328 * Decodes a JSON string 4329 * 4330 * EXAMPLE: <code>UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä')</code> 4331 * 4332 * @see http://php.net/manual/en/function.json-decode.php 4333 * 4334 * @param string $json <p> 4335 * The <i>json</i> string being decoded. 4336 * </p> 4337 * <p> 4338 * This function only works with UTF-8 encoded strings. 4339 * </p> 4340 * <p>PHP implements a superset of 4341 * JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard 4342 * only supports these values when they are nested inside an array or an object. 4343 * </p> 4344 * @param bool $assoc [optional] <p> 4345 * When <b>TRUE</b>, returned objects will be converted into 4346 * associative arrays. 4347 * </p> 4348 * @param int $depth [optional] <p> 4349 * User specified recursion depth. 4350 * </p> 4351 * @param int $options [optional] <p> 4352 * Bitmask of JSON decode options. Currently only 4353 * <b>JSON_BIGINT_AS_STRING</b> 4354 * is supported (default is to cast large integers as floats) 4355 * </p> 4356 * 4357 * @psalm-pure 4358 * 4359 * @return mixed 4360 * <p>The value encoded in <i>json</i> in appropriate PHP type. Values true, false and 4361 * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b> and <b>NULL</b> respectively. 4362 * <b>NULL</b> is returned if the <i>json</i> cannot be decoded or if the encoded data 4363 * is deeper than the recursion limit.</p> 4364 */ 4365 public static function json_decode( 4366 string $json, 4367 bool $assoc = false, 4368 int $depth = 512, 4369 int $options = 0 4370 ) { 4371 $json = self::filter($json); 4372 4373 if (self::$SUPPORT['json'] === false) { 4374 throw new \RuntimeException('ext-json: is not installed'); 4375 } 4376 4377 /** @noinspection PhpComposerExtensionStubsInspection */ 4378 return \json_decode($json, $assoc, $depth, $options); 4379 } 4380 4381 /** 4382 * (PHP 5 >= 5.2.0, PECL json >= 1.2.0)<br/> 4383 * Returns the JSON representation of a value. 4384 * 4385 * EXAMPLE: <code>UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]'</code> 4386 * 4387 * @see http://php.net/manual/en/function.json-encode.php 4388 * 4389 * @param mixed $value <p> 4390 * The <i>value</i> being encoded. Can be any type except 4391 * a resource. 4392 * </p> 4393 * <p> 4394 * All string data must be UTF-8 encoded. 4395 * </p> 4396 * <p>PHP implements a superset of 4397 * JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard 4398 * only supports these values when they are nested inside an array or an object. 4399 * </p> 4400 * @param int $options [optional] <p> 4401 * Bitmask consisting of <b>JSON_HEX_QUOT</b>, 4402 * <b>JSON_HEX_TAG</b>, 4403 * <b>JSON_HEX_AMP</b>, 4404 * <b>JSON_HEX_APOS</b>, 4405 * <b>JSON_NUMERIC_CHECK</b>, 4406 * <b>JSON_PRETTY_PRINT</b>, 4407 * <b>JSON_UNESCAPED_SLASHES</b>, 4408 * <b>JSON_FORCE_OBJECT</b>, 4409 * <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these 4410 * constants is described on 4411 * the JSON constants page. 4412 * </p> 4413 * @param int $depth [optional] <p> 4414 * Set the maximum depth. Must be greater than zero. 4415 * </p> 4416 * 4417 * @psalm-pure 4418 * 4419 * @return false|string 4420 * A JSON encoded <strong>string</strong> on success or<br> 4421 * <strong>FALSE</strong> on failure 4422 */ 4423 public static function json_encode($value, int $options = 0, int $depth = 512) 4424 { 4425 $value = self::filter($value); 4426 4427 if (self::$SUPPORT['json'] === false) { 4428 throw new \RuntimeException('ext-json: is not installed'); 4429 } 4430 4431 /** @noinspection PhpComposerExtensionStubsInspection */ 4432 return \json_encode($value, $options, $depth); 4433 } 4434 4435 /** 4436 * Checks whether JSON is available on the server. 4437 * 4438 * @psalm-pure 4439 * 4440 * @return bool 4441 * <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> 4442 * 4443 * @internal <p>Please do not use it anymore, we will make is private in next major version.</p> 4444 */ 4445 public static function json_loaded(): bool 4446 { 4447 return \function_exists('json_decode'); 4448 } 4449 4450 /** 4451 * Makes string's first char lowercase. 4452 * 4453 * EXAMPLE: <code>UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN</code> 4454 * 4455 * @param string $str <p>The input string</p> 4456 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 4457 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 4458 * @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, 4459 * tr</p> 4460 * @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ 4461 * -> ß</p> 4462 * 4463 * @psalm-pure 4464 * 4465 * @return string the resulting string 4466 */ 4467 public static function lcfirst( 4468 string $str, 4469 string $encoding = 'UTF-8', 4470 bool $clean_utf8 = false, 4471 string $lang = null, 4472 bool $try_to_keep_the_string_length = false 4473 ): string { 4474 if ($clean_utf8) { 4475 $str = self::clean($str); 4476 } 4477 4478 $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length); 4479 4480 if ($encoding === 'UTF-8') { 4481 $str_part_two = (string) \mb_substr($str, 1); 4482 4483 if ($use_mb_functions) { 4484 $str_part_one = \mb_strtolower( 4485 (string) \mb_substr($str, 0, 1) 4486 ); 4487 } else { 4488 $str_part_one = self::strtolower( 4489 (string) \mb_substr($str, 0, 1), 4490 $encoding, 4491 false, 4492 $lang, 4493 $try_to_keep_the_string_length 4494 ); 4495 } 4496 } else { 4497 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 4498 4499 $str_part_two = (string) self::substr($str, 1, null, $encoding); 4500 4501 $str_part_one = self::strtolower( 4502 (string) self::substr($str, 0, 1, $encoding), 4503 $encoding, 4504 false, 4505 $lang, 4506 $try_to_keep_the_string_length 4507 ); 4508 } 4509 4510 return $str_part_one . $str_part_two; 4511 } 4512 4513 /** 4514 * alias for "UTF8::lcfirst()" 4515 * 4516 * @param string $str 4517 * @param string $encoding 4518 * @param bool $clean_utf8 4519 * @param string|null $lang 4520 * @param bool $try_to_keep_the_string_length 4521 * 4522 * @psalm-pure 4523 * 4524 * @return string 4525 * 4526 * @see UTF8::lcfirst() 4527 * @deprecated <p>please use "UTF8::lcfirst()"</p> 4528 */ 4529 public static function lcword( 4530 string $str, 4531 string $encoding = 'UTF-8', 4532 bool $clean_utf8 = false, 4533 string $lang = null, 4534 bool $try_to_keep_the_string_length = false 4535 ): string { 4536 return self::lcfirst( 4537 $str, 4538 $encoding, 4539 $clean_utf8, 4540 $lang, 4541 $try_to_keep_the_string_length 4542 ); 4543 } 4544 4545 /** 4546 * Lowercase for all words in the string. 4547 * 4548 * @param string $str <p>The input string.</p> 4549 * @param string[] $exceptions [optional] <p>Exclusion for some words.</p> 4550 * @param string $char_list [optional] <p>Additional chars that contains to words and do 4551 * not start a new word.</p> 4552 * @param string $encoding [optional] <p>Set the charset.</p> 4553 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 4554 * @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, 4555 * tr</p> 4556 * @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ 4557 * -> ß</p> 4558 * 4559 * @psalm-pure 4560 * 4561 * @return string 4562 */ 4563 public static function lcwords( 4564 string $str, 4565 array $exceptions = [], 4566 string $char_list = '', 4567 string $encoding = 'UTF-8', 4568 bool $clean_utf8 = false, 4569 string $lang = null, 4570 bool $try_to_keep_the_string_length = false 4571 ): string { 4572 if (!$str) { 4573 return ''; 4574 } 4575 4576 $words = self::str_to_words($str, $char_list); 4577 $use_exceptions = $exceptions !== []; 4578 4579 $words_str = ''; 4580 foreach ($words as &$word) { 4581 if (!$word) { 4582 continue; 4583 } 4584 4585 if ( 4586 !$use_exceptions 4587 || 4588 !\in_array($word, $exceptions, true) 4589 ) { 4590 $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); 4591 } else { 4592 $words_str .= $word; 4593 } 4594 } 4595 4596 return $words_str; 4597 } 4598 4599 /** 4600 * alias for "UTF8::lcfirst()" 4601 * 4602 * @param string $str 4603 * @param string $encoding 4604 * @param bool $clean_utf8 4605 * @param string|null $lang 4606 * @param bool $try_to_keep_the_string_length 4607 * 4608 * @psalm-pure 4609 * 4610 * @return string 4611 * 4612 * @see UTF8::lcfirst() 4613 * @deprecated <p>please use "UTF8::lcfirst()"</p> 4614 */ 4615 public static function lowerCaseFirst( 4616 string $str, 4617 string $encoding = 'UTF-8', 4618 bool $clean_utf8 = false, 4619 string $lang = null, 4620 bool $try_to_keep_the_string_length = false 4621 ): string { 4622 return self::lcfirst( 4623 $str, 4624 $encoding, 4625 $clean_utf8, 4626 $lang, 4627 $try_to_keep_the_string_length 4628 ); 4629 } 4630 4631 /** 4632 * Strip whitespace or other characters from the beginning of a UTF-8 string. 4633 * 4634 * EXAMPLE: <code>UTF8::ltrim(' 中文空白 '); // '中文空白 '</code> 4635 * 4636 * @param string $str <p>The string to be trimmed</p> 4637 * @param string|null $chars <p>Optional characters to be stripped</p> 4638 * 4639 * @psalm-pure 4640 * 4641 * @return string the string with unwanted characters stripped from the left 4642 */ 4643 public static function ltrim(string $str = '', string $chars = null): string 4644 { 4645 if ($str === '') { 4646 return ''; 4647 } 4648 4649 if (self::$SUPPORT['mbstring'] === true) { 4650 if ($chars !== null) { 4651 /** @noinspection PregQuoteUsageInspection */ 4652 $chars = \preg_quote($chars); 4653 $pattern = "^[$chars}]+"; 4654 } else { 4655 $pattern = '^[\\s]+'; 4656 } 4657 4658 /** @noinspection PhpComposerExtensionStubsInspection */ 4659 return (string) \mb_ereg_replace($pattern, '', $str); 4660 } 4661 4662 if ($chars !== null) { 4663 $chars = \preg_quote($chars, '/'); 4664 $pattern = "^[$chars}]+"; 4665 } else { 4666 $pattern = '^[\\s]+'; 4667 } 4668 4669 return self::regex_replace($str, $pattern, ''); 4670 } 4671 4672 /** 4673 * Returns the UTF-8 character with the maximum code point in the given data. 4674 * 4675 * EXAMPLE: <code>UTF8::max('abc-äöü-中文空白'); // 'ø'</code> 4676 * 4677 * @param array<string>|string $arg <p>A UTF-8 encoded string or an array of such strings.</p> 4678 * 4679 * @psalm-pure 4680 * 4681 * @return string|null the character with the highest code point than others, returns null on failure or empty input 4682 */ 4683 public static function max($arg) 4684 { 4685 if (\is_array($arg)) { 4686 $arg = \implode('', $arg); 4687 } 4688 4689 $codepoints = self::codepoints($arg); 4690 if ($codepoints === []) { 4691 return null; 4692 } 4693 4694 $codepoint_max = \max($codepoints); 4695 4696 return self::chr((int) $codepoint_max); 4697 } 4698 4699 /** 4700 * Calculates and returns the maximum number of bytes taken by any 4701 * UTF-8 encoded character in the given string. 4702 * 4703 * EXAMPLE: <code>UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2</code> 4704 * 4705 * @param string $str <p>The original Unicode string.</p> 4706 * 4707 * @psalm-pure 4708 * 4709 * @return int 4710 * <p>Max byte lengths of the given chars.</p> 4711 */ 4712 public static function max_chr_width(string $str): int 4713 { 4714 $bytes = self::chr_size_list($str); 4715 if ($bytes !== []) { 4716 return (int) \max($bytes); 4717 } 4718 4719 return 0; 4720 } 4721 4722 /** 4723 * Checks whether mbstring is available on the server. 4724 * 4725 * @psalm-pure 4726 * 4727 * @return bool 4728 * <p><strong>true</strong> if available, <strong>false</strong> otherwise</p> 4729 * 4730 * @internal <p>Please do not use it anymore, we will make is private in next major version.</p> 4731 */ 4732 public static function mbstring_loaded(): bool 4733 { 4734 return \extension_loaded('mbstring'); 4735 } 4736 4737 /** 4738 * Returns the UTF-8 character with the minimum code point in the given data. 4739 * 4740 * EXAMPLE: <code>UTF8::min('abc-äöü-中文空白'); // '-'</code> 4741 * 4742 * @param string|string[] $arg <strong>A UTF-8 encoded string or an array of such strings.</strong> 4743 * 4744 * @psalm-pure 4745 * 4746 * @return string|null 4747 * <p>The character with the lowest code point than others, returns null on failure or empty input.</p> 4748 */ 4749 public static function min($arg) 4750 { 4751 if (\is_array($arg)) { 4752 $arg = \implode('', $arg); 4753 } 4754 4755 $codepoints = self::codepoints($arg); 4756 if ($codepoints === []) { 4757 return null; 4758 } 4759 4760 $codepoint_min = \min($codepoints); 4761 4762 return self::chr((int) $codepoint_min); 4763 } 4764 4765 /** 4766 * alias for "UTF8::normalize_encoding()" 4767 * 4768 * @param mixed $encoding 4769 * @param mixed $fallback 4770 * 4771 * @psalm-pure 4772 * 4773 * @return mixed 4774 * 4775 * @see UTF8::normalize_encoding() 4776 * @deprecated <p>please use "UTF8::normalize_encoding()"</p> 4777 */ 4778 public static function normalizeEncoding($encoding, $fallback = '') 4779 { 4780 return self::normalize_encoding($encoding, $fallback); 4781 } 4782 4783 /** 4784 * Normalize the encoding-"name" input. 4785 * 4786 * EXAMPLE: <code>UTF8::normalize_encoding('UTF8'); // 'UTF-8'</code> 4787 * 4788 * @param mixed $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p> 4789 * @param mixed $fallback <p>e.g.: UTF-8</p> 4790 * 4791 * @psalm-pure 4792 * 4793 * @return mixed|string 4794 * <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.<br>Will return a empty string as fallback (by default)</p> 4795 * 4796 * @template TNormalizeEncodingFallback 4797 * @phpstan-param string|TNormalizeEncodingFallback $fallback 4798 * @phpstan-return string|TNormalizeEncodingFallback 4799 */ 4800 public static function normalize_encoding($encoding, $fallback = '') 4801 { 4802 /** 4803 * @psalm-suppress ImpureStaticVariable 4804 * 4805 * @var array<string,string> 4806 */ 4807 static $STATIC_NORMALIZE_ENCODING_CACHE = []; 4808 4809 // init 4810 $encoding = (string) $encoding; 4811 4812 if (!$encoding) { 4813 return $fallback; 4814 } 4815 4816 if ( 4817 $encoding === 'UTF-8' 4818 || 4819 $encoding === 'UTF8' 4820 ) { 4821 return 'UTF-8'; 4822 } 4823 4824 if ( 4825 $encoding === '8BIT' 4826 || 4827 $encoding === 'BINARY' 4828 ) { 4829 return 'CP850'; 4830 } 4831 4832 if ( 4833 $encoding === 'HTML' 4834 || 4835 $encoding === 'HTML-ENTITIES' 4836 ) { 4837 return 'HTML-ENTITIES'; 4838 } 4839 4840 if ( 4841 $encoding === 'ISO' 4842 || 4843 $encoding === 'ISO-8859-1' 4844 ) { 4845 return 'ISO-8859-1'; 4846 } 4847 4848 if ( 4849 $encoding === '1' // only a fallback, for non "strict_types" usage ... 4850 || 4851 $encoding === '0' // only a fallback, for non "strict_types" usage ... 4852 ) { 4853 return $fallback; 4854 } 4855 4856 if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) { 4857 return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding]; 4858 } 4859 4860 if (self::$ENCODINGS === null) { 4861 self::$ENCODINGS = self::getData('encodings'); 4862 } 4863 4864 if (\in_array($encoding, self::$ENCODINGS, true)) { 4865 $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding; 4866 4867 return $encoding; 4868 } 4869 4870 $encoding_original = $encoding; 4871 $encoding = \strtoupper($encoding); 4872 $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding); 4873 4874 $equivalences = [ 4875 'ISO8859' => 'ISO-8859-1', 4876 'ISO88591' => 'ISO-8859-1', 4877 'ISO' => 'ISO-8859-1', 4878 'LATIN' => 'ISO-8859-1', 4879 'LATIN1' => 'ISO-8859-1', // Western European 4880 'ISO88592' => 'ISO-8859-2', 4881 'LATIN2' => 'ISO-8859-2', // Central European 4882 'ISO88593' => 'ISO-8859-3', 4883 'LATIN3' => 'ISO-8859-3', // Southern European 4884 'ISO88594' => 'ISO-8859-4', 4885 'LATIN4' => 'ISO-8859-4', // Northern European 4886 'ISO88595' => 'ISO-8859-5', 4887 'ISO88596' => 'ISO-8859-6', // Greek 4888 'ISO88597' => 'ISO-8859-7', 4889 'ISO88598' => 'ISO-8859-8', // Hebrew 4890 'ISO88599' => 'ISO-8859-9', 4891 'LATIN5' => 'ISO-8859-9', // Turkish 4892 'ISO885911' => 'ISO-8859-11', 4893 'TIS620' => 'ISO-8859-11', // Thai 4894 'ISO885910' => 'ISO-8859-10', 4895 'LATIN6' => 'ISO-8859-10', // Nordic 4896 'ISO885913' => 'ISO-8859-13', 4897 'LATIN7' => 'ISO-8859-13', // Baltic 4898 'ISO885914' => 'ISO-8859-14', 4899 'LATIN8' => 'ISO-8859-14', // Celtic 4900 'ISO885915' => 'ISO-8859-15', 4901 'LATIN9' => 'ISO-8859-15', // Western European (with some extra chars e.g. €) 4902 'ISO885916' => 'ISO-8859-16', 4903 'LATIN10' => 'ISO-8859-16', // Southeast European 4904 'CP1250' => 'WINDOWS-1250', 4905 'WIN1250' => 'WINDOWS-1250', 4906 'WINDOWS1250' => 'WINDOWS-1250', 4907 'CP1251' => 'WINDOWS-1251', 4908 'WIN1251' => 'WINDOWS-1251', 4909 'WINDOWS1251' => 'WINDOWS-1251', 4910 'CP1252' => 'WINDOWS-1252', 4911 'WIN1252' => 'WINDOWS-1252', 4912 'WINDOWS1252' => 'WINDOWS-1252', 4913 'CP1253' => 'WINDOWS-1253', 4914 'WIN1253' => 'WINDOWS-1253', 4915 'WINDOWS1253' => 'WINDOWS-1253', 4916 'CP1254' => 'WINDOWS-1254', 4917 'WIN1254' => 'WINDOWS-1254', 4918 'WINDOWS1254' => 'WINDOWS-1254', 4919 'CP1255' => 'WINDOWS-1255', 4920 'WIN1255' => 'WINDOWS-1255', 4921 'WINDOWS1255' => 'WINDOWS-1255', 4922 'CP1256' => 'WINDOWS-1256', 4923 'WIN1256' => 'WINDOWS-1256', 4924 'WINDOWS1256' => 'WINDOWS-1256', 4925 'CP1257' => 'WINDOWS-1257', 4926 'WIN1257' => 'WINDOWS-1257', 4927 'WINDOWS1257' => 'WINDOWS-1257', 4928 'CP1258' => 'WINDOWS-1258', 4929 'WIN1258' => 'WINDOWS-1258', 4930 'WINDOWS1258' => 'WINDOWS-1258', 4931 'UTF16' => 'UTF-16', 4932 'UTF32' => 'UTF-32', 4933 'UTF8' => 'UTF-8', 4934 'UTF' => 'UTF-8', 4935 'UTF7' => 'UTF-7', 4936 '8BIT' => 'CP850', 4937 'BINARY' => 'CP850', 4938 ]; 4939 4940 if (!empty($equivalences[$encoding_upper_helper])) { 4941 $encoding = $equivalences[$encoding_upper_helper]; 4942 } 4943 4944 $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding; 4945 4946 return $encoding; 4947 } 4948 4949 /** 4950 * Standardize line ending to unix-like. 4951 * 4952 * @param string $str <p>The input string.</p> 4953 * @param string|string[] $replacer <p>The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL 4954 * here.</p> 4955 * 4956 * @psalm-pure 4957 * 4958 * @return string 4959 * <p>A string with normalized line ending.</p> 4960 */ 4961 public static function normalize_line_ending(string $str, $replacer = "\n"): string 4962 { 4963 return \str_replace(["\r\n", "\r", "\n"], $replacer, $str); 4964 } 4965 4966 /** 4967 * Normalize some MS Word special characters. 4968 * 4969 * EXAMPLE: <code>UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."'</code> 4970 * 4971 * @param string $str <p>The string to be normalized.</p> 4972 * 4973 * @psalm-pure 4974 * 4975 * @return string 4976 * <p>A string with normalized characters for commonly used chars in Word documents.</p> 4977 */ 4978 public static function normalize_msword(string $str): string 4979 { 4980 return ASCII::normalize_msword($str); 4981 } 4982 4983 /** 4984 * Normalize the whitespace. 4985 * 4986 * EXAMPLE: <code>UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -"</code> 4987 * 4988 * @param string $str <p>The string to be normalized.</p> 4989 * @param bool $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces.</p> 4990 * @param bool $keep_bidi_unicode_controls [optional] <p>Set to true, to keep non-printable (for the web) 4991 * bidirectional text chars.</p> 4992 * @param bool $normalize_control_characters [optional] <p>Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".</p> 4993 * 4994 * @psalm-pure 4995 * 4996 * @return string 4997 * <p>A string with normalized whitespace.</p> 4998 */ 4999 public static function normalize_whitespace( 5000 string $str, 5001 bool $keep_non_breaking_space = false, 5002 bool $keep_bidi_unicode_controls = false, 5003 bool $normalize_control_characters = false 5004 ): string { 5005 return ASCII::normalize_whitespace( 5006 $str, 5007 $keep_non_breaking_space, 5008 $keep_bidi_unicode_controls, 5009 $normalize_control_characters 5010 ); 5011 } 5012 5013 /** 5014 * Calculates Unicode code point of the given UTF-8 encoded character. 5015 * 5016 * INFO: opposite to UTF8::chr() 5017 * 5018 * EXAMPLE: <code>UTF8::ord('☃'); // 0x2603</code> 5019 * 5020 * @param string $chr <p>The character of which to calculate code point.<p/> 5021 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 5022 * 5023 * @psalm-pure 5024 * 5025 * @return int 5026 * <p>Unicode code point of the given character,<br> 5027 * 0 on invalid UTF-8 byte sequence</p> 5028 */ 5029 public static function ord($chr, string $encoding = 'UTF-8'): int 5030 { 5031 /** 5032 * @psalm-suppress ImpureStaticVariable 5033 * 5034 * @var array<string,int> 5035 */ 5036 static $CHAR_CACHE = []; 5037 5038 // init 5039 $chr = (string) $chr; 5040 5041 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 5042 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 5043 } 5044 5045 $cache_key = $chr . '_' . $encoding; 5046 if (isset($CHAR_CACHE[$cache_key])) { 5047 return $CHAR_CACHE[$cache_key]; 5048 } 5049 5050 // check again, if it's still not UTF-8 5051 if ($encoding !== 'UTF-8') { 5052 $chr = self::encode($encoding, $chr); 5053 } 5054 5055 if (self::$ORD === null) { 5056 self::$ORD = self::getData('ord'); 5057 } 5058 5059 if (isset(self::$ORD[$chr])) { 5060 return $CHAR_CACHE[$cache_key] = self::$ORD[$chr]; 5061 } 5062 5063 // 5064 // fallback via "IntlChar" 5065 // 5066 5067 if (self::$SUPPORT['intlChar'] === true) { 5068 /** @noinspection PhpComposerExtensionStubsInspection */ 5069 $code = \IntlChar::ord($chr); 5070 if ($code) { 5071 return $CHAR_CACHE[$cache_key] = $code; 5072 } 5073 } 5074 5075 // 5076 // fallback via vanilla php 5077 // 5078 5079 /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */ 5080 $chr = \unpack('C*', (string) \substr($chr, 0, 4)); 5081 /** @noinspection OffsetOperationsInspection */ 5082 $code = $chr ? $chr[1] : 0; 5083 5084 /** @noinspection OffsetOperationsInspection */ 5085 if ($code >= 0xF0 && isset($chr[4])) { 5086 /** @noinspection UnnecessaryCastingInspection */ 5087 /** @noinspection OffsetOperationsInspection */ 5088 return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80); 5089 } 5090 5091 /** @noinspection OffsetOperationsInspection */ 5092 if ($code >= 0xE0 && isset($chr[3])) { 5093 /** @noinspection UnnecessaryCastingInspection */ 5094 /** @noinspection OffsetOperationsInspection */ 5095 return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80); 5096 } 5097 5098 /** @noinspection OffsetOperationsInspection */ 5099 if ($code >= 0xC0 && isset($chr[2])) { 5100 /** @noinspection UnnecessaryCastingInspection */ 5101 /** @noinspection OffsetOperationsInspection */ 5102 return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80); 5103 } 5104 5105 return $CHAR_CACHE[$cache_key] = $code; 5106 } 5107 5108 /** 5109 * Parses the string into an array (into the the second parameter). 5110 * 5111 * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope, 5112 * if the second parameter is not set! 5113 * 5114 * EXAMPLE: <code> 5115 * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array); 5116 * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試' 5117 * </code> 5118 * 5119 * @see http://php.net/manual/en/function.parse-str.php 5120 * 5121 * @param string $str <p>The input string.</p> 5122 * @param array $result <p>The result will be returned into this reference parameter.</p> 5123 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 5124 * 5125 * @psalm-pure 5126 * 5127 * @return bool 5128 * <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p> 5129 */ 5130 public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool 5131 { 5132 if ($clean_utf8) { 5133 $str = self::clean($str); 5134 } 5135 5136 if (self::$SUPPORT['mbstring'] === true) { 5137 $return = \mb_parse_str($str, $result); 5138 5139 return $return !== false && $result !== []; 5140 } 5141 5142 /** 5143 * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic 5144 */ 5145 \parse_str($str, $result); 5146 5147 return $result !== []; 5148 } 5149 5150 /** 5151 * Checks if \u modifier is available that enables Unicode support in PCRE. 5152 * 5153 * @psalm-pure 5154 * 5155 * @return bool 5156 * <p> 5157 * <strong>true</strong> if support is available,<br> 5158 * <strong>false</strong> otherwise 5159 * </p> 5160 */ 5161 public static function pcre_utf8_support(): bool 5162 { 5163 /** @noinspection PhpUsageOfSilenceOperatorInspection */ 5164 return (bool) @\preg_match('//u', ''); 5165 } 5166 5167 /** 5168 * Create an array containing a range of UTF-8 characters. 5169 * 5170 * EXAMPLE: <code>UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',)</code> 5171 * 5172 * @param int|string $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p> 5173 * @param int|string $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p> 5174 * @param bool $use_ctype <p>use ctype to detect numeric and hexadecimal, otherwise we will use a simple 5175 * "is_numeric"</p> 5176 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 5177 * @param float|int $step [optional] <p> 5178 * If a step value is given, it will be used as the 5179 * increment between elements in the sequence. step 5180 * should be given as a positive number. If not specified, 5181 * step will default to 1. 5182 * </p> 5183 * 5184 * @psalm-pure 5185 * 5186 * @return string[] 5187 */ 5188 public static function range( 5189 $var1, 5190 $var2, 5191 bool $use_ctype = true, 5192 string $encoding = 'UTF-8', 5193 $step = 1 5194 ): array { 5195 if (!$var1 || !$var2) { 5196 return []; 5197 } 5198 5199 if ($step !== 1) { 5200 /** 5201 * @psalm-suppress RedundantConditionGivenDocblockType 5202 * @psalm-suppress DocblockTypeContradiction 5203 */ 5204 if (!\is_numeric($step)) { 5205 throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step)); 5206 } 5207 5208 /** 5209 * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm? 5210 */ 5211 if ($step <= 0) { 5212 throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step); 5213 } 5214 } 5215 5216 if ($use_ctype && self::$SUPPORT['ctype'] === false) { 5217 throw new \RuntimeException('ext-ctype: is not installed'); 5218 } 5219 5220 $is_digit = false; 5221 $is_xdigit = false; 5222 5223 /** @noinspection PhpComposerExtensionStubsInspection */ 5224 if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) { 5225 $is_digit = true; 5226 $start = (int) $var1; 5227 } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) { 5228 $is_xdigit = true; 5229 $start = (int) self::hex_to_int((string) $var1); 5230 } elseif (!$use_ctype && \is_numeric($var1)) { 5231 $start = (int) $var1; 5232 } else { 5233 $start = self::ord((string) $var1); 5234 } 5235 5236 if (!$start) { 5237 return []; 5238 } 5239 5240 if ($is_digit) { 5241 $end = (int) $var2; 5242 } elseif ($is_xdigit) { 5243 $end = (int) self::hex_to_int((string) $var2); 5244 } elseif (!$use_ctype && \is_numeric($var2)) { 5245 $end = (int) $var2; 5246 } else { 5247 $end = self::ord((string) $var2); 5248 } 5249 5250 if (!$end) { 5251 return []; 5252 } 5253 5254 $array = []; 5255 foreach (\range($start, $end, $step) as $i) { 5256 $array[] = (string) self::chr((int) $i, $encoding); 5257 } 5258 5259 return $array; 5260 } 5261 5262 /** 5263 * Multi decode HTML entity + fix urlencoded-win1252-chars. 5264 * 5265 * EXAMPLE: <code>UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test'</code> 5266 * 5267 * e.g: 5268 * 'test+test' => 'test+test' 5269 * 'Düsseldorf' => 'Düsseldorf' 5270 * 'D%FCsseldorf' => 'Düsseldorf' 5271 * 'Düsseldorf' => 'Düsseldorf' 5272 * 'D%26%23xFC%3Bsseldorf' => 'Düsseldorf' 5273 * 'Düsseldorf' => 'Düsseldorf' 5274 * 'D%C3%BCsseldorf' => 'Düsseldorf' 5275 * 'D%C3%83%C2%BCsseldorf' => 'Düsseldorf' 5276 * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf' 5277 * 5278 * @param string $str <p>The input string.</p> 5279 * @param bool $multi_decode <p>Decode as often as possible.</p> 5280 * 5281 * @psalm-pure 5282 * 5283 * @return string 5284 * <p>The decoded URL, as a string.</p> 5285 */ 5286 public static function rawurldecode(string $str, bool $multi_decode = true): string 5287 { 5288 if ($str === '') { 5289 return ''; 5290 } 5291 5292 $str = self::urldecode_unicode_helper($str); 5293 5294 if ($multi_decode) { 5295 do { 5296 $str_compare = $str; 5297 5298 /** 5299 * @psalm-suppress PossiblyInvalidArgument 5300 */ 5301 $str = \rawurldecode( 5302 self::html_entity_decode( 5303 self::to_utf8($str), 5304 \ENT_QUOTES | \ENT_HTML5 5305 ) 5306 ); 5307 } while ($str_compare !== $str); 5308 } else { 5309 /** 5310 * @psalm-suppress PossiblyInvalidArgument 5311 */ 5312 $str = \rawurldecode( 5313 self::html_entity_decode( 5314 self::to_utf8($str), 5315 \ENT_QUOTES | \ENT_HTML5 5316 ) 5317 ); 5318 } 5319 5320 return self::fix_simple_utf8($str); 5321 } 5322 5323 /** 5324 * Replaces all occurrences of $pattern in $str by $replacement. 5325 * 5326 * @param string $str <p>The input string.</p> 5327 * @param string $pattern <p>The regular expression pattern.</p> 5328 * @param string $replacement <p>The string to replace with.</p> 5329 * @param string $options [optional] <p>Matching conditions to be used.</p> 5330 * @param string $delimiter [optional] <p>Delimiter the the regex. Default: '/'</p> 5331 * 5332 * @psalm-pure 5333 * 5334 * @return string 5335 */ 5336 public static function regex_replace( 5337 string $str, 5338 string $pattern, 5339 string $replacement, 5340 string $options = '', 5341 string $delimiter = '/' 5342 ): string { 5343 if ($options === 'msr') { 5344 $options = 'ms'; 5345 } 5346 5347 // fallback 5348 if (!$delimiter) { 5349 $delimiter = '/'; 5350 } 5351 5352 return (string) \preg_replace( 5353 $delimiter . $pattern . $delimiter . 'u' . $options, 5354 $replacement, 5355 $str 5356 ); 5357 } 5358 5359 /** 5360 * alias for "UTF8::remove_bom()" 5361 * 5362 * @param string $str 5363 * 5364 * @psalm-pure 5365 * 5366 * @return string 5367 * 5368 * @see UTF8::remove_bom() 5369 * @deprecated <p>please use "UTF8::remove_bom()"</p> 5370 */ 5371 public static function removeBOM(string $str): string 5372 { 5373 return self::remove_bom($str); 5374 } 5375 5376 /** 5377 * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings. 5378 * 5379 * EXAMPLE: <code>UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να'</code> 5380 * 5381 * @param string $str <p>The input string.</p> 5382 * 5383 * @psalm-pure 5384 * 5385 * @return string 5386 * <p>A string without UTF-BOM.</p> 5387 */ 5388 public static function remove_bom(string $str): string 5389 { 5390 if ($str === '') { 5391 return ''; 5392 } 5393 5394 $str_length = \strlen($str); 5395 foreach (self::$BOM as $bom_string => $bom_byte_length) { 5396 if (\strncmp($str, $bom_string, $bom_byte_length) === 0) { 5397 /** @var false|string $str_tmp - needed for PhpStan (stubs error) */ 5398 $str_tmp = \substr($str, $bom_byte_length, $str_length); 5399 if ($str_tmp === false) { 5400 return ''; 5401 } 5402 5403 $str_length -= (int) $bom_byte_length; 5404 5405 $str = (string) $str_tmp; 5406 } 5407 } 5408 5409 return $str; 5410 } 5411 5412 /** 5413 * Removes duplicate occurrences of a string in another string. 5414 * 5415 * EXAMPLE: <code>UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü'</code> 5416 * 5417 * @param string $str <p>The base string.</p> 5418 * @param string|string[] $what <p>String to search for in the base string.</p> 5419 * 5420 * @psalm-pure 5421 * 5422 * @return string 5423 * <p>A string with removed duplicates.</p> 5424 */ 5425 public static function remove_duplicates(string $str, $what = ' '): string 5426 { 5427 if (\is_string($what)) { 5428 $what = [$what]; 5429 } 5430 5431 /** 5432 * @psalm-suppress RedundantConditionGivenDocblockType 5433 */ 5434 if (\is_array($what)) { 5435 foreach ($what as $item) { 5436 $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str); 5437 } 5438 } 5439 5440 return $str; 5441 } 5442 5443 /** 5444 * Remove html via "strip_tags()" from the string. 5445 * 5446 * @param string $str <p>The input string.</p> 5447 * @param string $allowable_tags [optional] <p>You can use the optional second parameter to specify tags which 5448 * should not be stripped. Default: null 5449 * </p> 5450 * 5451 * @psalm-pure 5452 * 5453 * @return string 5454 * <p>A string with without html tags.</p> 5455 */ 5456 public static function remove_html(string $str, string $allowable_tags = ''): string 5457 { 5458 return \strip_tags($str, $allowable_tags); 5459 } 5460 5461 /** 5462 * Remove all breaks [<br> | \r\n | \r | \n | ...] from the string. 5463 * 5464 * @param string $str <p>The input string.</p> 5465 * @param string $replacement [optional] <p>Default is a empty string.</p> 5466 * 5467 * @psalm-pure 5468 * 5469 * @return string 5470 * <p>A string without breaks.</p> 5471 */ 5472 public static function remove_html_breaks(string $str, string $replacement = ''): string 5473 { 5474 return (string) \preg_replace("#/\r\n|\r|\n|<br.*/?>#isU", $replacement, $str); 5475 } 5476 5477 /** 5478 * Remove invisible characters from a string. 5479 * 5480 * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script. 5481 * 5482 * EXAMPLE: <code>UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε'</code> 5483 * 5484 * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php 5485 * 5486 * @param string $str <p>The input string.</p> 5487 * @param bool $url_encoded [optional] <p> 5488 * Try to remove url encoded control character. 5489 * WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa. 5490 * <br> 5491 * Default: false 5492 * </p> 5493 * @param string $replacement [optional] <p>The replacement character.</p> 5494 * @param bool $keep_basic_control_characters [optional] <p>Keep control characters like [LRM] or [LSEP].</p> 5495 * 5496 * @psalm-pure 5497 * 5498 * @return string 5499 * <p>A string without invisible chars.</p> 5500 */ 5501 public static function remove_invisible_characters( 5502 string $str, 5503 bool $url_encoded = false, 5504 string $replacement = '', 5505 bool $keep_basic_control_characters = true 5506 ): string { 5507 return ASCII::remove_invisible_characters( 5508 $str, 5509 $url_encoded, 5510 $replacement, 5511 $keep_basic_control_characters 5512 ); 5513 } 5514 5515 /** 5516 * Returns a new string with the prefix $substring removed, if present. 5517 * 5518 * @param string $str <p>The input string.</p> 5519 * @param string $substring <p>The prefix to remove.</p> 5520 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 5521 * 5522 * @psalm-pure 5523 * 5524 * @return string 5525 * <p>A string without the prefix $substring.</p> 5526 */ 5527 public static function remove_left( 5528 string $str, 5529 string $substring, 5530 string $encoding = 'UTF-8' 5531 ): string { 5532 if ( 5533 $substring 5534 && 5535 \strpos($str, $substring) === 0 5536 ) { 5537 if ($encoding === 'UTF-8') { 5538 return (string) \mb_substr( 5539 $str, 5540 (int) \mb_strlen($substring) 5541 ); 5542 } 5543 5544 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 5545 5546 return (string) self::substr( 5547 $str, 5548 (int) self::strlen($substring, $encoding), 5549 null, 5550 $encoding 5551 ); 5552 } 5553 5554 return $str; 5555 } 5556 5557 /** 5558 * Returns a new string with the suffix $substring removed, if present. 5559 * 5560 * @param string $str 5561 * @param string $substring <p>The suffix to remove.</p> 5562 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 5563 * 5564 * @psalm-pure 5565 * 5566 * @return string 5567 * <p>A string having a $str without the suffix $substring.</p> 5568 */ 5569 public static function remove_right( 5570 string $str, 5571 string $substring, 5572 string $encoding = 'UTF-8' 5573 ): string { 5574 if ($substring && \substr($str, -\strlen($substring)) === $substring) { 5575 if ($encoding === 'UTF-8') { 5576 return (string) \mb_substr( 5577 $str, 5578 0, 5579 (int) \mb_strlen($str) - (int) \mb_strlen($substring) 5580 ); 5581 } 5582 5583 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 5584 5585 return (string) self::substr( 5586 $str, 5587 0, 5588 (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding), 5589 $encoding 5590 ); 5591 } 5592 5593 return $str; 5594 } 5595 5596 /** 5597 * Replaces all occurrences of $search in $str by $replacement. 5598 * 5599 * @param string $str <p>The input string.</p> 5600 * @param string $search <p>The needle to search for.</p> 5601 * @param string $replacement <p>The string to replace with.</p> 5602 * @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> 5603 * 5604 * @psalm-pure 5605 * 5606 * @return string 5607 * <p>A string with replaced parts.</p> 5608 */ 5609 public static function replace( 5610 string $str, 5611 string $search, 5612 string $replacement, 5613 bool $case_sensitive = true 5614 ): string { 5615 if ($case_sensitive) { 5616 return \str_replace($search, $replacement, $str); 5617 } 5618 5619 return self::str_ireplace($search, $replacement, $str); 5620 } 5621 5622 /** 5623 * Replaces all occurrences of $search in $str by $replacement. 5624 * 5625 * @param string $str <p>The input string.</p> 5626 * @param array $search <p>The elements to search for.</p> 5627 * @param array|string $replacement <p>The string to replace with.</p> 5628 * @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> 5629 * 5630 * @psalm-pure 5631 * 5632 * @return string 5633 * <p>A string with replaced parts.</p> 5634 */ 5635 public static function replace_all( 5636 string $str, 5637 array $search, 5638 $replacement, 5639 bool $case_sensitive = true 5640 ): string { 5641 if ($case_sensitive) { 5642 return \str_replace($search, $replacement, $str); 5643 } 5644 5645 return self::str_ireplace($search, $replacement, $str); 5646 } 5647 5648 /** 5649 * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement. 5650 * 5651 * EXAMPLE: <code>UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白'</code> 5652 * 5653 * @param string $str <p>The input string</p> 5654 * @param string $replacement_char <p>The replacement character.</p> 5655 * @param bool $process_invalid_utf8_chars <p>Convert invalid UTF-8 chars </p> 5656 * 5657 * @psalm-pure 5658 * 5659 * @return string 5660 * <p>A string without diamond question marks (�).</p> 5661 */ 5662 public static function replace_diamond_question_mark( 5663 string $str, 5664 string $replacement_char = '', 5665 bool $process_invalid_utf8_chars = true 5666 ): string { 5667 if ($str === '') { 5668 return ''; 5669 } 5670 5671 if ($process_invalid_utf8_chars) { 5672 if ($replacement_char === '') { 5673 $replacement_char_helper = 'none'; 5674 } else { 5675 $replacement_char_helper = \ord($replacement_char); 5676 } 5677 5678 if (self::$SUPPORT['mbstring'] === false) { 5679 // if there is no native support for "mbstring", 5680 // then we need to clean the string before ... 5681 $str = self::clean($str); 5682 } 5683 5684 /** 5685 * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step 5686 */ 5687 $save = \mb_substitute_character(); 5688 /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */ 5689 @\mb_substitute_character($replacement_char_helper); 5690 // the polyfill maybe return false, so cast to string 5691 $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8'); 5692 \mb_substitute_character($save); 5693 } 5694 5695 return \str_replace( 5696 [ 5697 "\xEF\xBF\xBD", 5698 '�', 5699 ], 5700 [ 5701 $replacement_char, 5702 $replacement_char, 5703 ], 5704 $str 5705 ); 5706 } 5707 5708 /** 5709 * Strip whitespace or other characters from the end of a UTF-8 string. 5710 * 5711 * EXAMPLE: <code>UTF8::rtrim('-ABC-中文空白- '); // '-ABC-中文空白-'</code> 5712 * 5713 * @param string $str <p>The string to be trimmed.</p> 5714 * @param string|null $chars <p>Optional characters to be stripped.</p> 5715 * 5716 * @psalm-pure 5717 * 5718 * @return string 5719 * <p>A string with unwanted characters stripped from the right.</p> 5720 */ 5721 public static function rtrim(string $str = '', string $chars = null): string 5722 { 5723 if ($str === '') { 5724 return ''; 5725 } 5726 5727 if (self::$SUPPORT['mbstring'] === true) { 5728 if ($chars !== null) { 5729 /** @noinspection PregQuoteUsageInspection */ 5730 $chars = \preg_quote($chars); 5731 $pattern = "[$chars}]+$"; 5732 } else { 5733 $pattern = '[\\s]+$'; 5734 } 5735 5736 /** @noinspection PhpComposerExtensionStubsInspection */ 5737 return (string) \mb_ereg_replace($pattern, '', $str); 5738 } 5739 5740 if ($chars !== null) { 5741 $chars = \preg_quote($chars, '/'); 5742 $pattern = "[$chars}]+$"; 5743 } else { 5744 $pattern = '[\\s]+$'; 5745 } 5746 5747 return self::regex_replace($str, $pattern, ''); 5748 } 5749 5750 /** 5751 * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging. 5752 * 5753 * @param bool $useEcho 5754 * 5755 * @psalm-pure 5756 * 5757 * @return string|void 5758 */ 5759 public static function showSupport(bool $useEcho = true) 5760 { 5761 // init 5762 $html = ''; 5763 5764 $html .= '<pre>'; 5765 /** @noinspection AlterInForeachInspection */ 5766 foreach (self::$SUPPORT as $key => &$value) { 5767 $html .= $key . ' - ' . \print_r($value, true) . "\n<br>"; 5768 } 5769 $html .= '</pre>'; 5770 5771 if ($useEcho) { 5772 echo $html; 5773 } 5774 5775 return $html; 5776 } 5777 5778 /** 5779 * Converts a UTF-8 character to HTML Numbered Entity like "{". 5780 * 5781 * EXAMPLE: <code>UTF8::single_chr_html_encode('κ'); // 'κ'</code> 5782 * 5783 * @param string $char <p>The Unicode character to be encoded as numbered entity.</p> 5784 * @param bool $keep_ascii_chars <p>Set to <strong>true</strong> to keep ASCII chars.</> 5785 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 5786 * 5787 * @psalm-pure 5788 * 5789 * @return string 5790 * <p>The HTML numbered entity for the given character.</p> 5791 */ 5792 public static function single_chr_html_encode( 5793 string $char, 5794 bool $keep_ascii_chars = false, 5795 string $encoding = 'UTF-8' 5796 ): string { 5797 if ($char === '') { 5798 return ''; 5799 } 5800 5801 if ( 5802 $keep_ascii_chars 5803 && 5804 ASCII::is_ascii($char) 5805 ) { 5806 return $char; 5807 } 5808 5809 return '&#' . self::ord($char, $encoding) . ';'; 5810 } 5811 5812 /** 5813 * @param string $str 5814 * @param int $tab_length 5815 * 5816 * @psalm-pure 5817 * 5818 * @return string 5819 */ 5820 public static function spaces_to_tabs(string $str, int $tab_length = 4): string 5821 { 5822 if ($tab_length === 4) { 5823 $tab = ' '; 5824 } elseif ($tab_length === 2) { 5825 $tab = ' '; 5826 } else { 5827 $tab = \str_repeat(' ', $tab_length); 5828 } 5829 5830 return \str_replace($tab, "\t", $str); 5831 } 5832 5833 /** 5834 * alias for "UTF8::str_split()" 5835 * 5836 * @param int|string $str 5837 * @param int $length 5838 * @param bool $clean_utf8 5839 * 5840 * @psalm-pure 5841 * 5842 * @return string[] 5843 * 5844 * @see UTF8::str_split() 5845 * @deprecated <p>please use "UTF8::str_split()"</p> 5846 */ 5847 public static function split( 5848 $str, 5849 int $length = 1, 5850 bool $clean_utf8 = false 5851 ): array { 5852 /** @var string[] */ 5853 return self::str_split($str, $length, $clean_utf8); 5854 } 5855 5856 /** 5857 * alias for "UTF8::str_starts_with()" 5858 * 5859 * @param string $haystack 5860 * @param string $needle 5861 * 5862 * @psalm-pure 5863 * 5864 * @return bool 5865 * 5866 * @see UTF8::str_starts_with() 5867 * @deprecated <p>please use "UTF8::str_starts_with()"</p> 5868 */ 5869 public static function str_begins(string $haystack, string $needle): bool 5870 { 5871 return self::str_starts_with($haystack, $needle); 5872 } 5873 5874 /** 5875 * Returns a camelCase version of the string. Trims surrounding spaces, 5876 * capitalizes letters following digits, spaces, dashes and underscores, 5877 * and removes spaces, dashes, as well as underscores. 5878 * 5879 * @param string $str <p>The input string.</p> 5880 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 5881 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 5882 * @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, 5883 * tr</p> 5884 * @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ 5885 * -> ß</p> 5886 * 5887 * @psalm-pure 5888 * 5889 * @return string 5890 */ 5891 public static function str_camelize( 5892 string $str, 5893 string $encoding = 'UTF-8', 5894 bool $clean_utf8 = false, 5895 string $lang = null, 5896 bool $try_to_keep_the_string_length = false 5897 ): string { 5898 if ($clean_utf8) { 5899 $str = self::clean($str); 5900 } 5901 5902 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 5903 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 5904 } 5905 5906 $str = self::lcfirst( 5907 \trim($str), 5908 $encoding, 5909 false, 5910 $lang, 5911 $try_to_keep_the_string_length 5912 ); 5913 $str = (string) \preg_replace('/^[-_]+/', '', $str); 5914 5915 $use_mb_functions = $lang === null && !$try_to_keep_the_string_length; 5916 5917 $str = (string) \preg_replace_callback( 5918 '/[-_\\s]+(.)?/u', 5919 /** 5920 * @param array $match 5921 * 5922 * @psalm-pure 5923 * 5924 * @return string 5925 */ 5926 static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string { 5927 if (isset($match[1])) { 5928 if ($use_mb_functions) { 5929 if ($encoding === 'UTF-8') { 5930 return \mb_strtoupper($match[1]); 5931 } 5932 5933 return \mb_strtoupper($match[1], $encoding); 5934 } 5935 5936 return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length); 5937 } 5938 5939 return ''; 5940 }, 5941 $str 5942 ); 5943 5944 return (string) \preg_replace_callback( 5945 '/[\\p{N}]+(.)?/u', 5946 /** 5947 * @param array $match 5948 * 5949 * @psalm-pure 5950 * 5951 * @return string 5952 */ 5953 static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string { 5954 if ($use_mb_functions) { 5955 if ($encoding === 'UTF-8') { 5956 return \mb_strtoupper($match[0]); 5957 } 5958 5959 return \mb_strtoupper($match[0], $encoding); 5960 } 5961 5962 return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); 5963 }, 5964 $str 5965 ); 5966 } 5967 5968 /** 5969 * Returns the string with the first letter of each word capitalized, 5970 * except for when the word is a name which shouldn't be capitalized. 5971 * 5972 * @param string $str 5973 * 5974 * @psalm-pure 5975 * 5976 * @return string 5977 * <p>A string with $str capitalized.</p> 5978 */ 5979 public static function str_capitalize_name(string $str): string 5980 { 5981 return self::str_capitalize_name_helper( 5982 self::str_capitalize_name_helper( 5983 self::collapse_whitespace($str), 5984 ' ' 5985 ), 5986 '-' 5987 ); 5988 } 5989 5990 /** 5991 * Returns true if the string contains $needle, false otherwise. By default 5992 * the comparison is case-sensitive, but can be made insensitive by setting 5993 * $case_sensitive to false. 5994 * 5995 * @param string $haystack <p>The input string.</p> 5996 * @param string $needle <p>Substring to look for.</p> 5997 * @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> 5998 * 5999 * @psalm-pure 6000 * 6001 * @return bool 6002 * <p>Whether or not $haystack contains $needle.</p> 6003 */ 6004 public static function str_contains( 6005 string $haystack, 6006 string $needle, 6007 bool $case_sensitive = true 6008 ): bool { 6009 if ($case_sensitive) { 6010 if (\PHP_VERSION_ID >= 80000) { 6011 /** @phpstan-ignore-next-line - only for PHP8 */ 6012 return \str_contains($haystack, $needle); 6013 } 6014 6015 return \strpos($haystack, $needle) !== false; 6016 } 6017 6018 return \mb_stripos($haystack, $needle) !== false; 6019 } 6020 6021 /** 6022 * Returns true if the string contains all $needles, false otherwise. By 6023 * default the comparison is case-sensitive, but can be made insensitive by 6024 * setting $case_sensitive to false. 6025 * 6026 * @param string $haystack <p>The input string.</p> 6027 * @param array $needles <p>SubStrings to look for.</p> 6028 * @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> 6029 * 6030 * @psalm-pure 6031 * 6032 * @return bool 6033 * <p>Whether or not $haystack contains $needle.</p> 6034 */ 6035 public static function str_contains_all( 6036 string $haystack, 6037 array $needles, 6038 bool $case_sensitive = true 6039 ): bool { 6040 if ($haystack === '' || $needles === []) { 6041 return false; 6042 } 6043 6044 /** @noinspection LoopWhichDoesNotLoopInspection */ 6045 foreach ($needles as &$needle) { 6046 if ($case_sensitive) { 6047 /** @noinspection NestedPositiveIfStatementsInspection */ 6048 if (!$needle || \strpos($haystack, $needle) === false) { 6049 return false; 6050 } 6051 } 6052 6053 if (!$needle || \mb_stripos($haystack, $needle) === false) { 6054 return false; 6055 } 6056 } 6057 6058 return true; 6059 } 6060 6061 /** 6062 * Returns true if the string contains any $needles, false otherwise. By 6063 * default the comparison is case-sensitive, but can be made insensitive by 6064 * setting $case_sensitive to false. 6065 * 6066 * @param string $haystack <p>The input string.</p> 6067 * @param array $needles <p>SubStrings to look for.</p> 6068 * @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> 6069 * 6070 * @psalm-pure 6071 * 6072 * @return bool 6073 * <p>Whether or not $str contains $needle.</p> 6074 */ 6075 public static function str_contains_any( 6076 string $haystack, 6077 array $needles, 6078 bool $case_sensitive = true 6079 ): bool { 6080 if ($haystack === '' || $needles === []) { 6081 return false; 6082 } 6083 6084 /** @noinspection LoopWhichDoesNotLoopInspection */ 6085 foreach ($needles as &$needle) { 6086 if (!$needle) { 6087 continue; 6088 } 6089 6090 if ($case_sensitive) { 6091 if (\strpos($haystack, $needle) !== false) { 6092 return true; 6093 } 6094 6095 continue; 6096 } 6097 6098 if (\mb_stripos($haystack, $needle) !== false) { 6099 return true; 6100 } 6101 } 6102 6103 return false; 6104 } 6105 6106 /** 6107 * Returns a lowercase and trimmed string separated by dashes. Dashes are 6108 * inserted before uppercase characters (with the exception of the first 6109 * character of the string), and in place of spaces as well as underscores. 6110 * 6111 * @param string $str <p>The input string.</p> 6112 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 6113 * 6114 * @psalm-pure 6115 * 6116 * @return string 6117 */ 6118 public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string 6119 { 6120 return self::str_delimit($str, '-', $encoding); 6121 } 6122 6123 /** 6124 * Returns a lowercase and trimmed string separated by the given delimiter. 6125 * Delimiters are inserted before uppercase characters (with the exception 6126 * of the first character of the string), and in place of spaces, dashes, 6127 * and underscores. Alpha delimiters are not converted to lowercase. 6128 * 6129 * @param string $str <p>The input string.</p> 6130 * @param string $delimiter <p>Sequence used to separate parts of the string.</p> 6131 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 6132 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 6133 * @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, 6134 * tr</p> 6135 * @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ -> 6136 * ß</p> 6137 * 6138 * @psalm-pure 6139 * 6140 * @return string 6141 */ 6142 public static function str_delimit( 6143 string $str, 6144 string $delimiter, 6145 string $encoding = 'UTF-8', 6146 bool $clean_utf8 = false, 6147 string $lang = null, 6148 bool $try_to_keep_the_string_length = false 6149 ): string { 6150 if (self::$SUPPORT['mbstring'] === true) { 6151 /** @noinspection PhpComposerExtensionStubsInspection */ 6152 $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str)); 6153 6154 $use_mb_functions = $lang === null && !$try_to_keep_the_string_length; 6155 if ($use_mb_functions && $encoding === 'UTF-8') { 6156 $str = \mb_strtolower($str); 6157 } else { 6158 $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); 6159 } 6160 6161 /** @noinspection PhpComposerExtensionStubsInspection */ 6162 return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str); 6163 } 6164 6165 $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str)); 6166 6167 $use_mb_functions = $lang === null && !$try_to_keep_the_string_length; 6168 if ($use_mb_functions && $encoding === 'UTF-8') { 6169 $str = \mb_strtolower($str); 6170 } else { 6171 $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); 6172 } 6173 6174 return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str); 6175 } 6176 6177 /** 6178 * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32. 6179 * 6180 * EXAMPLE: <code> 6181 * UTF8::str_detect_encoding('中文空白'); // 'UTF-8' 6182 * UTF8::str_detect_encoding('Abc'); // 'ASCII' 6183 * </code> 6184 * 6185 * @param string $str <p>The input string.</p> 6186 * 6187 * @psalm-pure 6188 * 6189 * @return false|string 6190 * <p> 6191 * The detected string-encoding e.g. UTF-8 or UTF-16BE,<br> 6192 * otherwise it will return false e.g. for BINARY or not detected encoding. 6193 * </p> 6194 */ 6195 public static function str_detect_encoding($str) 6196 { 6197 // init 6198 $str = (string) $str; 6199 6200 // 6201 // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ... 6202 // 6203 6204 if (self::is_binary($str, true)) { 6205 $is_utf32 = self::is_utf32($str, false); 6206 if ($is_utf32 === 1) { 6207 return 'UTF-32LE'; 6208 } 6209 if ($is_utf32 === 2) { 6210 return 'UTF-32BE'; 6211 } 6212 6213 $is_utf16 = self::is_utf16($str, false); 6214 if ($is_utf16 === 1) { 6215 return 'UTF-16LE'; 6216 } 6217 if ($is_utf16 === 2) { 6218 return 'UTF-16BE'; 6219 } 6220 6221 // is binary but not "UTF-16" or "UTF-32" 6222 return false; 6223 } 6224 6225 // 6226 // 2.) simple check for ASCII chars 6227 // 6228 6229 if (ASCII::is_ascii($str)) { 6230 return 'ASCII'; 6231 } 6232 6233 // 6234 // 3.) simple check for UTF-8 chars 6235 // 6236 6237 if (self::is_utf8_string($str)) { 6238 return 'UTF-8'; 6239 } 6240 6241 // 6242 // 4.) check via "mb_detect_encoding()" 6243 // 6244 // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()" 6245 6246 $encoding_detecting_order = [ 6247 'ISO-8859-1', 6248 'ISO-8859-2', 6249 'ISO-8859-3', 6250 'ISO-8859-4', 6251 'ISO-8859-5', 6252 'ISO-8859-6', 6253 'ISO-8859-7', 6254 'ISO-8859-8', 6255 'ISO-8859-9', 6256 'ISO-8859-10', 6257 'ISO-8859-13', 6258 'ISO-8859-14', 6259 'ISO-8859-15', 6260 'ISO-8859-16', 6261 'WINDOWS-1251', 6262 'WINDOWS-1252', 6263 'WINDOWS-1254', 6264 'CP932', 6265 'CP936', 6266 'CP950', 6267 'CP866', 6268 'CP850', 6269 'CP51932', 6270 'CP50220', 6271 'CP50221', 6272 'CP50222', 6273 'ISO-2022-JP', 6274 'ISO-2022-KR', 6275 'JIS', 6276 'JIS-ms', 6277 'EUC-CN', 6278 'EUC-JP', 6279 ]; 6280 6281 if (self::$SUPPORT['mbstring'] === true) { 6282 // info: do not use the symfony polyfill here 6283 $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true); 6284 if ($encoding) { 6285 return $encoding; 6286 } 6287 } 6288 6289 // 6290 // 5.) check via "iconv()" 6291 // 6292 6293 if (self::$ENCODINGS === null) { 6294 self::$ENCODINGS = self::getData('encodings'); 6295 } 6296 6297 foreach (self::$ENCODINGS as $encoding_tmp) { 6298 // INFO: //IGNORE but still throw notice 6299 /** @noinspection PhpUsageOfSilenceOperatorInspection */ 6300 if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) { 6301 return $encoding_tmp; 6302 } 6303 } 6304 6305 return false; 6306 } 6307 6308 /** 6309 * alias for "UTF8::str_ends_with()" 6310 * 6311 * @param string $haystack 6312 * @param string $needle 6313 * 6314 * @psalm-pure 6315 * 6316 * @return bool 6317 * 6318 * @see UTF8::str_ends_with() 6319 * @deprecated <p>please use "UTF8::str_ends_with()"</p> 6320 */ 6321 public static function str_ends(string $haystack, string $needle): bool 6322 { 6323 return self::str_ends_with($haystack, $needle); 6324 } 6325 6326 /** 6327 * Check if the string ends with the given substring. 6328 * 6329 * EXAMPLE: <code> 6330 * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true 6331 * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false 6332 * </code> 6333 * 6334 * @param string $haystack <p>The string to search in.</p> 6335 * @param string $needle <p>The substring to search for.</p> 6336 * 6337 * @psalm-pure 6338 * 6339 * @return bool 6340 */ 6341 public static function str_ends_with(string $haystack, string $needle): bool 6342 { 6343 if ($needle === '') { 6344 return true; 6345 } 6346 6347 if ($haystack === '') { 6348 return false; 6349 } 6350 6351 if (\PHP_VERSION_ID >= 80000) { 6352 /** @phpstan-ignore-next-line - only for PHP8 */ 6353 return \str_ends_with($haystack, $needle); 6354 } 6355 6356 return \substr($haystack, -\strlen($needle)) === $needle; 6357 } 6358 6359 /** 6360 * Returns true if the string ends with any of $substrings, false otherwise. 6361 * 6362 * - case-sensitive 6363 * 6364 * @param string $str <p>The input string.</p> 6365 * @param string[] $substrings <p>Substrings to look for.</p> 6366 * 6367 * @psalm-pure 6368 * 6369 * @return bool 6370 * <p>Whether or not $str ends with $substring.</p> 6371 */ 6372 public static function str_ends_with_any(string $str, array $substrings): bool 6373 { 6374 if ($substrings === []) { 6375 return false; 6376 } 6377 6378 foreach ($substrings as &$substring) { 6379 if (\substr($str, -\strlen($substring)) === $substring) { 6380 return true; 6381 } 6382 } 6383 6384 return false; 6385 } 6386 6387 /** 6388 * Ensures that the string begins with $substring. If it doesn't, it's 6389 * prepended. 6390 * 6391 * @param string $str <p>The input string.</p> 6392 * @param string $substring <p>The substring to add if not present.</p> 6393 * 6394 * @psalm-pure 6395 * 6396 * @return string 6397 */ 6398 public static function str_ensure_left(string $str, string $substring): string 6399 { 6400 if ( 6401 $substring !== '' 6402 && 6403 \strpos($str, $substring) === 0 6404 ) { 6405 return $str; 6406 } 6407 6408 return $substring . $str; 6409 } 6410 6411 /** 6412 * Ensures that the string ends with $substring. If it doesn't, it's appended. 6413 * 6414 * @param string $str <p>The input string.</p> 6415 * @param string $substring <p>The substring to add if not present.</p> 6416 * 6417 * @psalm-pure 6418 * 6419 * @return string 6420 */ 6421 public static function str_ensure_right(string $str, string $substring): string 6422 { 6423 if ( 6424 $str === '' 6425 || 6426 $substring === '' 6427 || 6428 \substr($str, -\strlen($substring)) !== $substring 6429 ) { 6430 $str .= $substring; 6431 } 6432 6433 return $str; 6434 } 6435 6436 /** 6437 * Capitalizes the first word of the string, replaces underscores with 6438 * spaces, and strips '_id'. 6439 * 6440 * @param string $str 6441 * 6442 * @psalm-pure 6443 * 6444 * @return string 6445 */ 6446 public static function str_humanize($str): string 6447 { 6448 $str = \str_replace( 6449 [ 6450 '_id', 6451 '_', 6452 ], 6453 [ 6454 '', 6455 ' ', 6456 ], 6457 $str 6458 ); 6459 6460 return self::ucfirst(\trim($str)); 6461 } 6462 6463 /** 6464 * alias for "UTF8::str_istarts_with()" 6465 * 6466 * @param string $haystack 6467 * @param string $needle 6468 * 6469 * @psalm-pure 6470 * 6471 * @return bool 6472 * 6473 * @see UTF8::str_istarts_with() 6474 * @deprecated <p>please use "UTF8::str_istarts_with()"</p> 6475 */ 6476 public static function str_ibegins(string $haystack, string $needle): bool 6477 { 6478 return self::str_istarts_with($haystack, $needle); 6479 } 6480 6481 /** 6482 * alias for "UTF8::str_iends_with()" 6483 * 6484 * @param string $haystack 6485 * @param string $needle 6486 * 6487 * @psalm-pure 6488 * 6489 * @return bool 6490 * 6491 * @see UTF8::str_iends_with() 6492 * @deprecated <p>please use "UTF8::str_iends_with()"</p> 6493 */ 6494 public static function str_iends(string $haystack, string $needle): bool 6495 { 6496 return self::str_iends_with($haystack, $needle); 6497 } 6498 6499 /** 6500 * Check if the string ends with the given substring, case-insensitive. 6501 * 6502 * EXAMPLE: <code> 6503 * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true 6504 * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true 6505 * </code> 6506 * 6507 * @param string $haystack <p>The string to search in.</p> 6508 * @param string $needle <p>The substring to search for.</p> 6509 * 6510 * @psalm-pure 6511 * 6512 * @return bool 6513 */ 6514 public static function str_iends_with(string $haystack, string $needle): bool 6515 { 6516 if ($needle === '') { 6517 return true; 6518 } 6519 6520 if ($haystack === '') { 6521 return false; 6522 } 6523 6524 return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0; 6525 } 6526 6527 /** 6528 * Returns true if the string ends with any of $substrings, false otherwise. 6529 * 6530 * - case-insensitive 6531 * 6532 * @param string $str <p>The input string.</p> 6533 * @param string[] $substrings <p>Substrings to look for.</p> 6534 * 6535 * @psalm-pure 6536 * 6537 * @return bool 6538 * <p>Whether or not $str ends with $substring.</p> 6539 */ 6540 public static function str_iends_with_any(string $str, array $substrings): bool 6541 { 6542 if ($substrings === []) { 6543 return false; 6544 } 6545 6546 foreach ($substrings as &$substring) { 6547 if (self::str_iends_with($str, $substring)) { 6548 return true; 6549 } 6550 } 6551 6552 return false; 6553 } 6554 6555 /** 6556 * Returns the index of the first occurrence of $needle in the string, 6557 * and false if not found. Accepts an optional offset from which to begin 6558 * the search. 6559 * 6560 * @param string $str <p>The input string.</p> 6561 * @param string $needle <p>Substring to look for.</p> 6562 * @param int $offset [optional] <p>Offset from which to search. Default: 0</p> 6563 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 6564 * 6565 * @psalm-pure 6566 * 6567 * @return false|int 6568 * <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p> 6569 * 6570 * @see UTF8::stripos() 6571 * @deprecated <p>please use "UTF8::stripos()"</p> 6572 */ 6573 public static function str_iindex_first( 6574 string $str, 6575 string $needle, 6576 int $offset = 0, 6577 string $encoding = 'UTF-8' 6578 ) { 6579 return self::stripos( 6580 $str, 6581 $needle, 6582 $offset, 6583 $encoding 6584 ); 6585 } 6586 6587 /** 6588 * Returns the index of the last occurrence of $needle in the string, 6589 * and false if not found. Accepts an optional offset from which to begin 6590 * the search. Offsets may be negative to count from the last character 6591 * in the string. 6592 * 6593 * @param string $str <p>The input string.</p> 6594 * @param string $needle <p>Substring to look for.</p> 6595 * @param int $offset [optional] <p>Offset from which to search. Default: 0</p> 6596 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 6597 * 6598 * @psalm-pure 6599 * 6600 * @return false|int 6601 * <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p> 6602 * 6603 * @see UTF8::strripos() 6604 * @deprecated <p>please use "UTF8::strripos()"</p> 6605 */ 6606 public static function str_iindex_last( 6607 string $str, 6608 string $needle, 6609 int $offset = 0, 6610 string $encoding = 'UTF-8' 6611 ) { 6612 return self::strripos( 6613 $str, 6614 $needle, 6615 $offset, 6616 $encoding 6617 ); 6618 } 6619 6620 /** 6621 * Returns the index of the first occurrence of $needle in the string, 6622 * and false if not found. Accepts an optional offset from which to begin 6623 * the search. 6624 * 6625 * @param string $str <p>The input string.</p> 6626 * @param string $needle <p>Substring to look for.</p> 6627 * @param int $offset [optional] <p>Offset from which to search. Default: 0</p> 6628 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 6629 * 6630 * @psalm-pure 6631 * 6632 * @return false|int 6633 * <p>The occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p> 6634 * 6635 * @see UTF8::strpos() 6636 * @deprecated <p>please use "UTF8::strpos()"</p> 6637 */ 6638 public static function str_index_first( 6639 string $str, 6640 string $needle, 6641 int $offset = 0, 6642 string $encoding = 'UTF-8' 6643 ) { 6644 return self::strpos( 6645 $str, 6646 $needle, 6647 $offset, 6648 $encoding 6649 ); 6650 } 6651 6652 /** 6653 * Returns the index of the last occurrence of $needle in the string, 6654 * and false if not found. Accepts an optional offset from which to begin 6655 * the search. Offsets may be negative to count from the last character 6656 * in the string. 6657 * 6658 * @param string $str <p>The input string.</p> 6659 * @param string $needle <p>Substring to look for.</p> 6660 * @param int $offset [optional] <p>Offset from which to search. Default: 0</p> 6661 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 6662 * 6663 * @psalm-pure 6664 * 6665 * @return false|int 6666 * <p>The last occurrence's <strong>index</strong> if found, otherwise <strong>false</strong>.</p> 6667 * 6668 * @see UTF8::strrpos() 6669 * @deprecated <p>please use "UTF8::strrpos()"</p> 6670 */ 6671 public static function str_index_last( 6672 string $str, 6673 string $needle, 6674 int $offset = 0, 6675 string $encoding = 'UTF-8' 6676 ) { 6677 return self::strrpos( 6678 $str, 6679 $needle, 6680 $offset, 6681 $encoding 6682 ); 6683 } 6684 6685 /** 6686 * Inserts $substring into the string at the $index provided. 6687 * 6688 * @param string $str <p>The input string.</p> 6689 * @param string $substring <p>String to be inserted.</p> 6690 * @param int $index <p>The index at which to insert the substring.</p> 6691 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 6692 * 6693 * @psalm-pure 6694 * 6695 * @return string 6696 */ 6697 public static function str_insert( 6698 string $str, 6699 string $substring, 6700 int $index, 6701 string $encoding = 'UTF-8' 6702 ): string { 6703 if ($encoding === 'UTF-8') { 6704 $len = (int) \mb_strlen($str); 6705 if ($index > $len) { 6706 return $str; 6707 } 6708 6709 /** @noinspection UnnecessaryCastingInspection */ 6710 return (string) \mb_substr($str, 0, $index) . 6711 $substring . 6712 (string) \mb_substr($str, $index, $len); 6713 } 6714 6715 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 6716 6717 $len = (int) self::strlen($str, $encoding); 6718 if ($index > $len) { 6719 return $str; 6720 } 6721 6722 return ((string) self::substr($str, 0, $index, $encoding)) . 6723 $substring . 6724 ((string) self::substr($str, $index, $len, $encoding)); 6725 } 6726 6727 /** 6728 * Case-insensitive and UTF-8 safe version of <function>str_replace</function>. 6729 * 6730 * EXAMPLE: <code> 6731 * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn' 6732 * </code> 6733 * 6734 * @see http://php.net/manual/en/function.str-ireplace.php 6735 * 6736 * @param string|string[] $search <p> 6737 * Every replacement with search array is 6738 * performed on the result of previous replacement. 6739 * </p> 6740 * @param string|string[] $replacement <p>The replacement.</p> 6741 * @param string|string[] $subject <p> 6742 * If subject is an array, then the search and 6743 * replace is performed with every entry of 6744 * subject, and the return value is an array as 6745 * well. 6746 * </p> 6747 * @param int $count [optional] <p> 6748 * The number of matched and replaced needles will 6749 * be returned in count which is passed by 6750 * reference. 6751 * </p> 6752 * 6753 * @psalm-pure 6754 * 6755 * @return string|string[] 6756 * <p>A string or an array of replacements.</p> 6757 * 6758 * @template TStrIReplaceSubject 6759 * @phpstan-param TStrIReplaceSubject $subject 6760 * @phpstan-return TStrIReplaceSubject 6761 */ 6762 public static function str_ireplace($search, $replacement, $subject, &$count = null) 6763 { 6764 $search = (array) $search; 6765 6766 /** @noinspection AlterInForeachInspection */ 6767 foreach ($search as &$s) { 6768 $s = (string) $s; 6769 if ($s === '') { 6770 $s = '/^(?<=.)$/'; 6771 } else { 6772 $s = '/' . \preg_quote($s, '/') . '/ui'; 6773 } 6774 } 6775 6776 // fallback 6777 /** @phpstan-ignore-next-line - only a fallback for PHP8 */ 6778 if ($replacement === null) { 6779 $replacement = ''; 6780 } 6781 /** @phpstan-ignore-next-line - only a fallback for PHP8 */ 6782 if ($subject === null) { 6783 $subject = ''; 6784 } 6785 6786 /** 6787 * @psalm-suppress PossiblyNullArgument 6788 * @phpstan-var TStrIReplaceSubject $subject 6789 */ 6790 $subject = \preg_replace($search, $replacement, $subject, -1, $count); 6791 6792 return $subject; 6793 } 6794 6795 /** 6796 * Replaces $search from the beginning of string with $replacement. 6797 * 6798 * @param string $str <p>The input string.</p> 6799 * @param string $search <p>The string to search for.</p> 6800 * @param string $replacement <p>The replacement.</p> 6801 * 6802 * @psalm-pure 6803 * 6804 * @return string 6805 * <p>The string after the replacement.</p> 6806 */ 6807 public static function str_ireplace_beginning(string $str, string $search, string $replacement): string 6808 { 6809 if ($str === '') { 6810 if ($replacement === '') { 6811 return ''; 6812 } 6813 6814 if ($search === '') { 6815 return $replacement; 6816 } 6817 } 6818 6819 if ($search === '') { 6820 return $str . $replacement; 6821 } 6822 6823 $searchLength = \strlen($search); 6824 if (\strncasecmp($str, $search, $searchLength) === 0) { 6825 return $replacement . \substr($str, $searchLength); 6826 } 6827 6828 return $str; 6829 } 6830 6831 /** 6832 * Replaces $search from the ending of string with $replacement. 6833 * 6834 * @param string $str <p>The input string.</p> 6835 * @param string $search <p>The string to search for.</p> 6836 * @param string $replacement <p>The replacement.</p> 6837 * 6838 * @psalm-pure 6839 * 6840 * @return string 6841 * <p>The string after the replacement.</p> 6842 */ 6843 public static function str_ireplace_ending(string $str, string $search, string $replacement): string 6844 { 6845 if ($str === '') { 6846 if ($replacement === '') { 6847 return ''; 6848 } 6849 6850 if ($search === '') { 6851 return $replacement; 6852 } 6853 } 6854 6855 if ($search === '') { 6856 return $str . $replacement; 6857 } 6858 6859 if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) { 6860 $str = \substr($str, 0, -\strlen($search)) . $replacement; 6861 } 6862 6863 return $str; 6864 } 6865 6866 /** 6867 * Check if the string starts with the given substring, case-insensitive. 6868 * 6869 * EXAMPLE: <code> 6870 * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true 6871 * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true 6872 * </code> 6873 * 6874 * @param string $haystack <p>The string to search in.</p> 6875 * @param string $needle <p>The substring to search for.</p> 6876 * 6877 * @psalm-pure 6878 * 6879 * @return bool 6880 */ 6881 public static function str_istarts_with(string $haystack, string $needle): bool 6882 { 6883 if ($needle === '') { 6884 return true; 6885 } 6886 6887 if ($haystack === '') { 6888 return false; 6889 } 6890 6891 return self::stripos($haystack, $needle) === 0; 6892 } 6893 6894 /** 6895 * Returns true if the string begins with any of $substrings, false otherwise. 6896 * 6897 * - case-insensitive 6898 * 6899 * @param string $str <p>The input string.</p> 6900 * @param array $substrings <p>Substrings to look for.</p> 6901 * 6902 * @psalm-pure 6903 * 6904 * @return bool 6905 * <p>Whether or not $str starts with $substring.</p> 6906 */ 6907 public static function str_istarts_with_any(string $str, array $substrings): bool 6908 { 6909 if ($str === '') { 6910 return false; 6911 } 6912 6913 if ($substrings === []) { 6914 return false; 6915 } 6916 6917 foreach ($substrings as &$substring) { 6918 if (self::str_istarts_with($str, $substring)) { 6919 return true; 6920 } 6921 } 6922 6923 return false; 6924 } 6925 6926 /** 6927 * Gets the substring after the first occurrence of a separator. 6928 * 6929 * @param string $str <p>The input string.</p> 6930 * @param string $separator <p>The string separator.</p> 6931 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 6932 * 6933 * @psalm-pure 6934 * 6935 * @return string 6936 */ 6937 public static function str_isubstr_after_first_separator( 6938 string $str, 6939 string $separator, 6940 string $encoding = 'UTF-8' 6941 ): string { 6942 if ($separator === '' || $str === '') { 6943 return ''; 6944 } 6945 6946 $offset = self::stripos($str, $separator); 6947 if ($offset === false) { 6948 return ''; 6949 } 6950 6951 if ($encoding === 'UTF-8') { 6952 return (string) \mb_substr( 6953 $str, 6954 $offset + (int) \mb_strlen($separator) 6955 ); 6956 } 6957 6958 return (string) self::substr( 6959 $str, 6960 $offset + (int) self::strlen($separator, $encoding), 6961 null, 6962 $encoding 6963 ); 6964 } 6965 6966 /** 6967 * Gets the substring after the last occurrence of a separator. 6968 * 6969 * @param string $str <p>The input string.</p> 6970 * @param string $separator <p>The string separator.</p> 6971 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 6972 * 6973 * @psalm-pure 6974 * 6975 * @return string 6976 */ 6977 public static function str_isubstr_after_last_separator( 6978 string $str, 6979 string $separator, 6980 string $encoding = 'UTF-8' 6981 ): string { 6982 if ($separator === '' || $str === '') { 6983 return ''; 6984 } 6985 6986 $offset = self::strripos($str, $separator); 6987 if ($offset === false) { 6988 return ''; 6989 } 6990 6991 if ($encoding === 'UTF-8') { 6992 return (string) \mb_substr( 6993 $str, 6994 $offset + (int) self::strlen($separator) 6995 ); 6996 } 6997 6998 return (string) self::substr( 6999 $str, 7000 $offset + (int) self::strlen($separator, $encoding), 7001 null, 7002 $encoding 7003 ); 7004 } 7005 7006 /** 7007 * Gets the substring before the first occurrence of a separator. 7008 * 7009 * @param string $str <p>The input string.</p> 7010 * @param string $separator <p>The string separator.</p> 7011 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 7012 * 7013 * @psalm-pure 7014 * 7015 * @return string 7016 */ 7017 public static function str_isubstr_before_first_separator( 7018 string $str, 7019 string $separator, 7020 string $encoding = 'UTF-8' 7021 ): string { 7022 if ($separator === '' || $str === '') { 7023 return ''; 7024 } 7025 7026 $offset = self::stripos($str, $separator); 7027 if ($offset === false) { 7028 return ''; 7029 } 7030 7031 if ($encoding === 'UTF-8') { 7032 return (string) \mb_substr($str, 0, $offset); 7033 } 7034 7035 return (string) self::substr($str, 0, $offset, $encoding); 7036 } 7037 7038 /** 7039 * Gets the substring before the last occurrence of a separator. 7040 * 7041 * @param string $str <p>The input string.</p> 7042 * @param string $separator <p>The string separator.</p> 7043 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 7044 * 7045 * @psalm-pure 7046 * 7047 * @return string 7048 */ 7049 public static function str_isubstr_before_last_separator( 7050 string $str, 7051 string $separator, 7052 string $encoding = 'UTF-8' 7053 ): string { 7054 if ($separator === '' || $str === '') { 7055 return ''; 7056 } 7057 7058 if ($encoding === 'UTF-8') { 7059 $offset = \mb_strripos($str, $separator); 7060 if ($offset === false) { 7061 return ''; 7062 } 7063 7064 return (string) \mb_substr($str, 0, $offset); 7065 } 7066 7067 $offset = self::strripos($str, $separator, 0, $encoding); 7068 if ($offset === false) { 7069 return ''; 7070 } 7071 7072 return (string) self::substr($str, 0, $offset, $encoding); 7073 } 7074 7075 /** 7076 * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle". 7077 * 7078 * @param string $str <p>The input string.</p> 7079 * @param string $needle <p>The string to look for.</p> 7080 * @param bool $before_needle [optional] <p>Default: false</p> 7081 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 7082 * 7083 * @psalm-pure 7084 * 7085 * @return string 7086 */ 7087 public static function str_isubstr_first( 7088 string $str, 7089 string $needle, 7090 bool $before_needle = false, 7091 string $encoding = 'UTF-8' 7092 ): string { 7093 if ( 7094 $needle === '' 7095 || 7096 $str === '' 7097 ) { 7098 return ''; 7099 } 7100 7101 $part = self::stristr( 7102 $str, 7103 $needle, 7104 $before_needle, 7105 $encoding 7106 ); 7107 if ($part === false) { 7108 return ''; 7109 } 7110 7111 return $part; 7112 } 7113 7114 /** 7115 * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle". 7116 * 7117 * @param string $str <p>The input string.</p> 7118 * @param string $needle <p>The string to look for.</p> 7119 * @param bool $before_needle [optional] <p>Default: false</p> 7120 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 7121 * 7122 * @psalm-pure 7123 * 7124 * @return string 7125 */ 7126 public static function str_isubstr_last( 7127 string $str, 7128 string $needle, 7129 bool $before_needle = false, 7130 string $encoding = 'UTF-8' 7131 ): string { 7132 if ( 7133 $needle === '' 7134 || 7135 $str === '' 7136 ) { 7137 return ''; 7138 } 7139 7140 $part = self::strrichr( 7141 $str, 7142 $needle, 7143 $before_needle, 7144 $encoding 7145 ); 7146 if ($part === false) { 7147 return ''; 7148 } 7149 7150 return $part; 7151 } 7152 7153 /** 7154 * Returns the last $n characters of the string. 7155 * 7156 * @param string $str <p>The input string.</p> 7157 * @param int $n <p>Number of characters to retrieve from the end.</p> 7158 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 7159 * 7160 * @psalm-pure 7161 * 7162 * @return string 7163 */ 7164 public static function str_last_char( 7165 string $str, 7166 int $n = 1, 7167 string $encoding = 'UTF-8' 7168 ): string { 7169 if ($str === '' || $n <= 0) { 7170 return ''; 7171 } 7172 7173 if ($encoding === 'UTF-8') { 7174 return (string) \mb_substr($str, -$n); 7175 } 7176 7177 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 7178 7179 return (string) self::substr($str, -$n, null, $encoding); 7180 } 7181 7182 /** 7183 * Limit the number of characters in a string. 7184 * 7185 * @param string $str <p>The input string.</p> 7186 * @param int $length [optional] <p>Default: 100</p> 7187 * @param string $str_add_on [optional] <p>Default: …</p> 7188 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 7189 * 7190 * @psalm-pure 7191 * 7192 * @return string 7193 */ 7194 public static function str_limit( 7195 string $str, 7196 int $length = 100, 7197 string $str_add_on = '…', 7198 string $encoding = 'UTF-8' 7199 ): string { 7200 if ($str === '' || $length <= 0) { 7201 return ''; 7202 } 7203 7204 if ($encoding === 'UTF-8') { 7205 if ((int) \mb_strlen($str) <= $length) { 7206 return $str; 7207 } 7208 7209 /** @noinspection UnnecessaryCastingInspection */ 7210 return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on; 7211 } 7212 7213 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 7214 7215 if ((int) self::strlen($str, $encoding) <= $length) { 7216 return $str; 7217 } 7218 7219 return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on; 7220 } 7221 7222 /** 7223 * Limit the number of characters in a string, but also after the next word. 7224 * 7225 * EXAMPLE: <code>UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř'</code> 7226 * 7227 * @param string $str <p>The input string.</p> 7228 * @param int $length [optional] <p>Default: 100</p> 7229 * @param string $str_add_on [optional] <p>Default: …</p> 7230 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 7231 * 7232 * @psalm-pure 7233 * 7234 * @return string 7235 */ 7236 public static function str_limit_after_word( 7237 string $str, 7238 int $length = 100, 7239 string $str_add_on = '…', 7240 string $encoding = 'UTF-8' 7241 ): string { 7242 if ($str === '' || $length <= 0) { 7243 return ''; 7244 } 7245 7246 if ($encoding === 'UTF-8') { 7247 /** @noinspection UnnecessaryCastingInspection */ 7248 if ((int) \mb_strlen($str) <= $length) { 7249 return $str; 7250 } 7251 7252 if (\mb_substr($str, $length - 1, 1) === ' ') { 7253 return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on; 7254 } 7255 7256 $str = \mb_substr($str, 0, $length); 7257 7258 $array = \explode(' ', $str, -1); 7259 $new_str = \implode(' ', $array); 7260 7261 if ($new_str === '') { 7262 return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on; 7263 } 7264 } else { 7265 if ((int) self::strlen($str, $encoding) <= $length) { 7266 return $str; 7267 } 7268 7269 if (self::substr($str, $length - 1, 1, $encoding) === ' ') { 7270 return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on; 7271 } 7272 7273 /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */ 7274 $str = self::substr($str, 0, $length, $encoding); 7275 /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */ 7276 if ($str === false) { 7277 return '' . $str_add_on; 7278 } 7279 7280 $array = \explode(' ', $str, -1); 7281 $new_str = \implode(' ', $array); 7282 7283 if ($new_str === '') { 7284 return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on; 7285 } 7286 } 7287 7288 return $new_str . $str_add_on; 7289 } 7290 7291 /** 7292 * Returns the longest common prefix between the $str1 and $str2. 7293 * 7294 * @param string $str1 <p>The input sting.</p> 7295 * @param string $str2 <p>Second string for comparison.</p> 7296 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 7297 * 7298 * @psalm-pure 7299 * 7300 * @return string 7301 */ 7302 public static function str_longest_common_prefix( 7303 string $str1, 7304 string $str2, 7305 string $encoding = 'UTF-8' 7306 ): string { 7307 // init 7308 $longest_common_prefix = ''; 7309 7310 if ($encoding === 'UTF-8') { 7311 $max_length = (int) \min( 7312 \mb_strlen($str1), 7313 \mb_strlen($str2) 7314 ); 7315 7316 for ($i = 0; $i < $max_length; ++$i) { 7317 $char = \mb_substr($str1, $i, 1); 7318 7319 if ( 7320 $char !== false 7321 && 7322 $char === \mb_substr($str2, $i, 1) 7323 ) { 7324 $longest_common_prefix .= $char; 7325 } else { 7326 break; 7327 } 7328 } 7329 } else { 7330 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 7331 7332 $max_length = (int) \min( 7333 self::strlen($str1, $encoding), 7334 self::strlen($str2, $encoding) 7335 ); 7336 7337 for ($i = 0; $i < $max_length; ++$i) { 7338 $char = self::substr($str1, $i, 1, $encoding); 7339 7340 if ( 7341 $char !== false 7342 && 7343 $char === self::substr($str2, $i, 1, $encoding) 7344 ) { 7345 $longest_common_prefix .= $char; 7346 } else { 7347 break; 7348 } 7349 } 7350 } 7351 7352 return $longest_common_prefix; 7353 } 7354 7355 /** 7356 * Returns the longest common substring between the $str1 and $str2. 7357 * In the case of ties, it returns that which occurs first. 7358 * 7359 * @param string $str1 7360 * @param string $str2 <p>Second string for comparison.</p> 7361 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 7362 * 7363 * @psalm-pure 7364 * 7365 * @return string 7366 * <p>A string with its $str being the longest common substring.</p> 7367 */ 7368 public static function str_longest_common_substring( 7369 string $str1, 7370 string $str2, 7371 string $encoding = 'UTF-8' 7372 ): string { 7373 if ($str1 === '' || $str2 === '') { 7374 return ''; 7375 } 7376 7377 // Uses dynamic programming to solve 7378 // http://en.wikipedia.org/wiki/Longest_common_substring_problem 7379 7380 if ($encoding === 'UTF-8') { 7381 $str_length = (int) \mb_strlen($str1); 7382 $other_length = (int) \mb_strlen($str2); 7383 } else { 7384 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 7385 7386 $str_length = (int) self::strlen($str1, $encoding); 7387 $other_length = (int) self::strlen($str2, $encoding); 7388 } 7389 7390 // Return if either string is empty 7391 if ($str_length === 0 || $other_length === 0) { 7392 return ''; 7393 } 7394 7395 $len = 0; 7396 $end = 0; 7397 $table = \array_fill( 7398 0, 7399 $str_length + 1, 7400 \array_fill(0, $other_length + 1, 0) 7401 ); 7402 7403 if ($encoding === 'UTF-8') { 7404 for ($i = 1; $i <= $str_length; ++$i) { 7405 for ($j = 1; $j <= $other_length; ++$j) { 7406 $str_char = \mb_substr($str1, $i - 1, 1); 7407 $other_char = \mb_substr($str2, $j - 1, 1); 7408 7409 if ($str_char === $other_char) { 7410 $table[$i][$j] = $table[$i - 1][$j - 1] + 1; 7411 if ($table[$i][$j] > $len) { 7412 $len = $table[$i][$j]; 7413 $end = $i; 7414 } 7415 } else { 7416 $table[$i][$j] = 0; 7417 } 7418 } 7419 } 7420 } else { 7421 for ($i = 1; $i <= $str_length; ++$i) { 7422 for ($j = 1; $j <= $other_length; ++$j) { 7423 $str_char = self::substr($str1, $i - 1, 1, $encoding); 7424 $other_char = self::substr($str2, $j - 1, 1, $encoding); 7425 7426 if ($str_char === $other_char) { 7427 $table[$i][$j] = $table[$i - 1][$j - 1] + 1; 7428 if ($table[$i][$j] > $len) { 7429 $len = $table[$i][$j]; 7430 $end = $i; 7431 } 7432 } else { 7433 $table[$i][$j] = 0; 7434 } 7435 } 7436 } 7437 } 7438 7439 if ($encoding === 'UTF-8') { 7440 return (string) \mb_substr($str1, $end - $len, $len); 7441 } 7442 7443 return (string) self::substr($str1, $end - $len, $len, $encoding); 7444 } 7445 7446 /** 7447 * Returns the longest common suffix between the $str1 and $str2. 7448 * 7449 * @param string $str1 7450 * @param string $str2 <p>Second string for comparison.</p> 7451 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 7452 * 7453 * @psalm-pure 7454 * 7455 * @return string 7456 */ 7457 public static function str_longest_common_suffix( 7458 string $str1, 7459 string $str2, 7460 string $encoding = 'UTF-8' 7461 ): string { 7462 if ($str1 === '' || $str2 === '') { 7463 return ''; 7464 } 7465 7466 if ($encoding === 'UTF-8') { 7467 $max_length = (int) \min( 7468 \mb_strlen($str1, $encoding), 7469 \mb_strlen($str2, $encoding) 7470 ); 7471 7472 $longest_common_suffix = ''; 7473 for ($i = 1; $i <= $max_length; ++$i) { 7474 $char = \mb_substr($str1, -$i, 1); 7475 7476 if ( 7477 $char !== false 7478 && 7479 $char === \mb_substr($str2, -$i, 1) 7480 ) { 7481 $longest_common_suffix = $char . $longest_common_suffix; 7482 } else { 7483 break; 7484 } 7485 } 7486 } else { 7487 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 7488 7489 $max_length = (int) \min( 7490 self::strlen($str1, $encoding), 7491 self::strlen($str2, $encoding) 7492 ); 7493 7494 $longest_common_suffix = ''; 7495 for ($i = 1; $i <= $max_length; ++$i) { 7496 $char = self::substr($str1, -$i, 1, $encoding); 7497 7498 if ( 7499 $char !== false 7500 && 7501 $char === self::substr($str2, -$i, 1, $encoding) 7502 ) { 7503 $longest_common_suffix = $char . $longest_common_suffix; 7504 } else { 7505 break; 7506 } 7507 } 7508 } 7509 7510 return $longest_common_suffix; 7511 } 7512 7513 /** 7514 * Returns true if $str matches the supplied pattern, false otherwise. 7515 * 7516 * @param string $str <p>The input string.</p> 7517 * @param string $pattern <p>Regex pattern to match against.</p> 7518 * 7519 * @psalm-pure 7520 * 7521 * @return bool 7522 * <p>Whether or not $str matches the pattern.</p> 7523 */ 7524 public static function str_matches_pattern(string $str, string $pattern): bool 7525 { 7526 return (bool) \preg_match('/' . $pattern . '/u', $str); 7527 } 7528 7529 /** 7530 * Returns whether or not a character exists at an index. Offsets may be 7531 * negative to count from the last character in the string. Implements 7532 * part of the ArrayAccess interface. 7533 * 7534 * @param string $str <p>The input string.</p> 7535 * @param int $offset <p>The index to check.</p> 7536 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 7537 * 7538 * @psalm-pure 7539 * 7540 * @return bool 7541 * <p>Whether or not the index exists.</p> 7542 */ 7543 public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool 7544 { 7545 // init 7546 $length = (int) self::strlen($str, $encoding); 7547 7548 if ($offset >= 0) { 7549 return $length > $offset; 7550 } 7551 7552 return $length >= \abs($offset); 7553 } 7554 7555 /** 7556 * Returns the character at the given index. Offsets may be negative to 7557 * count from the last character in the string. Implements part of the 7558 * ArrayAccess interface, and throws an OutOfBoundsException if the index 7559 * does not exist. 7560 * 7561 * @param string $str <p>The input string.</p> 7562 * @param int $index <p>The <strong>index</strong> from which to retrieve the char.</p> 7563 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 7564 * 7565 * @throws \OutOfBoundsException if the positive or negative offset does not exist 7566 * 7567 * @return string 7568 * <p>The character at the specified index.</p> 7569 * 7570 * @psalm-pure 7571 */ 7572 public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string 7573 { 7574 // init 7575 $length = (int) self::strlen($str); 7576 7577 if ( 7578 ($index >= 0 && $length <= $index) 7579 || 7580 $length < \abs($index) 7581 ) { 7582 throw new \OutOfBoundsException('No character exists at the index'); 7583 } 7584 7585 return self::char_at($str, $index, $encoding); 7586 } 7587 7588 /** 7589 * Pad a UTF-8 string to a given length with another string. 7590 * 7591 * EXAMPLE: <code>UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___'</code> 7592 * 7593 * @param string $str <p>The input string.</p> 7594 * @param int $pad_length <p>The length of return string.</p> 7595 * @param string $pad_string [optional] <p>String to use for padding the input string.</p> 7596 * @param int|string $pad_type [optional] <p> 7597 * Can be <strong>STR_PAD_RIGHT</strong> (default), [or string "right"]<br> 7598 * <strong>STR_PAD_LEFT</strong> [or string "left"] or<br> 7599 * <strong>STR_PAD_BOTH</strong> [or string "both"] 7600 * </p> 7601 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 7602 * 7603 * @psalm-pure 7604 * 7605 * @return string 7606 * <p>Returns the padded string.</p> 7607 */ 7608 public static function str_pad( 7609 string $str, 7610 int $pad_length, 7611 string $pad_string = ' ', 7612 $pad_type = \STR_PAD_RIGHT, 7613 string $encoding = 'UTF-8' 7614 ): string { 7615 if ($pad_length === 0 || $pad_string === '') { 7616 return $str; 7617 } 7618 7619 if ($pad_type !== (int) $pad_type) { 7620 if ($pad_type === 'left') { 7621 $pad_type = \STR_PAD_LEFT; 7622 } elseif ($pad_type === 'right') { 7623 $pad_type = \STR_PAD_RIGHT; 7624 } elseif ($pad_type === 'both') { 7625 $pad_type = \STR_PAD_BOTH; 7626 } else { 7627 throw new \InvalidArgumentException( 7628 'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'" 7629 ); 7630 } 7631 } 7632 7633 if ($encoding === 'UTF-8') { 7634 $str_length = (int) \mb_strlen($str); 7635 7636 if ($pad_length >= $str_length) { 7637 switch ($pad_type) { 7638 case \STR_PAD_LEFT: 7639 $ps_length = (int) \mb_strlen($pad_string); 7640 7641 $diff = ($pad_length - $str_length); 7642 7643 $pre = (string) \mb_substr( 7644 \str_repeat($pad_string, (int) \ceil($diff / $ps_length)), 7645 0, 7646 $diff 7647 ); 7648 $post = ''; 7649 7650 break; 7651 7652 case \STR_PAD_BOTH: 7653 $diff = ($pad_length - $str_length); 7654 7655 $ps_length_left = (int) \floor($diff / 2); 7656 7657 $ps_length_right = (int) \ceil($diff / 2); 7658 7659 $pre = (string) \mb_substr( 7660 \str_repeat($pad_string, $ps_length_left), 7661 0, 7662 $ps_length_left 7663 ); 7664 $post = (string) \mb_substr( 7665 \str_repeat($pad_string, $ps_length_right), 7666 0, 7667 $ps_length_right 7668 ); 7669 7670 break; 7671 7672 case \STR_PAD_RIGHT: 7673 default: 7674 $ps_length = (int) \mb_strlen($pad_string); 7675 7676 $diff = ($pad_length - $str_length); 7677 7678 $post = (string) \mb_substr( 7679 \str_repeat($pad_string, (int) \ceil($diff / $ps_length)), 7680 0, 7681 $diff 7682 ); 7683 $pre = ''; 7684 } 7685 7686 return $pre . $str . $post; 7687 } 7688 7689 return $str; 7690 } 7691 7692 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 7693 7694 $str_length = (int) self::strlen($str, $encoding); 7695 7696 if ($pad_length >= $str_length) { 7697 switch ($pad_type) { 7698 case \STR_PAD_LEFT: 7699 $ps_length = (int) self::strlen($pad_string, $encoding); 7700 7701 $diff = ($pad_length - $str_length); 7702 7703 $pre = (string) self::substr( 7704 \str_repeat($pad_string, (int) \ceil($diff / $ps_length)), 7705 0, 7706 $diff, 7707 $encoding 7708 ); 7709 $post = ''; 7710 7711 break; 7712 7713 case \STR_PAD_BOTH: 7714 $diff = ($pad_length - $str_length); 7715 7716 $ps_length_left = (int) \floor($diff / 2); 7717 7718 $ps_length_right = (int) \ceil($diff / 2); 7719 7720 $pre = (string) self::substr( 7721 \str_repeat($pad_string, $ps_length_left), 7722 0, 7723 $ps_length_left, 7724 $encoding 7725 ); 7726 $post = (string) self::substr( 7727 \str_repeat($pad_string, $ps_length_right), 7728 0, 7729 $ps_length_right, 7730 $encoding 7731 ); 7732 7733 break; 7734 7735 case \STR_PAD_RIGHT: 7736 default: 7737 $ps_length = (int) self::strlen($pad_string, $encoding); 7738 7739 $diff = ($pad_length - $str_length); 7740 7741 $post = (string) self::substr( 7742 \str_repeat($pad_string, (int) \ceil($diff / $ps_length)), 7743 0, 7744 $diff, 7745 $encoding 7746 ); 7747 $pre = ''; 7748 } 7749 7750 return $pre . $str . $post; 7751 } 7752 7753 return $str; 7754 } 7755 7756 /** 7757 * Returns a new string of a given length such that both sides of the 7758 * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'. 7759 * 7760 * @param string $str 7761 * @param int $length <p>Desired string length after padding.</p> 7762 * @param string $pad_str [optional] <p>String used to pad, defaults to space. Default: ' '</p> 7763 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 7764 * 7765 * @psalm-pure 7766 * 7767 * @return string 7768 * <p>The string with padding applied.</p> 7769 */ 7770 public static function str_pad_both( 7771 string $str, 7772 int $length, 7773 string $pad_str = ' ', 7774 string $encoding = 'UTF-8' 7775 ): string { 7776 return self::str_pad( 7777 $str, 7778 $length, 7779 $pad_str, 7780 \STR_PAD_BOTH, 7781 $encoding 7782 ); 7783 } 7784 7785 /** 7786 * Returns a new string of a given length such that the beginning of the 7787 * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'. 7788 * 7789 * @param string $str 7790 * @param int $length <p>Desired string length after padding.</p> 7791 * @param string $pad_str [optional] <p>String used to pad, defaults to space. Default: ' '</p> 7792 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 7793 * 7794 * @psalm-pure 7795 * 7796 * @return string 7797 * <p>The string with left padding.</p> 7798 */ 7799 public static function str_pad_left( 7800 string $str, 7801 int $length, 7802 string $pad_str = ' ', 7803 string $encoding = 'UTF-8' 7804 ): string { 7805 return self::str_pad( 7806 $str, 7807 $length, 7808 $pad_str, 7809 \STR_PAD_LEFT, 7810 $encoding 7811 ); 7812 } 7813 7814 /** 7815 * Returns a new string of a given length such that the end of the string 7816 * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'. 7817 * 7818 * @param string $str 7819 * @param int $length <p>Desired string length after padding.</p> 7820 * @param string $pad_str [optional] <p>String used to pad, defaults to space. Default: ' '</p> 7821 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 7822 * 7823 * @psalm-pure 7824 * 7825 * @return string 7826 * <p>The string with right padding.</p> 7827 */ 7828 public static function str_pad_right( 7829 string $str, 7830 int $length, 7831 string $pad_str = ' ', 7832 string $encoding = 'UTF-8' 7833 ): string { 7834 return self::str_pad( 7835 $str, 7836 $length, 7837 $pad_str, 7838 \STR_PAD_RIGHT, 7839 $encoding 7840 ); 7841 } 7842 7843 /** 7844 * Repeat a string. 7845 * 7846 * EXAMPLE: <code>UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼'</code> 7847 * 7848 * @param string $str <p> 7849 * The string to be repeated. 7850 * </p> 7851 * @param int $multiplier <p> 7852 * Number of time the input string should be 7853 * repeated. 7854 * </p> 7855 * <p> 7856 * multiplier has to be greater than or equal to 0. 7857 * If the multiplier is set to 0, the function 7858 * will return an empty string. 7859 * </p> 7860 * 7861 * @psalm-pure 7862 * 7863 * @return string 7864 * <p>The repeated string.</p> 7865 */ 7866 public static function str_repeat(string $str, int $multiplier): string 7867 { 7868 $str = self::filter($str); 7869 7870 return \str_repeat($str, $multiplier); 7871 } 7872 7873 /** 7874 * INFO: This is only a wrapper for "str_replace()" -> the original functions is already UTF-8 safe. 7875 * 7876 * Replace all occurrences of the search string with the replacement string 7877 * 7878 * @see http://php.net/manual/en/function.str-replace.php 7879 * 7880 * @param string|string[] $search <p> 7881 * The value being searched for, otherwise known as the needle. 7882 * An array may be used to designate multiple needles. 7883 * </p> 7884 * @param string|string[] $replace <p> 7885 * The replacement value that replaces found search 7886 * values. An array may be used to designate multiple replacements. 7887 * </p> 7888 * @param string|string[] $subject <p> 7889 * The string or array of strings being searched and replaced on, 7890 * otherwise known as the haystack. 7891 * </p> 7892 * <p> 7893 * If subject is an array, then the search and 7894 * replace is performed with every entry of 7895 * subject, and the return value is an array as 7896 * well. 7897 * </p> 7898 * @param int|null $count [optional] <p> 7899 * If passed, this will hold the number of matched and replaced needles. 7900 * </p> 7901 * 7902 * @psalm-pure 7903 * 7904 * @return string|string[] 7905 * <p>This function returns a string or an array with the replaced values.</p> 7906 * 7907 * @template TStrReplaceSubject 7908 * @phpstan-param TStrReplaceSubject $subject 7909 * @phpstan-return TStrReplaceSubject 7910 * 7911 * @deprecated please use \str_replace() instead 7912 */ 7913 public static function str_replace( 7914 $search, 7915 $replace, 7916 $subject, 7917 int &$count = null 7918 ) { 7919 /** 7920 * @psalm-suppress PossiblyNullArgument 7921 * @phpstan-var TStrReplaceSubject $return; 7922 */ 7923 $return = \str_replace( 7924 $search, 7925 $replace, 7926 $subject, 7927 $count 7928 ); 7929 7930 return $return; 7931 } 7932 7933 /** 7934 * Replaces $search from the beginning of string with $replacement. 7935 * 7936 * @param string $str <p>The input string.</p> 7937 * @param string $search <p>The string to search for.</p> 7938 * @param string $replacement <p>The replacement.</p> 7939 * 7940 * @psalm-pure 7941 * 7942 * @return string 7943 * <p>A string after the replacements.</p> 7944 */ 7945 public static function str_replace_beginning( 7946 string $str, 7947 string $search, 7948 string $replacement 7949 ): string { 7950 if ($str === '') { 7951 if ($replacement === '') { 7952 return ''; 7953 } 7954 7955 if ($search === '') { 7956 return $replacement; 7957 } 7958 } 7959 7960 if ($search === '') { 7961 return $str . $replacement; 7962 } 7963 7964 $searchLength = \strlen($search); 7965 if (\strncmp($str, $search, $searchLength) === 0) { 7966 return $replacement . \substr($str, $searchLength); 7967 } 7968 7969 return $str; 7970 } 7971 7972 /** 7973 * Replaces $search from the ending of string with $replacement. 7974 * 7975 * @param string $str <p>The input string.</p> 7976 * @param string $search <p>The string to search for.</p> 7977 * @param string $replacement <p>The replacement.</p> 7978 * 7979 * @psalm-pure 7980 * 7981 * @return string 7982 * <p>A string after the replacements.</p> 7983 */ 7984 public static function str_replace_ending( 7985 string $str, 7986 string $search, 7987 string $replacement 7988 ): string { 7989 if ($str === '') { 7990 if ($replacement === '') { 7991 return ''; 7992 } 7993 7994 if ($search === '') { 7995 return $replacement; 7996 } 7997 } 7998 7999 if ($search === '') { 8000 return $str . $replacement; 8001 } 8002 8003 if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) { 8004 $str = \substr($str, 0, -\strlen($search)) . $replacement; 8005 } 8006 8007 return $str; 8008 } 8009 8010 /** 8011 * Replace the first "$search"-term with the "$replace"-term. 8012 * 8013 * @param string $search 8014 * @param string $replace 8015 * @param string $subject 8016 * 8017 * @psalm-pure 8018 * 8019 * @return string 8020 * 8021 * @psalm-suppress InvalidReturnType 8022 */ 8023 public static function str_replace_first( 8024 string $search, 8025 string $replace, 8026 string $subject 8027 ): string { 8028 $pos = self::strpos($subject, $search); 8029 8030 if ($pos !== false) { 8031 /** 8032 * @psalm-suppress InvalidReturnStatement 8033 */ 8034 return self::substr_replace( 8035 $subject, 8036 $replace, 8037 $pos, 8038 (int) self::strlen($search) 8039 ); 8040 } 8041 8042 return $subject; 8043 } 8044 8045 /** 8046 * Replace the last "$search"-term with the "$replace"-term. 8047 * 8048 * @param string $search 8049 * @param string $replace 8050 * @param string $subject 8051 * 8052 * @psalm-pure 8053 * 8054 * @return string 8055 * 8056 * @psalm-suppress InvalidReturnType 8057 */ 8058 public static function str_replace_last( 8059 string $search, 8060 string $replace, 8061 string $subject 8062 ): string { 8063 $pos = self::strrpos($subject, $search); 8064 if ($pos !== false) { 8065 /** 8066 * @psalm-suppress InvalidReturnStatement 8067 */ 8068 return self::substr_replace( 8069 $subject, 8070 $replace, 8071 $pos, 8072 (int) self::strlen($search) 8073 ); 8074 } 8075 8076 return $subject; 8077 } 8078 8079 /** 8080 * Shuffles all the characters in the string. 8081 * 8082 * INFO: uses random algorithm which is weak for cryptography purposes 8083 * 8084 * EXAMPLE: <code>UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô '</code> 8085 * 8086 * @param string $str <p>The input string</p> 8087 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 8088 * 8089 * @return string 8090 * <p>The shuffled string.</p> 8091 */ 8092 public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string 8093 { 8094 if ($encoding === 'UTF-8') { 8095 $indexes = \range(0, (int) \mb_strlen($str) - 1); 8096 /** @noinspection NonSecureShuffleUsageInspection */ 8097 \shuffle($indexes); 8098 8099 // init 8100 $shuffled_str = ''; 8101 8102 foreach ($indexes as &$i) { 8103 $tmp_sub_str = \mb_substr($str, $i, 1); 8104 if ($tmp_sub_str !== false) { 8105 $shuffled_str .= $tmp_sub_str; 8106 } 8107 } 8108 } else { 8109 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 8110 8111 $indexes = \range(0, (int) self::strlen($str, $encoding) - 1); 8112 /** @noinspection NonSecureShuffleUsageInspection */ 8113 \shuffle($indexes); 8114 8115 // init 8116 $shuffled_str = ''; 8117 8118 foreach ($indexes as &$i) { 8119 $tmp_sub_str = self::substr($str, $i, 1, $encoding); 8120 if ($tmp_sub_str !== false) { 8121 $shuffled_str .= $tmp_sub_str; 8122 } 8123 } 8124 } 8125 8126 return $shuffled_str; 8127 } 8128 8129 /** 8130 * Returns the substring beginning at $start, and up to, but not including 8131 * the index specified by $end. If $end is omitted, the function extracts 8132 * the remaining string. If $end is negative, it is computed from the end 8133 * of the string. 8134 * 8135 * @param string $str 8136 * @param int $start <p>Initial index from which to begin extraction.</p> 8137 * @param int|null $end [optional] <p>Index at which to end extraction. Default: null</p> 8138 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 8139 * 8140 * @psalm-pure 8141 * 8142 * @return false|string 8143 * <p>The extracted substring.</p><p>If <i>str</i> is shorter than <i>start</i> 8144 * characters long, <b>FALSE</b> will be returned. 8145 */ 8146 public static function str_slice( 8147 string $str, 8148 int $start, 8149 int $end = null, 8150 string $encoding = 'UTF-8' 8151 ) { 8152 if ($encoding === 'UTF-8') { 8153 if ($end === null) { 8154 $length = (int) \mb_strlen($str); 8155 } elseif ($end >= 0 && $end <= $start) { 8156 return ''; 8157 } elseif ($end < 0) { 8158 $length = (int) \mb_strlen($str) + $end - $start; 8159 } else { 8160 $length = $end - $start; 8161 } 8162 8163 return \mb_substr($str, $start, $length); 8164 } 8165 8166 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 8167 8168 if ($end === null) { 8169 $length = (int) self::strlen($str, $encoding); 8170 } elseif ($end >= 0 && $end <= $start) { 8171 return ''; 8172 } elseif ($end < 0) { 8173 $length = (int) self::strlen($str, $encoding) + $end - $start; 8174 } else { 8175 $length = $end - $start; 8176 } 8177 8178 return self::substr($str, $start, $length, $encoding); 8179 } 8180 8181 /** 8182 * Convert a string to e.g.: "snake_case" 8183 * 8184 * @param string $str 8185 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 8186 * 8187 * @psalm-pure 8188 * 8189 * @return string 8190 * <p>A string in snake_case.</p> 8191 */ 8192 public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string 8193 { 8194 if ($str === '') { 8195 return ''; 8196 } 8197 8198 $str = \str_replace( 8199 '-', 8200 '_', 8201 self::normalize_whitespace($str) 8202 ); 8203 8204 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 8205 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 8206 } 8207 8208 $str = (string) \preg_replace_callback( 8209 '/([\\p{N}|\\p{Lu}])/u', 8210 /** 8211 * @param string[] $matches 8212 * 8213 * @psalm-pure 8214 * 8215 * @return string 8216 */ 8217 static function (array $matches) use ($encoding): string { 8218 $match = $matches[1]; 8219 $match_int = (int) $match; 8220 8221 if ((string) $match_int === $match) { 8222 return '_' . $match . '_'; 8223 } 8224 8225 if ($encoding === 'UTF-8') { 8226 return '_' . \mb_strtolower($match); 8227 } 8228 8229 return '_' . self::strtolower($match, $encoding); 8230 }, 8231 $str 8232 ); 8233 8234 $str = (string) \preg_replace( 8235 [ 8236 '/\\s+/u', // convert spaces to "_" 8237 '/^\\s+|\\s+$/u', // trim leading & trailing spaces 8238 '/_+/', // remove double "_" 8239 ], 8240 [ 8241 '_', 8242 '', 8243 '_', 8244 ], 8245 $str 8246 ); 8247 8248 return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace 8249 } 8250 8251 /** 8252 * Sort all characters according to code points. 8253 * 8254 * EXAMPLE: <code>UTF8::str_sort(' -ABC-中文空白- '); // ' ---ABC中文白空'</code> 8255 * 8256 * @param string $str <p>A UTF-8 string.</p> 8257 * @param bool $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p> 8258 * @param bool $desc <p>If <strong>true</strong>, will sort characters in reverse code point order.</p> 8259 * 8260 * @psalm-pure 8261 * 8262 * @return string 8263 * <p>A string of sorted characters.</p> 8264 */ 8265 public static function str_sort(string $str, bool $unique = false, bool $desc = false): string 8266 { 8267 $array = self::codepoints($str); 8268 8269 if ($unique) { 8270 $array = \array_flip(\array_flip($array)); 8271 } 8272 8273 if ($desc) { 8274 \arsort($array); 8275 } else { 8276 \asort($array); 8277 } 8278 8279 return self::string($array); 8280 } 8281 8282 /** 8283 * Convert a string to an array of Unicode characters. 8284 * 8285 * EXAMPLE: <code> 8286 * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']] 8287 * </code> 8288 * 8289 * @param int[]|string[] $input <p>The string[] or int[] to split into array.</p> 8290 * @param int $length [optional] <p>Max character length of each array 8291 * lement.</p> 8292 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the 8293 * string.</p> 8294 * @param bool $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use 8295 * "mb_substr"</p> 8296 * 8297 * @psalm-pure 8298 * 8299 * @return string[][] 8300 * <p>An array containing chunks of the input.</p> 8301 */ 8302 public static function str_split_array( 8303 array $input, 8304 int $length = 1, 8305 bool $clean_utf8 = false, 8306 bool $try_to_use_mb_functions = true 8307 ): array { 8308 foreach ($input as $k => &$v) { 8309 $v = self::str_split( 8310 $v, 8311 $length, 8312 $clean_utf8, 8313 $try_to_use_mb_functions 8314 ); 8315 } 8316 8317 /** @var string[][] $input */ 8318 return $input; 8319 } 8320 8321 /** 8322 * Convert a string to an array of unicode characters. 8323 * 8324 * EXAMPLE: <code>UTF8::str_split('中文空白'); // array('中', '文', '空', '白')</code> 8325 * 8326 * @param int|string $input <p>The string or int to split into array.</p> 8327 * @param int $length [optional] <p>Max character length of each array 8328 * element.</p> 8329 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the 8330 * string.</p> 8331 * @param bool $try_to_use_mb_functions [optional] <p>Set to false, if you don't want to use 8332 * "mb_substr"</p> 8333 * 8334 * @psalm-pure 8335 * 8336 * @return string[] 8337 * <p>An array containing chunks of chars from the input.</p> 8338 * 8339 * @noinspection SuspiciousBinaryOperationInspection 8340 * @noinspection OffsetOperationsInspection 8341 */ 8342 public static function str_split( 8343 $input, 8344 int $length = 1, 8345 bool $clean_utf8 = false, 8346 bool $try_to_use_mb_functions = true 8347 ): array { 8348 if ($length <= 0) { 8349 return []; 8350 } 8351 8352 // this is only an old fallback 8353 /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */ 8354 /** @var int|int[]|string|string[] $input */ 8355 $input = $input; 8356 if (\is_array($input)) { 8357 /** 8358 * @psalm-suppress InvalidReturnStatement 8359 */ 8360 return self::str_split_array( 8361 $input, 8362 $length, 8363 $clean_utf8, 8364 $try_to_use_mb_functions 8365 ); 8366 } 8367 8368 // init 8369 $input = (string) $input; 8370 8371 if ($input === '') { 8372 return []; 8373 } 8374 8375 if ($clean_utf8) { 8376 $input = self::clean($input); 8377 } 8378 8379 if ( 8380 $try_to_use_mb_functions 8381 && 8382 self::$SUPPORT['mbstring'] === true 8383 ) { 8384 if (\function_exists('mb_str_split')) { 8385 /** 8386 * @psalm-suppress ImpureFunctionCall - why? 8387 */ 8388 $return = \mb_str_split($input, $length); 8389 if ($return !== false) { 8390 return $return; 8391 } 8392 } 8393 8394 $i_max = \mb_strlen($input); 8395 if ($i_max <= 127) { 8396 $ret = []; 8397 for ($i = 0; $i < $i_max; ++$i) { 8398 $ret[] = \mb_substr($input, $i, 1); 8399 } 8400 } else { 8401 $return_array = []; 8402 \preg_match_all('/./us', $input, $return_array); 8403 $ret = $return_array[0] ?? []; 8404 } 8405 } elseif (self::$SUPPORT['pcre_utf8'] === true) { 8406 $return_array = []; 8407 \preg_match_all('/./us', $input, $return_array); 8408 $ret = $return_array[0] ?? []; 8409 } else { 8410 8411 // fallback 8412 8413 $ret = []; 8414 $len = \strlen($input); 8415 8416 /** @noinspection ForeachInvariantsInspection */ 8417 for ($i = 0; $i < $len; ++$i) { 8418 if (($input[$i] & "\x80") === "\x00") { 8419 $ret[] = $input[$i]; 8420 } elseif ( 8421 isset($input[$i + 1]) 8422 && 8423 ($input[$i] & "\xE0") === "\xC0" 8424 ) { 8425 if (($input[$i + 1] & "\xC0") === "\x80") { 8426 $ret[] = $input[$i] . $input[$i + 1]; 8427 8428 ++$i; 8429 } 8430 } elseif ( 8431 isset($input[$i + 2]) 8432 && 8433 ($input[$i] & "\xF0") === "\xE0" 8434 ) { 8435 if ( 8436 ($input[$i + 1] & "\xC0") === "\x80" 8437 && 8438 ($input[$i + 2] & "\xC0") === "\x80" 8439 ) { 8440 $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2]; 8441 8442 $i += 2; 8443 } 8444 } elseif ( 8445 isset($input[$i + 3]) 8446 && 8447 ($input[$i] & "\xF8") === "\xF0" 8448 ) { 8449 if ( 8450 ($input[$i + 1] & "\xC0") === "\x80" 8451 && 8452 ($input[$i + 2] & "\xC0") === "\x80" 8453 && 8454 ($input[$i + 3] & "\xC0") === "\x80" 8455 ) { 8456 $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3]; 8457 8458 $i += 3; 8459 } 8460 } 8461 } 8462 } 8463 8464 if ($length > 1) { 8465 $ret = \array_chunk($ret, $length); 8466 8467 return \array_map( 8468 static function (array $item): string { 8469 return \implode('', $item); 8470 }, 8471 $ret 8472 ); 8473 } 8474 8475 if (isset($ret[0]) && $ret[0] === '') { 8476 return []; 8477 } 8478 8479 return $ret; 8480 } 8481 8482 /** 8483 * Splits the string with the provided regular expression, returning an 8484 * array of strings. An optional integer $limit will truncate the 8485 * results. 8486 * 8487 * @param string $str 8488 * @param string $pattern <p>The regex with which to split the string.</p> 8489 * @param int $limit [optional] <p>Maximum number of results to return. Default: -1 === no limit</p> 8490 * 8491 * @psalm-pure 8492 * 8493 * @return string[] 8494 * <p>An array of strings.</p> 8495 */ 8496 public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array 8497 { 8498 if ($limit === 0) { 8499 return []; 8500 } 8501 8502 if ($pattern === '') { 8503 return [$str]; 8504 } 8505 8506 if (self::$SUPPORT['mbstring'] === true) { 8507 if ($limit >= 0) { 8508 /** @noinspection PhpComposerExtensionStubsInspection */ 8509 $result_tmp = \mb_split($pattern, $str); 8510 8511 $result = []; 8512 foreach ($result_tmp as $item_tmp) { 8513 if ($limit === 0) { 8514 break; 8515 } 8516 --$limit; 8517 8518 $result[] = $item_tmp; 8519 } 8520 8521 return $result; 8522 } 8523 8524 /** @noinspection PhpComposerExtensionStubsInspection */ 8525 return \mb_split($pattern, $str); 8526 } 8527 8528 if ($limit > 0) { 8529 ++$limit; 8530 } else { 8531 $limit = -1; 8532 } 8533 8534 $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit); 8535 8536 if ($array === false) { 8537 return []; 8538 } 8539 8540 if ($limit > 0 && \count($array) === $limit) { 8541 \array_pop($array); 8542 } 8543 8544 return $array; 8545 } 8546 8547 /** 8548 * Check if the string starts with the given substring. 8549 * 8550 * EXAMPLE: <code> 8551 * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true 8552 * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false 8553 * </code> 8554 * 8555 * @param string $haystack <p>The string to search in.</p> 8556 * @param string $needle <p>The substring to search for.</p> 8557 * 8558 * @psalm-pure 8559 * 8560 * @return bool 8561 */ 8562 public static function str_starts_with(string $haystack, string $needle): bool 8563 { 8564 if ($needle === '') { 8565 return true; 8566 } 8567 8568 if ($haystack === '') { 8569 return false; 8570 } 8571 8572 if (\PHP_VERSION_ID >= 80000) { 8573 /** @phpstan-ignore-next-line - only for PHP8 */ 8574 return \str_starts_with($haystack, $needle); 8575 } 8576 8577 return \strncmp($haystack, $needle, \strlen($needle)) === 0; 8578 } 8579 8580 /** 8581 * Returns true if the string begins with any of $substrings, false otherwise. 8582 * 8583 * - case-sensitive 8584 * 8585 * @param string $str <p>The input string.</p> 8586 * @param array $substrings <p>Substrings to look for.</p> 8587 * 8588 * @psalm-pure 8589 * 8590 * @return bool 8591 * <p>Whether or not $str starts with $substring.</p> 8592 */ 8593 public static function str_starts_with_any(string $str, array $substrings): bool 8594 { 8595 if ($str === '') { 8596 return false; 8597 } 8598 8599 if ($substrings === []) { 8600 return false; 8601 } 8602 8603 foreach ($substrings as &$substring) { 8604 if (self::str_starts_with($str, $substring)) { 8605 return true; 8606 } 8607 } 8608 8609 return false; 8610 } 8611 8612 /** 8613 * Gets the substring after the first occurrence of a separator. 8614 * 8615 * @param string $str <p>The input string.</p> 8616 * @param string $separator <p>The string separator.</p> 8617 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 8618 * 8619 * @psalm-pure 8620 * 8621 * @return string 8622 */ 8623 public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string 8624 { 8625 if ($separator === '' || $str === '') { 8626 return ''; 8627 } 8628 8629 if ($encoding === 'UTF-8') { 8630 $offset = \mb_strpos($str, $separator); 8631 if ($offset === false) { 8632 return ''; 8633 } 8634 8635 return (string) \mb_substr( 8636 $str, 8637 $offset + (int) \mb_strlen($separator) 8638 ); 8639 } 8640 8641 $offset = self::strpos($str, $separator, 0, $encoding); 8642 if ($offset === false) { 8643 return ''; 8644 } 8645 8646 return (string) \mb_substr( 8647 $str, 8648 $offset + (int) self::strlen($separator, $encoding), 8649 null, 8650 $encoding 8651 ); 8652 } 8653 8654 /** 8655 * Gets the substring after the last occurrence of a separator. 8656 * 8657 * @param string $str <p>The input string.</p> 8658 * @param string $separator <p>The string separator.</p> 8659 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 8660 * 8661 * @psalm-pure 8662 * 8663 * @return string 8664 */ 8665 public static function str_substr_after_last_separator( 8666 string $str, 8667 string $separator, 8668 string $encoding = 'UTF-8' 8669 ): string { 8670 if ($separator === '' || $str === '') { 8671 return ''; 8672 } 8673 8674 if ($encoding === 'UTF-8') { 8675 $offset = \mb_strrpos($str, $separator); 8676 if ($offset === false) { 8677 return ''; 8678 } 8679 8680 return (string) \mb_substr( 8681 $str, 8682 $offset + (int) \mb_strlen($separator) 8683 ); 8684 } 8685 8686 $offset = self::strrpos($str, $separator, 0, $encoding); 8687 if ($offset === false) { 8688 return ''; 8689 } 8690 8691 return (string) self::substr( 8692 $str, 8693 $offset + (int) self::strlen($separator, $encoding), 8694 null, 8695 $encoding 8696 ); 8697 } 8698 8699 /** 8700 * Gets the substring before the first occurrence of a separator. 8701 * 8702 * @param string $str <p>The input string.</p> 8703 * @param string $separator <p>The string separator.</p> 8704 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 8705 * 8706 * @psalm-pure 8707 * 8708 * @return string 8709 */ 8710 public static function str_substr_before_first_separator( 8711 string $str, 8712 string $separator, 8713 string $encoding = 'UTF-8' 8714 ): string { 8715 if ($separator === '' || $str === '') { 8716 return ''; 8717 } 8718 8719 if ($encoding === 'UTF-8') { 8720 $offset = \mb_strpos($str, $separator); 8721 if ($offset === false) { 8722 return ''; 8723 } 8724 8725 return (string) \mb_substr( 8726 $str, 8727 0, 8728 $offset 8729 ); 8730 } 8731 8732 $offset = self::strpos($str, $separator, 0, $encoding); 8733 if ($offset === false) { 8734 return ''; 8735 } 8736 8737 return (string) self::substr( 8738 $str, 8739 0, 8740 $offset, 8741 $encoding 8742 ); 8743 } 8744 8745 /** 8746 * Gets the substring before the last occurrence of a separator. 8747 * 8748 * @param string $str <p>The input string.</p> 8749 * @param string $separator <p>The string separator.</p> 8750 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 8751 * 8752 * @psalm-pure 8753 * 8754 * @return string 8755 */ 8756 public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string 8757 { 8758 if ($separator === '' || $str === '') { 8759 return ''; 8760 } 8761 8762 if ($encoding === 'UTF-8') { 8763 $offset = \mb_strrpos($str, $separator); 8764 if ($offset === false) { 8765 return ''; 8766 } 8767 8768 return (string) \mb_substr( 8769 $str, 8770 0, 8771 $offset 8772 ); 8773 } 8774 8775 $offset = self::strrpos($str, $separator, 0, $encoding); 8776 if ($offset === false) { 8777 return ''; 8778 } 8779 8780 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 8781 8782 return (string) self::substr( 8783 $str, 8784 0, 8785 $offset, 8786 $encoding 8787 ); 8788 } 8789 8790 /** 8791 * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle". 8792 * 8793 * @param string $str <p>The input string.</p> 8794 * @param string $needle <p>The string to look for.</p> 8795 * @param bool $before_needle [optional] <p>Default: false</p> 8796 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 8797 * 8798 * @psalm-pure 8799 * 8800 * @return string 8801 */ 8802 public static function str_substr_first( 8803 string $str, 8804 string $needle, 8805 bool $before_needle = false, 8806 string $encoding = 'UTF-8' 8807 ): string { 8808 if ($str === '' || $needle === '') { 8809 return ''; 8810 } 8811 8812 if ($encoding === 'UTF-8') { 8813 if ($before_needle) { 8814 $part = \mb_strstr( 8815 $str, 8816 $needle, 8817 $before_needle 8818 ); 8819 } else { 8820 $part = \mb_strstr( 8821 $str, 8822 $needle 8823 ); 8824 } 8825 } else { 8826 $part = self::strstr( 8827 $str, 8828 $needle, 8829 $before_needle, 8830 $encoding 8831 ); 8832 } 8833 8834 return $part === false ? '' : $part; 8835 } 8836 8837 /** 8838 * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle". 8839 * 8840 * @param string $str <p>The input string.</p> 8841 * @param string $needle <p>The string to look for.</p> 8842 * @param bool $before_needle [optional] <p>Default: false</p> 8843 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 8844 * 8845 * @psalm-pure 8846 * 8847 * @return string 8848 */ 8849 public static function str_substr_last( 8850 string $str, 8851 string $needle, 8852 bool $before_needle = false, 8853 string $encoding = 'UTF-8' 8854 ): string { 8855 if ($str === '' || $needle === '') { 8856 return ''; 8857 } 8858 8859 if ($encoding === 'UTF-8') { 8860 if ($before_needle) { 8861 $part = \mb_strrchr( 8862 $str, 8863 $needle, 8864 $before_needle 8865 ); 8866 } else { 8867 $part = \mb_strrchr( 8868 $str, 8869 $needle 8870 ); 8871 } 8872 } else { 8873 $part = self::strrchr( 8874 $str, 8875 $needle, 8876 $before_needle, 8877 $encoding 8878 ); 8879 } 8880 8881 return $part === false ? '' : $part; 8882 } 8883 8884 /** 8885 * Surrounds $str with the given substring. 8886 * 8887 * @param string $str 8888 * @param string $substring <p>The substring to add to both sides.</p> 8889 * 8890 * @psalm-pure 8891 * 8892 * @return string 8893 * <p>A string with the substring both prepended and appended.</p> 8894 */ 8895 public static function str_surround(string $str, string $substring): string 8896 { 8897 return $substring . $str . $substring; 8898 } 8899 8900 /** 8901 * Returns a trimmed string with the first letter of each word capitalized. 8902 * Also accepts an array, $ignore, allowing you to list words not to be 8903 * capitalized. 8904 * 8905 * @param string $str 8906 * @param array|string[]|null $ignore [optional] <p>An array of words not to capitalize or 8907 * null. Default: null</p> 8908 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 8909 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the 8910 * string.</p> 8911 * @param string|null $lang [optional] <p>Set the language for special cases: az, 8912 * el, lt, tr</p> 8913 * @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: 8914 * e.g. ẞ -> ß</p> 8915 * @param bool $use_trim_first [optional] <p>true === trim the input string, 8916 * first</p> 8917 * @param string|null $word_define_chars [optional] <p>An string of chars that will be used as 8918 * whitespace separator === words.</p> 8919 * 8920 * @psalm-pure 8921 * 8922 * @return string 8923 * <p>The titleized string.</p> 8924 * 8925 * @noinspection PhpTooManyParametersInspection 8926 */ 8927 public static function str_titleize( 8928 string $str, 8929 array $ignore = null, 8930 string $encoding = 'UTF-8', 8931 bool $clean_utf8 = false, 8932 string $lang = null, 8933 bool $try_to_keep_the_string_length = false, 8934 bool $use_trim_first = true, 8935 string $word_define_chars = null 8936 ): string { 8937 if ($str === '') { 8938 return ''; 8939 } 8940 8941 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 8942 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 8943 } 8944 8945 if ($use_trim_first) { 8946 $str = \trim($str); 8947 } 8948 8949 if ($clean_utf8) { 8950 $str = self::clean($str); 8951 } 8952 8953 $use_mb_functions = $lang === null && !$try_to_keep_the_string_length; 8954 8955 if ($word_define_chars) { 8956 $word_define_chars = \preg_quote($word_define_chars, '/'); 8957 } else { 8958 $word_define_chars = ''; 8959 } 8960 8961 $str = (string) \preg_replace_callback( 8962 '/([^\\s' . $word_define_chars . ']+)/u', 8963 static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string { 8964 if ($ignore !== null && \in_array($match[0], $ignore, true)) { 8965 return $match[0]; 8966 } 8967 8968 if ($use_mb_functions) { 8969 if ($encoding === 'UTF-8') { 8970 return \mb_strtoupper(\mb_substr($match[0], 0, 1)) 8971 . \mb_strtolower(\mb_substr($match[0], 1)); 8972 } 8973 8974 return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding) 8975 . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding); 8976 } 8977 8978 return self::ucfirst( 8979 self::strtolower( 8980 $match[0], 8981 $encoding, 8982 false, 8983 $lang, 8984 $try_to_keep_the_string_length 8985 ), 8986 $encoding, 8987 false, 8988 $lang, 8989 $try_to_keep_the_string_length 8990 ); 8991 }, 8992 $str 8993 ); 8994 8995 return $str; 8996 } 8997 8998 /** 8999 * Convert a string into a obfuscate string. 9000 * 9001 * EXAMPLE: <code> 9002 * 9003 * UTF8::str_obfuscate('[email protected]', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*" 9004 * </code> 9005 * 9006 * @param string $str 9007 * @param float $percent 9008 * @param string $obfuscateChar 9009 * @param string[] $keepChars 9010 * 9011 * @psalm-pure 9012 * 9013 * @return string 9014 * <p>The obfuscate string.</p> 9015 */ 9016 public static function str_obfuscate( 9017 string $str, 9018 float $percent = 0.5, 9019 string $obfuscateChar = '*', 9020 array $keepChars = [] 9021 ): string { 9022 $obfuscateCharHelper = "\u{2603}"; 9023 $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str); 9024 9025 $chars = self::chars($str); 9026 $charsMax = \count($chars); 9027 $charsMaxChange = \round($charsMax * $percent); 9028 $charsCounter = 0; 9029 $charKeyDone = []; 9030 9031 while ($charsCounter < $charsMaxChange) { 9032 foreach ($chars as $charKey => $char) { 9033 if (isset($charKeyDone[$charKey])) { 9034 continue; 9035 } 9036 9037 if (\random_int(0, 100) > 50) { 9038 continue; 9039 } 9040 9041 if ($char === $obfuscateChar) { 9042 continue; 9043 } 9044 9045 ++$charsCounter; 9046 $charKeyDone[$charKey] = true; 9047 9048 if ($charsCounter > $charsMaxChange) { 9049 break; 9050 } 9051 9052 if (\in_array($char, $keepChars, true)) { 9053 continue; 9054 } 9055 9056 $chars[$charKey] = $obfuscateChar; 9057 } 9058 } 9059 9060 $str = \implode('', $chars); 9061 9062 return \str_replace($obfuscateCharHelper, $obfuscateChar, $str); 9063 } 9064 9065 /** 9066 * Returns a trimmed string in proper title case. 9067 * 9068 * Also accepts an array, $ignore, allowing you to list words not to be 9069 * capitalized. 9070 * 9071 * Adapted from John Gruber's script. 9072 * 9073 * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78 9074 * 9075 * @param string $str 9076 * @param array $ignore <p>An array of words not to capitalize.</p> 9077 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 9078 * 9079 * @psalm-pure 9080 * 9081 * @return string 9082 * <p>The titleized string.</p> 9083 */ 9084 public static function str_titleize_for_humans( 9085 string $str, 9086 array $ignore = [], 9087 string $encoding = 'UTF-8' 9088 ): string { 9089 if ($str === '') { 9090 return ''; 9091 } 9092 9093 $small_words = [ 9094 '(?<!q&)a', 9095 'an', 9096 'and', 9097 'as', 9098 'at(?!&t)', 9099 'but', 9100 'by', 9101 'en', 9102 'for', 9103 'if', 9104 'in', 9105 'of', 9106 'on', 9107 'or', 9108 'the', 9109 'to', 9110 'v[.]?', 9111 'via', 9112 'vs[.]?', 9113 ]; 9114 9115 if ($ignore !== []) { 9116 $small_words = \array_merge($small_words, $ignore); 9117 } 9118 9119 $small_words_rx = \implode('|', $small_words); 9120 $apostrophe_rx = '(?x: [\'’] [[:lower:]]* )?'; 9121 9122 $str = \trim($str); 9123 9124 if (!self::has_lowercase($str)) { 9125 $str = self::strtolower($str, $encoding); 9126 } 9127 9128 // the main substitutions 9129 /** @noinspection RegExpDuplicateAlternationBranch - false-positive - https://youtrack.jetbrains.com/issue/WI-51002 */ 9130 $str = (string) \preg_replace_callback( 9131 '~\\b (_*) (?: # 1. Leading underscore and 9132 ( (?<=[ ][/\\\\]) [[:alpha:]]+ [-_[:alpha:]/\\\\]+ | # 2. file path or 9133 [-_[:alpha:]]+ [@.:] [-_[:alpha:]@.:/]+ ' . $apostrophe_rx . ' ) # URL, domain, or email 9134 | 9135 ( (?i: ' . $small_words_rx . ' ) ' . $apostrophe_rx . ' ) # 3. or small word (case-insensitive) 9136 | 9137 ( [[:alpha:]] [[:lower:]\'’()\[\]{}]* ' . $apostrophe_rx . ' ) # 4. or word w/o internal caps 9138 | 9139 ( [[:alpha:]] [[:alpha:]\'’()\[\]{}]* ' . $apostrophe_rx . ' ) # 5. or some other word 9140 ) (_*) \\b # 6. With trailing underscore 9141 ~ux', 9142 /** 9143 * @param string[] $matches 9144 * 9145 * @psalm-pure 9146 * 9147 * @return string 9148 */ 9149 static function (array $matches) use ($encoding): string { 9150 // preserve leading underscore 9151 $str = $matches[1]; 9152 if ($matches[2]) { 9153 // preserve URLs, domains, emails and file paths 9154 $str .= $matches[2]; 9155 } elseif ($matches[3]) { 9156 // lower-case small words 9157 $str .= self::strtolower($matches[3], $encoding); 9158 } elseif ($matches[4]) { 9159 // capitalize word w/o internal caps 9160 $str .= static::ucfirst($matches[4], $encoding); 9161 } else { 9162 // preserve other kinds of word (iPhone) 9163 $str .= $matches[5]; 9164 } 9165 // preserve trailing underscore 9166 $str .= $matches[6]; 9167 9168 return $str; 9169 }, 9170 $str 9171 ); 9172 9173 // Exceptions for small words: capitalize at start of title... 9174 $str = (string) \preg_replace_callback( 9175 '~( \\A [[:punct:]]* # start of title... 9176 | [:.;?!][ ]+ # or of subsentence... 9177 | [ ][\'"“‘(\[][ ]* ) # or of inserted subphrase... 9178 ( ' . $small_words_rx . ' ) \\b # ...followed by small word 9179 ~uxi', 9180 /** 9181 * @param string[] $matches 9182 * 9183 * @psalm-pure 9184 * 9185 * @return string 9186 */ 9187 static function (array $matches) use ($encoding): string { 9188 return $matches[1] . static::ucfirst($matches[2], $encoding); 9189 }, 9190 $str 9191 ); 9192 9193 // ...and end of title 9194 $str = (string) \preg_replace_callback( 9195 '~\\b ( ' . $small_words_rx . ' ) # small word... 9196 (?= [[:punct:]]* \Z # ...at the end of the title... 9197 | [\'"’”)\]] [ ] ) # ...or of an inserted subphrase? 9198 ~uxi', 9199 /** 9200 * @param string[] $matches 9201 * 9202 * @psalm-pure 9203 * 9204 * @return string 9205 */ 9206 static function (array $matches) use ($encoding): string { 9207 return static::ucfirst($matches[1], $encoding); 9208 }, 9209 $str 9210 ); 9211 9212 // Exceptions for small words in hyphenated compound words. 9213 // e.g. "in-flight" -> In-Flight 9214 $str = (string) \preg_replace_callback( 9215 '~\\b 9216 (?<! -) # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (in-flight) 9217 ( ' . $small_words_rx . ' ) 9218 (?= -[[:alpha:]]+) # lookahead for "-someword" 9219 ~uxi', 9220 /** 9221 * @param string[] $matches 9222 * 9223 * @psalm-pure 9224 * 9225 * @return string 9226 */ 9227 static function (array $matches) use ($encoding): string { 9228 return static::ucfirst($matches[1], $encoding); 9229 }, 9230 $str 9231 ); 9232 9233 // e.g. "Stand-in" -> "Stand-In" (Stand is already capped at this point) 9234 $str = (string) \preg_replace_callback( 9235 '~\\b 9236 (?<!…) # Negative lookbehind for a hyphen; we do not want to match man-in-the-middle but do want (stand-in) 9237 ( [[:alpha:]]+- ) # $1 = first word and hyphen, should already be properly capped 9238 ( ' . $small_words_rx . ' ) # ...followed by small word 9239 (?! - ) # Negative lookahead for another - 9240 ~uxi', 9241 /** 9242 * @param string[] $matches 9243 * 9244 * @psalm-pure 9245 * 9246 * @return string 9247 */ 9248 static function (array $matches) use ($encoding): string { 9249 return $matches[1] . static::ucfirst($matches[2], $encoding); 9250 }, 9251 $str 9252 ); 9253 9254 return $str; 9255 } 9256 9257 /** 9258 * Get a binary representation of a specific string. 9259 * 9260 * EXAPLE: <code>UTF8::str_to_binary('😃'); // '11110000100111111001100010000011'</code> 9261 * 9262 * @param string $str <p>The input string.</p> 9263 * 9264 * @psalm-pure 9265 * 9266 * @return false|string 9267 * <p>false on error</p> 9268 */ 9269 public static function str_to_binary(string $str) 9270 { 9271 /** @var array|false $value - needed for PhpStan (stubs error) */ 9272 $value = \unpack('H*', $str); 9273 if ($value === false) { 9274 return false; 9275 } 9276 9277 /** @noinspection OffsetOperationsInspection */ 9278 return \base_convert($value[1], 16, 2); 9279 } 9280 9281 /** 9282 * @param string $str 9283 * @param bool $remove_empty_values <p>Remove empty values.</p> 9284 * @param int|null $remove_short_values <p>The min. string length or null to disable</p> 9285 * 9286 * @psalm-pure 9287 * 9288 * @return string[] 9289 */ 9290 public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array 9291 { 9292 if ($str === '') { 9293 return $remove_empty_values ? [] : ['']; 9294 } 9295 9296 if (self::$SUPPORT['mbstring'] === true) { 9297 /** @noinspection PhpComposerExtensionStubsInspection */ 9298 $return = \mb_split("[\r\n]{1,2}", $str); 9299 } else { 9300 $return = \preg_split("/[\r\n]{1,2}/u", $str); 9301 } 9302 9303 if ($return === false) { 9304 return $remove_empty_values ? [] : ['']; 9305 } 9306 9307 if ( 9308 $remove_short_values === null 9309 && 9310 !$remove_empty_values 9311 ) { 9312 return $return; 9313 } 9314 9315 return self::reduce_string_array( 9316 $return, 9317 $remove_empty_values, 9318 $remove_short_values 9319 ); 9320 } 9321 9322 /** 9323 * Convert a string into an array of words. 9324 * 9325 * EXAMPLE: <code>UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '')</code> 9326 * 9327 * @param string $str 9328 * @param string $char_list <p>Additional chars for the definition of "words".</p> 9329 * @param bool $remove_empty_values <p>Remove empty values.</p> 9330 * @param int|null $remove_short_values <p>The min. string length or null to disable</p> 9331 * 9332 * @psalm-pure 9333 * 9334 * @return string[] 9335 */ 9336 public static function str_to_words( 9337 string $str, 9338 string $char_list = '', 9339 bool $remove_empty_values = false, 9340 int $remove_short_values = null 9341 ): array { 9342 if ($str === '') { 9343 return $remove_empty_values ? [] : ['']; 9344 } 9345 9346 $char_list = self::rxClass($char_list, '\pL'); 9347 9348 $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE); 9349 if ($return === false) { 9350 return $remove_empty_values ? [] : ['']; 9351 } 9352 9353 if ( 9354 $remove_short_values === null 9355 && 9356 !$remove_empty_values 9357 ) { 9358 return $return; 9359 } 9360 9361 $tmp_return = self::reduce_string_array( 9362 $return, 9363 $remove_empty_values, 9364 $remove_short_values 9365 ); 9366 9367 foreach ($tmp_return as &$item) { 9368 $item = (string) $item; 9369 } 9370 9371 return $tmp_return; 9372 } 9373 9374 /** 9375 * alias for "UTF8::to_ascii()" 9376 * 9377 * @param string $str 9378 * @param string $unknown 9379 * @param bool $strict 9380 * 9381 * @psalm-pure 9382 * 9383 * @return string 9384 * 9385 * @see UTF8::to_ascii() 9386 * @deprecated <p>please use "UTF8::to_ascii()"</p> 9387 */ 9388 public static function str_transliterate( 9389 string $str, 9390 string $unknown = '?', 9391 bool $strict = false 9392 ): string { 9393 return self::to_ascii($str, $unknown, $strict); 9394 } 9395 9396 /** 9397 * Truncates the string to a given length. If $substring is provided, and 9398 * truncating occurs, the string is further truncated so that the substring 9399 * may be appended without exceeding the desired length. 9400 * 9401 * @param string $str 9402 * @param int $length <p>Desired length of the truncated string.</p> 9403 * @param string $substring [optional] <p>The substring to append if it can fit. Default: ''</p> 9404 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 9405 * 9406 * @psalm-pure 9407 * 9408 * @return string 9409 * <p>A string after truncating.</p> 9410 */ 9411 public static function str_truncate( 9412 string $str, 9413 int $length, 9414 string $substring = '', 9415 string $encoding = 'UTF-8' 9416 ): string { 9417 if ($str === '') { 9418 return ''; 9419 } 9420 9421 if ($encoding === 'UTF-8') { 9422 if ($length >= (int) \mb_strlen($str)) { 9423 return $str; 9424 } 9425 9426 if ($substring !== '') { 9427 $length -= (int) \mb_strlen($substring); 9428 9429 /** @noinspection UnnecessaryCastingInspection */ 9430 return (string) \mb_substr($str, 0, $length) . $substring; 9431 } 9432 9433 /** @noinspection UnnecessaryCastingInspection */ 9434 return (string) \mb_substr($str, 0, $length); 9435 } 9436 9437 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 9438 9439 if ($length >= (int) self::strlen($str, $encoding)) { 9440 return $str; 9441 } 9442 9443 if ($substring !== '') { 9444 $length -= (int) self::strlen($substring, $encoding); 9445 } 9446 9447 return ( 9448 (string) self::substr( 9449 $str, 9450 0, 9451 $length, 9452 $encoding 9453 ) 9454 ) . $substring; 9455 } 9456 9457 /** 9458 * Truncates the string to a given length, while ensuring that it does not 9459 * split words. If $substring is provided, and truncating occurs, the 9460 * string is further truncated so that the substring may be appended without 9461 * exceeding the desired length. 9462 * 9463 * @param string $str 9464 * @param int $length <p>Desired length of the truncated string.</p> 9465 * @param string $substring [optional] <p>The substring to append if it can fit. 9466 * Default: 9467 * ''</p> 9468 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 9469 * @param bool $ignore_do_not_split_words_for_one_word [optional] <p>Default: false</p> 9470 * 9471 * @psalm-pure 9472 * 9473 * @return string 9474 * <p>A string after truncating.</p> 9475 */ 9476 public static function str_truncate_safe( 9477 string $str, 9478 int $length, 9479 string $substring = '', 9480 string $encoding = 'UTF-8', 9481 bool $ignore_do_not_split_words_for_one_word = false 9482 ): string { 9483 if ($str === '' || $length <= 0) { 9484 return $substring; 9485 } 9486 9487 if ($encoding === 'UTF-8') { 9488 if ($length >= (int) \mb_strlen($str)) { 9489 return $str; 9490 } 9491 9492 // need to further trim the string so we can append the substring 9493 $length -= (int) \mb_strlen($substring); 9494 if ($length <= 0) { 9495 return $substring; 9496 } 9497 9498 /** @var false|string $truncated - needed for PhpStan (stubs error) */ 9499 $truncated = \mb_substr($str, 0, $length); 9500 if ($truncated === false) { 9501 return ''; 9502 } 9503 9504 // if the last word was truncated 9505 $space_position = \mb_strpos($str, ' ', $length - 1); 9506 if ($space_position !== $length) { 9507 // find pos of the last occurrence of a space, get up to that 9508 $last_position = \mb_strrpos($truncated, ' ', 0); 9509 9510 if ( 9511 $last_position !== false 9512 || 9513 ( 9514 $space_position !== false 9515 && 9516 !$ignore_do_not_split_words_for_one_word 9517 ) 9518 ) { 9519 $truncated = (string) \mb_substr($truncated, 0, (int) $last_position); 9520 } 9521 } 9522 } else { 9523 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 9524 9525 if ($length >= (int) self::strlen($str, $encoding)) { 9526 return $str; 9527 } 9528 9529 // need to further trim the string so we can append the substring 9530 $length -= (int) self::strlen($substring, $encoding); 9531 if ($length <= 0) { 9532 return $substring; 9533 } 9534 9535 $truncated = self::substr($str, 0, $length, $encoding); 9536 9537 if ($truncated === false) { 9538 return ''; 9539 } 9540 9541 // if the last word was truncated 9542 $space_position = self::strpos($str, ' ', $length - 1, $encoding); 9543 if ($space_position !== $length) { 9544 // find pos of the last occurrence of a space, get up to that 9545 $last_position = self::strrpos($truncated, ' ', 0, $encoding); 9546 9547 if ( 9548 $last_position !== false 9549 || 9550 ( 9551 $space_position !== false 9552 && 9553 !$ignore_do_not_split_words_for_one_word 9554 ) 9555 ) { 9556 $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding); 9557 } 9558 } 9559 } 9560 9561 return $truncated . $substring; 9562 } 9563 9564 /** 9565 * Returns a lowercase and trimmed string separated by underscores. 9566 * Underscores are inserted before uppercase characters (with the exception 9567 * of the first character of the string), and in place of spaces as well as 9568 * dashes. 9569 * 9570 * @param string $str 9571 * 9572 * @psalm-pure 9573 * 9574 * @return string 9575 * <p>The underscored string.</p> 9576 */ 9577 public static function str_underscored(string $str): string 9578 { 9579 return self::str_delimit($str, '_'); 9580 } 9581 9582 /** 9583 * Returns an UpperCamelCase version of the supplied string. It trims 9584 * surrounding spaces, capitalizes letters following digits, spaces, dashes 9585 * and underscores, and removes spaces, dashes, underscores. 9586 * 9587 * @param string $str <p>The input string.</p> 9588 * @param string $encoding [optional] <p>Default: 'UTF-8'</p> 9589 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 9590 * @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, 9591 * tr</p> 9592 * @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ 9593 * -> ß</p> 9594 * 9595 * @psalm-pure 9596 * 9597 * @return string 9598 * <p>A string in UpperCamelCase.</p> 9599 */ 9600 public static function str_upper_camelize( 9601 string $str, 9602 string $encoding = 'UTF-8', 9603 bool $clean_utf8 = false, 9604 string $lang = null, 9605 bool $try_to_keep_the_string_length = false 9606 ): string { 9607 return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); 9608 } 9609 9610 /** 9611 * alias for "UTF8::ucfirst()" 9612 * 9613 * @param string $str 9614 * @param string $encoding 9615 * @param bool $clean_utf8 9616 * @param string|null $lang 9617 * @param bool $try_to_keep_the_string_length 9618 * 9619 * @psalm-pure 9620 * 9621 * @return string 9622 * 9623 * @see UTF8::ucfirst() 9624 * @deprecated <p>please use "UTF8::ucfirst()"</p> 9625 */ 9626 public static function str_upper_first( 9627 string $str, 9628 string $encoding = 'UTF-8', 9629 bool $clean_utf8 = false, 9630 string $lang = null, 9631 bool $try_to_keep_the_string_length = false 9632 ): string { 9633 return self::ucfirst( 9634 $str, 9635 $encoding, 9636 $clean_utf8, 9637 $lang, 9638 $try_to_keep_the_string_length 9639 ); 9640 } 9641 9642 /** 9643 * Get the number of words in a specific string. 9644 * 9645 * EXAMPLES: <code> 9646 * // format: 0 -> return only word count (int) 9647 * // 9648 * UTF8::str_word_count('中文空白 öäü abc#c'); // 4 9649 * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3 9650 * 9651 * // format: 1 -> return words (array) 9652 * // 9653 * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c') 9654 * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c') 9655 * 9656 * // format: 2 -> return words with offset (array) 9657 * // 9658 * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c') 9659 * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c') 9660 * </code> 9661 * 9662 * @param string $str <p>The input string.</p> 9663 * @param int $format [optional] <p> 9664 * <strong>0</strong> => return a number of words (default)<br> 9665 * <strong>1</strong> => return an array of words<br> 9666 * <strong>2</strong> => return an array of words with word-offset as key 9667 * </p> 9668 * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new word.</p> 9669 * 9670 * @psalm-pure 9671 * 9672 * @return int|string[] 9673 * <p>The number of words in the string.</p> 9674 */ 9675 public static function str_word_count(string $str, int $format = 0, string $char_list = '') 9676 { 9677 $str_parts = self::str_to_words($str, $char_list); 9678 9679 $len = \count($str_parts); 9680 9681 if ($format === 1) { 9682 $number_of_words = []; 9683 for ($i = 1; $i < $len; $i += 2) { 9684 $number_of_words[] = $str_parts[$i]; 9685 } 9686 } elseif ($format === 2) { 9687 $number_of_words = []; 9688 $offset = (int) self::strlen($str_parts[0]); 9689 for ($i = 1; $i < $len; $i += 2) { 9690 $number_of_words[$offset] = $str_parts[$i]; 9691 $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]); 9692 } 9693 } else { 9694 $number_of_words = (int) (($len - 1) / 2); 9695 } 9696 9697 return $number_of_words; 9698 } 9699 9700 /** 9701 * Case-insensitive string comparison. 9702 * 9703 * INFO: Case-insensitive version of UTF8::strcmp() 9704 * 9705 * EXAMPLE: <code>UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0</code> 9706 * 9707 * @param string $str1 <p>The first string.</p> 9708 * @param string $str2 <p>The second string.</p> 9709 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 9710 * 9711 * @psalm-pure 9712 * 9713 * @return int 9714 * <strong>< 0</strong> if str1 is less than str2;<br> 9715 * <strong>> 0</strong> if str1 is greater than str2,<br> 9716 * <strong>0</strong> if they are equal 9717 */ 9718 public static function strcasecmp( 9719 string $str1, 9720 string $str2, 9721 string $encoding = 'UTF-8' 9722 ): int { 9723 return self::strcmp( 9724 self::strtocasefold( 9725 $str1, 9726 true, 9727 false, 9728 $encoding, 9729 null, 9730 false 9731 ), 9732 self::strtocasefold( 9733 $str2, 9734 true, 9735 false, 9736 $encoding, 9737 null, 9738 false 9739 ) 9740 ); 9741 } 9742 9743 /** 9744 * alias for "UTF8::strstr()" 9745 * 9746 * @param string $haystack 9747 * @param string $needle 9748 * @param bool $before_needle 9749 * @param string $encoding 9750 * @param bool $clean_utf8 9751 * 9752 * @psalm-pure 9753 * 9754 * @return false|string 9755 * 9756 * @see UTF8::strstr() 9757 * @deprecated <p>please use "UTF8::strstr()"</p> 9758 */ 9759 public static function strchr( 9760 string $haystack, 9761 string $needle, 9762 bool $before_needle = false, 9763 string $encoding = 'UTF-8', 9764 bool $clean_utf8 = false 9765 ) { 9766 return self::strstr( 9767 $haystack, 9768 $needle, 9769 $before_needle, 9770 $encoding, 9771 $clean_utf8 9772 ); 9773 } 9774 9775 /** 9776 * Case-sensitive string comparison. 9777 * 9778 * EXAMPLE: <code>UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0</code> 9779 * 9780 * @param string $str1 <p>The first string.</p> 9781 * @param string $str2 <p>The second string.</p> 9782 * 9783 * @psalm-pure 9784 * 9785 * @return int 9786 * <strong>< 0</strong> if str1 is less than str2<br> 9787 * <strong>> 0</strong> if str1 is greater than str2<br> 9788 * <strong>0</strong> if they are equal 9789 */ 9790 public static function strcmp(string $str1, string $str2): int 9791 { 9792 if ($str1 === $str2) { 9793 return 0; 9794 } 9795 9796 return \strcmp( 9797 \Normalizer::normalize($str1, \Normalizer::NFD), 9798 \Normalizer::normalize($str2, \Normalizer::NFD) 9799 ); 9800 } 9801 9802 /** 9803 * Find length of initial segment not matching mask. 9804 * 9805 * @param string $str 9806 * @param string $char_list 9807 * @param int $offset 9808 * @param int|null $length 9809 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 9810 * 9811 * @psalm-pure 9812 * 9813 * @return int 9814 */ 9815 public static function strcspn( 9816 string $str, 9817 string $char_list, 9818 int $offset = 0, 9819 int $length = null, 9820 string $encoding = 'UTF-8' 9821 ): int { 9822 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 9823 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 9824 } 9825 9826 if ($char_list === '') { 9827 return (int) self::strlen($str, $encoding); 9828 } 9829 9830 if ($offset || $length !== null) { 9831 if ($encoding === 'UTF-8') { 9832 if ($length === null) { 9833 $str_tmp = \mb_substr($str, $offset); 9834 } else { 9835 $str_tmp = \mb_substr($str, $offset, $length); 9836 } 9837 } else { 9838 $str_tmp = self::substr($str, $offset, $length, $encoding); 9839 } 9840 9841 if ($str_tmp === false) { 9842 return 0; 9843 } 9844 9845 /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */ 9846 $str = $str_tmp; 9847 } 9848 9849 if ($str === '') { 9850 return 0; 9851 } 9852 9853 $matches = []; 9854 if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) { 9855 $return = self::strlen($matches[1], $encoding); 9856 if ($return === false) { 9857 return 0; 9858 } 9859 9860 return $return; 9861 } 9862 9863 return (int) self::strlen($str, $encoding); 9864 } 9865 9866 /** 9867 * alias for "UTF8::stristr()" 9868 * 9869 * @param string $haystack 9870 * @param string $needle 9871 * @param bool $before_needle 9872 * @param string $encoding 9873 * @param bool $clean_utf8 9874 * 9875 * @psalm-pure 9876 * 9877 * @return false|string 9878 * 9879 * @see UTF8::stristr() 9880 * @deprecated <p>please use "UTF8::stristr()"</p> 9881 */ 9882 public static function strichr( 9883 string $haystack, 9884 string $needle, 9885 bool $before_needle = false, 9886 string $encoding = 'UTF-8', 9887 bool $clean_utf8 = false 9888 ) { 9889 return self::stristr( 9890 $haystack, 9891 $needle, 9892 $before_needle, 9893 $encoding, 9894 $clean_utf8 9895 ); 9896 } 9897 9898 /** 9899 * Create a UTF-8 string from code points. 9900 * 9901 * INFO: opposite to UTF8::codepoints() 9902 * 9903 * EXAMPLE: <code>UTF8::string(array(246, 228, 252)); // 'öäü'</code> 9904 * 9905 * @param int|int[]|string|string[] $intOrHex <p>Integer or Hexadecimal codepoints.</p> 9906 * 9907 * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex 9908 * 9909 * @psalm-pure 9910 * 9911 * @return string 9912 * <p>A UTF-8 encoded string.</p> 9913 */ 9914 public static function string($intOrHex): string 9915 { 9916 if ($intOrHex === []) { 9917 return ''; 9918 } 9919 9920 if (!\is_array($intOrHex)) { 9921 $intOrHex = [$intOrHex]; 9922 } 9923 9924 $str = ''; 9925 foreach ($intOrHex as $strPart) { 9926 $str .= '&#' . (int) $strPart . ';'; 9927 } 9928 9929 return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5); 9930 } 9931 9932 /** 9933 * Checks if string starts with "BOM" (Byte Order Mark Character) character. 9934 * 9935 * EXAMPLE: <code>UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true</code> 9936 * 9937 * @param string $str <p>The input string.</p> 9938 * 9939 * @psalm-pure 9940 * 9941 * @return bool 9942 * <p> 9943 * <strong>true</strong> if the string has BOM at the start,<br> 9944 * <strong>false</strong> otherwise 9945 * </p> 9946 */ 9947 public static function string_has_bom(string $str): bool 9948 { 9949 /** @noinspection PhpUnusedLocalVariableInspection */ 9950 foreach (self::$BOM as $bom_string => &$bom_byte_length) { 9951 if (\strncmp($str, $bom_string, $bom_byte_length) === 0) { 9952 return true; 9953 } 9954 } 9955 9956 return false; 9957 } 9958 9959 /** 9960 * Strip HTML and PHP tags from a string + clean invalid UTF-8. 9961 * 9962 * EXAMPLE: <code>UTF8::strip_tags("<span>κόσμε\xa0\xa1</span>"); // 'κόσμε'</code> 9963 * 9964 * @see http://php.net/manual/en/function.strip-tags.php 9965 * 9966 * @param string $str <p> 9967 * The input string. 9968 * </p> 9969 * @param string|null $allowable_tags [optional] <p> 9970 * You can use the optional second parameter to specify tags which should 9971 * not be stripped. 9972 * </p> 9973 * <p> 9974 * HTML comments and PHP tags are also stripped. This is hardcoded and 9975 * can not be changed with allowable_tags. 9976 * </p> 9977 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 9978 * 9979 * @psalm-pure 9980 * 9981 * @return string 9982 * <p>The stripped string.</p> 9983 */ 9984 public static function strip_tags( 9985 string $str, 9986 string $allowable_tags = null, 9987 bool $clean_utf8 = false 9988 ): string { 9989 if ($str === '') { 9990 return ''; 9991 } 9992 9993 if ($clean_utf8) { 9994 $str = self::clean($str); 9995 } 9996 9997 if ($allowable_tags === null) { 9998 return \strip_tags($str); 9999 } 10000 10001 return \strip_tags($str, $allowable_tags); 10002 } 10003 10004 /** 10005 * Strip all whitespace characters. This includes tabs and newline 10006 * characters, as well as multibyte whitespace such as the thin space 10007 * and ideographic space. 10008 * 10009 * EXAMPLE: <code>UTF8::strip_whitespace(' Ο συγγραφέας '); // 'Οσυγγραφέας'</code> 10010 * 10011 * @param string $str 10012 * 10013 * @psalm-pure 10014 * 10015 * @return string 10016 */ 10017 public static function strip_whitespace(string $str): string 10018 { 10019 if ($str === '') { 10020 return ''; 10021 } 10022 10023 return (string) \preg_replace('/[[:space:]]+/u', '', $str); 10024 } 10025 10026 /** 10027 * Find the position of the first occurrence of a substring in a string, case-insensitive. 10028 * 10029 * INFO: use UTF8::stripos_in_byte() for the byte-length 10030 * 10031 * EXAMPLE: <code>UTF8::stripos('aσσb', 'ΣΣ'); // 1</code> (σσ == ΣΣ) 10032 * 10033 * @see http://php.net/manual/en/function.mb-stripos.php 10034 * 10035 * @param string $haystack <p>The string from which to get the position of the first occurrence of needle.</p> 10036 * @param string $needle <p>The string to find in haystack.</p> 10037 * @param int $offset [optional] <p>The position in haystack to start searching.</p> 10038 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 10039 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 10040 * 10041 * @psalm-pure 10042 * 10043 * @return false|int 10044 * Return the <strong>(int)</strong> numeric position of the first occurrence of needle in the 10045 * haystack string,<br> or <strong>false</strong> if needle is not found 10046 */ 10047 public static function stripos( 10048 string $haystack, 10049 string $needle, 10050 int $offset = 0, 10051 string $encoding = 'UTF-8', 10052 bool $clean_utf8 = false 10053 ) { 10054 if ($haystack === '') { 10055 if (\PHP_VERSION_ID >= 80000 && $needle === '') { 10056 return 0; 10057 } 10058 10059 return false; 10060 } 10061 10062 if ($needle === '' && \PHP_VERSION_ID < 80000) { 10063 return false; 10064 } 10065 10066 if ($clean_utf8) { 10067 // "mb_strpos()" and "iconv_strpos()" returns wrong position, 10068 // if invalid characters are found in $haystack before $needle 10069 $haystack = self::clean($haystack); 10070 $needle = self::clean($needle); 10071 } 10072 10073 if (self::$SUPPORT['mbstring'] === true) { 10074 if ($encoding === 'UTF-8') { 10075 return \mb_stripos($haystack, $needle, $offset); 10076 } 10077 10078 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 10079 10080 return \mb_stripos($haystack, $needle, $offset, $encoding); 10081 } 10082 10083 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 10084 10085 if ( 10086 $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings 10087 && 10088 $offset >= 0 // grapheme_stripos() can't handle negative offset 10089 && 10090 self::$SUPPORT['intl'] === true 10091 ) { 10092 $return_tmp = \grapheme_stripos($haystack, $needle, $offset); 10093 if ($return_tmp !== false) { 10094 return $return_tmp; 10095 } 10096 } 10097 10098 // 10099 // fallback for ascii only 10100 // 10101 10102 if (ASCII::is_ascii($haystack . $needle)) { 10103 return \stripos($haystack, $needle, $offset); 10104 } 10105 10106 // 10107 // fallback via vanilla php 10108 // 10109 10110 $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false); 10111 $needle = self::strtocasefold($needle, true, false, $encoding, null, false); 10112 10113 return self::strpos($haystack, $needle, $offset, $encoding); 10114 } 10115 10116 /** 10117 * Returns all of haystack starting from and including the first occurrence of needle to the end. 10118 * 10119 * EXAMPLE: <code> 10120 * $str = 'iñtërnâtiônàlizætiøn'; 10121 * $search = 'NÂT'; 10122 * 10123 * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn' 10124 * UTF8::stristr($str, $search, true)); // 'iñtër' 10125 * </code> 10126 * 10127 * @param string $haystack <p>The input string. Must be valid UTF-8.</p> 10128 * @param string $needle <p>The string to look for. Must be valid UTF-8.</p> 10129 * @param bool $before_needle [optional] <p> 10130 * If <b>TRUE</b>, it returns the part of the 10131 * haystack before the first occurrence of the needle (excluding the needle). 10132 * </p> 10133 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 10134 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 10135 * 10136 * @psalm-pure 10137 * 10138 * @return false|string 10139 * <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p> 10140 */ 10141 public static function stristr( 10142 string $haystack, 10143 string $needle, 10144 bool $before_needle = false, 10145 string $encoding = 'UTF-8', 10146 bool $clean_utf8 = false 10147 ) { 10148 if ($haystack === '') { 10149 if (\PHP_VERSION_ID >= 80000 && $needle === '') { 10150 return ''; 10151 } 10152 10153 return false; 10154 } 10155 10156 if ($clean_utf8) { 10157 // "mb_strpos()" and "iconv_strpos()" returns wrong position, 10158 // if invalid characters are found in $haystack before $needle 10159 $needle = self::clean($needle); 10160 $haystack = self::clean($haystack); 10161 } 10162 10163 if ($needle === '') { 10164 if (\PHP_VERSION_ID >= 80000) { 10165 return $haystack; 10166 } 10167 10168 return false; 10169 } 10170 10171 if (self::$SUPPORT['mbstring'] === true) { 10172 if ($encoding === 'UTF-8') { 10173 return \mb_stristr($haystack, $needle, $before_needle); 10174 } 10175 10176 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 10177 10178 return \mb_stristr($haystack, $needle, $before_needle, $encoding); 10179 } 10180 10181 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 10182 10183 if ( 10184 $encoding !== 'UTF-8' 10185 && 10186 self::$SUPPORT['mbstring'] === false 10187 ) { 10188 /** 10189 * @psalm-suppress ImpureFunctionCall - is is only a warning 10190 */ 10191 \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); 10192 } 10193 10194 if ( 10195 $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings 10196 && 10197 self::$SUPPORT['intl'] === true 10198 ) { 10199 $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle); 10200 if ($return_tmp !== false) { 10201 return $return_tmp; 10202 } 10203 } 10204 10205 if (ASCII::is_ascii($needle . $haystack)) { 10206 return \stristr($haystack, $needle, $before_needle); 10207 } 10208 10209 \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match); 10210 10211 if (!isset($match[1])) { 10212 return false; 10213 } 10214 10215 if ($before_needle) { 10216 return $match[1]; 10217 } 10218 10219 return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding); 10220 } 10221 10222 /** 10223 * Get the string length, not the byte-length! 10224 * 10225 * INFO: use UTF8::strwidth() for the char-length 10226 * 10227 * EXAMPLE: <code>UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20</code> 10228 * 10229 * @see http://php.net/manual/en/function.mb-strlen.php 10230 * 10231 * @param string $str <p>The string being checked for length.</p> 10232 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 10233 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 10234 * 10235 * @psalm-pure 10236 * 10237 * @return false|int 10238 * <p> 10239 * The number <strong>(int)</strong> of characters in the string $str having character encoding 10240 * $encoding. 10241 * (One multi-byte character counted as +1). 10242 * <br> 10243 * Can return <strong>false</strong>, if e.g. mbstring is not installed and we process invalid 10244 * chars. 10245 * </p> 10246 */ 10247 public static function strlen( 10248 string $str, 10249 string $encoding = 'UTF-8', 10250 bool $clean_utf8 = false 10251 ) { 10252 if ($str === '') { 10253 return 0; 10254 } 10255 10256 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 10257 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 10258 } 10259 10260 if ($clean_utf8) { 10261 // "mb_strlen" and "\iconv_strlen" returns wrong length, 10262 // if invalid characters are found in $str 10263 $str = self::clean($str); 10264 } 10265 10266 // 10267 // fallback via mbstring 10268 // 10269 10270 if (self::$SUPPORT['mbstring'] === true) { 10271 if ($encoding === 'UTF-8') { 10272 /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */ 10273 return @\mb_strlen($str); 10274 } 10275 10276 /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */ 10277 return @\mb_strlen($str, $encoding); 10278 } 10279 10280 // 10281 // fallback for binary || ascii only 10282 // 10283 10284 if ( 10285 $encoding === 'CP850' 10286 || 10287 $encoding === 'ASCII' 10288 ) { 10289 return \strlen($str); 10290 } 10291 10292 if ( 10293 $encoding !== 'UTF-8' 10294 && 10295 self::$SUPPORT['mbstring'] === false 10296 && 10297 self::$SUPPORT['iconv'] === false 10298 ) { 10299 /** 10300 * @psalm-suppress ImpureFunctionCall - is is only a warning 10301 */ 10302 \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); 10303 } 10304 10305 // 10306 // fallback via iconv 10307 // 10308 10309 if (self::$SUPPORT['iconv'] === true) { 10310 $return_tmp = \iconv_strlen($str, $encoding); 10311 if ($return_tmp !== false) { 10312 return $return_tmp; 10313 } 10314 } 10315 10316 // 10317 // fallback via intl 10318 // 10319 10320 if ( 10321 $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings 10322 && 10323 self::$SUPPORT['intl'] === true 10324 ) { 10325 $return_tmp = \grapheme_strlen($str); 10326 if ($return_tmp !== null) { 10327 return $return_tmp; 10328 } 10329 } 10330 10331 // 10332 // fallback for ascii only 10333 // 10334 10335 if (ASCII::is_ascii($str)) { 10336 return \strlen($str); 10337 } 10338 10339 // 10340 // fallback via vanilla php 10341 // 10342 10343 \preg_match_all('/./us', $str, $parts); 10344 10345 $return_tmp = \count($parts[0]); 10346 if ($return_tmp === 0) { 10347 return false; 10348 } 10349 10350 return $return_tmp; 10351 } 10352 10353 /** 10354 * Get string length in byte. 10355 * 10356 * @param string $str 10357 * 10358 * @psalm-pure 10359 * 10360 * @return int 10361 */ 10362 public static function strlen_in_byte(string $str): int 10363 { 10364 if ($str === '') { 10365 return 0; 10366 } 10367 10368 if (self::$SUPPORT['mbstring_func_overload'] === true) { 10369 // "mb_" is available if overload is used, so use it ... 10370 return \mb_strlen($str, 'CP850'); // 8-BIT 10371 } 10372 10373 return \strlen($str); 10374 } 10375 10376 /** 10377 * Case-insensitive string comparisons using a "natural order" algorithm. 10378 * 10379 * INFO: natural order version of UTF8::strcasecmp() 10380 * 10381 * EXAMPLES: <code> 10382 * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1 10383 * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1 10384 * 10385 * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1 10386 * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1 10387 * </code> 10388 * 10389 * @param string $str1 <p>The first string.</p> 10390 * @param string $str2 <p>The second string.</p> 10391 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 10392 * 10393 * @psalm-pure 10394 * 10395 * @return int 10396 * <strong>< 0</strong> if str1 is less than str2<br> 10397 * <strong>> 0</strong> if str1 is greater than str2<br> 10398 * <strong>0</strong> if they are equal 10399 */ 10400 public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int 10401 { 10402 return self::strnatcmp( 10403 self::strtocasefold($str1, true, false, $encoding, null, false), 10404 self::strtocasefold($str2, true, false, $encoding, null, false) 10405 ); 10406 } 10407 10408 /** 10409 * String comparisons using a "natural order" algorithm 10410 * 10411 * INFO: natural order version of UTF8::strcmp() 10412 * 10413 * EXAMPLES: <code> 10414 * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1 10415 * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1 10416 * 10417 * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1 10418 * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1 10419 * </code> 10420 * 10421 * @see http://php.net/manual/en/function.strnatcmp.php 10422 * 10423 * @param string $str1 <p>The first string.</p> 10424 * @param string $str2 <p>The second string.</p> 10425 * 10426 * @psalm-pure 10427 * 10428 * @return int 10429 * <strong>< 0</strong> if str1 is less than str2;<br> 10430 * <strong>> 0</strong> if str1 is greater than str2;<br> 10431 * <strong>0</strong> if they are equal 10432 */ 10433 public static function strnatcmp(string $str1, string $str2): int 10434 { 10435 if ($str1 === $str2) { 10436 return 0; 10437 } 10438 10439 return \strnatcmp( 10440 (string) self::strtonatfold($str1), 10441 (string) self::strtonatfold($str2) 10442 ); 10443 } 10444 10445 /** 10446 * Case-insensitive string comparison of the first n characters. 10447 * 10448 * EXAMPLE: <code> 10449 * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0 10450 * </code> 10451 * 10452 * @see http://php.net/manual/en/function.strncasecmp.php 10453 * 10454 * @param string $str1 <p>The first string.</p> 10455 * @param string $str2 <p>The second string.</p> 10456 * @param int $len <p>The length of strings to be used in the comparison.</p> 10457 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 10458 * 10459 * @psalm-pure 10460 * 10461 * @return int 10462 * <strong>< 0</strong> if <i>str1</i> is less than <i>str2</i>;<br> 10463 * <strong>> 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br> 10464 * <strong>0</strong> if they are equal 10465 */ 10466 public static function strncasecmp( 10467 string $str1, 10468 string $str2, 10469 int $len, 10470 string $encoding = 'UTF-8' 10471 ): int { 10472 return self::strncmp( 10473 self::strtocasefold($str1, true, false, $encoding, null, false), 10474 self::strtocasefold($str2, true, false, $encoding, null, false), 10475 $len 10476 ); 10477 } 10478 10479 /** 10480 * String comparison of the first n characters. 10481 * 10482 * EXAMPLE: <code> 10483 * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0 10484 * </code> 10485 * 10486 * @see http://php.net/manual/en/function.strncmp.php 10487 * 10488 * @param string $str1 <p>The first string.</p> 10489 * @param string $str2 <p>The second string.</p> 10490 * @param int $len <p>Number of characters to use in the comparison.</p> 10491 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 10492 * 10493 * @psalm-pure 10494 * 10495 * @return int 10496 * <strong>< 0</strong> if <i>str1</i> is less than <i>str2</i>;<br> 10497 * <strong>> 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br> 10498 * <strong>0</strong> if they are equal 10499 */ 10500 public static function strncmp( 10501 string $str1, 10502 string $str2, 10503 int $len, 10504 string $encoding = 'UTF-8' 10505 ): int { 10506 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 10507 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 10508 } 10509 10510 if ($encoding === 'UTF-8') { 10511 $str1 = (string) \mb_substr($str1, 0, $len); 10512 $str2 = (string) \mb_substr($str2, 0, $len); 10513 } else { 10514 $str1 = (string) self::substr($str1, 0, $len, $encoding); 10515 $str2 = (string) self::substr($str2, 0, $len, $encoding); 10516 } 10517 10518 return self::strcmp($str1, $str2); 10519 } 10520 10521 /** 10522 * Search a string for any of a set of characters. 10523 * 10524 * EXAMPLE: <code>UTF8::strpbrk('-中文空白-', '白'); // '白-'</code> 10525 * 10526 * @see http://php.net/manual/en/function.strpbrk.php 10527 * 10528 * @param string $haystack <p>The string where char_list is looked for.</p> 10529 * @param string $char_list <p>This parameter is case-sensitive.</p> 10530 * 10531 * @psalm-pure 10532 * 10533 * @return false|string 10534 * <p>The string starting from the character found, or false if it is not found.</p> 10535 */ 10536 public static function strpbrk(string $haystack, string $char_list) 10537 { 10538 if ($haystack === '' || $char_list === '') { 10539 return false; 10540 } 10541 10542 if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) { 10543 return \substr($haystack, (int) \strpos($haystack, $m[0])); 10544 } 10545 10546 return false; 10547 } 10548 10549 /** 10550 * Find the position of the first occurrence of a substring in a string. 10551 * 10552 * INFO: use UTF8::strpos_in_byte() for the byte-length 10553 * 10554 * EXAMPLE: <code>UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8</code> 10555 * 10556 * @see http://php.net/manual/en/function.mb-strpos.php 10557 * 10558 * @param string $haystack <p>The string from which to get the position of the first occurrence of needle.</p> 10559 * @param int|string $needle <p>The string to find in haystack.<br>Or a code point as int.</p> 10560 * @param int $offset [optional] <p>The search offset. If it is not specified, 0 is used.</p> 10561 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 10562 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 10563 * 10564 * @psalm-pure 10565 * 10566 * @return false|int 10567 * The <strong>(int)</strong> numeric position of the first occurrence of needle in the haystack 10568 * string.<br> If needle is not found it returns false. 10569 */ 10570 public static function strpos( 10571 string $haystack, 10572 $needle, 10573 int $offset = 0, 10574 string $encoding = 'UTF-8', 10575 bool $clean_utf8 = false 10576 ) { 10577 if ($haystack === '') { 10578 if (\PHP_VERSION_ID >= 80000) { 10579 if ($needle === '') { 10580 return 0; 10581 } 10582 } else { 10583 return false; 10584 } 10585 } 10586 10587 // iconv and mbstring do not support integer $needle 10588 if ((int) $needle === $needle) { 10589 $needle = (string) self::chr($needle); 10590 } 10591 $needle = (string) $needle; 10592 10593 if ($haystack === '') { 10594 if (\PHP_VERSION_ID >= 80000 && $needle === '') { 10595 return 0; 10596 } 10597 10598 return false; 10599 } 10600 10601 if ($needle === '' && \PHP_VERSION_ID < 80000) { 10602 return false; 10603 } 10604 10605 if ($clean_utf8) { 10606 // "mb_strpos()" and "iconv_strpos()" returns wrong position, 10607 // if invalid characters are found in $haystack before $needle 10608 $needle = self::clean($needle); 10609 $haystack = self::clean($haystack); 10610 } 10611 10612 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 10613 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 10614 } 10615 10616 // 10617 // fallback via mbstring 10618 // 10619 10620 if (self::$SUPPORT['mbstring'] === true) { 10621 if ($encoding === 'UTF-8') { 10622 /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */ 10623 return @\mb_strpos($haystack, $needle, $offset); 10624 } 10625 10626 /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */ 10627 return @\mb_strpos($haystack, $needle, $offset, $encoding); 10628 } 10629 10630 // 10631 // fallback for binary || ascii only 10632 // 10633 if ( 10634 $encoding === 'CP850' 10635 || 10636 $encoding === 'ASCII' 10637 ) { 10638 return \strpos($haystack, $needle, $offset); 10639 } 10640 10641 if ( 10642 $encoding !== 'UTF-8' 10643 && 10644 self::$SUPPORT['iconv'] === false 10645 && 10646 self::$SUPPORT['mbstring'] === false 10647 ) { 10648 /** 10649 * @psalm-suppress ImpureFunctionCall - is is only a warning 10650 */ 10651 \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); 10652 } 10653 10654 // 10655 // fallback via intl 10656 // 10657 10658 if ( 10659 $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings 10660 && 10661 $offset >= 0 // grapheme_strpos() can't handle negative offset 10662 && 10663 self::$SUPPORT['intl'] === true 10664 ) { 10665 $return_tmp = \grapheme_strpos($haystack, $needle, $offset); 10666 if ($return_tmp !== false) { 10667 return $return_tmp; 10668 } 10669 } 10670 10671 // 10672 // fallback via iconv 10673 // 10674 10675 if ( 10676 $offset >= 0 // iconv_strpos() can't handle negative offset 10677 && 10678 self::$SUPPORT['iconv'] === true 10679 ) { 10680 // ignore invalid negative offset to keep compatibility 10681 // with php < 5.5.35, < 5.6.21, < 7.0.6 10682 $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding); 10683 if ($return_tmp !== false) { 10684 return $return_tmp; 10685 } 10686 } 10687 10688 // 10689 // fallback for ascii only 10690 // 10691 10692 if (ASCII::is_ascii($haystack . $needle)) { 10693 /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */ 10694 return @\strpos($haystack, $needle, $offset); 10695 } 10696 10697 // 10698 // fallback via vanilla php 10699 // 10700 10701 $haystack_tmp = self::substr($haystack, $offset, null, $encoding); 10702 if ($haystack_tmp === false) { 10703 $haystack_tmp = ''; 10704 } 10705 $haystack = (string) $haystack_tmp; 10706 10707 if ($offset < 0) { 10708 $offset = 0; 10709 } 10710 10711 $pos = \strpos($haystack, $needle); 10712 if ($pos === false) { 10713 return false; 10714 } 10715 10716 if ($pos) { 10717 return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding); 10718 } 10719 10720 return $offset + 0; 10721 } 10722 10723 /** 10724 * Find the position of the first occurrence of a substring in a string. 10725 * 10726 * @param string $haystack <p> 10727 * The string being checked. 10728 * </p> 10729 * @param string $needle <p> 10730 * The position counted from the beginning of haystack. 10731 * </p> 10732 * @param int $offset [optional] <p> 10733 * The search offset. If it is not specified, 0 is used. 10734 * </p> 10735 * 10736 * @psalm-pure 10737 * 10738 * @return false|int 10739 * <p>The numeric position of the first occurrence of needle in the 10740 * haystack string. If needle is not found, it returns false.</p> 10741 */ 10742 public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0) 10743 { 10744 if ($haystack === '' || $needle === '') { 10745 return false; 10746 } 10747 10748 if (self::$SUPPORT['mbstring_func_overload'] === true) { 10749 // "mb_" is available if overload is used, so use it ... 10750 return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT 10751 } 10752 10753 return \strpos($haystack, $needle, $offset); 10754 } 10755 10756 /** 10757 * Find the position of the first occurrence of a substring in a string, case-insensitive. 10758 * 10759 * @param string $haystack <p> 10760 * The string being checked. 10761 * </p> 10762 * @param string $needle <p> 10763 * The position counted from the beginning of haystack. 10764 * </p> 10765 * @param int $offset [optional] <p> 10766 * The search offset. If it is not specified, 0 is used. 10767 * </p> 10768 * 10769 * @psalm-pure 10770 * 10771 * @return false|int 10772 * <p>The numeric position of the first occurrence of needle in the 10773 * haystack string. If needle is not found, it returns false.</p> 10774 */ 10775 public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0) 10776 { 10777 if ($haystack === '' || $needle === '') { 10778 return false; 10779 } 10780 10781 if (self::$SUPPORT['mbstring_func_overload'] === true) { 10782 // "mb_" is available if overload is used, so use it ... 10783 return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT 10784 } 10785 10786 return \stripos($haystack, $needle, $offset); 10787 } 10788 10789 /** 10790 * Find the last occurrence of a character in a string within another. 10791 * 10792 * EXAMPLE: <code>UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü'</code> 10793 * 10794 * @see http://php.net/manual/en/function.mb-strrchr.php 10795 * 10796 * @param string $haystack <p>The string from which to get the last occurrence of needle.</p> 10797 * @param string $needle <p>The string to find in haystack</p> 10798 * @param bool $before_needle [optional] <p> 10799 * Determines which portion of haystack 10800 * this function returns. 10801 * If set to true, it returns all of haystack 10802 * from the beginning to the last occurrence of needle. 10803 * If set to false, it returns all of haystack 10804 * from the last occurrence of needle to the end, 10805 * </p> 10806 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 10807 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 10808 * 10809 * @psalm-pure 10810 * 10811 * @return false|string 10812 * <p>The portion of haystack or false if needle is not found.</p> 10813 */ 10814 public static function strrchr( 10815 string $haystack, 10816 string $needle, 10817 bool $before_needle = false, 10818 string $encoding = 'UTF-8', 10819 bool $clean_utf8 = false 10820 ) { 10821 if ($haystack === '' || $needle === '') { 10822 return false; 10823 } 10824 10825 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 10826 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 10827 } 10828 10829 if ($clean_utf8) { 10830 // "mb_strpos()" and "iconv_strpos()" returns wrong position, 10831 // if invalid characters are found in $haystack before $needle 10832 $needle = self::clean($needle); 10833 $haystack = self::clean($haystack); 10834 } 10835 10836 // 10837 // fallback via mbstring 10838 // 10839 10840 if (self::$SUPPORT['mbstring'] === true) { 10841 if ($encoding === 'UTF-8') { 10842 return \mb_strrchr($haystack, $needle, $before_needle); 10843 } 10844 10845 return \mb_strrchr($haystack, $needle, $before_needle, $encoding); 10846 } 10847 10848 // 10849 // fallback for binary || ascii only 10850 // 10851 10852 if ( 10853 !$before_needle 10854 && 10855 ( 10856 $encoding === 'CP850' 10857 || 10858 $encoding === 'ASCII' 10859 ) 10860 ) { 10861 return \strrchr($haystack, $needle); 10862 } 10863 10864 if ( 10865 $encoding !== 'UTF-8' 10866 && 10867 self::$SUPPORT['mbstring'] === false 10868 ) { 10869 /** 10870 * @psalm-suppress ImpureFunctionCall - is is only a warning 10871 */ 10872 \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); 10873 } 10874 10875 // 10876 // fallback via iconv 10877 // 10878 10879 if (self::$SUPPORT['iconv'] === true) { 10880 $needle_tmp = self::substr($needle, 0, 1, $encoding); 10881 if ($needle_tmp === false) { 10882 return false; 10883 } 10884 $needle = (string) $needle_tmp; 10885 10886 $pos = \iconv_strrpos($haystack, $needle, $encoding); 10887 if ($pos === false) { 10888 return false; 10889 } 10890 10891 if ($before_needle) { 10892 return self::substr($haystack, 0, $pos, $encoding); 10893 } 10894 10895 return self::substr($haystack, $pos, null, $encoding); 10896 } 10897 10898 // 10899 // fallback via vanilla php 10900 // 10901 10902 $needle_tmp = self::substr($needle, 0, 1, $encoding); 10903 if ($needle_tmp === false) { 10904 return false; 10905 } 10906 $needle = (string) $needle_tmp; 10907 10908 $pos = self::strrpos($haystack, $needle, 0, $encoding); 10909 if ($pos === false) { 10910 return false; 10911 } 10912 10913 if ($before_needle) { 10914 return self::substr($haystack, 0, $pos, $encoding); 10915 } 10916 10917 return self::substr($haystack, $pos, null, $encoding); 10918 } 10919 10920 /** 10921 * Reverses characters order in the string. 10922 * 10923 * EXAMPLE: <code>UTF8::strrev('κ-öäü'); // 'üäö-κ'</code> 10924 * 10925 * @param string $str <p>The input string.</p> 10926 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 10927 * 10928 * @psalm-pure 10929 * 10930 * @return string 10931 * <p>The string with characters in the reverse sequence.</p> 10932 */ 10933 public static function strrev(string $str, string $encoding = 'UTF-8'): string 10934 { 10935 if ($str === '') { 10936 return ''; 10937 } 10938 10939 // init 10940 $reversed = ''; 10941 10942 $str = self::emoji_encode($str, true); 10943 10944 if ($encoding === 'UTF-8') { 10945 if (self::$SUPPORT['intl'] === true) { 10946 // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8 10947 $i = (int) \grapheme_strlen($str); 10948 while ($i--) { 10949 $reversed_tmp = \grapheme_substr($str, $i, 1); 10950 if ($reversed_tmp !== false) { 10951 $reversed .= $reversed_tmp; 10952 } 10953 } 10954 } else { 10955 $i = (int) \mb_strlen($str); 10956 while ($i--) { 10957 $reversed_tmp = \mb_substr($str, $i, 1); 10958 if ($reversed_tmp !== false) { 10959 $reversed .= $reversed_tmp; 10960 } 10961 } 10962 } 10963 } else { 10964 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 10965 10966 $i = (int) self::strlen($str, $encoding); 10967 while ($i--) { 10968 $reversed_tmp = self::substr($str, $i, 1, $encoding); 10969 if ($reversed_tmp !== false) { 10970 $reversed .= $reversed_tmp; 10971 } 10972 } 10973 } 10974 10975 return self::emoji_decode($reversed, true); 10976 } 10977 10978 /** 10979 * Find the last occurrence of a character in a string within another, case-insensitive. 10980 * 10981 * EXAMPLE: <code>UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü'</code> 10982 * 10983 * @see http://php.net/manual/en/function.mb-strrichr.php 10984 * 10985 * @param string $haystack <p>The string from which to get the last occurrence of needle.</p> 10986 * @param string $needle <p>The string to find in haystack.</p> 10987 * @param bool $before_needle [optional] <p> 10988 * Determines which portion of haystack 10989 * this function returns. 10990 * If set to true, it returns all of haystack 10991 * from the beginning to the last occurrence of needle. 10992 * If set to false, it returns all of haystack 10993 * from the last occurrence of needle to the end, 10994 * </p> 10995 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 10996 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 10997 * 10998 * @psalm-pure 10999 * 11000 * @return false|string 11001 * <p>The portion of haystack or<br>false if needle is not found.</p> 11002 */ 11003 public static function strrichr( 11004 string $haystack, 11005 string $needle, 11006 bool $before_needle = false, 11007 string $encoding = 'UTF-8', 11008 bool $clean_utf8 = false 11009 ) { 11010 if ($haystack === '' || $needle === '') { 11011 return false; 11012 } 11013 11014 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 11015 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 11016 } 11017 11018 if ($clean_utf8) { 11019 // "mb_strpos()" and "iconv_strpos()" returns wrong position, 11020 // if invalid characters are found in $haystack before $needle 11021 $needle = self::clean($needle); 11022 $haystack = self::clean($haystack); 11023 } 11024 11025 // 11026 // fallback via mbstring 11027 // 11028 11029 if (self::$SUPPORT['mbstring'] === true) { 11030 if ($encoding === 'UTF-8') { 11031 return \mb_strrichr($haystack, $needle, $before_needle); 11032 } 11033 11034 return \mb_strrichr($haystack, $needle, $before_needle, $encoding); 11035 } 11036 11037 // 11038 // fallback via vanilla php 11039 // 11040 11041 $needle_tmp = self::substr($needle, 0, 1, $encoding); 11042 if ($needle_tmp === false) { 11043 return false; 11044 } 11045 $needle = (string) $needle_tmp; 11046 11047 $pos = self::strripos($haystack, $needle, 0, $encoding); 11048 if ($pos === false) { 11049 return false; 11050 } 11051 11052 if ($before_needle) { 11053 return self::substr($haystack, 0, $pos, $encoding); 11054 } 11055 11056 return self::substr($haystack, $pos, null, $encoding); 11057 } 11058 11059 /** 11060 * Find the position of the last occurrence of a substring in a string, case-insensitive. 11061 * 11062 * EXAMPLE: <code>UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code> 11063 * 11064 * @param string $haystack <p>The string to look in.</p> 11065 * @param int|string $needle <p>The string to look for.</p> 11066 * @param int $offset [optional] <p>Number of characters to ignore in the beginning or end.</p> 11067 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 11068 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 11069 * 11070 * @psalm-pure 11071 * 11072 * @return false|int 11073 * <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack 11074 * string.<br>If needle is not found, it returns false.</p> 11075 */ 11076 public static function strripos( 11077 string $haystack, 11078 $needle, 11079 int $offset = 0, 11080 string $encoding = 'UTF-8', 11081 bool $clean_utf8 = false 11082 ) { 11083 if ($haystack === '') { 11084 if (\PHP_VERSION_ID >= 80000) { 11085 if ($needle === '') { 11086 return 0; 11087 } 11088 } else { 11089 return false; 11090 } 11091 } 11092 11093 // iconv and mbstring do not support integer $needle 11094 if ((int) $needle === $needle && $needle >= 0) { 11095 $needle = (string) self::chr($needle); 11096 } 11097 $needle = (string) $needle; 11098 11099 if ($haystack === '') { 11100 if (\PHP_VERSION_ID >= 80000 && $needle === '') { 11101 return 0; 11102 } 11103 11104 return false; 11105 } 11106 11107 if ($needle === '' && \PHP_VERSION_ID < 80000) { 11108 return false; 11109 } 11110 11111 if ($clean_utf8) { 11112 // mb_strripos() && iconv_strripos() is not tolerant to invalid characters 11113 $needle = self::clean($needle); 11114 $haystack = self::clean($haystack); 11115 } 11116 11117 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 11118 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 11119 } 11120 11121 // 11122 // fallback via mbstrig 11123 // 11124 11125 if (self::$SUPPORT['mbstring'] === true) { 11126 if ($encoding === 'UTF-8') { 11127 return \mb_strripos($haystack, $needle, $offset); 11128 } 11129 11130 return \mb_strripos($haystack, $needle, $offset, $encoding); 11131 } 11132 11133 // 11134 // fallback for binary || ascii only 11135 // 11136 11137 if ( 11138 $encoding === 'CP850' 11139 || 11140 $encoding === 'ASCII' 11141 ) { 11142 return \strripos($haystack, $needle, $offset); 11143 } 11144 11145 if ( 11146 $encoding !== 'UTF-8' 11147 && 11148 self::$SUPPORT['mbstring'] === false 11149 ) { 11150 /** 11151 * @psalm-suppress ImpureFunctionCall - is is only a warning 11152 */ 11153 \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); 11154 } 11155 11156 // 11157 // fallback via intl 11158 // 11159 11160 if ( 11161 $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings 11162 && 11163 $offset >= 0 // grapheme_strripos() can't handle negative offset 11164 && 11165 self::$SUPPORT['intl'] === true 11166 ) { 11167 $return_tmp = \grapheme_strripos($haystack, $needle, $offset); 11168 if ($return_tmp !== false) { 11169 return $return_tmp; 11170 } 11171 } 11172 11173 // 11174 // fallback for ascii only 11175 // 11176 11177 if (ASCII::is_ascii($haystack . $needle)) { 11178 return \strripos($haystack, $needle, $offset); 11179 } 11180 11181 // 11182 // fallback via vanilla php 11183 // 11184 11185 $haystack = self::strtocasefold($haystack, true, false, $encoding); 11186 $needle = self::strtocasefold($needle, true, false, $encoding); 11187 11188 return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8); 11189 } 11190 11191 /** 11192 * Finds position of last occurrence of a string within another, case-insensitive. 11193 * 11194 * @param string $haystack <p> 11195 * The string from which to get the position of the last occurrence 11196 * of needle. 11197 * </p> 11198 * @param string $needle <p> 11199 * The string to find in haystack. 11200 * </p> 11201 * @param int $offset [optional] <p> 11202 * The position in haystack 11203 * to start searching. 11204 * </p> 11205 * 11206 * @psalm-pure 11207 * 11208 * @return false|int 11209 * <p>eturn the numeric position of the last occurrence of needle in the 11210 * haystack string, or false if needle is not found.</p> 11211 */ 11212 public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0) 11213 { 11214 if ($haystack === '' || $needle === '') { 11215 return false; 11216 } 11217 11218 if (self::$SUPPORT['mbstring_func_overload'] === true) { 11219 // "mb_" is available if overload is used, so use it ... 11220 return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT 11221 } 11222 11223 return \strripos($haystack, $needle, $offset); 11224 } 11225 11226 /** 11227 * Find the position of the last occurrence of a substring in a string. 11228 * 11229 * EXAMPLE: <code>UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13</code> 11230 * 11231 * @see http://php.net/manual/en/function.mb-strrpos.php 11232 * 11233 * @param string $haystack <p>The string being checked, for the last occurrence of needle</p> 11234 * @param int|string $needle <p>The string to find in haystack.<br>Or a code point as int.</p> 11235 * @param int $offset [optional] <p>May be specified to begin searching an arbitrary number of characters 11236 * into the string. Negative values will stop searching at an arbitrary point prior to 11237 * the end of the string. 11238 * </p> 11239 * @param string $encoding [optional] <p>Set the charset.</p> 11240 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 11241 * 11242 * @psalm-pure 11243 * 11244 * @return false|int 11245 * <p>The <strong>(int)</strong> numeric position of the last occurrence of needle in the haystack 11246 * string.<br>If needle is not found, it returns false.</p> 11247 */ 11248 public static function strrpos( 11249 string $haystack, 11250 $needle, 11251 int $offset = 0, 11252 string $encoding = 'UTF-8', 11253 bool $clean_utf8 = false 11254 ) { 11255 if ($haystack === '') { 11256 if (\PHP_VERSION_ID >= 80000) { 11257 if ($needle === '') { 11258 return 0; 11259 } 11260 } else { 11261 return false; 11262 } 11263 } 11264 11265 // iconv and mbstring do not support integer $needle 11266 if ((int) $needle === $needle && $needle >= 0) { 11267 $needle = (string) self::chr($needle); 11268 } 11269 $needle = (string) $needle; 11270 11271 if ($haystack === '') { 11272 if (\PHP_VERSION_ID >= 80000 && $needle === '') { 11273 return 0; 11274 } 11275 11276 return false; 11277 } 11278 11279 if ($needle === '' && \PHP_VERSION_ID < 80000) { 11280 return false; 11281 } 11282 11283 if ($clean_utf8) { 11284 // mb_strrpos && iconv_strrpos is not tolerant to invalid characters 11285 $needle = self::clean($needle); 11286 $haystack = self::clean($haystack); 11287 } 11288 11289 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 11290 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 11291 } 11292 11293 // 11294 // fallback via mbstring 11295 // 11296 11297 if (self::$SUPPORT['mbstring'] === true) { 11298 if ($encoding === 'UTF-8') { 11299 return \mb_strrpos($haystack, $needle, $offset); 11300 } 11301 11302 return \mb_strrpos($haystack, $needle, $offset, $encoding); 11303 } 11304 11305 // 11306 // fallback for binary || ascii only 11307 // 11308 11309 if ( 11310 $encoding === 'CP850' 11311 || 11312 $encoding === 'ASCII' 11313 ) { 11314 return \strrpos($haystack, $needle, $offset); 11315 } 11316 11317 if ( 11318 $encoding !== 'UTF-8' 11319 && 11320 self::$SUPPORT['mbstring'] === false 11321 ) { 11322 /** 11323 * @psalm-suppress ImpureFunctionCall - is is only a warning 11324 */ 11325 \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); 11326 } 11327 11328 // 11329 // fallback via intl 11330 // 11331 11332 if ( 11333 $offset >= 0 // grapheme_strrpos() can't handle negative offset 11334 && 11335 $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings 11336 && 11337 self::$SUPPORT['intl'] === true 11338 ) { 11339 $return_tmp = \grapheme_strrpos($haystack, $needle, $offset); 11340 if ($return_tmp !== false) { 11341 return $return_tmp; 11342 } 11343 } 11344 11345 // 11346 // fallback for ascii only 11347 // 11348 11349 if (ASCII::is_ascii($haystack . $needle)) { 11350 return \strrpos($haystack, $needle, $offset); 11351 } 11352 11353 // 11354 // fallback via vanilla php 11355 // 11356 11357 $haystack_tmp = null; 11358 if ($offset > 0) { 11359 $haystack_tmp = self::substr($haystack, $offset); 11360 } elseif ($offset < 0) { 11361 $haystack_tmp = self::substr($haystack, 0, $offset); 11362 $offset = 0; 11363 } 11364 11365 if ($haystack_tmp !== null) { 11366 if ($haystack_tmp === false) { 11367 $haystack_tmp = ''; 11368 } 11369 $haystack = (string) $haystack_tmp; 11370 } 11371 11372 $pos = \strrpos($haystack, $needle); 11373 if ($pos === false) { 11374 return false; 11375 } 11376 11377 /** @var false|string $str_tmp - needed for PhpStan (stubs error) */ 11378 $str_tmp = \substr($haystack, 0, $pos); 11379 if ($str_tmp === false) { 11380 return false; 11381 } 11382 11383 return $offset + (int) self::strlen($str_tmp); 11384 } 11385 11386 /** 11387 * Find the position of the last occurrence of a substring in a string. 11388 * 11389 * @param string $haystack <p> 11390 * The string being checked, for the last occurrence 11391 * of needle. 11392 * </p> 11393 * @param string $needle <p> 11394 * The string to find in haystack. 11395 * </p> 11396 * @param int $offset [optional] <p>May be specified to begin searching an arbitrary number of characters into 11397 * the string. Negative values will stop searching at an arbitrary point 11398 * prior to the end of the string. 11399 * </p> 11400 * 11401 * @psalm-pure 11402 * 11403 * @return false|int 11404 * <p>The numeric position of the last occurrence of needle in the 11405 * haystack string. If needle is not found, it returns false.</p> 11406 */ 11407 public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0) 11408 { 11409 if ($haystack === '' || $needle === '') { 11410 return false; 11411 } 11412 11413 if (self::$SUPPORT['mbstring_func_overload'] === true) { 11414 // "mb_" is available if overload is used, so use it ... 11415 return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT 11416 } 11417 11418 return \strrpos($haystack, $needle, $offset); 11419 } 11420 11421 /** 11422 * Finds the length of the initial segment of a string consisting entirely of characters contained within a given 11423 * mask. 11424 * 11425 * EXAMPLE: <code>UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3'</code> 11426 * 11427 * @param string $str <p>The input string.</p> 11428 * @param string $mask <p>The mask of chars</p> 11429 * @param int $offset [optional] 11430 * @param int|null $length [optional] 11431 * @param string $encoding [optional] <p>Set the charset.</p> 11432 * 11433 * @psalm-pure 11434 * 11435 * @return false|int 11436 */ 11437 public static function strspn( 11438 string $str, 11439 string $mask, 11440 int $offset = 0, 11441 int $length = null, 11442 string $encoding = 'UTF-8' 11443 ) { 11444 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 11445 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 11446 } 11447 11448 if ($offset || $length !== null) { 11449 if ($encoding === 'UTF-8') { 11450 if ($length === null) { 11451 $str = (string) \mb_substr($str, $offset); 11452 } else { 11453 $str = (string) \mb_substr($str, $offset, $length); 11454 } 11455 } else { 11456 $str = (string) self::substr($str, $offset, $length, $encoding); 11457 } 11458 } 11459 11460 if ($str === '' || $mask === '') { 11461 return 0; 11462 } 11463 11464 $matches = []; 11465 11466 return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0; 11467 } 11468 11469 /** 11470 * Returns part of haystack string from the first occurrence of needle to the end of haystack. 11471 * 11472 * EXAMPLE: <code> 11473 * $str = 'iñtërnâtiônàlizætiøn'; 11474 * $search = 'nât'; 11475 * 11476 * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn' 11477 * UTF8::strstr($str, $search, true)); // 'iñtër' 11478 * </code> 11479 * 11480 * @param string $haystack <p>The input string. Must be valid UTF-8.</p> 11481 * @param string $needle <p>The string to look for. Must be valid UTF-8.</p> 11482 * @param bool $before_needle [optional] <p> 11483 * If <b>TRUE</b>, strstr() returns the part of the 11484 * haystack before the first occurrence of the needle (excluding the needle). 11485 * </p> 11486 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 11487 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 11488 * 11489 * @psalm-pure 11490 * 11491 * @return false|string 11492 * <p>A sub-string,<br>or <strong>false</strong> if needle is not found.</p> 11493 */ 11494 public static function strstr( 11495 string $haystack, 11496 string $needle, 11497 bool $before_needle = false, 11498 string $encoding = 'UTF-8', 11499 bool $clean_utf8 = false 11500 ) { 11501 if ($haystack === '') { 11502 if (\PHP_VERSION_ID >= 80000 && $needle === '') { 11503 return ''; 11504 } 11505 11506 return false; 11507 } 11508 11509 if ($clean_utf8) { 11510 // "mb_strpos()" and "iconv_strpos()" returns wrong position, 11511 // if invalid characters are found in $haystack before $needle 11512 $needle = self::clean($needle); 11513 $haystack = self::clean($haystack); 11514 } 11515 11516 if ($needle === '') { 11517 if (\PHP_VERSION_ID >= 80000) { 11518 return $haystack; 11519 } 11520 11521 return false; 11522 } 11523 11524 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 11525 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 11526 } 11527 11528 // 11529 // fallback via mbstring 11530 // 11531 11532 if (self::$SUPPORT['mbstring'] === true) { 11533 if ($encoding === 'UTF-8') { 11534 return \mb_strstr($haystack, $needle, $before_needle); 11535 } 11536 11537 return \mb_strstr($haystack, $needle, $before_needle, $encoding); 11538 } 11539 11540 // 11541 // fallback for binary || ascii only 11542 // 11543 11544 if ( 11545 $encoding === 'CP850' 11546 || 11547 $encoding === 'ASCII' 11548 ) { 11549 return \strstr($haystack, $needle, $before_needle); 11550 } 11551 11552 if ( 11553 $encoding !== 'UTF-8' 11554 && 11555 self::$SUPPORT['mbstring'] === false 11556 ) { 11557 /** 11558 * @psalm-suppress ImpureFunctionCall - is is only a warning 11559 */ 11560 \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); 11561 } 11562 11563 // 11564 // fallback via intl 11565 // 11566 11567 if ( 11568 $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings 11569 && 11570 self::$SUPPORT['intl'] === true 11571 ) { 11572 $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle); 11573 if ($return_tmp !== false) { 11574 return $return_tmp; 11575 } 11576 } 11577 11578 // 11579 // fallback for ascii only 11580 // 11581 11582 if (ASCII::is_ascii($haystack . $needle)) { 11583 return \strstr($haystack, $needle, $before_needle); 11584 } 11585 11586 // 11587 // fallback via vanilla php 11588 // 11589 11590 \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match); 11591 11592 if (!isset($match[1])) { 11593 return false; 11594 } 11595 11596 if ($before_needle) { 11597 return $match[1]; 11598 } 11599 11600 return self::substr($haystack, (int) self::strlen($match[1])); 11601 } 11602 11603 /** 11604 * Finds first occurrence of a string within another. 11605 * 11606 * @param string $haystack <p> 11607 * The string from which to get the first occurrence 11608 * of needle. 11609 * </p> 11610 * @param string $needle <p> 11611 * The string to find in haystack. 11612 * </p> 11613 * @param bool $before_needle [optional] <p> 11614 * Determines which portion of haystack 11615 * this function returns. 11616 * If set to true, it returns all of haystack 11617 * from the beginning to the first occurrence of needle. 11618 * If set to false, it returns all of haystack 11619 * from the first occurrence of needle to the end, 11620 * </p> 11621 * 11622 * @psalm-pure 11623 * 11624 * @return false|string 11625 * <p>The portion of haystack, 11626 * or false if needle is not found.</p> 11627 */ 11628 public static function strstr_in_byte( 11629 string $haystack, 11630 string $needle, 11631 bool $before_needle = false 11632 ) { 11633 if ($haystack === '' || $needle === '') { 11634 return false; 11635 } 11636 11637 if (self::$SUPPORT['mbstring_func_overload'] === true) { 11638 // "mb_" is available if overload is used, so use it ... 11639 return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT 11640 } 11641 11642 return \strstr($haystack, $needle, $before_needle); 11643 } 11644 11645 /** 11646 * Unicode transformation for case-less matching. 11647 * 11648 * EXAMPLE: <code>UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱'</code> 11649 * 11650 * @see http://unicode.org/reports/tr21/tr21-5.html 11651 * 11652 * @param string $str <p>The input string.</p> 11653 * @param bool $full [optional] <p> 11654 * <b>true</b>, replace full case folding chars (default)<br> 11655 * <b>false</b>, use only limited static array [UTF8::$COMMON_CASE_FOLD] 11656 * </p> 11657 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 11658 * @param string $encoding [optional] <p>Set the charset.</p> 11659 * @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, tr</p> 11660 * @param bool $lower [optional] <p>Use lowercase string, otherwise use uppercase string. PS: uppercase 11661 * is for some languages better ...</p> 11662 * 11663 * @psalm-pure 11664 * 11665 * @return string 11666 */ 11667 public static function strtocasefold( 11668 string $str, 11669 bool $full = true, 11670 bool $clean_utf8 = false, 11671 string $encoding = 'UTF-8', 11672 string $lang = null, 11673 bool $lower = true 11674 ): string { 11675 if ($str === '') { 11676 return ''; 11677 } 11678 11679 if ($clean_utf8) { 11680 // "mb_strpos()" and "iconv_strpos()" returns wrong position, 11681 // if invalid characters are found in $haystack before $needle 11682 $str = self::clean($str); 11683 } 11684 11685 $str = self::fixStrCaseHelper($str, $lower, $full); 11686 11687 if ($lang === null && $encoding === 'UTF-8') { 11688 if ($lower) { 11689 return \mb_strtolower($str); 11690 } 11691 11692 return \mb_strtoupper($str); 11693 } 11694 11695 if ($lower) { 11696 return self::strtolower($str, $encoding, false, $lang); 11697 } 11698 11699 return self::strtoupper($str, $encoding, false, $lang); 11700 } 11701 11702 /** 11703 * Make a string lowercase. 11704 * 11705 * EXAMPLE: <code>UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii'</code> 11706 * 11707 * @see http://php.net/manual/en/function.mb-strtolower.php 11708 * 11709 * @param string $str <p>The string being lowercased.</p> 11710 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 11711 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 11712 * @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, 11713 * tr</p> 11714 * @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ 11715 * -> ß</p> 11716 * 11717 * @psalm-pure 11718 * 11719 * @return string 11720 * <p>String with all alphabetic characters converted to lowercase.</p> 11721 */ 11722 public static function strtolower( 11723 $str, 11724 string $encoding = 'UTF-8', 11725 bool $clean_utf8 = false, 11726 string $lang = null, 11727 bool $try_to_keep_the_string_length = false 11728 ): string { 11729 // init 11730 $str = (string) $str; 11731 11732 if ($str === '') { 11733 return ''; 11734 } 11735 11736 if ($clean_utf8) { 11737 // "mb_strpos()" and "iconv_strpos()" returns wrong position, 11738 // if invalid characters are found in $haystack before $needle 11739 $str = self::clean($str); 11740 } 11741 11742 // hack for old php version or for the polyfill ... 11743 if ($try_to_keep_the_string_length) { 11744 $str = self::fixStrCaseHelper($str, true); 11745 } 11746 11747 if ($lang === null && $encoding === 'UTF-8') { 11748 return \mb_strtolower($str); 11749 } 11750 11751 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 11752 11753 if ($lang !== null) { 11754 if (self::$SUPPORT['intl'] === true) { 11755 if (self::$INTL_TRANSLITERATOR_LIST === null) { 11756 self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list'); 11757 } 11758 11759 $language_code = $lang . '-Lower'; 11760 if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) { 11761 /** 11762 * @psalm-suppress ImpureFunctionCall - is is only a warning 11763 */ 11764 \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING); 11765 11766 $language_code = 'Any-Lower'; 11767 } 11768 11769 /** @noinspection PhpComposerExtensionStubsInspection */ 11770 /** @noinspection UnnecessaryCastingInspection */ 11771 return (string) \transliterator_transliterate($language_code, $str); 11772 } 11773 11774 /** 11775 * @psalm-suppress ImpureFunctionCall - is is only a warning 11776 */ 11777 \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING); 11778 } 11779 11780 // always fallback via symfony polyfill 11781 return \mb_strtolower($str, $encoding); 11782 } 11783 11784 /** 11785 * Make a string uppercase. 11786 * 11787 * EXAMPLE: <code>UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI'</code> 11788 * 11789 * @see http://php.net/manual/en/function.mb-strtoupper.php 11790 * 11791 * @param string $str <p>The string being uppercased.</p> 11792 * @param string $encoding [optional] <p>Set the charset.</p> 11793 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 11794 * @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, 11795 * tr</p> 11796 * @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ 11797 * -> ß</p> 11798 * 11799 * @psalm-pure 11800 * 11801 * @return string 11802 * <p>String with all alphabetic characters converted to uppercase.</p> 11803 */ 11804 public static function strtoupper( 11805 $str, 11806 string $encoding = 'UTF-8', 11807 bool $clean_utf8 = false, 11808 string $lang = null, 11809 bool $try_to_keep_the_string_length = false 11810 ): string { 11811 // init 11812 $str = (string) $str; 11813 11814 if ($str === '') { 11815 return ''; 11816 } 11817 11818 if ($clean_utf8) { 11819 // "mb_strpos()" and "iconv_strpos()" returns wrong position, 11820 // if invalid characters are found in $haystack before $needle 11821 $str = self::clean($str); 11822 } 11823 11824 // hack for old php version or for the polyfill ... 11825 if ($try_to_keep_the_string_length) { 11826 $str = self::fixStrCaseHelper($str); 11827 } 11828 11829 if ($lang === null && $encoding === 'UTF-8') { 11830 return \mb_strtoupper($str); 11831 } 11832 11833 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 11834 11835 if ($lang !== null) { 11836 if (self::$SUPPORT['intl'] === true) { 11837 if (self::$INTL_TRANSLITERATOR_LIST === null) { 11838 self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list'); 11839 } 11840 11841 $language_code = $lang . '-Upper'; 11842 if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) { 11843 /** 11844 * @psalm-suppress ImpureFunctionCall - is is only a warning 11845 */ 11846 \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING); 11847 11848 $language_code = 'Any-Upper'; 11849 } 11850 11851 /** @noinspection PhpComposerExtensionStubsInspection */ 11852 /** @noinspection UnnecessaryCastingInspection */ 11853 return (string) \transliterator_transliterate($language_code, $str); 11854 } 11855 11856 /** 11857 * @psalm-suppress ImpureFunctionCall - is is only a warning 11858 */ 11859 \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING); 11860 } 11861 11862 // always fallback via symfony polyfill 11863 return \mb_strtoupper($str, $encoding); 11864 } 11865 11866 /** 11867 * Translate characters or replace sub-strings. 11868 * 11869 * EXAMPLE: 11870 * <code> 11871 * $array = [ 11872 * 'Hello' => '○●◎', 11873 * '中文空白' => 'earth', 11874 * ]; 11875 * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth' 11876 * </code> 11877 * 11878 * @see http://php.net/manual/en/function.strtr.php 11879 * 11880 * @param string $str <p>The string being translated.</p> 11881 * @param string|string[] $from <p>The string replacing from.</p> 11882 * @param string|string[] $to [optional] <p>The string being translated to to.</p> 11883 * 11884 * @psalm-pure 11885 * 11886 * @return string 11887 * <p>This function returns a copy of str, translating all occurrences of each character in "from" 11888 * to the corresponding character in "to".</p> 11889 */ 11890 public static function strtr(string $str, $from, $to = ''): string 11891 { 11892 if ($str === '') { 11893 return ''; 11894 } 11895 11896 if ($from === $to) { 11897 return $str; 11898 } 11899 11900 if ($to !== '') { 11901 if (!\is_array($from)) { 11902 $from = self::str_split($from); 11903 } 11904 11905 if (!\is_array($to)) { 11906 $to = self::str_split($to); 11907 } 11908 11909 $count_from = \count($from); 11910 $count_to = \count($to); 11911 11912 if ($count_from !== $count_to) { 11913 if ($count_from > $count_to) { 11914 $from = \array_slice($from, 0, $count_to); 11915 } elseif ($count_from < $count_to) { 11916 $to = \array_slice($to, 0, $count_from); 11917 } 11918 } 11919 11920 $from = \array_combine($from, $to); 11921 /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */ 11922 if ($from === false) { 11923 throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')'); 11924 } 11925 } 11926 11927 if (\is_string($from)) { 11928 return \str_replace($from, $to, $str); 11929 } 11930 11931 return \strtr($str, $from); 11932 } 11933 11934 /** 11935 * Return the width of a string. 11936 * 11937 * INFO: use UTF8::strlen() for the byte-length 11938 * 11939 * EXAMPLE: <code>UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21</code> 11940 * 11941 * @param string $str <p>The input string.</p> 11942 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 11943 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 11944 * 11945 * @psalm-pure 11946 * 11947 * @return int 11948 */ 11949 public static function strwidth( 11950 string $str, 11951 string $encoding = 'UTF-8', 11952 bool $clean_utf8 = false 11953 ): int { 11954 if ($str === '') { 11955 return 0; 11956 } 11957 11958 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 11959 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 11960 } 11961 11962 if ($clean_utf8) { 11963 // iconv and mbstring are not tolerant to invalid encoding 11964 // further, their behaviour is inconsistent with that of PHP's substr 11965 $str = self::clean($str); 11966 } 11967 11968 // 11969 // fallback via mbstring 11970 // 11971 11972 if (self::$SUPPORT['mbstring'] === true) { 11973 if ($encoding === 'UTF-8') { 11974 return \mb_strwidth($str); 11975 } 11976 11977 return \mb_strwidth($str, $encoding); 11978 } 11979 11980 // 11981 // fallback via vanilla php 11982 // 11983 11984 if ($encoding !== 'UTF-8') { 11985 $str = self::encode('UTF-8', $str, false, $encoding); 11986 } 11987 11988 $wide = 0; 11989 $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide); 11990 11991 return ($wide << 1) + (int) self::strlen($str); 11992 } 11993 11994 /** 11995 * Get part of a string. 11996 * 11997 * EXAMPLE: <code>UTF8::substr('中文空白', 1, 2); // '文空'</code> 11998 * 11999 * @see http://php.net/manual/en/function.mb-substr.php 12000 * 12001 * @param string $str <p>The string being checked.</p> 12002 * @param int $offset <p>The first position used in str.</p> 12003 * @param int|null $length [optional] <p>The maximum length of the returned string.</p> 12004 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 12005 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 12006 * 12007 * @psalm-pure 12008 * 12009 * @return false|string 12010 * The portion of <i>str</i> specified by the <i>offset</i> and 12011 * <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i> 12012 * characters long, <b>FALSE</b> will be returned. 12013 */ 12014 public static function substr( 12015 string $str, 12016 int $offset = 0, 12017 int $length = null, 12018 string $encoding = 'UTF-8', 12019 bool $clean_utf8 = false 12020 ) { 12021 // empty string 12022 if ($str === '' || $length === 0) { 12023 return ''; 12024 } 12025 12026 if ($clean_utf8) { 12027 // iconv and mbstring are not tolerant to invalid encoding 12028 // further, their behaviour is inconsistent with that of PHP's substr 12029 $str = self::clean($str); 12030 } 12031 12032 // whole string 12033 if (!$offset && $length === null) { 12034 return $str; 12035 } 12036 12037 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 12038 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 12039 } 12040 12041 // 12042 // fallback via mbstring 12043 // 12044 12045 if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') { 12046 if ($length === null) { 12047 return \mb_substr($str, $offset); 12048 } 12049 12050 return \mb_substr($str, $offset, $length); 12051 } 12052 12053 // 12054 // fallback for binary || ascii only 12055 // 12056 12057 if ( 12058 $encoding === 'CP850' 12059 || 12060 $encoding === 'ASCII' 12061 ) { 12062 if ($length === null) { 12063 return \substr($str, $offset); 12064 } 12065 12066 return \substr($str, $offset, $length); 12067 } 12068 12069 // otherwise we need the string-length 12070 $str_length = 0; 12071 if ($offset || $length === null) { 12072 $str_length = self::strlen($str, $encoding); 12073 } 12074 12075 // e.g.: invalid chars + mbstring not installed 12076 if ($str_length === false) { 12077 return false; 12078 } 12079 12080 // empty string 12081 if ($offset === $str_length && !$length) { 12082 return ''; 12083 } 12084 12085 // impossible 12086 if ($offset && $offset > $str_length) { 12087 return ''; 12088 } 12089 12090 $length = $length ?? (int) $str_length; 12091 12092 if ( 12093 $encoding !== 'UTF-8' 12094 && 12095 self::$SUPPORT['mbstring'] === false 12096 ) { 12097 /** 12098 * @psalm-suppress ImpureFunctionCall - is is only a warning 12099 */ 12100 \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); 12101 } 12102 12103 // 12104 // fallback via intl 12105 // 12106 12107 if ( 12108 $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings 12109 && 12110 $offset >= 0 // grapheme_substr() can't handle negative offset 12111 && 12112 self::$SUPPORT['intl'] === true 12113 ) { 12114 $return_tmp = \grapheme_substr($str, $offset, $length); 12115 if ($return_tmp !== false) { 12116 return $return_tmp; 12117 } 12118 } 12119 12120 // 12121 // fallback via iconv 12122 // 12123 12124 if ( 12125 $length >= 0 // "iconv_substr()" can't handle negative length 12126 && 12127 self::$SUPPORT['iconv'] === true 12128 ) { 12129 $return_tmp = \iconv_substr($str, $offset, $length); 12130 if ($return_tmp !== false) { 12131 return $return_tmp; 12132 } 12133 } 12134 12135 // 12136 // fallback for ascii only 12137 // 12138 12139 if (ASCII::is_ascii($str)) { 12140 return \substr($str, $offset, $length); 12141 } 12142 12143 // 12144 // fallback via vanilla php 12145 // 12146 12147 // split to array, and remove invalid characters 12148 $array = self::str_split($str); 12149 12150 // extract relevant part, and join to make sting again 12151 return \implode('', \array_slice($array, $offset, $length)); 12152 } 12153 12154 /** 12155 * Binary-safe comparison of two strings from an offset, up to a length of characters. 12156 * 12157 * EXAMPLE: <code> 12158 * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1 12159 * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1 12160 * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0 12161 * </code> 12162 * 12163 * @param string $str1 <p>The main string being compared.</p> 12164 * @param string $str2 <p>The secondary string being compared.</p> 12165 * @param int $offset [optional] <p>The start position for the comparison. If negative, it starts 12166 * counting from the end of the string.</p> 12167 * @param int|null $length [optional] <p>The length of the comparison. The default value is the largest 12168 * of the length of the str compared to the length of main_str less the 12169 * offset.</p> 12170 * @param bool $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case 12171 * insensitive.</p> 12172 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 12173 * 12174 * @psalm-pure 12175 * 12176 * @return int 12177 * <strong>< 0</strong> if str1 is less than str2;<br> 12178 * <strong>> 0</strong> if str1 is greater than str2,<br> 12179 * <strong>0</strong> if they are equal 12180 */ 12181 public static function substr_compare( 12182 string $str1, 12183 string $str2, 12184 int $offset = 0, 12185 int $length = null, 12186 bool $case_insensitivity = false, 12187 string $encoding = 'UTF-8' 12188 ): int { 12189 if ( 12190 $offset !== 0 12191 || 12192 $length !== null 12193 ) { 12194 if ($encoding === 'UTF-8') { 12195 if ($length === null) { 12196 $str1 = (string) \mb_substr($str1, $offset); 12197 } else { 12198 $str1 = (string) \mb_substr($str1, $offset, $length); 12199 } 12200 $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1)); 12201 } else { 12202 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 12203 12204 $str1 = (string) self::substr($str1, $offset, $length, $encoding); 12205 $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding); 12206 } 12207 } 12208 12209 if ($case_insensitivity) { 12210 return self::strcasecmp($str1, $str2, $encoding); 12211 } 12212 12213 return self::strcmp($str1, $str2); 12214 } 12215 12216 /** 12217 * Count the number of substring occurrences. 12218 * 12219 * EXAMPLE: <code>UTF8::substr_count('中文空白', '文空', 1, 2); // 1</code> 12220 * 12221 * @see http://php.net/manual/en/function.substr-count.php 12222 * 12223 * @param string $haystack <p>The string to search in.</p> 12224 * @param string $needle <p>The substring to search for.</p> 12225 * @param int $offset [optional] <p>The offset where to start counting.</p> 12226 * @param int|null $length [optional] <p> 12227 * The maximum length after the specified offset to search for the 12228 * substring. It outputs a warning if the offset plus the length is 12229 * greater than the haystack length. 12230 * </p> 12231 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 12232 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 12233 * 12234 * @psalm-pure 12235 * 12236 * @return false|int 12237 * <p>This functions returns an integer or false if there isn't a string.</p> 12238 */ 12239 public static function substr_count( 12240 string $haystack, 12241 string $needle, 12242 int $offset = 0, 12243 int $length = null, 12244 string $encoding = 'UTF-8', 12245 bool $clean_utf8 = false 12246 ) { 12247 if ($needle === '') { 12248 return false; 12249 } 12250 12251 if ($haystack === '') { 12252 if (\PHP_VERSION_ID >= 80000) { 12253 return 0; 12254 } 12255 12256 return 0; 12257 } 12258 12259 if ($length === 0) { 12260 return 0; 12261 } 12262 12263 if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { 12264 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 12265 } 12266 12267 if ($clean_utf8) { 12268 // "mb_strpos()" and "iconv_strpos()" returns wrong position, 12269 // if invalid characters are found in $haystack before $needle 12270 $needle = self::clean($needle); 12271 $haystack = self::clean($haystack); 12272 } 12273 12274 if ($offset || $length > 0) { 12275 if ($length === null) { 12276 $length_tmp = self::strlen($haystack, $encoding); 12277 if ($length_tmp === false) { 12278 return false; 12279 } 12280 $length = (int) $length_tmp; 12281 } 12282 12283 if ($encoding === 'UTF-8') { 12284 $haystack = (string) \mb_substr($haystack, $offset, $length); 12285 } else { 12286 $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding); 12287 } 12288 } 12289 12290 if ( 12291 $encoding !== 'UTF-8' 12292 && 12293 self::$SUPPORT['mbstring'] === false 12294 ) { 12295 /** 12296 * @psalm-suppress ImpureFunctionCall - is is only a warning 12297 */ 12298 \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); 12299 } 12300 12301 if (self::$SUPPORT['mbstring'] === true) { 12302 if ($encoding === 'UTF-8') { 12303 return \mb_substr_count($haystack, $needle); 12304 } 12305 12306 return \mb_substr_count($haystack, $needle, $encoding); 12307 } 12308 12309 \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER); 12310 12311 return \count($matches); 12312 } 12313 12314 /** 12315 * Count the number of substring occurrences. 12316 * 12317 * @param string $haystack <p> 12318 * The string being checked. 12319 * </p> 12320 * @param string $needle <p> 12321 * The string being found. 12322 * </p> 12323 * @param int $offset [optional] <p> 12324 * The offset where to start counting 12325 * </p> 12326 * @param int|null $length [optional] <p> 12327 * The maximum length after the specified offset to search for the 12328 * substring. It outputs a warning if the offset plus the length is 12329 * greater than the haystack length. 12330 * </p> 12331 * 12332 * @psalm-pure 12333 * 12334 * @return false|int 12335 * <p>The number of times the 12336 * needle substring occurs in the 12337 * haystack string.</p> 12338 */ 12339 public static function substr_count_in_byte( 12340 string $haystack, 12341 string $needle, 12342 int $offset = 0, 12343 int $length = null 12344 ) { 12345 if ($haystack === '' || $needle === '') { 12346 return 0; 12347 } 12348 12349 if ( 12350 ($offset || $length !== null) 12351 && 12352 self::$SUPPORT['mbstring_func_overload'] === true 12353 ) { 12354 if ($length === null) { 12355 $length_tmp = self::strlen($haystack); 12356 if ($length_tmp === false) { 12357 return false; 12358 } 12359 $length = (int) $length_tmp; 12360 } 12361 12362 if ( 12363 ( 12364 $length !== 0 12365 && 12366 $offset !== 0 12367 ) 12368 && 12369 ($length + $offset) <= 0 12370 && 12371 \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1 12372 ) { 12373 return false; 12374 } 12375 12376 /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */ 12377 $haystack_tmp = \substr($haystack, $offset, $length); 12378 if ($haystack_tmp === false) { 12379 $haystack_tmp = ''; 12380 } 12381 $haystack = (string) $haystack_tmp; 12382 } 12383 12384 if (self::$SUPPORT['mbstring_func_overload'] === true) { 12385 // "mb_" is available if overload is used, so use it ... 12386 return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT 12387 } 12388 12389 if ($length === null) { 12390 return \substr_count($haystack, $needle, $offset); 12391 } 12392 12393 return \substr_count($haystack, $needle, $offset, $length); 12394 } 12395 12396 /** 12397 * Returns the number of occurrences of $substring in the given string. 12398 * By default, the comparison is case-sensitive, but can be made insensitive 12399 * by setting $case_sensitive to false. 12400 * 12401 * @param string $str <p>The input string.</p> 12402 * @param string $substring <p>The substring to search for.</p> 12403 * @param bool $case_sensitive [optional] <p>Whether or not to enforce case-sensitivity. Default: true</p> 12404 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 12405 * 12406 * @psalm-pure 12407 * 12408 * @return int 12409 */ 12410 public static function substr_count_simple( 12411 string $str, 12412 string $substring, 12413 bool $case_sensitive = true, 12414 string $encoding = 'UTF-8' 12415 ): int { 12416 if ($str === '' || $substring === '') { 12417 return 0; 12418 } 12419 12420 if ($encoding === 'UTF-8') { 12421 if ($case_sensitive) { 12422 return (int) \mb_substr_count($str, $substring); 12423 } 12424 12425 return (int) \mb_substr_count( 12426 \mb_strtoupper($str), 12427 \mb_strtoupper($substring) 12428 ); 12429 } 12430 12431 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 12432 12433 if ($case_sensitive) { 12434 return (int) \mb_substr_count($str, $substring, $encoding); 12435 } 12436 12437 return (int) \mb_substr_count( 12438 self::strtocasefold($str, true, false, $encoding, null, false), 12439 self::strtocasefold($substring, true, false, $encoding, null, false), 12440 $encoding 12441 ); 12442 } 12443 12444 /** 12445 * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive. 12446 * 12447 * EXMAPLE: <code> 12448 * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd' 12449 * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd' 12450 * </code> 12451 * 12452 * @param string $haystack <p>The string to search in.</p> 12453 * @param string $needle <p>The substring to search for.</p> 12454 * 12455 * @psalm-pure 12456 * 12457 * @return string 12458 * <p>Return the sub-string.</p> 12459 */ 12460 public static function substr_ileft(string $haystack, string $needle): string 12461 { 12462 if ($haystack === '') { 12463 return ''; 12464 } 12465 12466 if ($needle === '') { 12467 return $haystack; 12468 } 12469 12470 if (self::str_istarts_with($haystack, $needle)) { 12471 $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle)); 12472 } 12473 12474 return $haystack; 12475 } 12476 12477 /** 12478 * Get part of a string process in bytes. 12479 * 12480 * @param string $str <p>The string being checked.</p> 12481 * @param int $offset <p>The first position used in str.</p> 12482 * @param int|null $length [optional] <p>The maximum length of the returned string.</p> 12483 * 12484 * @psalm-pure 12485 * 12486 * @return false|string 12487 * The portion of <i>str</i> specified by the <i>offset</i> and 12488 * <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i> 12489 * characters long, <b>FALSE</b> will be returned. 12490 */ 12491 public static function substr_in_byte(string $str, int $offset = 0, int $length = null) 12492 { 12493 // empty string 12494 if ($str === '' || $length === 0) { 12495 return ''; 12496 } 12497 12498 // whole string 12499 if (!$offset && $length === null) { 12500 return $str; 12501 } 12502 12503 if (self::$SUPPORT['mbstring_func_overload'] === true) { 12504 // "mb_" is available if overload is used, so use it ... 12505 return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT 12506 } 12507 12508 return \substr($str, $offset, $length ?? 2147483647); 12509 } 12510 12511 /** 12512 * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive. 12513 * 12514 * EXAMPLE: <code> 12515 * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle' 12516 * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle' 12517 * </code> 12518 * 12519 * @param string $haystack <p>The string to search in.</p> 12520 * @param string $needle <p>The substring to search for.</p> 12521 * 12522 * @psalm-pure 12523 * 12524 * @return string 12525 * <p>Return the sub-string.<p> 12526 */ 12527 public static function substr_iright(string $haystack, string $needle): string 12528 { 12529 if ($haystack === '') { 12530 return ''; 12531 } 12532 12533 if ($needle === '') { 12534 return $haystack; 12535 } 12536 12537 if (self::str_iends_with($haystack, $needle)) { 12538 $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle)); 12539 } 12540 12541 return $haystack; 12542 } 12543 12544 /** 12545 * Removes a prefix ($needle) from the beginning of the string ($haystack). 12546 * 12547 * EXAMPLE: <code> 12548 * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd' 12549 * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd' 12550 * </code> 12551 * 12552 * @param string $haystack <p>The string to search in.</p> 12553 * @param string $needle <p>The substring to search for.</p> 12554 * 12555 * @psalm-pure 12556 * 12557 * @return string 12558 * <p>Return the sub-string.</p> 12559 */ 12560 public static function substr_left(string $haystack, string $needle): string 12561 { 12562 if ($haystack === '') { 12563 return ''; 12564 } 12565 12566 if ($needle === '') { 12567 return $haystack; 12568 } 12569 12570 if (self::str_starts_with($haystack, $needle)) { 12571 $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle)); 12572 } 12573 12574 return $haystack; 12575 } 12576 12577 /** 12578 * Replace text within a portion of a string. 12579 * 12580 * EXAMPLE: <code>UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')</code> 12581 * 12582 * source: https://gist.github.com/stemar/8287074 12583 * 12584 * @param string|string[] $str <p>The input string or an array of stings.</p> 12585 * @param string|string[] $replacement <p>The replacement string or an array of stings.</p> 12586 * @param int|int[] $offset <p> 12587 * If start is positive, the replacing will begin at the start'th offset 12588 * into string. 12589 * <br><br> 12590 * If start is negative, the replacing will begin at the start'th character 12591 * from the end of string. 12592 * </p> 12593 * @param int|int[]|null $length [optional] <p>If given and is positive, it represents the length of the 12594 * portion of string which is to be replaced. If it is negative, it 12595 * represents the number of characters from the end of string at which to 12596 * stop replacing. If it is not given, then it will default to strlen( 12597 * string ); i.e. end the replacing at the end of string. Of course, if 12598 * length is zero then this function will have the effect of inserting 12599 * replacement into string at the given start offset.</p> 12600 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 12601 * 12602 * @psalm-pure 12603 * 12604 * @return string|string[] 12605 * <p>The result string is returned. If string is an array then array is returned.</p> 12606 */ 12607 public static function substr_replace( 12608 $str, 12609 $replacement, 12610 $offset, 12611 $length = null, 12612 string $encoding = 'UTF-8' 12613 ) { 12614 if (\is_array($str)) { 12615 $num = \count($str); 12616 12617 // the replacement 12618 if (\is_array($replacement)) { 12619 $replacement = \array_slice($replacement, 0, $num); 12620 } else { 12621 $replacement = \array_pad([$replacement], $num, $replacement); 12622 } 12623 12624 // the offset 12625 if (\is_array($offset)) { 12626 $offset = \array_slice($offset, 0, $num); 12627 foreach ($offset as &$value_tmp) { 12628 $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0; 12629 } 12630 unset($value_tmp); 12631 } else { 12632 $offset = \array_pad([$offset], $num, $offset); 12633 } 12634 12635 // the length 12636 if ($length === null) { 12637 $length = \array_fill(0, $num, 0); 12638 } elseif (\is_array($length)) { 12639 $length = \array_slice($length, 0, $num); 12640 foreach ($length as &$value_tmp_V2) { 12641 $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num; 12642 } 12643 unset($value_tmp_V2); 12644 } else { 12645 $length = \array_pad([$length], $num, $length); 12646 } 12647 12648 // recursive call 12649 return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length); 12650 } 12651 12652 if (\is_array($replacement)) { 12653 if ($replacement !== []) { 12654 $replacement = $replacement[0]; 12655 } else { 12656 $replacement = ''; 12657 } 12658 } 12659 12660 // init 12661 $str = (string) $str; 12662 $replacement = (string) $replacement; 12663 12664 if (\is_array($length)) { 12665 throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.'); 12666 } 12667 12668 if (\is_array($offset)) { 12669 throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.'); 12670 } 12671 12672 if ($str === '') { 12673 return $replacement; 12674 } 12675 12676 if (self::$SUPPORT['mbstring'] === true) { 12677 $string_length = (int) self::strlen($str, $encoding); 12678 12679 if ($offset < 0) { 12680 $offset = (int) \max(0, $string_length + $offset); 12681 } elseif ($offset > $string_length) { 12682 $offset = $string_length; 12683 } 12684 12685 if ($length !== null && $length < 0) { 12686 $length = (int) \max(0, $string_length - $offset + $length); 12687 } elseif ($length === null || $length > $string_length) { 12688 $length = $string_length; 12689 } 12690 12691 /** @noinspection AdditionOperationOnArraysInspection */ 12692 if (($offset + $length) > $string_length) { 12693 $length = $string_length - $offset; 12694 } 12695 12696 /** @noinspection AdditionOperationOnArraysInspection */ 12697 return ((string) \mb_substr($str, 0, $offset, $encoding)) . 12698 $replacement . 12699 ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding)); 12700 } 12701 12702 // 12703 // fallback for ascii only 12704 // 12705 12706 if (ASCII::is_ascii($str)) { 12707 return ($length === null) ? 12708 \substr_replace($str, $replacement, $offset) : 12709 \substr_replace($str, $replacement, $offset, $length); 12710 } 12711 12712 // 12713 // fallback via vanilla php 12714 // 12715 12716 \preg_match_all('/./us', $str, $str_matches); 12717 \preg_match_all('/./us', $replacement, $replacement_matches); 12718 12719 if ($length === null) { 12720 $length_tmp = self::strlen($str, $encoding); 12721 if ($length_tmp === false) { 12722 // e.g.: non mbstring support + invalid chars 12723 return ''; 12724 } 12725 $length = (int) $length_tmp; 12726 } 12727 12728 \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]); 12729 12730 return \implode('', $str_matches[0]); 12731 } 12732 12733 /** 12734 * Removes a suffix ($needle) from the end of the string ($haystack). 12735 * 12736 * EXAMPLE: <code> 12737 * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle' 12738 * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε' 12739 * </code> 12740 * 12741 * @param string $haystack <p>The string to search in.</p> 12742 * @param string $needle <p>The substring to search for.</p> 12743 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 12744 * 12745 * @psalm-pure 12746 * 12747 * @return string 12748 * <p>Return the sub-string.</p> 12749 */ 12750 public static function substr_right( 12751 string $haystack, 12752 string $needle, 12753 string $encoding = 'UTF-8' 12754 ): string { 12755 if ($haystack === '') { 12756 return ''; 12757 } 12758 12759 if ($needle === '') { 12760 return $haystack; 12761 } 12762 12763 if ( 12764 $encoding === 'UTF-8' 12765 && 12766 \substr($haystack, -\strlen($needle)) === $needle 12767 ) { 12768 return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle)); 12769 } 12770 12771 if (\substr($haystack, -\strlen($needle)) === $needle) { 12772 return (string) self::substr( 12773 $haystack, 12774 0, 12775 (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding), 12776 $encoding 12777 ); 12778 } 12779 12780 return $haystack; 12781 } 12782 12783 /** 12784 * Returns a case swapped version of the string. 12785 * 12786 * EXAMPLE: <code>UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii'</code> 12787 * 12788 * @param string $str <p>The input string.</p> 12789 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 12790 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 12791 * 12792 * @psalm-pure 12793 * 12794 * @return string 12795 * <p>Each character's case swapped.</p> 12796 */ 12797 public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string 12798 { 12799 if ($str === '') { 12800 return ''; 12801 } 12802 12803 if ($clean_utf8) { 12804 // "mb_strpos()" and "iconv_strpos()" returns wrong position, 12805 // if invalid characters are found in $haystack before $needle 12806 $str = self::clean($str); 12807 } 12808 12809 if ($encoding === 'UTF-8') { 12810 return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str); 12811 } 12812 12813 return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str); 12814 } 12815 12816 /** 12817 * Checks whether symfony-polyfills are used. 12818 * 12819 * @psalm-pure 12820 * 12821 * @return bool 12822 * <p><strong>true</strong> if in use, <strong>false</strong> otherwise</p> 12823 * 12824 * @internal <p>Please do not use it anymore, we will make is private in next major version.</p> 12825 */ 12826 public static function symfony_polyfill_used(): bool 12827 { 12828 // init 12829 $return = false; 12830 12831 $return_tmp = \extension_loaded('mbstring'); 12832 if (!$return_tmp && \function_exists('mb_strlen')) { 12833 $return = true; 12834 } 12835 12836 $return_tmp = \extension_loaded('iconv'); 12837 if (!$return_tmp && \function_exists('iconv')) { 12838 $return = true; 12839 } 12840 12841 return $return; 12842 } 12843 12844 /** 12845 * @param string $str 12846 * @param int $tab_length 12847 * 12848 * @psalm-pure 12849 * 12850 * @return string 12851 */ 12852 public static function tabs_to_spaces(string $str, int $tab_length = 4): string 12853 { 12854 if ($tab_length === 4) { 12855 $spaces = ' '; 12856 } elseif ($tab_length === 2) { 12857 $spaces = ' '; 12858 } else { 12859 $spaces = \str_repeat(' ', $tab_length); 12860 } 12861 12862 return \str_replace("\t", $spaces, $str); 12863 } 12864 12865 /** 12866 * Converts the first character of each word in the string to uppercase 12867 * and all other chars to lowercase. 12868 * 12869 * @param string $str <p>The input string.</p> 12870 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 12871 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 12872 * @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, 12873 * tr</p> 12874 * @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ 12875 * -> ß</p> 12876 * 12877 * @psalm-pure 12878 * 12879 * @return string 12880 * <p>A string with all characters of $str being title-cased.</p> 12881 */ 12882 public static function titlecase( 12883 string $str, 12884 string $encoding = 'UTF-8', 12885 bool $clean_utf8 = false, 12886 string $lang = null, 12887 bool $try_to_keep_the_string_length = false 12888 ): string { 12889 if ($clean_utf8) { 12890 // "mb_strpos()" and "iconv_strpos()" returns wrong position, 12891 // if invalid characters are found in $haystack before $needle 12892 $str = self::clean($str); 12893 } 12894 12895 if ( 12896 $lang === null 12897 && 12898 !$try_to_keep_the_string_length 12899 ) { 12900 if ($encoding === 'UTF-8') { 12901 return \mb_convert_case($str, \MB_CASE_TITLE); 12902 } 12903 12904 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 12905 12906 return \mb_convert_case($str, \MB_CASE_TITLE, $encoding); 12907 } 12908 12909 return self::str_titleize( 12910 $str, 12911 null, 12912 $encoding, 12913 false, 12914 $lang, 12915 $try_to_keep_the_string_length, 12916 false 12917 ); 12918 } 12919 12920 /** 12921 * alias for "UTF8::to_ascii()" 12922 * 12923 * @param string $str 12924 * @param string $subst_chr 12925 * @param bool $strict 12926 * 12927 * @psalm-pure 12928 * 12929 * @return string 12930 * 12931 * @see UTF8::to_ascii() 12932 * @deprecated <p>please use "UTF8::to_ascii()"</p> 12933 */ 12934 public static function toAscii( 12935 string $str, 12936 string $subst_chr = '?', 12937 bool $strict = false 12938 ): string { 12939 return self::to_ascii($str, $subst_chr, $strict); 12940 } 12941 12942 /** 12943 * alias for "UTF8::to_iso8859()" 12944 * 12945 * @param string|string[] $str 12946 * 12947 * @psalm-pure 12948 * 12949 * @return string|string[] 12950 * 12951 * @see UTF8::to_iso8859() 12952 * @deprecated <p>please use "UTF8::to_iso8859()"</p> 12953 */ 12954 public static function toIso8859($str) 12955 { 12956 return self::to_iso8859($str); 12957 } 12958 12959 /** 12960 * alias for "UTF8::to_latin1()" 12961 * 12962 * @param string|string[] $str 12963 * 12964 * @psalm-pure 12965 * 12966 * @return string|string[] 12967 * 12968 * @see UTF8::to_iso8859() 12969 * @deprecated <p>please use "UTF8::to_iso8859()"</p> 12970 */ 12971 public static function toLatin1($str) 12972 { 12973 return self::to_iso8859($str); 12974 } 12975 12976 /** 12977 * alias for "UTF8::to_utf8()" 12978 * 12979 * @param string|string[] $str 12980 * 12981 * @psalm-pure 12982 * 12983 * @return string|string[] 12984 * 12985 * @see UTF8::to_utf8() 12986 * @deprecated <p>please use "UTF8::to_utf8()"</p> 12987 */ 12988 public static function toUTF8($str) 12989 { 12990 return self::to_utf8($str); 12991 } 12992 12993 /** 12994 * Convert a string into ASCII. 12995 * 12996 * EXAMPLE: <code>UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii'</code> 12997 * 12998 * @param string $str <p>The input string.</p> 12999 * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p> 13000 * @param bool $strict [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad 13001 * performance</p> 13002 * 13003 * @psalm-pure 13004 * 13005 * @return string 13006 */ 13007 public static function to_ascii( 13008 string $str, 13009 string $unknown = '?', 13010 bool $strict = false 13011 ): string { 13012 return ASCII::to_transliterate($str, $unknown, $strict); 13013 } 13014 13015 /** 13016 * @param bool|int|float|string $str 13017 * 13018 * @psalm-pure 13019 * 13020 * @return bool 13021 */ 13022 public static function to_boolean($str): bool 13023 { 13024 // init 13025 $str = (string) $str; 13026 13027 if ($str === '') { 13028 return false; 13029 } 13030 13031 // Info: http://php.net/manual/en/filter.filters.validate.php 13032 $map = [ 13033 'true' => true, 13034 '1' => true, 13035 'on' => true, 13036 'yes' => true, 13037 'false' => false, 13038 '0' => false, 13039 'off' => false, 13040 'no' => false, 13041 ]; 13042 13043 if (isset($map[$str])) { 13044 return $map[$str]; 13045 } 13046 13047 $key = \strtolower($str); 13048 if (isset($map[$key])) { 13049 return $map[$key]; 13050 } 13051 13052 if (\is_numeric($str)) { 13053 return ((float) $str + 0) > 0; 13054 } 13055 13056 return (bool) \trim($str); 13057 } 13058 13059 /** 13060 * Convert given string to safe filename (and keep string case). 13061 * 13062 * @param string $str 13063 * @param bool $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are 13064 * simply replaced with hyphen. 13065 * @param string $fallback_char 13066 * 13067 * @psalm-pure 13068 * 13069 * @return string 13070 */ 13071 public static function to_filename( 13072 string $str, 13073 bool $use_transliterate = false, 13074 string $fallback_char = '-' 13075 ): string { 13076 return ASCII::to_filename( 13077 $str, 13078 $use_transliterate, 13079 $fallback_char 13080 ); 13081 } 13082 13083 /** 13084 * Convert a string into "ISO-8859"-encoding (Latin-1). 13085 * 13086 * EXAMPLE: <code>UTF8::to_utf8(UTF8::to_iso8859(' -ABC-中文空白- ')); // ' -ABC-????- '</code> 13087 * 13088 * @param string|string[] $str 13089 * 13090 * @psalm-pure 13091 * 13092 * @return string|string[] 13093 */ 13094 public static function to_iso8859($str) 13095 { 13096 if (\is_array($str)) { 13097 foreach ($str as $k => &$v) { 13098 $v = self::to_iso8859($v); 13099 } 13100 13101 return $str; 13102 } 13103 13104 $str = (string) $str; 13105 if ($str === '') { 13106 return ''; 13107 } 13108 13109 return self::utf8_decode($str); 13110 } 13111 13112 /** 13113 * alias for "UTF8::to_iso8859()" 13114 * 13115 * @param string|string[] $str 13116 * 13117 * @psalm-pure 13118 * 13119 * @return string|string[] 13120 * 13121 * @see UTF8::to_iso8859() 13122 * @deprecated <p>please use "UTF8::to_iso8859()"</p> 13123 */ 13124 public static function to_latin1($str) 13125 { 13126 return self::to_iso8859($str); 13127 } 13128 13129 /** 13130 * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8. 13131 * 13132 * <ul> 13133 * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li> 13134 * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li> 13135 * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this 13136 * case.</li> 13137 * </ul> 13138 * 13139 * EXAMPLE: <code>UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat')</code> 13140 * 13141 * @param string|string[] $str <p>Any string or array of strings.</p> 13142 * @param bool $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p> 13143 * 13144 * @psalm-pure 13145 * 13146 * @return string|string[] 13147 * <p>The UTF-8 encoded string</p> 13148 * 13149 * @template TToUtf8 13150 * @phpstan-param TToUtf8 $str 13151 * @phpstan-return TToUtf8 13152 * 13153 * @noinspection SuspiciousBinaryOperationInspection 13154 */ 13155 public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false) 13156 { 13157 if (\is_array($str)) { 13158 foreach ($str as $k => &$v) { 13159 $v = self::to_utf8_string($v, $decode_html_entity_to_utf8); 13160 } 13161 13162 return $str; 13163 } 13164 13165 /** @phpstan-var TToUtf8 $str */ 13166 $str = self::to_utf8_string($str, $decode_html_entity_to_utf8); 13167 13168 return $str; 13169 } 13170 13171 /** 13172 * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8. 13173 * 13174 * <ul> 13175 * <li>It decode UTF-8 codepoints and Unicode escape sequences.</li> 13176 * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li> 13177 * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this 13178 * case.</li> 13179 * </ul> 13180 * 13181 * EXAMPLE: <code>UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat'</code> 13182 * 13183 * @param string $str <p>Any string.</p> 13184 * @param bool $decode_html_entity_to_utf8 <p>Set to true, if you need to decode html-entities.</p> 13185 * 13186 * @psalm-pure 13187 * 13188 * @return string 13189 * <p>The UTF-8 encoded string</p> 13190 * 13191 * @noinspection SuspiciousBinaryOperationInspection 13192 */ 13193 public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string 13194 { 13195 if ($str === '') { 13196 return $str; 13197 } 13198 13199 $max = \strlen($str); 13200 $buf = ''; 13201 13202 for ($i = 0; $i < $max; ++$i) { 13203 $c1 = $str[$i]; 13204 13205 if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already 13206 13207 if ($c1 <= "\xDF") { // looks like 2 bytes UTF8 13208 13209 $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; 13210 13211 if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already 13212 $buf .= $c1 . $c2; 13213 ++$i; 13214 } else { // not valid UTF8 - convert it 13215 $buf .= self::to_utf8_convert_helper($c1); 13216 } 13217 } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8 13218 13219 $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; 13220 $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2]; 13221 13222 if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already 13223 $buf .= $c1 . $c2 . $c3; 13224 $i += 2; 13225 } else { // not valid UTF8 - convert it 13226 $buf .= self::to_utf8_convert_helper($c1); 13227 } 13228 } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8 13229 13230 $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; 13231 $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2]; 13232 $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3]; 13233 13234 if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already 13235 $buf .= $c1 . $c2 . $c3 . $c4; 13236 $i += 3; 13237 } else { // not valid UTF8 - convert it 13238 $buf .= self::to_utf8_convert_helper($c1); 13239 } 13240 } else { // doesn't look like UTF8, but should be converted 13241 13242 $buf .= self::to_utf8_convert_helper($c1); 13243 } 13244 } elseif (($c1 & "\xC0") === "\x80") { // needs conversion 13245 13246 $buf .= self::to_utf8_convert_helper($c1); 13247 } else { // it doesn't need conversion 13248 13249 $buf .= $c1; 13250 } 13251 } 13252 13253 // decode unicode escape sequences + unicode surrogate pairs 13254 $buf = \preg_replace_callback( 13255 '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/', 13256 /** 13257 * @param array $matches 13258 * 13259 * @psalm-pure 13260 * 13261 * @return string 13262 */ 13263 static function (array $matches): string { 13264 if (isset($matches[3])) { 13265 $cp = (int) \hexdec($matches[3]); 13266 } else { 13267 // http://unicode.org/faq/utf_bom.html#utf16-4 13268 $cp = ((int) \hexdec($matches[1]) << 10) 13269 + (int) \hexdec($matches[2]) 13270 + 0x10000 13271 - (0xD800 << 10) 13272 - 0xDC00; 13273 } 13274 13275 // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471 13276 // 13277 // php_utf32_utf8(unsigned char *buf, unsigned k) 13278 13279 if ($cp < 0x80) { 13280 return (string) self::chr($cp); 13281 } 13282 13283 if ($cp < 0xA0) { 13284 /** @noinspection UnnecessaryCastingInspection */ 13285 return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F); 13286 } 13287 13288 return self::decimal_to_chr($cp); 13289 }, 13290 $buf 13291 ); 13292 13293 if ($buf === null) { 13294 return ''; 13295 } 13296 13297 // decode UTF-8 codepoints 13298 if ($decode_html_entity_to_utf8) { 13299 $buf = self::html_entity_decode($buf); 13300 } 13301 13302 return $buf; 13303 } 13304 13305 /** 13306 * Returns the given string as an integer, or null if the string isn't numeric. 13307 * 13308 * @param string $str 13309 * 13310 * @psalm-pure 13311 * 13312 * @return int|null 13313 * <p>null if the string isn't numeric</p> 13314 */ 13315 public static function to_int(string $str) 13316 { 13317 if (\is_numeric($str)) { 13318 return (int) $str; 13319 } 13320 13321 return null; 13322 } 13323 13324 /** 13325 * Returns the given input as string, or null if the input isn't int|float|string 13326 * and do not implement the "__toString()" method. 13327 * 13328 * @param float|int|object|string|null $input 13329 * 13330 * @psalm-pure 13331 * 13332 * @return string|null 13333 * <p>null if the input isn't int|float|string and has no "__toString()" method</p> 13334 */ 13335 public static function to_string($input) 13336 { 13337 if ($input === null) { 13338 return null; 13339 } 13340 13341 /** @var string $input_type - hack for psalm */ 13342 $input_type = \gettype($input); 13343 13344 if ( 13345 $input_type === 'string' 13346 || 13347 $input_type === 'integer' 13348 || 13349 $input_type === 'float' 13350 || 13351 $input_type === 'double' 13352 ) { 13353 return (string) $input; 13354 } 13355 13356 if ($input_type === 'object') { 13357 /** @noinspection PhpSillyAssignmentInspection */ 13358 /** @var object $input - hack for psalm / phpstan */ 13359 $input = $input; 13360 /** @noinspection NestedPositiveIfStatementsInspection */ 13361 /** @noinspection MissingOrEmptyGroupStatementInspection */ 13362 if (\method_exists($input, '__toString')) { 13363 return (string) $input; 13364 } 13365 } 13366 13367 return null; 13368 } 13369 13370 /** 13371 * Strip whitespace or other characters from the beginning and end of a UTF-8 string. 13372 * 13373 * INFO: This is slower then "trim()" 13374 * 13375 * We can only use the original-function, if we use <= 7-Bit in the string / chars 13376 * but the check for ASCII (7-Bit) cost more time, then we can safe here. 13377 * 13378 * EXAMPLE: <code>UTF8::trim(' -ABC-中文空白- '); // '-ABC-中文空白-'</code> 13379 * 13380 * @param string $str <p>The string to be trimmed</p> 13381 * @param string|null $chars [optional] <p>Optional characters to be stripped</p> 13382 * 13383 * @psalm-pure 13384 * 13385 * @return string 13386 * <p>The trimmed string.</p> 13387 */ 13388 public static function trim(string $str = '', string $chars = null): string 13389 { 13390 if ($str === '') { 13391 return ''; 13392 } 13393 13394 if (self::$SUPPORT['mbstring'] === true) { 13395 if ($chars !== null) { 13396 /** @noinspection PregQuoteUsageInspection */ 13397 $chars = \preg_quote($chars); 13398 $pattern = "^[$chars}]+|[$chars}]+\$"; 13399 } else { 13400 $pattern = '^[\\s]+|[\\s]+$'; 13401 } 13402 13403 /** @noinspection PhpComposerExtensionStubsInspection */ 13404 return (string) \mb_ereg_replace($pattern, '', $str); 13405 } 13406 13407 if ($chars !== null) { 13408 $chars = \preg_quote($chars, '/'); 13409 $pattern = "^[$chars}]+|[$chars}]+\$"; 13410 } else { 13411 $pattern = '^[\\s]+|[\\s]+$'; 13412 } 13413 13414 return self::regex_replace($str, $pattern, ''); 13415 } 13416 13417 /** 13418 * Makes string's first char uppercase. 13419 * 13420 * EXAMPLE: <code>UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo'</code> 13421 * 13422 * @param string $str <p>The input string.</p> 13423 * @param string $encoding [optional] <p>Set the charset for e.g. "mb_" function</p> 13424 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 13425 * @param string|null $lang [optional] <p>Set the language for special cases: az, el, lt, 13426 * tr</p> 13427 * @param bool $try_to_keep_the_string_length [optional] <p>true === try to keep the string length: e.g. ẞ 13428 * -> ß</p> 13429 * 13430 * @psalm-pure 13431 * 13432 * @return string 13433 * <p>The resulting string with with char uppercase.</p> 13434 */ 13435 public static function ucfirst( 13436 string $str, 13437 string $encoding = 'UTF-8', 13438 bool $clean_utf8 = false, 13439 string $lang = null, 13440 bool $try_to_keep_the_string_length = false 13441 ): string { 13442 if ($str === '') { 13443 return ''; 13444 } 13445 13446 if ($clean_utf8) { 13447 // "mb_strpos()" and "iconv_strpos()" returns wrong position, 13448 // if invalid characters are found in $haystack before $needle 13449 $str = self::clean($str); 13450 } 13451 13452 $use_mb_functions = $lang === null && !$try_to_keep_the_string_length; 13453 13454 if ($encoding === 'UTF-8') { 13455 $str_part_two = (string) \mb_substr($str, 1); 13456 13457 if ($use_mb_functions) { 13458 $str_part_one = \mb_strtoupper( 13459 (string) \mb_substr($str, 0, 1) 13460 ); 13461 } else { 13462 $str_part_one = self::strtoupper( 13463 (string) \mb_substr($str, 0, 1), 13464 $encoding, 13465 false, 13466 $lang, 13467 $try_to_keep_the_string_length 13468 ); 13469 } 13470 } else { 13471 $encoding = self::normalize_encoding($encoding, 'UTF-8'); 13472 13473 $str_part_two = (string) self::substr($str, 1, null, $encoding); 13474 13475 if ($use_mb_functions) { 13476 $str_part_one = \mb_strtoupper( 13477 (string) \mb_substr($str, 0, 1, $encoding), 13478 $encoding 13479 ); 13480 } else { 13481 $str_part_one = self::strtoupper( 13482 (string) self::substr($str, 0, 1, $encoding), 13483 $encoding, 13484 false, 13485 $lang, 13486 $try_to_keep_the_string_length 13487 ); 13488 } 13489 } 13490 13491 return $str_part_one . $str_part_two; 13492 } 13493 13494 /** 13495 * alias for "UTF8::ucfirst()" 13496 * 13497 * @param string $str 13498 * @param string $encoding 13499 * @param bool $clean_utf8 13500 * 13501 * @psalm-pure 13502 * 13503 * @return string 13504 * 13505 * @see UTF8::ucfirst() 13506 * @deprecated <p>please use "UTF8::ucfirst()"</p> 13507 */ 13508 public static function ucword( 13509 string $str, 13510 string $encoding = 'UTF-8', 13511 bool $clean_utf8 = false 13512 ): string { 13513 return self::ucfirst($str, $encoding, $clean_utf8); 13514 } 13515 13516 /** 13517 * Uppercase for all words in the string. 13518 * 13519 * EXAMPLE: <code>UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn'</code> 13520 * 13521 * @param string $str <p>The input string.</p> 13522 * @param string[] $exceptions [optional] <p>Exclusion for some words.</p> 13523 * @param string $char_list [optional] <p>Additional chars that contains to words and do not start a new 13524 * word.</p> 13525 * @param string $encoding [optional] <p>Set the charset.</p> 13526 * @param bool $clean_utf8 [optional] <p>Remove non UTF-8 chars from the string.</p> 13527 * 13528 * @psalm-pure 13529 * 13530 * @return string 13531 */ 13532 public static function ucwords( 13533 string $str, 13534 array $exceptions = [], 13535 string $char_list = '', 13536 string $encoding = 'UTF-8', 13537 bool $clean_utf8 = false 13538 ): string { 13539 if (!$str) { 13540 return ''; 13541 } 13542 13543 // INFO: mb_convert_case($str, MB_CASE_TITLE); 13544 // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters 13545 13546 if ($clean_utf8) { 13547 // "mb_strpos()" and "iconv_strpos()" returns wrong position, 13548 // if invalid characters are found in $haystack before $needle 13549 $str = self::clean($str); 13550 } 13551 13552 $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions)); 13553 13554 if ( 13555 $use_php_default_functions 13556 && 13557 ASCII::is_ascii($str) 13558 ) { 13559 return \ucwords($str); 13560 } 13561 13562 $words = self::str_to_words($str, $char_list); 13563 $use_exceptions = $exceptions !== []; 13564 13565 $words_str = ''; 13566 foreach ($words as &$word) { 13567 if (!$word) { 13568 continue; 13569 } 13570 13571 if ( 13572 !$use_exceptions 13573 || 13574 !\in_array($word, $exceptions, true) 13575 ) { 13576 $words_str .= self::ucfirst($word, $encoding); 13577 } else { 13578 $words_str .= $word; 13579 } 13580 } 13581 13582 return $words_str; 13583 } 13584 13585 /** 13586 * Multi decode HTML entity + fix urlencoded-win1252-chars. 13587 * 13588 * EXAMPLE: <code>UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test'</code> 13589 * 13590 * e.g: 13591 * 'test+test' => 'test test' 13592 * 'Düsseldorf' => 'Düsseldorf' 13593 * 'D%FCsseldorf' => 'Düsseldorf' 13594 * 'Düsseldorf' => 'Düsseldorf' 13595 * 'D%26%23xFC%3Bsseldorf' => 'Düsseldorf' 13596 * 'Düsseldorf' => 'Düsseldorf' 13597 * 'D%C3%BCsseldorf' => 'Düsseldorf' 13598 * 'D%C3%83%C2%BCsseldorf' => 'Düsseldorf' 13599 * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf' 13600 * 13601 * @param string $str <p>The input string.</p> 13602 * @param bool $multi_decode <p>Decode as often as possible.</p> 13603 * 13604 * @psalm-pure 13605 * 13606 * @return string 13607 */ 13608 public static function urldecode(string $str, bool $multi_decode = true): string 13609 { 13610 if ($str === '') { 13611 return ''; 13612 } 13613 13614 $str = self::urldecode_unicode_helper($str); 13615 13616 if ($multi_decode) { 13617 do { 13618 $str_compare = $str; 13619 13620 /** 13621 * @psalm-suppress PossiblyInvalidArgument 13622 */ 13623 $str = \urldecode( 13624 self::html_entity_decode( 13625 self::to_utf8($str), 13626 \ENT_QUOTES | \ENT_HTML5 13627 ) 13628 ); 13629 } while ($str_compare !== $str); 13630 } else { 13631 /** 13632 * @psalm-suppress PossiblyInvalidArgument 13633 */ 13634 $str = \urldecode( 13635 self::html_entity_decode( 13636 self::to_utf8($str), 13637 \ENT_QUOTES | \ENT_HTML5 13638 ) 13639 ); 13640 } 13641 13642 return self::fix_simple_utf8($str); 13643 } 13644 13645 /** 13646 * Return a array with "urlencoded"-win1252 -> UTF-8 13647 * 13648 * @psalm-pure 13649 * 13650 * @return string[] 13651 * 13652 * @deprecated <p>please use the "UTF8::urldecode()" function to decode a string</p> 13653 */ 13654 public static function urldecode_fix_win1252_chars(): array 13655 { 13656 return [ 13657 '%20' => ' ', 13658 '%21' => '!', 13659 '%22' => '"', 13660 '%23' => '#', 13661 '%24' => '$', 13662 '%25' => '%', 13663 '%26' => '&', 13664 '%27' => "'", 13665 '%28' => '(', 13666 '%29' => ')', 13667 '%2A' => '*', 13668 '%2B' => '+', 13669 '%2C' => ',', 13670 '%2D' => '-', 13671 '%2E' => '.', 13672 '%2F' => '/', 13673 '%30' => '0', 13674 '%31' => '1', 13675 '%32' => '2', 13676 '%33' => '3', 13677 '%34' => '4', 13678 '%35' => '5', 13679 '%36' => '6', 13680 '%37' => '7', 13681 '%38' => '8', 13682 '%39' => '9', 13683 '%3A' => ':', 13684 '%3B' => ';', 13685 '%3C' => '<', 13686 '%3D' => '=', 13687 '%3E' => '>', 13688 '%3F' => '?', 13689 '%40' => '@', 13690 '%41' => 'A', 13691 '%42' => 'B', 13692 '%43' => 'C', 13693 '%44' => 'D', 13694 '%45' => 'E', 13695 '%46' => 'F', 13696 '%47' => 'G', 13697 '%48' => 'H', 13698 '%49' => 'I', 13699 '%4A' => 'J', 13700 '%4B' => 'K', 13701 '%4C' => 'L', 13702 '%4D' => 'M', 13703 '%4E' => 'N', 13704 '%4F' => 'O', 13705 '%50' => 'P', 13706 '%51' => 'Q', 13707 '%52' => 'R', 13708 '%53' => 'S', 13709 '%54' => 'T', 13710 '%55' => 'U', 13711 '%56' => 'V', 13712 '%57' => 'W', 13713 '%58' => 'X', 13714 '%59' => 'Y', 13715 '%5A' => 'Z', 13716 '%5B' => '[', 13717 '%5C' => '\\', 13718 '%5D' => ']', 13719 '%5E' => '^', 13720 '%5F' => '_', 13721 '%60' => '`', 13722 '%61' => 'a', 13723 '%62' => 'b', 13724 '%63' => 'c', 13725 '%64' => 'd', 13726 '%65' => 'e', 13727 '%66' => 'f', 13728 '%67' => 'g', 13729 '%68' => 'h', 13730 '%69' => 'i', 13731 '%6A' => 'j', 13732 '%6B' => 'k', 13733 '%6C' => 'l', 13734 '%6D' => 'm', 13735 '%6E' => 'n', 13736 '%6F' => 'o', 13737 '%70' => 'p', 13738 '%71' => 'q', 13739 '%72' => 'r', 13740 '%73' => 's', 13741 '%74' => 't', 13742 '%75' => 'u', 13743 '%76' => 'v', 13744 '%77' => 'w', 13745 '%78' => 'x', 13746 '%79' => 'y', 13747 '%7A' => 'z', 13748 '%7B' => '{', 13749 '%7C' => '|', 13750 '%7D' => '}', 13751 '%7E' => '~', 13752 '%7F' => '', 13753 '%80' => '`', 13754 '%81' => '', 13755 '%82' => '‚', 13756 '%83' => 'ƒ', 13757 '%84' => '„', 13758 '%85' => '…', 13759 '%86' => '†', 13760 '%87' => '‡', 13761 '%88' => 'ˆ', 13762 '%89' => '‰', 13763 '%8A' => 'Š', 13764 '%8B' => '‹', 13765 '%8C' => 'Œ', 13766 '%8D' => '', 13767 '%8E' => 'Ž', 13768 '%8F' => '', 13769 '%90' => '', 13770 '%91' => '‘', 13771 '%92' => '’', 13772 '%93' => '“', 13773 '%94' => '”', 13774 '%95' => '•', 13775 '%96' => '–', 13776 '%97' => '—', 13777 '%98' => '˜', 13778 '%99' => '™', 13779 '%9A' => 'š', 13780 '%9B' => '›', 13781 '%9C' => 'œ', 13782 '%9D' => '', 13783 '%9E' => 'ž', 13784 '%9F' => 'Ÿ', 13785 '%A0' => '', 13786 '%A1' => '¡', 13787 '%A2' => '¢', 13788 '%A3' => '£', 13789 '%A4' => '¤', 13790 '%A5' => '¥', 13791 '%A6' => '¦', 13792 '%A7' => '§', 13793 '%A8' => '¨', 13794 '%A9' => '©', 13795 '%AA' => 'ª', 13796 '%AB' => '«', 13797 '%AC' => '¬', 13798 '%AD' => '', 13799 '%AE' => '®', 13800 '%AF' => '¯', 13801 '%B0' => '°', 13802 '%B1' => '±', 13803 '%B2' => '²', 13804 '%B3' => '³', 13805 '%B4' => '´', 13806 '%B5' => 'µ', 13807 '%B6' => '¶', 13808 '%B7' => '·', 13809 '%B8' => '¸', 13810 '%B9' => '¹', 13811 '%BA' => 'º', 13812 '%BB' => '»', 13813 '%BC' => '¼', 13814 '%BD' => '½', 13815 '%BE' => '¾', 13816 '%BF' => '¿', 13817 '%C0' => 'À', 13818 '%C1' => 'Á', 13819 '%C2' => 'Â', 13820 '%C3' => 'Ã', 13821 '%C4' => 'Ä', 13822 '%C5' => 'Å', 13823 '%C6' => 'Æ', 13824 '%C7' => 'Ç', 13825 '%C8' => 'È', 13826 '%C9' => 'É', 13827 '%CA' => 'Ê', 13828 '%CB' => 'Ë', 13829 '%CC' => 'Ì', 13830 '%CD' => 'Í', 13831 '%CE' => 'Î', 13832 '%CF' => 'Ï', 13833 '%D0' => 'Ð', 13834 '%D1' => 'Ñ', 13835 '%D2' => 'Ò', 13836 '%D3' => 'Ó', 13837 '%D4' => 'Ô', 13838 '%D5' => 'Õ', 13839 '%D6' => 'Ö', 13840 '%D7' => '×', 13841 '%D8' => 'Ø', 13842 '%D9' => 'Ù', 13843 '%DA' => 'Ú', 13844 '%DB' => 'Û', 13845 '%DC' => 'Ü', 13846 '%DD' => 'Ý', 13847 '%DE' => 'Þ', 13848 '%DF' => 'ß', 13849 '%E0' => 'à', 13850 '%E1' => 'á', 13851 '%E2' => 'â', 13852 '%E3' => 'ã', 13853 '%E4' => 'ä', 13854 '%E5' => 'å', 13855 '%E6' => 'æ', 13856 '%E7' => 'ç', 13857 '%E8' => 'è', 13858 '%E9' => 'é', 13859 '%EA' => 'ê', 13860 '%EB' => 'ë', 13861 '%EC' => 'ì', 13862 '%ED' => 'í', 13863 '%EE' => 'î', 13864 '%EF' => 'ï', 13865 '%F0' => 'ð', 13866 '%F1' => 'ñ', 13867 '%F2' => 'ò', 13868 '%F3' => 'ó', 13869 '%F4' => 'ô', 13870 '%F5' => 'õ', 13871 '%F6' => 'ö', 13872 '%F7' => '÷', 13873 '%F8' => 'ø', 13874 '%F9' => 'ù', 13875 '%FA' => 'ú', 13876 '%FB' => 'û', 13877 '%FC' => 'ü', 13878 '%FD' => 'ý', 13879 '%FE' => 'þ', 13880 '%FF' => 'ÿ', 13881 ]; 13882 } 13883 13884 /** 13885 * Decodes a UTF-8 string to ISO-8859-1. 13886 * 13887 * EXAMPLE: <code>UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-'</code> 13888 * 13889 * @param string $str <p>The input string.</p> 13890 * @param bool $keep_utf8_chars 13891 * 13892 * @psalm-pure 13893 * 13894 * @return string 13895 * 13896 * @noinspection SuspiciousBinaryOperationInspection 13897 */ 13898 public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string 13899 { 13900 if ($str === '') { 13901 return ''; 13902 } 13903 13904 // save for later comparision 13905 $str_backup = $str; 13906 $len = \strlen($str); 13907 13908 if (self::$ORD === null) { 13909 self::$ORD = self::getData('ord'); 13910 } 13911 13912 if (self::$CHR === null) { 13913 self::$CHR = self::getData('chr'); 13914 } 13915 13916 $no_char_found = '?'; 13917 /** @noinspection ForeachInvariantsInspection */ 13918 for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) { 13919 switch ($str[$i] & "\xF0") { 13920 case "\xC0": 13921 case "\xD0": 13922 $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"]; 13923 $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found; 13924 13925 break; 13926 13927 /** @noinspection PhpMissingBreakStatementInspection */ 13928 case "\xF0": 13929 ++$i; 13930 13931 // no break 13932 13933 case "\xE0": 13934 $str[$j] = $no_char_found; 13935 $i += 2; 13936 13937 break; 13938 13939 default: 13940 $str[$j] = $str[$i]; 13941 } 13942 } 13943 13944 /** @var false|string $return - needed for PhpStan (stubs error) */ 13945 $return = \substr($str, 0, $j); 13946 if ($return === false) { 13947 $return = ''; 13948 } 13949 13950 if ( 13951 $keep_utf8_chars 13952 && 13953 (int) self::strlen($return) >= (int) self::strlen($str_backup) 13954 ) { 13955 return $str_backup; 13956 } 13957 13958 return $return; 13959 } 13960 13961 /** 13962 * Encodes an ISO-8859-1 string to UTF-8. 13963 * 13964 * EXAMPLE: <code>UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-'</code> 13965 * 13966 * @param string $str <p>The input string.</p> 13967 * 13968 * @psalm-pure 13969 * 13970 * @return string 13971 */ 13972 public static function utf8_encode(string $str): string 13973 { 13974 if ($str === '') { 13975 return ''; 13976 } 13977 13978 /** @var false|string $str - the polyfill maybe return false */ 13979 $str = \utf8_encode($str); 13980 13981 /** @noinspection CallableParameterUseCaseInTypeContextInspection */ 13982 /** @psalm-suppress TypeDoesNotContainType */ 13983 if ($str === false) { 13984 return ''; 13985 } 13986 13987 return $str; 13988 } 13989 13990 /** 13991 * fix -> utf8-win1252 chars 13992 * 13993 * @param string $str <p>The input string.</p> 13994 * 13995 * @psalm-pure 13996 * 13997 * @return string 13998 * 13999 * @deprecated <p>please use "UTF8::fix_simple_utf8()"</p> 14000 */ 14001 public static function utf8_fix_win1252_chars(string $str): string 14002 { 14003 return self::fix_simple_utf8($str); 14004 } 14005 14006 /** 14007 * Returns an array with all utf8 whitespace characters. 14008 * 14009 * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html 14010 * 14011 * @psalm-pure 14012 * 14013 * @return string[] 14014 * An array with all known whitespace characters as values and the type of whitespace as keys 14015 * as defined in above URL 14016 */ 14017 public static function whitespace_table(): array 14018 { 14019 return self::$WHITESPACE_TABLE; 14020 } 14021 14022 /** 14023 * Limit the number of words in a string. 14024 * 14025 * EXAMPLE: <code>UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř'</code> 14026 * 14027 * @param string $str <p>The input string.</p> 14028 * @param int $limit <p>The limit of words as integer.</p> 14029 * @param string $str_add_on <p>Replacement for the striped string.</p> 14030 * 14031 * @psalm-pure 14032 * 14033 * @return string 14034 */ 14035 public static function words_limit( 14036 string $str, 14037 int $limit = 100, 14038 string $str_add_on = '…' 14039 ): string { 14040 if ($str === '' || $limit < 1) { 14041 return ''; 14042 } 14043 14044 \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches); 14045 14046 if ( 14047 !isset($matches[0]) 14048 || 14049 \mb_strlen($str) === (int) \mb_strlen($matches[0]) 14050 ) { 14051 return $str; 14052 } 14053 14054 return \rtrim($matches[0]) . $str_add_on; 14055 } 14056 14057 /** 14058 * Wraps a string to a given number of characters 14059 * 14060 * EXAMPLE: <code>UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '<br>', true)); // 'Iñ<br>të<br>rn<br>ât<br>iô<br>nà<br>li<br>zæ<br>ti<br>øn'</code> 14061 * 14062 * @see http://php.net/manual/en/function.wordwrap.php 14063 * 14064 * @param string $str <p>The input string.</p> 14065 * @param int $width [optional] <p>The column width.</p> 14066 * @param string $break [optional] <p>The line is broken using the optional break parameter.</p> 14067 * @param bool $cut [optional] <p> 14068 * If the cut is set to true, the string is 14069 * always wrapped at or before the specified width. So if you have 14070 * a word that is larger than the given width, it is broken apart. 14071 * </p> 14072 * 14073 * @psalm-pure 14074 * 14075 * @return string 14076 * <p>The given string wrapped at the specified column.</p> 14077 */ 14078 public static function wordwrap( 14079 string $str, 14080 int $width = 75, 14081 string $break = "\n", 14082 bool $cut = false 14083 ): string { 14084 if ($str === '' || $break === '') { 14085 return ''; 14086 } 14087 14088 $str_split = \explode($break, $str); 14089 if ($str_split === false) { 14090 return ''; 14091 } 14092 14093 /** @var string[] $charsArray */ 14094 $charsArray = []; 14095 $word_split = ''; 14096 foreach ($str_split as $i => $i_value) { 14097 if ($i) { 14098 $charsArray[] = $break; 14099 $word_split .= '#'; 14100 } 14101 14102 foreach (self::str_split($i_value) as $c) { 14103 $charsArray[] = $c; 14104 if ($c === ' ') { 14105 $word_split .= ' '; 14106 } else { 14107 $word_split .= '?'; 14108 } 14109 } 14110 } 14111 14112 $str_return = ''; 14113 $j = 0; 14114 $b = -1; 14115 $i = -1; 14116 $word_split = \wordwrap($word_split, $width, '#', $cut); 14117 14118 $max = \mb_strlen($word_split); 14119 while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) { 14120 for (++$i; $i < $b; ++$i) { 14121 if (isset($charsArray[$j])) { 14122 $str_return .= $charsArray[$j]; 14123 unset($charsArray[$j]); 14124 } 14125 ++$j; 14126 14127 // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill 14128 if ($i > $max) { 14129 break 2; 14130 } 14131 } 14132 14133 if ( 14134 $break === $charsArray[$j] 14135 || 14136 $charsArray[$j] === ' ' 14137 ) { 14138 unset($charsArray[$j++]); 14139 } 14140 14141 $str_return .= $break; 14142 14143 // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill 14144 if ($b > $max) { 14145 break; 14146 } 14147 } 14148 14149 return $str_return . \implode('', $charsArray); 14150 } 14151 14152 /** 14153 * Line-Wrap the string after $limit, but split the string by "$delimiter" before ... 14154 * ... so that we wrap the per line. 14155 * 14156 * @param string $str <p>The input string.</p> 14157 * @param int $width [optional] <p>The column width.</p> 14158 * @param string $break [optional] <p>The line is broken using the optional break parameter.</p> 14159 * @param bool $cut [optional] <p> 14160 * If the cut is set to true, the string is 14161 * always wrapped at or before the specified width. So if you have 14162 * a word that is larger than the given width, it is broken apart. 14163 * </p> 14164 * @param bool $add_final_break [optional] <p> 14165 * If this flag is true, then the method will add a $break at the end 14166 * of the result string. 14167 * </p> 14168 * @param string|null $delimiter [optional] <p> 14169 * You can change the default behavior, where we split the string by newline. 14170 * </p> 14171 * 14172 * @psalm-pure 14173 * 14174 * @return string 14175 */ 14176 public static function wordwrap_per_line( 14177 string $str, 14178 int $width = 75, 14179 string $break = "\n", 14180 bool $cut = false, 14181 bool $add_final_break = true, 14182 string $delimiter = null 14183 ): string { 14184 if ($delimiter === null) { 14185 $strings = \preg_split('/\\r\\n|\\r|\\n/', $str); 14186 } else { 14187 $strings = \explode($delimiter, $str); 14188 } 14189 14190 $string_helper_array = []; 14191 if ($strings !== false) { 14192 foreach ($strings as $value) { 14193 $string_helper_array[] = self::wordwrap($value, $width, $break, $cut); 14194 } 14195 } 14196 14197 if ($add_final_break) { 14198 $final_break = $break; 14199 } else { 14200 $final_break = ''; 14201 } 14202 14203 return \implode($delimiter ?? "\n", $string_helper_array) . $final_break; 14204 } 14205 14206 /** 14207 * Returns an array of Unicode White Space characters. 14208 * 14209 * @psalm-pure 14210 * 14211 * @return string[] 14212 * <p>An array with numeric code point as key and White Space Character as value.</p> 14213 */ 14214 public static function ws(): array 14215 { 14216 return self::$WHITESPACE; 14217 } 14218 14219 /** 14220 * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters. 14221 * 14222 * EXAMPLE: <code> 14223 * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true 14224 * // 14225 * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false 14226 * </code> 14227 * 14228 * @see http://hsivonen.iki.fi/php-utf8/ 14229 * 14230 * @param string $str <p>The string to be checked.</p> 14231 * @param bool $strict <p>Check also if the string is not UTF-16 or UTF-32.</p> 14232 * 14233 * @psalm-pure 14234 * 14235 * @return bool 14236 * 14237 * @noinspection ReturnTypeCanBeDeclaredInspection 14238 */ 14239 private static function is_utf8_string(string $str, bool $strict = false) 14240 { 14241 if ($str === '') { 14242 return true; 14243 } 14244 14245 if ($strict) { 14246 $is_binary = self::is_binary($str, true); 14247 14248 if ($is_binary && self::is_utf16($str, false) !== false) { 14249 return false; 14250 } 14251 14252 if ($is_binary && self::is_utf32($str, false) !== false) { 14253 return false; 14254 } 14255 } 14256 14257 if (self::$SUPPORT['pcre_utf8']) { 14258 // If even just the first character can be matched, when the /u 14259 // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow 14260 // invalid, nothing at all will match, even if the string contains 14261 // some valid sequences 14262 return \preg_match('/^./us', $str) === 1; 14263 } 14264 14265 $mState = 0; // cached expected number of octets after the current octet 14266 // until the beginning of the next UTF8 character sequence 14267 $mUcs4 = 0; // cached Unicode character 14268 $mBytes = 1; // cached expected number of octets in the current sequence 14269 14270 if (self::$ORD === null) { 14271 self::$ORD = self::getData('ord'); 14272 } 14273 14274 $len = \strlen($str); 14275 /** @noinspection ForeachInvariantsInspection */ 14276 for ($i = 0; $i < $len; ++$i) { 14277 $in = self::$ORD[$str[$i]]; 14278 14279 if ($mState === 0) { 14280 // When mState is zero we expect either a US-ASCII character or a 14281 // multi-octet sequence. 14282 if ((0x80 & $in) === 0) { 14283 // US-ASCII, pass straight through. 14284 $mBytes = 1; 14285 } elseif ((0xE0 & $in) === 0xC0) { 14286 // First octet of 2 octet sequence. 14287 $mUcs4 = $in; 14288 $mUcs4 = ($mUcs4 & 0x1F) << 6; 14289 $mState = 1; 14290 $mBytes = 2; 14291 } elseif ((0xF0 & $in) === 0xE0) { 14292 // First octet of 3 octet sequence. 14293 $mUcs4 = $in; 14294 $mUcs4 = ($mUcs4 & 0x0F) << 12; 14295 $mState = 2; 14296 $mBytes = 3; 14297 } elseif ((0xF8 & $in) === 0xF0) { 14298 // First octet of 4 octet sequence. 14299 $mUcs4 = $in; 14300 $mUcs4 = ($mUcs4 & 0x07) << 18; 14301 $mState = 3; 14302 $mBytes = 4; 14303 } elseif ((0xFC & $in) === 0xF8) { 14304 /* First octet of 5 octet sequence. 14305 * 14306 * This is illegal because the encoded codepoint must be either 14307 * (a) not the shortest form or 14308 * (b) outside the Unicode range of 0-0x10FFFF. 14309 * Rather than trying to resynchronize, we will carry on until the end 14310 * of the sequence and let the later error handling code catch it. 14311 */ 14312 $mUcs4 = $in; 14313 $mUcs4 = ($mUcs4 & 0x03) << 24; 14314 $mState = 4; 14315 $mBytes = 5; 14316 } elseif ((0xFE & $in) === 0xFC) { 14317 // First octet of 6 octet sequence, see comments for 5 octet sequence. 14318 $mUcs4 = $in; 14319 $mUcs4 = ($mUcs4 & 1) << 30; 14320 $mState = 5; 14321 $mBytes = 6; 14322 } else { 14323 // Current octet is neither in the US-ASCII range nor a legal first 14324 // octet of a multi-octet sequence. 14325 return false; 14326 } 14327 } elseif ((0xC0 & $in) === 0x80) { 14328 14329 // When mState is non-zero, we expect a continuation of the multi-octet 14330 // sequence 14331 14332 // Legal continuation. 14333 $shift = ($mState - 1) * 6; 14334 $tmp = $in; 14335 $tmp = ($tmp & 0x0000003F) << $shift; 14336 $mUcs4 |= $tmp; 14337 // Prefix: End of the multi-octet sequence. mUcs4 now contains the final 14338 // Unicode code point to be output. 14339 if (--$mState === 0) { 14340 // Check for illegal sequences and code points. 14341 // 14342 // From Unicode 3.1, non-shortest form is illegal 14343 if ( 14344 ($mBytes === 2 && $mUcs4 < 0x0080) 14345 || 14346 ($mBytes === 3 && $mUcs4 < 0x0800) 14347 || 14348 ($mBytes === 4 && $mUcs4 < 0x10000) 14349 || 14350 ($mBytes > 4) 14351 || 14352 // From Unicode 3.2, surrogate characters are illegal. 14353 (($mUcs4 & 0xFFFFF800) === 0xD800) 14354 || 14355 // Code points outside the Unicode range are illegal. 14356 ($mUcs4 > 0x10FFFF) 14357 ) { 14358 return false; 14359 } 14360 // initialize UTF8 cache 14361 $mState = 0; 14362 $mUcs4 = 0; 14363 $mBytes = 1; 14364 } 14365 } else { 14366 // ((0xC0 & (*in) != 0x80) && (mState != 0)) 14367 // Incomplete multi-octet sequence. 14368 return false; 14369 } 14370 } 14371 14372 return $mState === 0; 14373 } 14374 14375 /** 14376 * @param string $str 14377 * @param bool $use_lowercase <p>Use uppercase by default, otherwise use lowercase.</p> 14378 * @param bool $use_full_case_fold <p>Convert not only common cases.</p> 14379 * 14380 * @psalm-pure 14381 * 14382 * @return string 14383 * 14384 * @noinspection ReturnTypeCanBeDeclaredInspection 14385 */ 14386 private static function fixStrCaseHelper( 14387 string $str, 14388 bool $use_lowercase = false, 14389 bool $use_full_case_fold = false 14390 ) { 14391 $upper = self::$COMMON_CASE_FOLD['upper']; 14392 $lower = self::$COMMON_CASE_FOLD['lower']; 14393 14394 if ($use_lowercase) { 14395 $str = \str_replace( 14396 $upper, 14397 $lower, 14398 $str 14399 ); 14400 } else { 14401 $str = \str_replace( 14402 $lower, 14403 $upper, 14404 $str 14405 ); 14406 } 14407 14408 if ($use_full_case_fold) { 14409 /** 14410 * @psalm-suppress ImpureStaticVariable 14411 * 14412 * @var array<mixed>|null 14413 */ 14414 static $FULL_CASE_FOLD = null; 14415 if ($FULL_CASE_FOLD === null) { 14416 $FULL_CASE_FOLD = self::getData('caseFolding_full'); 14417 } 14418 14419 if ($use_lowercase) { 14420 $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str); 14421 } else { 14422 $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str); 14423 } 14424 } 14425 14426 return $str; 14427 } 14428 14429 /** 14430 * get data from "/data/*.php" 14431 * 14432 * @param string $file 14433 * 14434 * @psalm-pure 14435 * 14436 * @return array 14437 * 14438 * @noinspection ReturnTypeCanBeDeclaredInspection 14439 */ 14440 private static function getData(string $file) 14441 { 14442 /** @noinspection PhpIncludeInspection */ 14443 /** @noinspection UsingInclusionReturnValueInspection */ 14444 /** @psalm-suppress UnresolvableInclude */ 14445 return include __DIR__ . '/data/' . $file . '.php'; 14446 } 14447 14448 /** 14449 * @psalm-pure 14450 * 14451 * @return true|null 14452 */ 14453 private static function initEmojiData() 14454 { 14455 if (self::$EMOJI_KEYS_CACHE === null) { 14456 if (self::$EMOJI === null) { 14457 self::$EMOJI = self::getData('emoji'); 14458 } 14459 14460 /** 14461 * @psalm-suppress ImpureFunctionCall - static sort function is used 14462 */ 14463 \uksort( 14464 self::$EMOJI, 14465 static function (string $a, string $b): int { 14466 return \strlen($b) <=> \strlen($a); 14467 } 14468 ); 14469 14470 self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI); 14471 self::$EMOJI_VALUES_CACHE = self::$EMOJI; 14472 14473 foreach (self::$EMOJI_KEYS_CACHE as $key) { 14474 $tmp_key = \crc32($key); 14475 self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_'; 14476 } 14477 14478 return true; 14479 } 14480 14481 return null; 14482 } 14483 14484 /** 14485 * Checks whether mbstring "overloaded" is active on the server. 14486 * 14487 * @psalm-pure 14488 * 14489 * @return bool 14490 * 14491 * @noinspection ReturnTypeCanBeDeclaredInspection 14492 */ 14493 private static function mbstring_overloaded() 14494 { 14495 /** 14496 * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2 14497 */ 14498 14499 /** @noinspection PhpComposerExtensionStubsInspection */ 14500 /** @noinspection PhpUsageOfSilenceOperatorInspection */ 14501 return \defined('MB_OVERLOAD_STRING') 14502 && 14503 ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING); 14504 } 14505 14506 /** 14507 * @param array $strings 14508 * @param bool $remove_empty_values 14509 * @param int|null $remove_short_values 14510 * 14511 * @psalm-pure 14512 * 14513 * @return array 14514 * 14515 * @noinspection ReturnTypeCanBeDeclaredInspection 14516 */ 14517 private static function reduce_string_array( 14518 array $strings, 14519 bool $remove_empty_values, 14520 int $remove_short_values = null 14521 ) { 14522 // init 14523 $return = []; 14524 14525 foreach ($strings as &$str) { 14526 if ( 14527 $remove_short_values !== null 14528 && 14529 \mb_strlen($str) <= $remove_short_values 14530 ) { 14531 continue; 14532 } 14533 14534 if ( 14535 $remove_empty_values 14536 && 14537 \trim($str) === '' 14538 ) { 14539 continue; 14540 } 14541 14542 $return[] = $str; 14543 } 14544 14545 return $return; 14546 } 14547 14548 /** 14549 * rxClass 14550 * 14551 * @param string $s 14552 * @param string $class 14553 * 14554 * @psalm-pure 14555 * 14556 * @return string 14557 * 14558 * @noinspection ReturnTypeCanBeDeclaredInspection 14559 */ 14560 private static function rxClass(string $s, string $class = '') 14561 { 14562 /** 14563 * @psalm-suppress ImpureStaticVariable 14564 * 14565 * @var array<string,string> 14566 */ 14567 static $RX_CLASS_CACHE = []; 14568 14569 $cache_key = $s . '_' . $class; 14570 14571 if (isset($RX_CLASS_CACHE[$cache_key])) { 14572 return $RX_CLASS_CACHE[$cache_key]; 14573 } 14574 14575 /** @var string[] $class_array */ 14576 $class_array[] = $class; 14577 14578 /** @noinspection SuspiciousLoopInspection */ 14579 /** @noinspection AlterInForeachInspection */ 14580 foreach (self::str_split($s) as &$s) { 14581 if ($s === '-') { 14582 $class_array[0] = '-' . $class_array[0]; 14583 } elseif (!isset($s[2])) { 14584 $class_array[0] .= \preg_quote($s, '/'); 14585 } elseif (self::strlen($s) === 1) { 14586 $class_array[0] .= $s; 14587 } else { 14588 $class_array[] = $s; 14589 } 14590 } 14591 14592 if ($class_array[0]) { 14593 $class_array[0] = '[' . $class_array[0] . ']'; 14594 } 14595 14596 if (\count($class_array) === 1) { 14597 $return = $class_array[0]; 14598 } else { 14599 $return = '(?:' . \implode('|', $class_array) . ')'; 14600 } 14601 14602 $RX_CLASS_CACHE[$cache_key] = $return; 14603 14604 return $return; 14605 } 14606 14607 /** 14608 * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius"). 14609 * 14610 * @param string $names 14611 * @param string $delimiter 14612 * @param string $encoding 14613 * 14614 * @psalm-pure 14615 * 14616 * @return string 14617 * 14618 * @noinspection ReturnTypeCanBeDeclaredInspection 14619 */ 14620 private static function str_capitalize_name_helper( 14621 string $names, 14622 string $delimiter, 14623 string $encoding = 'UTF-8' 14624 ) { 14625 // init 14626 $name_helper_array = \explode($delimiter, $names); 14627 if ($name_helper_array === false) { 14628 return ''; 14629 } 14630 14631 $special_cases = [ 14632 'names' => [ 14633 'ab', 14634 'af', 14635 'al', 14636 'and', 14637 'ap', 14638 'bint', 14639 'binte', 14640 'da', 14641 'de', 14642 'del', 14643 'den', 14644 'der', 14645 'di', 14646 'dit', 14647 'ibn', 14648 'la', 14649 'mac', 14650 'nic', 14651 'of', 14652 'ter', 14653 'the', 14654 'und', 14655 'van', 14656 'von', 14657 'y', 14658 'zu', 14659 ], 14660 'prefixes' => [ 14661 'al-', 14662 "d'", 14663 'ff', 14664 "l'", 14665 'mac', 14666 'mc', 14667 'nic', 14668 ], 14669 ]; 14670 14671 foreach ($name_helper_array as &$name) { 14672 if (\in_array($name, $special_cases['names'], true)) { 14673 continue; 14674 } 14675 14676 $continue = false; 14677 14678 if ($delimiter === '-') { 14679 /** @noinspection AlterInForeachInspection */ 14680 foreach ((array) $special_cases['names'] as &$beginning) { 14681 if (\strncmp($name, $beginning, \strlen($beginning)) === 0) { 14682 $continue = true; 14683 14684 break; 14685 } 14686 } 14687 } 14688 14689 /** @noinspection AlterInForeachInspection */ 14690 foreach ((array) $special_cases['prefixes'] as &$beginning) { 14691 if (\strncmp($name, $beginning, \strlen($beginning)) === 0) { 14692 $continue = true; 14693 14694 break; 14695 } 14696 } 14697 14698 if ($continue) { 14699 continue; 14700 } 14701 14702 $name = self::ucfirst($name); 14703 } 14704 14705 return \implode($delimiter, $name_helper_array); 14706 } 14707 14708 /** 14709 * Generic case-sensitive transformation for collation matching. 14710 * 14711 * @param string $str <p>The input string</p> 14712 * 14713 * @psalm-pure 14714 * 14715 * @return string|null 14716 */ 14717 private static function strtonatfold(string $str) 14718 { 14719 $str = \Normalizer::normalize($str, \Normalizer::NFD); 14720 /** @phpstan-ignore-next-line - https://github.com/JetBrains/phpstorm-stubs/pull/949 */ 14721 if ($str === false) { 14722 return ''; 14723 } 14724 14725 /** @noinspection PhpUndefinedClassInspection */ 14726 return \preg_replace( 14727 '/\p{Mn}+/u', 14728 '', 14729 $str 14730 ); 14731 } 14732 14733 /** 14734 * @param int|string $input 14735 * 14736 * @psalm-pure 14737 * 14738 * @return string 14739 * 14740 * @noinspection ReturnTypeCanBeDeclaredInspection 14741 * @noinspection SuspiciousBinaryOperationInspection 14742 */ 14743 private static function to_utf8_convert_helper($input) 14744 { 14745 // init 14746 $buf = ''; 14747 14748 if (self::$ORD === null) { 14749 self::$ORD = self::getData('ord'); 14750 } 14751 14752 if (self::$CHR === null) { 14753 self::$CHR = self::getData('chr'); 14754 } 14755 14756 if (self::$WIN1252_TO_UTF8 === null) { 14757 self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8'); 14758 } 14759 14760 $ordC1 = self::$ORD[$input]; 14761 if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases 14762 $buf .= self::$WIN1252_TO_UTF8[$ordC1]; 14763 } else { 14764 /** @noinspection OffsetOperationsInspection */ 14765 $cc1 = self::$CHR[$ordC1 / 64] | "\xC0"; 14766 $cc2 = ((string) $input & "\x3F") | "\x80"; 14767 $buf .= $cc1 . $cc2; 14768 } 14769 14770 return $buf; 14771 } 14772 14773 /** 14774 * @param string $str 14775 * 14776 * @psalm-pure 14777 * 14778 * @return string 14779 * 14780 * @noinspection ReturnTypeCanBeDeclaredInspection 14781 */ 14782 private static function urldecode_unicode_helper(string $str) 14783 { 14784 if (\strpos($str, '%u') === false) { 14785 return $str; 14786 } 14787 14788 $pattern = '/%u([0-9a-fA-F]{3,4})/'; 14789 if (\preg_match($pattern, $str)) { 14790 $str = (string) \preg_replace($pattern, '&#x\\1;', $str); 14791 } 14792 14793 return $str; 14794 } 14795 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Sep 7 05:41:13 2022 | Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer |