please don't use it anymore

*/ const GRAPHEME_CLUSTER_RX = "(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])"; /** * Bom => Byte-Length * * INFO: https://en.wikipedia.org/wiki/Byte_order_mark * * @var array */ private static $BOM = [ "\xef\xbb\xbf" => 3, // UTF-8 BOM 'ï»¿' => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...) "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM ' þÿ' => 6, // UTF-32 (BE) BOM as "WINDOWS-1252" "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM 'ÿþ ' => 6, // UTF-32 (LE) BOM as "WINDOWS-1252" "\xfe\xff" => 2, // UTF-16 (BE) BOM 'þÿ' => 4, // UTF-16 (BE) BOM as "WINDOWS-1252" "\xff\xfe" => 2, // UTF-16 (LE) BOM 'ÿþ' => 4, // UTF-16 (LE) BOM as "WINDOWS-1252" ]; /** * Numeric code point => UTF-8 Character * * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp * * @var array */ private static $WHITESPACE = [ // NULL Byte 0 => "\x0", // Tab 9 => "\x9", // New Line 10 => "\xa", // Vertical Tab 11 => "\xb", // Carriage Return 13 => "\xd", // Ordinary Space 32 => "\x20", // NO-BREAK SPACE 160 => "\xc2\xa0", // OGHAM SPACE MARK 5760 => "\xe1\x9a\x80", // MONGOLIAN VOWEL SEPARATOR 6158 => "\xe1\xa0\x8e", // EN QUAD 8192 => "\xe2\x80\x80", // EM QUAD 8193 => "\xe2\x80\x81", // EN SPACE 8194 => "\xe2\x80\x82", // EM SPACE 8195 => "\xe2\x80\x83", // THREE-PER-EM SPACE 8196 => "\xe2\x80\x84", // FOUR-PER-EM SPACE 8197 => "\xe2\x80\x85", // SIX-PER-EM SPACE 8198 => "\xe2\x80\x86", // FIGURE SPACE 8199 => "\xe2\x80\x87", // PUNCTUATION SPACE 8200 => "\xe2\x80\x88", // THIN SPACE 8201 => "\xe2\x80\x89", // HAIR SPACE 8202 => "\xe2\x80\x8a", // LINE SEPARATOR 8232 => "\xe2\x80\xa8", // PARAGRAPH SEPARATOR 8233 => "\xe2\x80\xa9", // NARROW NO-BREAK SPACE 8239 => "\xe2\x80\xaf", // MEDIUM MATHEMATICAL SPACE 8287 => "\xe2\x81\x9f", // HALFWIDTH HANGUL FILLER 65440 => "\xef\xbe\xa0", // IDEOGRAPHIC SPACE 12288 => "\xe3\x80\x80", ]; /** * @var array */ private static $WHITESPACE_TABLE = [ 'SPACE' => "\x20", 'NO-BREAK SPACE' => "\xc2\xa0", 'OGHAM SPACE MARK' => "\xe1\x9a\x80", 'EN QUAD' => "\xe2\x80\x80", 'EM QUAD' => "\xe2\x80\x81", 'EN SPACE' => "\xe2\x80\x82", 'EM SPACE' => "\xe2\x80\x83", 'THREE-PER-EM SPACE' => "\xe2\x80\x84", 'FOUR-PER-EM SPACE' => "\xe2\x80\x85", 'SIX-PER-EM SPACE' => "\xe2\x80\x86", 'FIGURE SPACE' => "\xe2\x80\x87", 'PUNCTUATION SPACE' => "\xe2\x80\x88", 'THIN SPACE' => "\xe2\x80\x89", 'HAIR SPACE' => "\xe2\x80\x8a", 'LINE SEPARATOR' => "\xe2\x80\xa8", 'PARAGRAPH SEPARATOR' => "\xe2\x80\xa9", 'ZERO WIDTH SPACE' => "\xe2\x80\x8b", 'NARROW NO-BREAK SPACE' => "\xe2\x80\xaf", 'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f", 'IDEOGRAPHIC SPACE' => "\xe3\x80\x80", 'HALFWIDTH HANGUL FILLER' => "\xef\xbe\xa0", ]; /** * @var array * * @phpstan-var array{upper: string[], lower: string[]} */ private static $COMMON_CASE_FOLD = [ 'upper' => [ 'µ', 'ſ', "\xCD\x85", 'ς', 'ẞ', "\xCF\x90", "\xCF\x91", "\xCF\x95", "\xCF\x96", "\xCF\xB0", "\xCF\xB1", "\xCF\xB5", "\xE1\xBA\x9B", "\xE1\xBE\xBE", ], 'lower' => [ 'μ', 's', 'ι', 'σ', 'ß', 'β', 'θ', 'φ', 'π', 'κ', 'ρ', 'ε', "\xE1\xB9\xA1", 'ι', ], ]; /** * @var array * * @phpstan-var array */ private static $SUPPORT = []; /** * @var string[]|null * * @phpstan-var array|null */ private static $BROKEN_UTF8_FIX; /** * @var string[]|null * * @phpstan-var array|null */ private static $WIN1252_TO_UTF8; /** * @var string[]|null * * @phpstan-var array|null */ private static $INTL_TRANSLITERATOR_LIST; /** * @var string[]|null * * @phpstan-var array|null */ private static $ENCODINGS; /** * @var int[]|null * * @phpstan-var array|null */ private static $ORD; /** * @var string[]|null * * @phpstan-var array|null */ private static $EMOJI; /** * @var string[]|null * * @phpstan-var array|null */ private static $EMOJI_VALUES_CACHE; /** * @var string[]|null * * @phpstan-var array|null */ private static $EMOJI_KEYS_CACHE; /** * @var string[]|null * * @phpstan-var array|null */ private static $EMOJI_KEYS_REVERSIBLE_CACHE; /** * @var string[]|null * * @phpstan-var array|null */ private static $CHR; /** * __construct() */ public function __construct() { } /** * Return the character at the specified position: $str[1] like functionality. * * EXAMPLE: UTF8::access('fòô', 1); // 'ò' * * @param string $str

A UTF-8 string.

* @param int $pos

The position of character to return.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string *

Single multi-byte character.

*/ public static function access(string $str, int $pos, string $encoding = 'UTF-8'): string { if ($str === '' || $pos < 0) { return ''; } if ($encoding === 'UTF-8') { return (string) \mb_substr($str, $pos, 1); } return (string) self::substr($str, $pos, 1, $encoding); } /** * Prepends UTF-8 BOM character to the string and returns the whole string. * * INFO: If BOM already existed there, the Input string is returned. * * EXAMPLE: UTF8::add_bom_to_string('fòô'); // "\xEF\xBB\xBF" . 'fòô' * * @param string $str

The input string.

* * @psalm-pure * * @return string *

The output string that contains BOM.

*/ public static function add_bom_to_string(string $str): string { if (!self::string_has_bom($str)) { $str = self::bom() . $str; } return $str; } /** * Changes all keys in an array. * * @param array $array

The array to work on

* @param int $case [optional]

Either CASE_UPPER
* or CASE_LOWER (default)

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string[] *

An array with its keys lower- or uppercased.

*/ public static function array_change_key_case( array $array, int $case = \CASE_LOWER, string $encoding = 'UTF-8' ): array { if ( $case !== \CASE_LOWER && $case !== \CASE_UPPER ) { $case = \CASE_LOWER; } $return = []; foreach ($array as $key => &$value) { $key = $case === \CASE_LOWER ? self::strtolower((string) $key, $encoding) : self::strtoupper((string) $key, $encoding); $return[$key] = $value; } return $return; } /** * Returns the substring between $start and $end, if found, or an empty * string. An optional offset may be supplied from which to begin the * search for the start string. * * @param string $str * @param string $start

Delimiter marking the start of the substring.

* @param string $end

Delimiter marking the end of the substring.

* @param int $offset [optional]

Index from which to begin the search. Default: 0

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string */ public static function between( string $str, string $start, string $end, int $offset = 0, string $encoding = 'UTF-8' ): string { if ($encoding === 'UTF-8') { $start_position = \mb_strpos($str, $start, $offset); if ($start_position === false) { return ''; } $substr_index = $start_position + (int) \mb_strlen($start); $end_position = \mb_strpos($str, $end, $substr_index); if ( $end_position === false || $end_position === $substr_index ) { return ''; } return (string) \mb_substr($str, $substr_index, $end_position - $substr_index); } $encoding = self::normalize_encoding($encoding, 'UTF-8'); $start_position = self::strpos($str, $start, $offset, $encoding); if ($start_position === false) { return ''; } $substr_index = $start_position + (int) self::strlen($start, $encoding); $end_position = self::strpos($str, $end, $substr_index, $encoding); if ( $end_position === false || $end_position === $substr_index ) { return ''; } return (string) self::substr( $str, $substr_index, $end_position - $substr_index, $encoding ); } /** * Convert binary into a string. * * INFO: opposite to UTF8::str_to_binary() * * EXAMPLE: UTF8::binary_to_str('11110000100111111001100010000011'); // '😃' * * @param string $bin 1|0 * * @psalm-pure * * @return string */ public static function binary_to_str($bin): string { if (!isset($bin[0])) { return ''; } $convert = \base_convert($bin, 2, 16); if ($convert === '0') { return ''; } return \pack('H*', $convert); } /** * Returns the UTF-8 Byte Order Mark Character. * * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values * * EXAMPLE: UTF8::bom(); // "\xEF\xBB\xBF" * * @psalm-pure * * @return string *

UTF-8 Byte Order Mark.

*/ public static function bom(): string { return "\xef\xbb\xbf"; } /** * @alias of UTF8::chr_map() * * @param callable $callback * @param string $str * * @psalm-pure * * @return string[] * * @see UTF8::chr_map() */ public static function callback($callback, string $str): array { return self::chr_map($callback, $str); } /** * Returns the character at $index, with indexes starting at 0. * * @param string $str

The input string.

* @param int $index

Position of the character.

* @param string $encoding [optional]

Default is UTF-8

* * @psalm-pure * * @return string *

The character at $index.

*/ public static function char_at(string $str, int $index, string $encoding = 'UTF-8'): string { if ($encoding === 'UTF-8') { return (string) \mb_substr($str, $index, 1); } return (string) self::substr($str, $index, 1, $encoding); } /** * Returns an array consisting of the characters in the string. * * @param string $str

The input string.

* * @psalm-pure * * @return string[] *

An array of chars.

*/ public static function chars(string $str): array { /** @var string[] */ return self::str_split($str); } /** * This method will auto-detect your server environment for UTF-8 support. * * @return true|null * * @internal

You don't need to run it manually, it will be triggered if it's needed.

*/ public static function checkForSupport() { if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) { self::$SUPPORT['already_checked_via_portable_utf8'] = true; // http://php.net/manual/en/book.mbstring.php self::$SUPPORT['mbstring'] = self::mbstring_loaded(); self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded(); if (self::$SUPPORT['mbstring'] === true) { \mb_internal_encoding('UTF-8'); /** @noinspection UnusedFunctionResultInspection */ /** @noinspection PhpComposerExtensionStubsInspection */ \mb_regex_encoding('UTF-8'); self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8'; } // http://php.net/manual/en/book.iconv.php self::$SUPPORT['iconv'] = self::iconv_loaded(); // http://php.net/manual/en/book.intl.php self::$SUPPORT['intl'] = self::intl_loaded(); // http://php.net/manual/en/class.intlchar.php self::$SUPPORT['intlChar'] = self::intlChar_loaded(); // http://php.net/manual/en/book.ctype.php self::$SUPPORT['ctype'] = self::ctype_loaded(); // http://php.net/manual/en/class.finfo.php self::$SUPPORT['finfo'] = self::finfo_loaded(); // http://php.net/manual/en/book.json.php self::$SUPPORT['json'] = self::json_loaded(); // http://php.net/manual/en/book.pcre.php self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support(); self::$SUPPORT['symfony_polyfill_used'] = self::symfony_polyfill_used(); if (self::$SUPPORT['symfony_polyfill_used'] === true) { \mb_internal_encoding('UTF-8'); self::$SUPPORT['mbstring_internal_encoding'] = 'UTF-8'; } return true; } return null; } /** * Generates a UTF-8 encoded character from the given code point. * * INFO: opposite to UTF8::ord() * * EXAMPLE: UTF8::chr(0x2603); // '☃' * * @param int $code_point

The code point for which to generate a character.

* @param string $encoding [optional]

Default is UTF-8

* * @psalm-pure * * @return string|null *

Multi-byte character, returns null on failure or empty input.

*/ public static function chr($code_point, string $encoding = 'UTF-8') { // init /** * @psalm-suppress ImpureStaticVariable * * @var array */ static $CHAR_CACHE = []; if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } /** @noinspection InArrayCanBeUsedInspection */ if ( $encoding !== 'UTF-8' && $encoding !== 'ISO-8859-1' && $encoding !== 'WINDOWS-1252' && self::$SUPPORT['mbstring'] === false ) { /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); } if (!\is_int($code_point) || $code_point <= 0) { return null; } $cache_key = $code_point . '_' . $encoding; if (isset($CHAR_CACHE[$cache_key])) { return $CHAR_CACHE[$cache_key]; } if ($code_point <= 0x80) { // only for "simple"-chars if (self::$CHR === null) { self::$CHR = self::getData('chr'); } /** * @psalm-suppress PossiblyNullArrayAccess */ $chr = self::$CHR[$code_point]; if ($encoding !== 'UTF-8') { $chr = self::encode($encoding, $chr); } return $CHAR_CACHE[$cache_key] = $chr; } // // fallback via "IntlChar" // if (self::$SUPPORT['intlChar'] === true) { /** @noinspection PhpComposerExtensionStubsInspection */ $chr = \IntlChar::chr($code_point); if ($encoding !== 'UTF-8') { $chr = self::encode($encoding, $chr); } return $CHAR_CACHE[$cache_key] = $chr; } // // fallback via vanilla php // if (self::$CHR === null) { self::$CHR = self::getData('chr'); } $code_point = (int) $code_point; if ($code_point <= 0x7FF) { /** * @psalm-suppress PossiblyNullArrayAccess */ $chr = self::$CHR[($code_point >> 6) + 0xC0] . self::$CHR[($code_point & 0x3F) + 0x80]; } elseif ($code_point <= 0xFFFF) { /** * @psalm-suppress PossiblyNullArrayAccess */ $chr = self::$CHR[($code_point >> 12) + 0xE0] . self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] . self::$CHR[($code_point & 0x3F) + 0x80]; } else { /** * @psalm-suppress PossiblyNullArrayAccess */ $chr = self::$CHR[($code_point >> 18) + 0xF0] . self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] . self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] . self::$CHR[($code_point & 0x3F) + 0x80]; } if ($encoding !== 'UTF-8') { $chr = self::encode($encoding, $chr); } return $CHAR_CACHE[$cache_key] = $chr; } /** * Applies callback to all characters of a string. * * EXAMPLE: UTF8::chr_map([UTF8::class, 'strtolower'], 'Κόσμε'); // ['κ','ό', 'σ', 'μ', 'ε'] * * @param callable $callback

The callback function.

* @param string $str

UTF-8 string to run callback on.

* * @psalm-pure * * @return string[] *

The outcome of the callback, as array.

*/ public static function chr_map($callback, string $str): array { return \array_map( $callback, self::str_split($str) ); } /** * Generates an array of byte length of each character of a Unicode string. * * 1 byte => U+0000 - U+007F * 2 byte => U+0080 - U+07FF * 3 byte => U+0800 - U+FFFF * 4 byte => U+10000 - U+10FFFF * * EXAMPLE: UTF8::chr_size_list('中文空白-test'); // [3, 3, 3, 3, 1, 1, 1, 1, 1] * * @param string $str

The original unicode string.

* * @psalm-pure * * @return int[] *

An array of byte lengths of each character.

*/ public static function chr_size_list(string $str): array { if ($str === '') { return []; } if (self::$SUPPORT['mbstring_func_overload'] === true) { return \array_map( static function (string $data): int { // "mb_" is available if overload is used, so use it ... return \mb_strlen($data, 'CP850'); // 8-BIT }, self::str_split($str) ); } return \array_map('\strlen', self::str_split($str)); } /** * Get a decimal code representation of a specific character. * * INFO: opposite to UTF8::decimal_to_chr() * * EXAMPLE: UTF8::chr_to_decimal('§'); // 0xa7 * * @param string $char

The input character.

* * @psalm-pure * * @return int */ public static function chr_to_decimal(string $char): int { if (self::$SUPPORT['iconv'] === true) { $chr_tmp = \iconv('UTF-8', 'UCS-4LE', $char); if ($chr_tmp !== false) { /** @noinspection OffsetOperationsInspection */ return \unpack('V', $chr_tmp)[1]; } } $code = self::ord($char[0]); $bytes = 1; if (!($code & 0x80)) { // 0xxxxxxx return $code; } if (($code & 0xe0) === 0xc0) { // 110xxxxx $bytes = 2; $code &= ~0xc0; } elseif (($code & 0xf0) === 0xe0) { // 1110xxxx $bytes = 3; $code &= ~0xe0; } elseif (($code & 0xf8) === 0xf0) { // 11110xxx $bytes = 4; $code &= ~0xf0; } for ($i = 2; $i <= $bytes; ++$i) { // 10xxxxxx $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80); } return $code; } /** * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character. * * EXAMPLE: UTF8::chr_to_hex('§'); // U+00a7 * * @param int|string $char

The input character

* @param string $prefix [optional] * * @psalm-pure * * @return string *

The code point encoded as U+xxxx.

*/ public static function chr_to_hex($char, string $prefix = 'U+'): string { if ($char === '') { return ''; } if ($char === '�') { $char = ''; } return self::int_to_hex(self::ord((string) $char), $prefix); } /** * alias for "UTF8::chr_to_decimal()" * * @param string $chr * * @psalm-pure * * @return int * * @see UTF8::chr_to_decimal() * @deprecated

please use "UTF8::chr_to_decimal()"

*/ public static function chr_to_int(string $chr): int { return self::chr_to_decimal($chr); } /** * Splits a string into smaller chunks and multiple lines, using the specified line ending character. * * EXAMPLE:

UTF8::chunk_split('ABC-ÖÄÜ-中文空白-κόσμε', 3); // "ABC\r\n-ÖÄ\r\nÜ-中\r\n文空白\r\n-κό\r\nσμε"

* * @param string $body

The original string to be split.

* @param int $chunk_length [optional]

The maximum character length of a chunk.

* @param string $end [optional]

The character(s) to be inserted at the end of each chunk.

* * @psalm-pure * * @return string *

The chunked string.

*/ public static function chunk_split(string $body, int $chunk_length = 76, string $end = "\r\n"): string { return \implode($end, self::str_split($body, $chunk_length)); } /** * Accepts a string and removes all non-UTF-8 characters from it + extras if needed. * * EXAMPLE:

UTF8::clean("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - DÃ¼sseldorf", true, true); // '„Abcdef  …” — 😃 - DÃ¼sseldorf'

* * @param string $str

The string to be sanitized.

* @param bool $remove_bom [optional]

Set to true, if you need to remove * UTF-BOM.

* @param bool $normalize_whitespace [optional]

Set to true, if you need to normalize the * whitespace.

* @param bool $normalize_msword [optional]

Set to true, if you need to normalize MS * Word chars e.g.: "…" * => "..."

* @param bool $keep_non_breaking_space [optional]

Set to true, to keep non-breaking-spaces, * in * combination with * $normalize_whitespace

* @param bool $replace_diamond_question_mark [optional]

Set to true, if you need to remove diamond * question mark e.g.: "�"

* @param bool $remove_invisible_characters [optional]

Set to false, if you not want to remove * invisible characters e.g.: "\0"

* @param bool $remove_invisible_characters_url_encoded [optional]

Set to true, if you not want to remove * invisible url encoded characters e.g.: "%0B"
WARNING: * maybe contains false-positives e.g. aa%0Baa -> aaaa. *

* * @psalm-pure * * @return string *

An clean UTF-8 encoded string.

* * @noinspection PhpTooManyParametersInspection */ public static function clean( string $str, bool $remove_bom = false, bool $normalize_whitespace = false, bool $normalize_msword = false, bool $keep_non_breaking_space = false, bool $replace_diamond_question_mark = false, bool $remove_invisible_characters = true, bool $remove_invisible_characters_url_encoded = false ): string { // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string // caused connection reset problem on larger strings $regex = '/ ( (?: [\x00-\x7F] # single-byte sequences 0xxxxxxx | [\xC0-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx | [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences 1110xxxx 10xxxxxx * 2 | [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3 ){1,100} # ...one or more times ) | ( [\x80-\xBF] ) # invalid byte in range 10000000 - 10111111 | ( [\xC0-\xFF] ) # invalid byte in range 11000000 - 11111111 /x'; /** @noinspection NotOptimalRegularExpressionsInspection */ $str = (string) \preg_replace($regex, '$1', $str); if ($replace_diamond_question_mark) { $str = self::replace_diamond_question_mark($str); } if ($remove_invisible_characters) { $str = self::remove_invisible_characters($str, $remove_invisible_characters_url_encoded); } if ($normalize_whitespace) { $str = self::normalize_whitespace($str, $keep_non_breaking_space); } if ($normalize_msword) { $str = self::normalize_msword($str); } if ($remove_bom) { $str = self::remove_bom($str); } return $str; } /** * Clean-up a string and show only printable UTF-8 chars at the end + fix UTF-8 encoding. * * EXAMPLE:

UTF8::cleanup("\xEF\xBB\xBF„Abcdef\xc2\xa0\x20…” — 😃 - DÃ¼sseldorf", true, true); // '„Abcdef  …” — 😃 - Düsseldorf'

* * @param string $str

The input string.

* * @psalm-pure * * @return string */ public static function cleanup($str): string { // init $str = (string) $str; if ($str === '') { return ''; } // fixed ISO <-> UTF-8 Errors $str = self::fix_simple_utf8($str); // remove all none UTF-8 symbols // && remove diamond question mark (�) // && remove remove invisible characters (e.g. "\0") // && remove BOM // && normalize whitespace chars (but keep non-breaking-spaces) return self::clean( $str, true, true, false, true, true ); } /** * Accepts a string or a array of strings and returns an array of Unicode code points. * * INFO: opposite to UTF8::string() * * EXAMPLE:


     * UTF8::codepoints('κöñ'); // array(954, 246, 241)
     * // ... OR ...
     * UTF8::codepoints('κöñ', true); // array('U+03ba', 'U+00f6', 'U+00f1')
     *

* * @param string|string[] $arg

A UTF-8 encoded string or an array of such strings.

* @param bool $use_u_style

If True, will return code points in U+xxxx format, * default, code points will be returned as integers.

* * @psalm-pure * * @return int[]|string[] *

* The array of code points:
* int[] for $u_style === false
* string[] for $u_style === true
*

*/ public static function codepoints($arg, bool $use_u_style = false): array { if (\is_string($arg)) { $arg = self::str_split($arg); } /** * @psalm-suppress DocblockTypeContradiction */ if (!\is_array($arg)) { return []; } if ($arg === []) { return []; } $arg = \array_map( [ self::class, 'ord', ], $arg ); if ($use_u_style) { $arg = \array_map( [ self::class, 'int_to_hex', ], $arg ); } return $arg; } /** * Trims the string and replaces consecutive whitespace characters with a * single space. This includes tabs and newline characters, as well as * multibyte whitespace such as the thin space and ideographic space. * * @param string $str

The input string.

* * @psalm-pure * * @return string *

A string with trimmed $str and condensed whitespace.

*/ public static function collapse_whitespace(string $str): string { if (self::$SUPPORT['mbstring'] === true) { /** @noinspection PhpComposerExtensionStubsInspection */ return \trim((string) \mb_ereg_replace('[[:space:]]+', ' ', $str)); } return \trim(self::regex_replace($str, '[[:space:]]+', ' ')); } /** * Returns count of characters used in a string. * * EXAMPLE: UTF8::count_chars('κaκbκc'); // array('κ' => 3, 'a' => 1, 'b' => 1, 'c' => 1) * * @param string $str

The input string.

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* @param bool $try_to_use_mb_functions [optional]

Set to false, if you don't want to use * * @psalm-pure * * @return int[] *

An associative array of Character as keys and * their count as values.

*/ public static function count_chars( string $str, bool $clean_utf8 = false, bool $try_to_use_mb_functions = true ): array { return \array_count_values( self::str_split( $str, 1, $clean_utf8, $try_to_use_mb_functions ) ); } /** * Create a valid CSS identifier for e.g. "class"- or "id"-attributes. * * EXAMPLE: UTF8::css_identifier('123foo/bar!!!'); // _23foo-bar * * copy&past from https://github.com/drupal/core/blob/8.8.x/lib/Drupal/Component/Utility/Html.php#L95 * * @param string $str

INFO: if no identifier is given e.g. " " or "", we will create a unique string automatically

* @param string[] $filter * @param bool $strip_tags * @param bool $strtolower * * @psalm-pure * * @return string * * @phpstan-param array $filter */ public static function css_identifier( string $str = '', array $filter = [ ' ' => '-', '/' => '-', '[' => '', ']' => '', ], bool $strip_tags = false, bool $strtolower = true ): string { // We could also use strtr() here but its much slower than str_replace(). In // order to keep '__' to stay '__' we first replace it with a different // placeholder after checking that it is not defined as a filter. $double_underscore_replacements = 0; // Fallback ... if (\trim($str) === '') { $str = \uniqid('auto-generated-css-class', true); } else { $str = self::clean($str); } if ($strip_tags) { $str = \strip_tags($str); } if ($strtolower) { $str = \strtolower($str); } if (!isset($filter['__'])) { $str = \str_replace('__', '##', $str, $double_underscore_replacements); } /* @noinspection ArrayValuesMissUseInspection */ $str = \str_replace(\array_keys($filter), \array_values($filter), $str); // Replace temporary placeholder '##' with '__' only if the original // $identifier contained '__'. if ($double_underscore_replacements > 0) { $str = \str_replace('##', '__', $str); } // Valid characters in a CSS identifier are: // - the hyphen (U+002D) // - a-z (U+0030 - U+0039) // - A-Z (U+0041 - U+005A) // - the underscore (U+005F) // - 0-9 (U+0061 - U+007A) // - ISO 10646 characters U+00A1 and higher // We strip out any character not in the above list. $str = (string) \preg_replace('/[^\x{002D}\x{0030}-\x{0039}\x{0041}-\x{005A}\x{005F}\x{0061}-\x{007A}\x{00A1}-\x{FFFF}]/u', '', $str); // Identifiers cannot start with a digit, two hyphens, or a hyphen followed by a digit. $str = (string) \preg_replace(['/^[0-9]/', '/^(-[0-9])|^(--)/'], ['_', '__'], $str); return \trim($str, '-'); } /** * Remove css media-queries. * * @param string $str * * @psalm-pure * * @return string */ public static function css_stripe_media_queries(string $str): string { return (string) \preg_replace( '#@media\\s+(?:only\\s)?(?:[\\s{(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#isumU', '', $str ); } /** * Checks whether ctype is available on the server. * * @psalm-pure * * @return bool *

true if available, false otherwise

* * @internal

Please do not use it anymore, we will make is private in next major version.

*/ public static function ctype_loaded(): bool { return \extension_loaded('ctype'); } /** * Converts an int value into a UTF-8 character. * * INFO: opposite to UTF8::string() * * EXAMPLE: UTF8::decimal_to_chr(931); // 'Σ' * * @param int|string $int * * @phpstan-param int|numeric-string $int * * @psalm-pure * * @return string */ public static function decimal_to_chr($int): string { return self::html_entity_decode('&#' . $int . ';', \ENT_QUOTES | \ENT_HTML5); } /** * Decodes a MIME header field * * @param string $str * @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return false|string *

A decoded MIME field on success, * or false if an error occurs during the decoding.

*/ public static function decode_mimeheader($str, string $encoding = 'UTF-8') { if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } // always fallback via symfony polyfill return \iconv_mime_decode($str, \ICONV_MIME_DECODE_CONTINUE_ON_ERROR, $encoding); } /** * Convert any two-letter country code (ISO 3166-1) to the corresponding Emoji. * * @see https://en.wikipedia.org/wiki/ISO_3166-1 * * @param string $country_code_iso_3166_1

e.g. DE

* * @return string *

Emoji or empty string on error.

*/ public static function emoji_from_country_code(string $country_code_iso_3166_1): string { if ($country_code_iso_3166_1 === '') { return ''; } if (self::strlen($country_code_iso_3166_1) !== 2) { return ''; } $country_code_iso_3166_1 = \strtoupper($country_code_iso_3166_1); $flagOffset = 0x1F1E6; $asciiOffset = 0x41; return (self::chr((self::ord($country_code_iso_3166_1[0]) - $asciiOffset + $flagOffset)) ?? '') . (self::chr((self::ord($country_code_iso_3166_1[1]) - $asciiOffset + $flagOffset)) ?? ''); } /** * Decodes a string which was encoded by "UTF8::emoji_encode()". * * INFO: opposite to UTF8::emoji_encode() * * EXAMPLE:


     * UTF8::emoji_decode('foo CHARACTER_OGRE', false); // 'foo 👹'
     * //
     * UTF8::emoji_decode('foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_', true); // 'foo 👹'
     *

* * @param string $str

The input string.

* @param bool $use_reversible_string_mappings [optional]

* When TRUE, we se a reversible string mapping * between "emoji_encode" and "emoji_decode".

* * @psalm-pure * * @return string */ public static function emoji_decode( string $str, bool $use_reversible_string_mappings = false ): string { self::initEmojiData(); if ($use_reversible_string_mappings) { return (string) \str_replace( (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE, (array) self::$EMOJI_VALUES_CACHE, $str ); } return (string) \str_replace( (array) self::$EMOJI_KEYS_CACHE, (array) self::$EMOJI_VALUES_CACHE, $str ); } /** * Encode a string with emoji chars into a non-emoji string. * * INFO: opposite to UTF8::emoji_decode() * * EXAMPLE:


     * UTF8::emoji_encode('foo 👹', false)); // 'foo CHARACTER_OGRE'
     * //
     * UTF8::emoji_encode('foo 👹', true)); // 'foo _-_PORTABLE_UTF8_-_308095726_-_627590803_-_8FTU_ELBATROP_-_'
     *

* * @param string $str

The input string

* @param bool $use_reversible_string_mappings [optional]

* when TRUE, we use a reversible string mapping * between "emoji_encode" and "emoji_decode"

* * @psalm-pure * * @return string */ public static function emoji_encode( string $str, bool $use_reversible_string_mappings = false ): string { self::initEmojiData(); if ($use_reversible_string_mappings) { return (string) \str_replace( (array) self::$EMOJI_VALUES_CACHE, (array) self::$EMOJI_KEYS_REVERSIBLE_CACHE, $str ); } return (string) \str_replace( (array) self::$EMOJI_VALUES_CACHE, (array) self::$EMOJI_KEYS_CACHE, $str ); } /** * Encode a string with a new charset-encoding. * * INFO: This function will also try to fix broken / double encoding, * so you can call this function also on a UTF-8 string and you don't mess up the string. * * EXAMPLE:


     * UTF8::encode('ISO-8859-1', '-ABC-中文空白-'); // '-ABC-????-'
     * //
     * UTF8::encode('UTF-8', '-ABC-中文空白-'); // '-ABC-中文空白-'
     * //
     * UTF8::encode('HTML', '-ABC-中文空白-'); // '-ABC-中文空白-'
     * //
     * UTF8::encode('BASE64', '-ABC-中文空白-'); // 'LUFCQy3kuK3mlofnqbrnmb0t'
     *

* * @param string $to_encoding

e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.

* @param string $str

The input string

* @param bool $auto_detect_the_from_encoding [optional]

Force the new encoding (we try to fix broken / double * encoding for UTF-8)
otherwise we auto-detect the current * string-encoding

* @param string $from_encoding [optional]

e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.
* A empty string will trigger the autodetect anyway.

* * @psalm-pure * * @return string * * @psalm-suppress InvalidReturnStatement */ public static function encode( string $to_encoding, string $str, bool $auto_detect_the_from_encoding = true, string $from_encoding = '' ): string { if ($str === '' || $to_encoding === '') { return $str; } if ($to_encoding !== 'UTF-8' && $to_encoding !== 'CP850') { $to_encoding = self::normalize_encoding($to_encoding, 'UTF-8'); } if ($from_encoding && $from_encoding !== 'UTF-8' && $from_encoding !== 'CP850') { $from_encoding = self::normalize_encoding($from_encoding); } if ( $to_encoding && $from_encoding && $from_encoding === $to_encoding ) { return $str; } if ($to_encoding === 'JSON') { $return = self::json_encode($str); if ($return === false) { throw new \InvalidArgumentException('The input string [' . $str . '] can not be used for json_encode().'); } return $return; } if ($from_encoding === 'JSON') { $str = self::json_decode($str); $from_encoding = ''; } if ($to_encoding === 'BASE64') { return \base64_encode($str); } if ($from_encoding === 'BASE64') { $str = \base64_decode($str, true); $from_encoding = ''; } if ($to_encoding === 'HTML-ENTITIES') { return self::html_encode($str, true); } if ($from_encoding === 'HTML-ENTITIES') { $str = self::html_entity_decode($str, \ENT_COMPAT); $from_encoding = ''; } $from_encoding_auto_detected = false; if ( $auto_detect_the_from_encoding || !$from_encoding ) { $from_encoding_auto_detected = self::str_detect_encoding($str); } // DEBUG //var_dump($to_encoding, $from_encoding, $from_encoding_auto_detected, $str, "\n\n"); if ($from_encoding_auto_detected !== false) { /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */ $from_encoding = $from_encoding_auto_detected; } elseif ($auto_detect_the_from_encoding) { // fallback for the "autodetect"-mode return self::to_utf8($str); } if ( !$from_encoding || $from_encoding === $to_encoding ) { return $str; } if ( $to_encoding === 'UTF-8' && ( $from_encoding === 'WINDOWS-1252' || $from_encoding === 'ISO-8859-1' ) ) { return self::to_utf8($str); } if ( $to_encoding === 'ISO-8859-1' && ( $from_encoding === 'WINDOWS-1252' || $from_encoding === 'UTF-8' ) ) { return self::to_iso8859($str); } /** @noinspection InArrayCanBeUsedInspection */ if ( $to_encoding !== 'UTF-8' && $to_encoding !== 'ISO-8859-1' && $to_encoding !== 'WINDOWS-1252' && self::$SUPPORT['mbstring'] === false ) { /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::encode() without mbstring cannot handle "' . $to_encoding . '" encoding', \E_USER_WARNING); } if (self::$SUPPORT['mbstring'] === true) { // warning: do not use the symfony polyfill here $str_encoded = \mb_convert_encoding( $str, $to_encoding, $from_encoding ); if ($str_encoded) { \assert(\is_string($str_encoded)); return $str_encoded; } } /** @noinspection PhpUsageOfSilenceOperatorInspection - Detected an incomplete multibyte character in input string */ $return = @\iconv($from_encoding, $to_encoding, $str); if ($return !== false) { return $return; } return $str; } /** * @param string $str * @param string $from_charset [optional]

Set the input charset.

* @param string $to_charset [optional]

Set the output charset.

* @param string $transfer_encoding [optional]

Set the transfer encoding.

* @param string $linefeed [optional]

Set the used linefeed.

* @param int $indent [optional]

Set the max length indent.

* * @psalm-pure * * @return false|string *

An encoded MIME field on success, * or false if an error occurs during the encoding.

*/ public static function encode_mimeheader( string $str, string $from_charset = 'UTF-8', string $to_charset = 'UTF-8', string $transfer_encoding = 'Q', string $linefeed = "\r\n", int $indent = 76 ) { if ($from_charset !== 'UTF-8' && $from_charset !== 'CP850') { $from_charset = self::normalize_encoding($from_charset, 'UTF-8'); } if ($to_charset !== 'UTF-8' && $to_charset !== 'CP850') { $to_charset = self::normalize_encoding($to_charset, 'UTF-8'); } // always fallback via symfony polyfill return \iconv_mime_encode( '', $str, [ 'scheme' => $transfer_encoding, 'line-length' => $indent, 'input-charset' => $from_charset, 'output-charset' => $to_charset, 'line-break-chars' => $linefeed, ] ); } /** * Create an extract from a sentence, so if the search-string was found, it try to centered in the output. * * @param string $str

The input string.

* @param string $search

The searched string.

* @param int|null $length [optional]

Default: null === text->length / 2

* @param string $replacer_for_skipped_text [optional]

Default: …

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string */ public static function extract_text( string $str, string $search = '', int $length = null, string $replacer_for_skipped_text = '…', string $encoding = 'UTF-8' ): string { if ($str === '') { return ''; } if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } $trim_chars = "\t\r\n -_()!~?=+/*\\,.:;\"'[]{}`&"; if ($length === null) { $length = (int) \round((int) self::strlen($str, $encoding) / 2); } if ($search === '') { if ($encoding === 'UTF-8') { if ($length > 0) { $string_length = (int) \mb_strlen($str); $end = ($length - 1) > $string_length ? $string_length : ($length - 1); } else { $end = 0; } $pos = (int) \min( \mb_strpos($str, ' ', $end), \mb_strpos($str, '.', $end) ); } else { if ($length > 0) { $string_length = (int) self::strlen($str, $encoding); $end = ($length - 1) > $string_length ? $string_length : ($length - 1); } else { $end = 0; } $pos = (int) \min( self::strpos($str, ' ', $end, $encoding), self::strpos($str, '.', $end, $encoding) ); } if ($pos) { if ($encoding === 'UTF-8') { $str_sub = \mb_substr($str, 0, $pos); } else { $str_sub = self::substr($str, 0, $pos, $encoding); } if ($str_sub === false) { return ''; } return \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text; } return $str; } if ($encoding === 'UTF-8') { $word_position = (int) \mb_stripos($str, $search); $half_side = (int) ($word_position - $length / 2 + (int) \mb_strlen($search) / 2); } else { $word_position = (int) self::stripos($str, $search, 0, $encoding); $half_side = (int) ($word_position - $length / 2 + (int) self::strlen($search, $encoding) / 2); } $pos_start = 0; if ($half_side > 0) { if ($encoding === 'UTF-8') { $half_text = \mb_substr($str, 0, $half_side); } else { $half_text = self::substr($str, 0, $half_side, $encoding); } if ($half_text !== false) { if ($encoding === 'UTF-8') { $pos_start = (int) \max( \mb_strrpos($half_text, ' '), \mb_strrpos($half_text, '.') ); } else { $pos_start = (int) \max( self::strrpos($half_text, ' ', 0, $encoding), self::strrpos($half_text, '.', 0, $encoding) ); } } } if ($word_position && $half_side > 0) { $offset = $pos_start + $length - 1; $real_length = (int) self::strlen($str, $encoding); if ($offset > $real_length) { $offset = $real_length; } if ($encoding === 'UTF-8') { $pos_end = (int) \min( \mb_strpos($str, ' ', $offset), \mb_strpos($str, '.', $offset) ) - $pos_start; } else { $pos_end = (int) \min( self::strpos($str, ' ', $offset, $encoding), self::strpos($str, '.', $offset, $encoding) ) - $pos_start; } if (!$pos_end || $pos_end <= 0) { if ($encoding === 'UTF-8') { $str_sub = \mb_substr($str, $pos_start, (int) \mb_strlen($str)); } else { $str_sub = self::substr($str, $pos_start, (int) self::strlen($str, $encoding), $encoding); } if ($str_sub !== false) { $extract = $replacer_for_skipped_text . \ltrim($str_sub, $trim_chars); } else { $extract = ''; } } else { if ($encoding === 'UTF-8') { $str_sub = \mb_substr($str, $pos_start, $pos_end); } else { $str_sub = self::substr($str, $pos_start, $pos_end, $encoding); } if ($str_sub !== false) { $extract = $replacer_for_skipped_text . \trim($str_sub, $trim_chars) . $replacer_for_skipped_text; } else { $extract = ''; } } } else { $offset = $length - 1; $true_length = (int) self::strlen($str, $encoding); if ($offset > $true_length) { $offset = $true_length; } if ($encoding === 'UTF-8') { $pos_end = (int) \min( \mb_strpos($str, ' ', $offset), \mb_strpos($str, '.', $offset) ); } else { $pos_end = (int) \min( self::strpos($str, ' ', $offset, $encoding), self::strpos($str, '.', $offset, $encoding) ); } if ($pos_end) { if ($encoding === 'UTF-8') { $str_sub = \mb_substr($str, 0, $pos_end); } else { $str_sub = self::substr($str, 0, $pos_end, $encoding); } if ($str_sub !== false) { $extract = \rtrim($str_sub, $trim_chars) . $replacer_for_skipped_text; } else { $extract = ''; } } else { $extract = $str; } } return $extract; } /** * Reads entire file into a string. * * EXAMPLE: UTF8::file_get_contents('utf16le.txt'); // ... * * WARNING: Do not use UTF-8 Option ($convert_to_utf8) for binary files (e.g.: images) !!! * * @see http://php.net/manual/en/function.file-get-contents.php * * @param string $filename

* Name of the file to read. *

* @param bool $use_include_path [optional]

* Prior to PHP 5, this parameter is called * use_include_path and is a bool. * As of PHP 5 the FILE_USE_INCLUDE_PATH can be used * to trigger include path * search. *

* @param resource|null $context [optional]

* A valid context resource created with * stream_context_create. If you don't need to use a * custom context, you can skip this parameter by &null;. *

* @param int|null $offset [optional]

* The offset where the reading starts. *

* @param int|null $max_length [optional]

* Maximum length of data read. The default is to read until end * of file is reached. *

* @param int $timeout

The time in seconds for the timeout.

* @param bool $convert_to_utf8 WARNING!!!

Maybe you can't use this option for * some files, because they used non default utf-8 chars. Binary files * like images or pdf will not be converted.

* @param string $from_encoding [optional]

e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.
* A empty string will trigger the autodetect anyway.

* * @psalm-pure * * @return false|string *

The function returns the read data as string or false on failure.

* * @noinspection PhpTooManyParametersInspection */ public static function file_get_contents( string $filename, bool $use_include_path = false, $context = null, int $offset = null, int $max_length = null, int $timeout = 10, bool $convert_to_utf8 = true, string $from_encoding = '' ) { // init $filename = \filter_var($filename, \FILTER_SANITIZE_STRING); /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */ if ($filename === false) { return false; } if ($timeout && $context === null) { $context = \stream_context_create( [ 'http' => [ 'timeout' => $timeout, ], ] ); } if ($offset === null) { $offset = 0; } if (\is_int($max_length)) { $data = \file_get_contents($filename, $use_include_path, $context, $offset, $max_length); } else { $data = \file_get_contents($filename, $use_include_path, $context, $offset); } // return false on error if ($data === false) { return false; } if ($convert_to_utf8) { if ( !self::is_binary($data, true) || self::is_utf16($data, false) !== false || self::is_utf32($data, false) !== false ) { $data = self::encode('UTF-8', $data, false, $from_encoding); $data = self::cleanup($data); } } return $data; } /** * Checks if a file starts with BOM (Byte Order Mark) character. * * EXAMPLE: UTF8::file_has_bom('utf8_with_bom.txt'); // true * * @param string $file_path

Path to a valid file.

* * @throws \RuntimeException if file_get_contents() returned false * * @return bool *

true if the file has BOM at the start, false otherwise

* * @psalm-pure */ public static function file_has_bom(string $file_path): bool { $file_content = \file_get_contents($file_path); if ($file_content === false) { throw new \RuntimeException('file_get_contents() returned false for:' . $file_path); } return self::string_has_bom($file_content); } /** * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. * * EXAMPLE: UTF8::filter(array("\xE9", 'à', 'a')); // array('é', 'à', 'a') * * @param array|object|string $var * @param int $normalization_form * @param string $leading_combining * * @psalm-pure * * @return mixed * * @template TFilter * @phpstan-param TFilter $var * @phpstan-return TFilter */ public static function filter( $var, int $normalization_form = \Normalizer::NFC, string $leading_combining = '◌' ) { switch (\gettype($var)) { case 'object': case 'array': foreach ($var as $k => &$v) { $v = self::filter($v, $normalization_form, $leading_combining); } unset($v); break; case 'string': if (\strpos($var, "\r") !== false) { $var = self::normalize_line_ending($var); } if (!ASCII::is_ascii($var)) { if (\Normalizer::isNormalized($var, $normalization_form)) { $n = '-'; } else { $n = \Normalizer::normalize($var, $normalization_form); if (isset($n[0])) { $var = $n; } else { $var = self::encode('UTF-8', $var); } } \assert(\is_string($var)); if ( $var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && \preg_match('/^\\p{Mn}/u', $var) ) { // Prevent leading combining chars // for NFC-safe concatenations. $var = $leading_combining . $var; } } break; default: // nothing } /** @noinspection PhpSillyAssignmentInspection */ /** @phpstan-var TFilter $var */ $var = $var; return $var; } /** * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. * * Gets a specific external variable by name and optionally filters it. * * EXAMPLE:


     * // _GET['foo'] = 'bar';
     * UTF8::filter_input(INPUT_GET, 'foo', FILTER_SANITIZE_STRING)); // 'bar'
     *

* * @see http://php.net/manual/en/function.filter-input.php * * @param int $type

* One of INPUT_GET, INPUT_POST, * INPUT_COOKIE, INPUT_SERVER, or * INPUT_ENV. *

* @param string $variable_name

* Name of a variable to get. *

* @param int $filter [optional]

* The ID of the filter to apply. The * manual page lists the available filters. *

* @param int|int[]|null $options [optional]

* Associative array of options or bitwise disjunction of flags. If filter * accepts options, flags can be provided in "flags" field of array. *

* * @psalm-pure * * @return mixed *

* Value of the requested variable on success, FALSE if the filter fails, or NULL if the * variable_name variable is not set. If the flag FILTER_NULL_ON_FAILURE is used, it * returns FALSE if the variable is not set and NULL if the filter fails. *

*/ public static function filter_input( int $type, string $variable_name, int $filter = \FILTER_DEFAULT, $options = null ) { /** * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here */ if ($options === null || \func_num_args() < 4) { $var = \filter_input($type, $variable_name, $filter); } else { $var = \filter_input($type, $variable_name, $filter, $options); } return self::filter($var); } /** * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. * * Gets external variables and optionally filters them. * * EXAMPLE:


     * // _GET['foo'] = 'bar';
     * UTF8::filter_input_array(INPUT_GET, array('foo' => 'FILTER_SANITIZE_STRING')); // array('bar')
     *

* * @see http://php.net/manual/en/function.filter-input-array.php * * @param int $type

* One of INPUT_GET, INPUT_POST, * INPUT_COOKIE, INPUT_SERVER, or * INPUT_ENV. *

* @param array|null $definition [optional]

* An array defining the arguments. A valid key is a string * containing a variable name and a valid value is either a filter type, or an array * optionally specifying the filter, flags and options. If the value is an * array, valid keys are filter which specifies the * filter type, * flags which specifies any flags that apply to the * filter, and options which specifies any options that * apply to the filter. See the example below for a better understanding. *

* This parameter can be also an integer holding a filter constant. Then all values in the * input array are filtered by this filter. *

* @param bool $add_empty [optional]

* Add missing keys as NULL to the return value. *

* * @psalm-pure * * @return mixed *

* An array containing the values of the requested variables on success, or FALSE on failure. * An array value will be FALSE if the filter fails, or NULL if the variable is not * set. Or if the flag FILTER_NULL_ON_FAILURE is used, it returns FALSE if the variable * is not set and NULL if the filter fails. *

*/ public static function filter_input_array( int $type, $definition = null, bool $add_empty = true ) { /** * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here */ if ($definition === null || \func_num_args() < 2) { $a = \filter_input_array($type); } else { $a = \filter_input_array($type, $definition, $add_empty); } return self::filter($a); } /** * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. * * Filters a variable with a specified filter. * * EXAMPLE: UTF8::filter_var('-ABC-中文空白-', FILTER_VALIDATE_URL); // false * * @see http://php.net/manual/en/function.filter-var.php * * @param float|int|string|null $variable

* Value to filter. *

* @param int $filter [optional]

* The ID of the filter to apply. The * manual page lists the available filters. *

* @param int|int[]|null $options [optional]

* Associative array of options or bitwise disjunction of flags. If filter * accepts options, flags can be provided in "flags" field of array. For * the "callback" filter, callable type should be passed. The * callback must accept one argument, the value to be filtered, and return * the value after filtering/sanitizing it. *

* * // for filters that accept options, use this format * $options = array( * 'options' => array( * 'default' => 3, // value to return if the filter fails * // other options here * 'min_range' => 0 * ), * 'flags' => FILTER_FLAG_ALLOW_OCTAL, * ); * $var = filter_var('0755', FILTER_VALIDATE_INT, $options); * // for filter that only accept flags, you can pass them directly * $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE); * // for filter that only accept flags, you can also pass as an array * $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, * array('flags' => FILTER_NULL_ON_FAILURE)); * // callback validate filter * function foo($value) * { * // Expected format: Surname, GivenNames * if (strpos($value, ", ") === false) return false; * list($surname, $givennames) = explode(", ", $value, 2); * $empty = (empty($surname) || empty($givennames)); * $notstrings = (!is_string($surname) || !is_string($givennames)); * if ($empty || $notstrings) { * return false; * } else { * return $value; * } * } * $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo')); * *

* * @psalm-pure * * @return mixed *

The filtered data, or FALSE if the filter fails.

*/ public static function filter_var( $variable, int $filter = \FILTER_DEFAULT, $options = null ) { /** * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here */ if (\func_num_args() < 3) { $variable = \filter_var($variable, $filter); } else { $variable = \filter_var($variable, $filter, $options); } return self::filter($variable); } /** * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed. * * Gets multiple variables and optionally filters them. * * EXAMPLE:


     * $filters = [
     *     'name'  => ['filter'  => FILTER_CALLBACK, 'options' => [UTF8::class, 'ucwords']],
     *     'age'   => ['filter'  => FILTER_VALIDATE_INT, 'options' => ['min_range' => 1, 'max_range' => 120]],
     *     'email' => FILTER_VALIDATE_EMAIL,
     * ];
     *
     * $data = [
     *     'name' => 'κόσμε',
     *     'age' => '18',
     *     'email' => 'foo@bar.de'
     * ];
     *
     * UTF8::filter_var_array($data, $filters, true); // ['name' => 'Κόσμε', 'age' => 18, 'email' => 'foo@bar.de']
     *

* * @see http://php.net/manual/en/function.filter-var-array.php * * @param array $data

* An array with string keys containing the data to filter. *

* @param array|int|null $definition [optional]

* An array defining the arguments. A valid key is a string * containing a variable name and a valid value is either a * filter type, or an * array optionally specifying the filter, flags and options. * If the value is an array, valid keys are filter * which specifies the filter type, * flags which specifies any flags that apply to the * filter, and options which specifies any options that * apply to the filter. See the example below for a better understanding. *

* This parameter can be also an integer holding a filter constant. Then all values * in the input array are filtered by this filter. *

* @param bool $add_empty [optional]

* Add missing keys as NULL to the return value. *

* * @psalm-pure * * @return mixed *

* An array containing the values of the requested variables on success, or FALSE on failure. * An array value will be FALSE if the filter fails, or NULL if the variable is not * set. *

*/ public static function filter_var_array( array $data, $definition = null, bool $add_empty = true ) { /** * @psalm-suppress ImpureFunctionCall - we use func_num_args only for args count matching here */ if (\func_num_args() < 2) { $a = \filter_var_array($data); } else { $a = \filter_var_array($data, $definition, $add_empty); } return self::filter($a); } /** * Checks whether finfo is available on the server. * * @psalm-pure * * @return bool *

true if available, false otherwise

* * @internal

Please do not use it anymore, we will make is private in next major version.

*/ public static function finfo_loaded(): bool { return \class_exists('finfo'); } /** * Returns the first $n characters of the string. * * @param string $str

The input string.

* @param int $n

Number of characters to retrieve from the start.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string */ public static function first_char( string $str, int $n = 1, string $encoding = 'UTF-8' ): string { if ($str === '' || $n <= 0) { return ''; } if ($encoding === 'UTF-8') { return (string) \mb_substr($str, 0, $n); } return (string) self::substr($str, 0, $n, $encoding); } /** * Check if the number of Unicode characters isn't greater than the specified integer. * * EXAMPLE: UTF8::fits_inside('κόσμε', 6); // false * * @param string $str the original string to be checked * @param int $box_size the size in number of chars to be checked against string * * @psalm-pure * * @return bool *

TRUE if string is less than or equal to $box_size, FALSE otherwise.

*/ public static function fits_inside(string $str, int $box_size): bool { return (int) self::strlen($str) <= $box_size; } /** * Try to fix simple broken UTF-8 strings. * * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings. * * EXAMPLE: UTF8::fix_simple_utf8('DÃ¼sseldorf'); // 'Düsseldorf' * * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1 * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it. * See: http://en.wikipedia.org/wiki/Windows-1252 * * @param string $str

The input string

* * @psalm-pure * * @return string */ public static function fix_simple_utf8(string $str): string { if ($str === '') { return ''; } /** * @psalm-suppress ImpureStaticVariable * * @var array|null */ static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null; /** * @psalm-suppress ImpureStaticVariable * * @var array|null */ static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null; if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) { if (self::$BROKEN_UTF8_FIX === null) { self::$BROKEN_UTF8_FIX = self::getData('utf8_fix'); } $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = \array_keys(self::$BROKEN_UTF8_FIX); $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = self::$BROKEN_UTF8_FIX; } \assert(\is_array($BROKEN_UTF8_TO_UTF8_VALUES_CACHE)); return \str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str); } /** * Fix a double (or multiple) encoded UTF8 string. * * EXAMPLE: UTF8::fix_utf8('FÃÂÂÂÂ©dÃÂÂÂÂ©ration'); // 'Fédération' * * @param string|string[] $str you can use a string or an array of strings * * @psalm-pure * * @return string|string[] * Will return the fixed input-"array" or * the fixed input-"string" * * @psalm-suppress InvalidReturnType */ public static function fix_utf8($str) { if (\is_array($str)) { foreach ($str as $k => &$v) { $v = self::fix_utf8($v); } unset($v); /** * @psalm-suppress InvalidReturnStatement */ return $str; } $str = (string) $str; $last = ''; while ($last !== $str) { $last = $str; /** * @psalm-suppress PossiblyInvalidArgument */ $str = self::to_utf8( self::utf8_decode($str, true) ); } /** * @psalm-suppress InvalidReturnStatement */ return $str; } /** * Get character of a specific character. * * EXAMPLE: UTF8::getCharDirection('ا'); // 'RTL' * * @param string $char * * @psalm-pure * * @return string *

'RTL' or 'LTR'.

*/ public static function getCharDirection(string $char): string { if (self::$SUPPORT['intlChar'] === true) { /** @noinspection PhpComposerExtensionStubsInspection */ $tmp_return = \IntlChar::charDirection($char); // from "IntlChar"-Class $char_direction = [ 'RTL' => [1, 13, 14, 15, 21], 'LTR' => [0, 11, 12, 20], ]; if (\in_array($tmp_return, $char_direction['LTR'], true)) { return 'LTR'; } if (\in_array($tmp_return, $char_direction['RTL'], true)) { return 'RTL'; } } $c = static::chr_to_decimal($char); if (!($c >= 0x5be && $c <= 0x10b7f)) { return 'LTR'; } if ($c <= 0x85e) { if ($c === 0x5be || $c === 0x5c0 || $c === 0x5c3 || $c === 0x5c6 || ($c >= 0x5d0 && $c <= 0x5ea) || ($c >= 0x5f0 && $c <= 0x5f4) || $c === 0x608 || $c === 0x60b || $c === 0x60d || $c === 0x61b || ($c >= 0x61e && $c <= 0x64a) || ($c >= 0x66d && $c <= 0x66f) || ($c >= 0x671 && $c <= 0x6d5) || ($c >= 0x6e5 && $c <= 0x6e6) || ($c >= 0x6ee && $c <= 0x6ef) || ($c >= 0x6fa && $c <= 0x70d) || $c === 0x710 || ($c >= 0x712 && $c <= 0x72f) || ($c >= 0x74d && $c <= 0x7a5) || $c === 0x7b1 || ($c >= 0x7c0 && $c <= 0x7ea) || ($c >= 0x7f4 && $c <= 0x7f5) || $c === 0x7fa || ($c >= 0x800 && $c <= 0x815) || $c === 0x81a || $c === 0x824 || $c === 0x828 || ($c >= 0x830 && $c <= 0x83e) || ($c >= 0x840 && $c <= 0x858) || $c === 0x85e ) { return 'RTL'; } } elseif ($c === 0x200f) { return 'RTL'; } elseif ($c >= 0xfb1d) { if ($c === 0xfb1d || ($c >= 0xfb1f && $c <= 0xfb28) || ($c >= 0xfb2a && $c <= 0xfb36) || ($c >= 0xfb38 && $c <= 0xfb3c) || $c === 0xfb3e || ($c >= 0xfb40 && $c <= 0xfb41) || ($c >= 0xfb43 && $c <= 0xfb44) || ($c >= 0xfb46 && $c <= 0xfbc1) || ($c >= 0xfbd3 && $c <= 0xfd3d) || ($c >= 0xfd50 && $c <= 0xfd8f) || ($c >= 0xfd92 && $c <= 0xfdc7) || ($c >= 0xfdf0 && $c <= 0xfdfc) || ($c >= 0xfe70 && $c <= 0xfe74) || ($c >= 0xfe76 && $c <= 0xfefc) || ($c >= 0x10800 && $c <= 0x10805) || $c === 0x10808 || ($c >= 0x1080a && $c <= 0x10835) || ($c >= 0x10837 && $c <= 0x10838) || $c === 0x1083c || ($c >= 0x1083f && $c <= 0x10855) || ($c >= 0x10857 && $c <= 0x1085f) || ($c >= 0x10900 && $c <= 0x1091b) || ($c >= 0x10920 && $c <= 0x10939) || $c === 0x1093f || $c === 0x10a00 || ($c >= 0x10a10 && $c <= 0x10a13) || ($c >= 0x10a15 && $c <= 0x10a17) || ($c >= 0x10a19 && $c <= 0x10a33) || ($c >= 0x10a40 && $c <= 0x10a47) || ($c >= 0x10a50 && $c <= 0x10a58) || ($c >= 0x10a60 && $c <= 0x10a7f) || ($c >= 0x10b00 && $c <= 0x10b35) || ($c >= 0x10b40 && $c <= 0x10b55) || ($c >= 0x10b58 && $c <= 0x10b72) || ($c >= 0x10b78 && $c <= 0x10b7f) ) { return 'RTL'; } } return 'LTR'; } /** * Check for php-support. * * @param string|null $key * * @psalm-pure * * @return mixed * Return the full support-"array", if $key === null
* return bool-value, if $key is used and available
* otherwise return null */ public static function getSupportInfo(string $key = null) { if ($key === null) { return self::$SUPPORT; } if (self::$INTL_TRANSLITERATOR_LIST === null) { self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list'); } // compatibility fix for old versions self::$SUPPORT['intl__transliterator_list_ids'] = self::$INTL_TRANSLITERATOR_LIST; return self::$SUPPORT[$key] ?? null; } /** * Warning: this method only works for some file-types (png, jpg) * if you need more supported types, please use e.g. "finfo" * * @param string $str * @param array $fallback

with this keys: 'ext', 'mime', 'type' * * @psalm-pure * * @return null[]|string[] *

with this keys: 'ext', 'mime', 'type'

* * @phpstan-param array{ext: null|string, mime: null|string, type: null|string} $fallback */ public static function get_file_type( string $str, array $fallback = [ 'ext' => null, 'mime' => 'application/octet-stream', 'type' => null, ] ): array { if ($str === '') { return $fallback; } /** @var false|string $str_info - needed for PhpStan (stubs error) */ $str_info = \substr($str, 0, 2); if ($str_info === false || \strlen($str_info) !== 2) { return $fallback; } // DEBUG //var_dump($str_info); $str_info = \unpack('C2chars', $str_info); /** @noinspection PhpSillyAssignmentInspection */ /** @var array|false $str_info - needed for PhpStan (stubs error) */ $str_info = $str_info; if ($str_info === false) { return $fallback; } /** @noinspection OffsetOperationsInspection */ $type_code = (int) ($str_info['chars1'] . $str_info['chars2']); // DEBUG //var_dump($type_code); // // info: https://en.wikipedia.org/wiki/Magic_number_%28programming%29#Format_indicator // switch ($type_code) { // WARNING: do not add too simple comparisons, because of false-positive results: // // 3780 => 'pdf', 7790 => 'exe', 7784 => 'midi', 8075 => 'zip', // 8297 => 'rar', 7173 => 'gif', 7373 => 'tiff' 6677 => 'bmp', ... // case 255216: $ext = 'jpg'; $mime = 'image/jpeg'; $type = 'binary'; break; case 13780: $ext = 'png'; $mime = 'image/png'; $type = 'binary'; break; default: return $fallback; } return [ 'ext' => $ext, 'mime' => $mime, 'type' => $type, ]; } /** * @param int $length

Length of the random string.

* @param string $possible_chars [optional]

Characters string for the random selection.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @return string */ public static function get_random_string( int $length, string $possible_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789', string $encoding = 'UTF-8' ): string { // init $i = 0; $str = ''; // // add random chars // if ($encoding === 'UTF-8') { $max_length = (int) \mb_strlen($possible_chars); if ($max_length === 0) { return ''; } while ($i < $length) { try { $rand_int = \random_int(0, $max_length - 1); } catch (\Exception $e) { /** @noinspection RandomApiMigrationInspection */ $rand_int = \mt_rand(0, $max_length - 1); } $char = \mb_substr($possible_chars, $rand_int, 1); if ($char !== false) { $str .= $char; ++$i; } } } else { $encoding = self::normalize_encoding($encoding, 'UTF-8'); $max_length = (int) self::strlen($possible_chars, $encoding); if ($max_length === 0) { return ''; } while ($i < $length) { try { $rand_int = \random_int(0, $max_length - 1); } catch (\Exception $e) { /** @noinspection RandomApiMigrationInspection */ $rand_int = \mt_rand(0, $max_length - 1); } $char = self::substr($possible_chars, $rand_int, 1, $encoding); if ($char !== false) { $str .= $char; ++$i; } } } return $str; } /** * @param int|string $extra_entropy [optional]

Extra entropy via a string or int value.

* @param bool $use_md5 [optional]

Return the unique identifier as md5-hash? Default: true

* * @return string */ public static function get_unique_string($extra_entropy = '', bool $use_md5 = true): string { try { $rand_int = \random_int(0, \mt_getrandmax()); } catch (\Exception $e) { /** @noinspection RandomApiMigrationInspection */ $rand_int = \mt_rand(0, \mt_getrandmax()); } $unique_helper = $rand_int . \session_id() . ($_SERVER['REMOTE_ADDR'] ?? '') . ($_SERVER['SERVER_ADDR'] ?? '') . $extra_entropy; $unique_string = \uniqid($unique_helper, true); if ($use_md5) { $unique_string = \md5($unique_string . $unique_helper); } return $unique_string; } /** * alias for "UTF8::string_has_bom()" * * @param string $str * * @psalm-pure * * @return bool * * @see UTF8::string_has_bom() * @deprecated

please use "UTF8::string_has_bom()"

*/ public static function hasBom(string $str): bool { return self::string_has_bom($str); } /** * Returns true if the string contains a lower case char, false otherwise. * * @param string $str

The input string.

* * @psalm-pure * * @return bool *

Whether or not the string contains a lower case character.

*/ public static function has_lowercase(string $str): bool { if (self::$SUPPORT['mbstring'] === true) { /** @noinspection PhpComposerExtensionStubsInspection */ return \mb_ereg_match('.*[[:lower:]]', $str); } return self::str_matches_pattern($str, '.*[[:lower:]]'); } /** * Returns true if the string contains whitespace, false otherwise. * * @param string $str

The input string.

* * @psalm-pure * * @return bool *

Whether or not the string contains whitespace.

*/ public static function has_whitespace(string $str): bool { if (self::$SUPPORT['mbstring'] === true) { /** @noinspection PhpComposerExtensionStubsInspection */ return \mb_ereg_match('.*[[:space:]]', $str); } return self::str_matches_pattern($str, '.*[[:space:]]'); } /** * Returns true if the string contains an upper case char, false otherwise. * * @param string $str

The input string.

* * @psalm-pure * * @return bool *

Whether or not the string contains an upper case character.

*/ public static function has_uppercase(string $str): bool { if (self::$SUPPORT['mbstring'] === true) { /** @noinspection PhpComposerExtensionStubsInspection */ return \mb_ereg_match('.*[[:upper:]]', $str); } return self::str_matches_pattern($str, '.*[[:upper:]]'); } /** * Converts a hexadecimal value into a UTF-8 character. * * INFO: opposite to UTF8::chr_to_hex() * * EXAMPLE: UTF8::hex_to_chr('U+00a7'); // '§' * * @param string $hexdec

The hexadecimal value.

* * @psalm-pure * * @return false|string one single UTF-8 character */ public static function hex_to_chr(string $hexdec) { /** @noinspection PhpUsageOfSilenceOperatorInspection - Invalid characters passed for attempted conversion, these have been ignored */ return self::decimal_to_chr((int) @\hexdec($hexdec)); } /** * Converts hexadecimal U+xxxx code point representation to integer. * * INFO: opposite to UTF8::int_to_hex() * * EXAMPLE: UTF8::hex_to_int('U+00f1'); // 241 * * @param string $hexdec

The hexadecimal code point representation.

* * @psalm-pure * * @return false|int *

The code point, or false on failure.

*/ public static function hex_to_int($hexdec) { // init $hexdec = (string) $hexdec; if ($hexdec === '') { return false; } if (\preg_match('/^(?:\\\u|U\+|)([a-zA-Z0-9]{4,6})$/', $hexdec, $match)) { return \intval($match[1], 16); } return false; } /** * alias for "UTF8::html_entity_decode()" * * @param string $str * @param int|null $flags * @param string $encoding * * @psalm-pure * * @return string * * @see UTF8::html_entity_decode() * @deprecated

please use "UTF8::html_entity_decode()"

*/ public static function html_decode( string $str, int $flags = null, string $encoding = 'UTF-8' ): string { return self::html_entity_decode($str, $flags, $encoding); } /** * Converts a UTF-8 string to a series of HTML numbered entities. * * INFO: opposite to UTF8::html_decode() * * EXAMPLE: UTF8::html_encode('中文空白'); // '中文空白' * * @param string $str

The Unicode string to be encoded as numbered entities.

* @param bool $keep_ascii_chars [optional]

Keep ASCII chars.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string HTML numbered entities */ public static function html_encode( string $str, bool $keep_ascii_chars = false, string $encoding = 'UTF-8' ): string { if ($str === '') { return ''; } if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } // INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity if (self::$SUPPORT['mbstring'] === true) { if ($keep_ascii_chars) { $start_code = 0x80; } else { $start_code = 0x00; } if ($encoding === 'UTF-8') { /** @var false|string|null $return - needed for PhpStan (stubs error) */ $return = \mb_encode_numericentity( $str, [$start_code, 0xfffff, 0, 0xfffff] ); if ($return !== null && $return !== false) { return $return; } } /** @var false|string|null $return - needed for PhpStan (stubs error) */ $return = \mb_encode_numericentity( $str, [$start_code, 0xfffff, 0, 0xfffff], $encoding ); if ($return !== null && $return !== false) { return $return; } } // // fallback via vanilla php // return \implode( '', \array_map( static function (string $chr) use ($keep_ascii_chars, $encoding): string { return self::single_chr_html_encode($chr, $keep_ascii_chars, $encoding); }, self::str_split($str) ) ); } /** * UTF-8 version of html_entity_decode() * * The reason we are not using html_entity_decode() by itself is because * while it is not technically correct to leave out the semicolon * at the end of an entity most browsers will still interpret the entity * correctly. html_entity_decode() does not convert entities without * semicolons, so we are left with our own little solution here. Bummer. * * Convert all HTML entities to their applicable characters. * * INFO: opposite to UTF8::html_encode() * * EXAMPLE: UTF8::html_entity_decode('中文空白'); // '中文空白' * * @see http://php.net/manual/en/function.html-entity-decode.php * * @param string $str

* The input string. *

* @param int|null $flags [optional]

* A bitmask of one or more of the following flags, which specify how to handle quotes * and which document type to use. The default is ENT_COMPAT | ENT_HTML401. * * Available flags constants * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Constant Name Description
ENT_COMPAT Will convert double-quotes and leave single-quotes alone.
ENT_QUOTES Will convert both double and single quotes.
ENT_NOQUOTES Will leave both double and single quotes unconverted.
ENT_HTML401 * Handle code as HTML 4.01. *
ENT_XML1 * Handle code as XML 1. *
ENT_XHTML * Handle code as XHTML. *
ENT_HTML5 * Handle code as HTML 5. *
*

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string the decoded string */ public static function html_entity_decode( string $str, int $flags = null, string $encoding = 'UTF-8' ): string { if ( !isset($str[3]) // examples: &; || &x; || \strpos($str, '&') === false // no "&" ) { return $str; } if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } if ($flags === null) { $flags = \ENT_QUOTES | \ENT_HTML5; } /** @noinspection InArrayCanBeUsedInspection */ if ( $encoding !== 'UTF-8' && $encoding !== 'ISO-8859-1' && $encoding !== 'WINDOWS-1252' && self::$SUPPORT['mbstring'] === false ) { /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); } do { $str_compare = $str; if (\strpos($str, '&') !== false) { if (\strpos($str, '&#') !== false) { // decode also numeric & UTF16 two byte entities $str = (string) \preg_replace( '/(&#(?:x0*[0-9a-fA-F]{2,6}(?![0-9a-fA-F;])|(?:0*\d{2,6}(?![0-9;]))))/S', '$1;', $str ); } $str = \html_entity_decode( $str, $flags, $encoding ); } } while ($str_compare !== $str); return $str; } /** * Create a escape html version of the string via "UTF8::htmlspecialchars()". * * @param string $str * @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string */ public static function html_escape(string $str, string $encoding = 'UTF-8'): string { return self::htmlspecialchars( $str, \ENT_QUOTES | \ENT_SUBSTITUTE, $encoding ); } /** * Remove empty html-tag. * * e.g.:

* * @param string $str * * @psalm-pure * * @return string */ public static function html_stripe_empty_tags(string $str): string { return (string) \preg_replace( '/<[^\\/>]*?>\\s*?<\\/[^>]*?>/u', '', $str ); } /** * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities(). * * EXAMPLE: UTF8::htmlentities('<白-öäü>'); // '<白-öäü>' * * @see http://php.net/manual/en/function.htmlentities.php * * @param string $str

* The input string. *

* @param int $flags [optional]

* A bitmask of one or more of the following flags, which specify how to handle * quotes, invalid code unit sequences and the used document type. The default is * ENT_COMPAT | ENT_HTML401. * * Available flags constants * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Constant Name Description
ENT_COMPAT Will convert double-quotes and leave single-quotes alone.
ENT_QUOTES Will convert both double and single quotes.
ENT_NOQUOTES Will leave both double and single quotes unconverted.
ENT_IGNORE * Silently discard invalid code unit sequences instead of returning * an empty string. Using this flag is discouraged as it * may have security implications. *
ENT_SUBSTITUTE * Replace invalid code unit sequences with a Unicode Replacement Character * U+FFFD (UTF-8) or &#FFFD; (otherwise) instead of returning an empty * string. *
ENT_DISALLOWED * Replace invalid code points for the given document type with a * Unicode Replacement Character U+FFFD (UTF-8) or &#FFFD; * (otherwise) instead of leaving them as is. This may be useful, for * instance, to ensure the well-formedness of XML documents with * embedded external content. *
ENT_HTML401 * Handle code as HTML 4.01. *
ENT_XML1 * Handle code as XML 1. *
ENT_XHTML * Handle code as XHTML. *
ENT_HTML5 * Handle code as HTML 5. *
*

* @param string $encoding [optional]

* Like htmlspecialchars, * htmlentities takes an optional third argument * encoding which defines encoding used in * conversion. * Although this argument is technically optional, you are highly * encouraged to specify the correct value for your code. *

* @param bool $double_encode [optional]

* When double_encode is turned off PHP will not * encode existing html entities. The default is to convert everything. *

* * @psalm-pure * * @return string *

* The encoded string. *

* If the input string contains an invalid code unit * sequence within the given encoding an empty string * will be returned, unless either the ENT_IGNORE or * ENT_SUBSTITUTE flags are set. *

*/ public static function htmlentities( string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true ): string { if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } $str = \htmlentities( $str, $flags, $encoding, $double_encode ); /** * PHP doesn't replace a backslash to its html entity since this is something * that's mostly used to escape characters when inserting in a database. Since * we're using a decent database layer, we don't need this shit and we're replacing * the double backslashes by its' html entity equivalent. * * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303 */ $str = \str_replace('\\', '\', $str); return self::html_encode($str, true, $encoding); } /** * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars() * * INFO: Take a look at "UTF8::htmlentities()" * * EXAMPLE: UTF8::htmlspecialchars('<白-öäü>'); // '<白-öäü>' * * @see http://php.net/manual/en/function.htmlspecialchars.php * * @param string $str

* The string being converted. *

* @param int $flags [optional]

* @param string $encoding [optional]

* Defines encoding used in conversion. *

* For the purposes of this function, the encodings * ISO-8859-1, ISO-8859-15, * UTF-8, cp866, * cp1251, cp1252, and * KOI8-R are effectively equivalent, provided the * string itself is valid for the encoding, as * the characters affected by htmlspecialchars occupy * the same positions in all of these encodings. *

* @param bool $double_encode [optional]

* When double_encode is turned off PHP will not * encode existing html entities, the default is to convert everything. *

* * @psalm-pure * * @return string the converted string. *

* If the input string contains an invalid code unit * sequence within the given encoding an empty string * will be returned, unless either the ENT_IGNORE or * ENT_SUBSTITUTE flags are set */ public static function htmlspecialchars( string $str, int $flags = \ENT_COMPAT, string $encoding = 'UTF-8', bool $double_encode = true ): string { if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } return \htmlspecialchars( $str, $flags, $encoding, $double_encode ); } /** * Checks whether iconv is available on the server. * * @psalm-pure * * @return bool *

true if available, false otherwise

* * @internal

Please do not use it anymore, we will make is private in next major version.

*/ public static function iconv_loaded(): bool { return \extension_loaded('iconv'); } /** * alias for "UTF8::decimal_to_chr()" * * @param int|string $int * * @phpstan-param int|numeric-string $int * * @psalm-pure * * @return string * * @see UTF8::decimal_to_chr() * @deprecated

please use "UTF8::decimal_to_chr()"

*/ public static function int_to_chr($int): string { return self::decimal_to_chr($int); } /** * Converts Integer to hexadecimal U+xxxx code point representation. * * INFO: opposite to UTF8::hex_to_int() * * EXAMPLE: UTF8::int_to_hex(241); // 'U+00f1' * * @param int $int

The integer to be converted to hexadecimal code point.

* @param string $prefix [optional] * * @psalm-pure * * @return string the code point, or empty string on failure */ public static function int_to_hex(int $int, string $prefix = 'U+'): string { $hex = \dechex($int); $hex = (\strlen($hex) < 4 ? \substr('0000' . $hex, -4) : $hex); return $prefix . $hex . ''; } /** * Checks whether intl-char is available on the server. * * @psalm-pure * * @return bool *

true if available, false otherwise

* * @internal

Please do not use it anymore, we will make is private in next major version.

*/ public static function intlChar_loaded(): bool { return \class_exists('IntlChar'); } /** * Checks whether intl is available on the server. * * @psalm-pure * * @return bool *

true if available, false otherwise

* * @internal

Please do not use it anymore, we will make is private in next major version.

*/ public static function intl_loaded(): bool { return \extension_loaded('intl'); } /** * alias for "UTF8::is_ascii()" * * @param string $str * * @psalm-pure * * @return bool * * @see UTF8::is_ascii() * @deprecated

please use "UTF8::is_ascii()"

*/ public static function isAscii(string $str): bool { return ASCII::is_ascii($str); } /** * alias for "UTF8::is_base64()" * * @param string $str * * @psalm-pure * * @return bool * * @see UTF8::is_base64() * @deprecated

please use "UTF8::is_base64()"

*/ public static function isBase64($str): bool { return self::is_base64($str); } /** * alias for "UTF8::is_binary()" * * @param int|string $str * @param bool $strict * * @psalm-pure * * @return bool * * @see UTF8::is_binary() * @deprecated

please use "UTF8::is_binary()"

*/ public static function isBinary($str, bool $strict = false): bool { return self::is_binary($str, $strict); } /** * alias for "UTF8::is_bom()" * * @param string $utf8_chr * * @psalm-pure * * @return bool * * @see UTF8::is_bom() * @deprecated

please use "UTF8::is_bom()"

*/ public static function isBom(string $utf8_chr): bool { return self::is_bom($utf8_chr); } /** * alias for "UTF8::is_html()" * * @param string $str * * @psalm-pure * * @return bool * * @see UTF8::is_html() * @deprecated

please use "UTF8::is_html()"

*/ public static function isHtml(string $str): bool { return self::is_html($str); } /** * alias for "UTF8::is_json()" * * @param string $str * * @return bool * * @see UTF8::is_json() * @deprecated

please use "UTF8::is_json()"

*/ public static function isJson(string $str): bool { return self::is_json($str); } /** * alias for "UTF8::is_utf16()" * * @param string $str * * @psalm-pure * * @return false|int * false if is't not UTF16,
* 1 for UTF-16LE,
* 2 for UTF-16BE * * @see UTF8::is_utf16() * @deprecated

please use "UTF8::is_utf16()"

*/ public static function isUtf16($str) { return self::is_utf16($str); } /** * alias for "UTF8::is_utf32()" * * @param string $str * * @psalm-pure * * @return false|int * false if is't not UTF16, * 1 for UTF-32LE, * 2 for UTF-32BE * * @see UTF8::is_utf32() * @deprecated

please use "UTF8::is_utf32()"

*/ public static function isUtf32($str) { return self::is_utf32($str); } /** * alias for "UTF8::is_utf8()" * * @param string $str * @param bool $strict * * @psalm-pure * * @return bool * * @see UTF8::is_utf8() * @deprecated

please use "UTF8::is_utf8()"

*/ public static function isUtf8($str, bool $strict = false): bool { return self::is_utf8($str, $strict); } /** * Returns true if the string contains only alphabetic chars, false otherwise. * * @param string $str

The input string.

* * @psalm-pure * * @return bool *

Whether or not $str contains only alphabetic chars.

*/ public static function is_alpha(string $str): bool { if (self::$SUPPORT['mbstring'] === true) { /** @noinspection PhpComposerExtensionStubsInspection */ return \mb_ereg_match('^[[:alpha:]]*$', $str); } return self::str_matches_pattern($str, '^[[:alpha:]]*$'); } /** * Returns true if the string contains only alphabetic and numeric chars, false otherwise. * * @param string $str

The input string.

* * @psalm-pure * * @return bool *

Whether or not $str contains only alphanumeric chars.

*/ public static function is_alphanumeric(string $str): bool { if (self::$SUPPORT['mbstring'] === true) { /** @noinspection PhpComposerExtensionStubsInspection */ return \mb_ereg_match('^[[:alnum:]]*$', $str); } return self::str_matches_pattern($str, '^[[:alnum:]]*$'); } /** * Returns true if the string contains only punctuation chars, false otherwise. * * @param string $str

The input string.

* * @psalm-pure * * @return bool *

Whether or not $str contains only punctuation chars.

*/ public static function is_punctuation(string $str): bool { return self::str_matches_pattern($str, '^[[:punct:]]*$'); } /** * Returns true if the string contains only printable (non-invisible) chars, false otherwise. * * @param string $str

The input string.

* @param bool $ignore_control_characters [optional]

Ignore control characters like [LRM] or [LSEP].

* * @psalm-pure * * @return bool *

Whether or not $str contains only printable (non-invisible) chars.

*/ public static function is_printable(string $str, bool $ignore_control_characters = false): bool { return self::remove_invisible_characters($str, false, '', $ignore_control_characters) === $str; } /** * Checks if a string is 7 bit ASCII. * * EXAMPLE: UTF8::is_ascii('白'); // false * * @param string $str

The string to check.

* * @psalm-pure * * @return bool *

* true if it is ASCII
* false otherwise *

*/ public static function is_ascii(string $str): bool { return ASCII::is_ascii($str); } /** * Returns true if the string is base64 encoded, false otherwise. * * EXAMPLE: UTF8::is_base64('4KSu4KWL4KSo4KS/4KSa'); // true * * @param string|null $str

The input string.

* @param bool $empty_string_is_valid [optional]

Is an empty string valid base64 or not?

* * @psalm-pure * * @return bool *

Whether or not $str is base64 encoded.

*/ public static function is_base64($str, bool $empty_string_is_valid = false): bool { if ( !$empty_string_is_valid && $str === '' ) { return false; } if (!\is_string($str)) { return false; } $base64String = \base64_decode($str, true); return $base64String !== false && \base64_encode($base64String) === $str; } /** * Check if the input is binary... (is look like a hack). * * EXAMPLE: UTF8::is_binary(01); // true * * @param int|string $input * @param bool $strict * * @psalm-pure * * @return bool */ public static function is_binary($input, bool $strict = false): bool { $input = (string) $input; if ($input === '') { return false; } if (\preg_match('~^[01]+$~', $input)) { return true; } $ext = self::get_file_type($input); if ($ext['type'] === 'binary') { return true; } $test_length = \strlen($input); $test_null_counting = \substr_count($input, "\x0", 0, $test_length); if (($test_null_counting / $test_length) > 0.25) { return true; } if ($strict) { if (self::$SUPPORT['finfo'] === false) { throw new \RuntimeException('ext-fileinfo: is not installed'); } /** * @noinspection PhpComposerExtensionStubsInspection * @psalm-suppress ImpureMethodCall - it will return the same result for the same file ... */ $finfo_encoding = (new \finfo(\FILEINFO_MIME_ENCODING))->buffer($input); if ($finfo_encoding && $finfo_encoding === 'binary') { return true; } } return false; } /** * Check if the file is binary. * * EXAMPLE: UTF8::is_binary('./utf32.txt'); // true * * @param string $file * * @return bool */ public static function is_binary_file($file): bool { // init $block = ''; $fp = \fopen($file, 'rb'); if (\is_resource($fp)) { $block = \fread($fp, 512); \fclose($fp); } if ($block === '' || $block === false) { return false; } return self::is_binary($block, true); } /** * Returns true if the string contains only whitespace chars, false otherwise. * * @param string $str

The input string.

* * @psalm-pure * * @return bool *

Whether or not $str contains only whitespace characters.

*/ public static function is_blank(string $str): bool { if (self::$SUPPORT['mbstring'] === true) { /** @noinspection PhpComposerExtensionStubsInspection */ return \mb_ereg_match('^[[:space:]]*$', $str); } return self::str_matches_pattern($str, '^[[:space:]]*$'); } /** * Checks if the given string is equal to any "Byte Order Mark". * * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string. * * EXAMPLE: UTF8::is_bom("\xef\xbb\xbf"); // true * * @param string $str

The input string.

* * @psalm-pure * * @return bool *

true if the $utf8_chr is Byte Order Mark, false otherwise.

*/ public static function is_bom($str): bool { /** @noinspection PhpUnusedLocalVariableInspection */ foreach (self::$BOM as $bom_string => &$bom_byte_length) { if ($str === $bom_string) { return true; } } return false; } /** * Determine whether the string is considered to be empty. * * A variable is considered empty if it does not exist or if its value equals FALSE. * empty() does not generate a warning if the variable does not exist. * * @param array|float|int|string $str * * @psalm-pure * * @return bool *

Whether or not $str is empty().

*/ public static function is_empty($str): bool { return empty($str); } /** * Returns true if the string contains only hexadecimal chars, false otherwise. * * @param string $str

The input string.

* * @psalm-pure * * @return bool *

Whether or not $str contains only hexadecimal chars.

*/ public static function is_hexadecimal(string $str): bool { if (self::$SUPPORT['mbstring'] === true) { /** @noinspection PhpComposerExtensionStubsInspection */ return \mb_ereg_match('^[[:xdigit:]]*$', $str); } return self::str_matches_pattern($str, '^[[:xdigit:]]*$'); } /** * Check if the string contains any HTML tags. * * EXAMPLE: UTF8::is_html('lall'); // true * * @param string $str

The input string.

* * @psalm-pure * * @return bool *

Whether or not $str contains html elements.

*/ public static function is_html(string $str): bool { if ($str === '') { return false; } // init $matches = []; $str = self::emoji_encode($str); // hack for emoji support :/ \preg_match("/<\\/?\\w+(?:(?:\\s+\\w+(?:\\s*=\\s*(?:\".*?\"|'.*?'|[^'\">\\s]+))?)*\\s*|\\s*)\\/?>/u", $str, $matches); return $matches !== []; } /** * Check if $url is an correct url. * * @param string $url * @param bool $disallow_localhost * * @psalm-pure * * @return bool */ public static function is_url(string $url, bool $disallow_localhost = false): bool { if ($url === '') { return false; } // WARNING: keep this as hack protection if (!self::str_istarts_with_any($url, ['http://', 'https://'])) { return false; } // e.g. -> the server itself connect to "https://foo.localhost/phpmyadmin/... if ($disallow_localhost) { if (self::str_istarts_with_any( $url, [ 'http://localhost', 'https://localhost', 'http://127.0.0.1', 'https://127.0.0.1', 'http://::1', 'https://::1', ] )) { return false; } $regex = '/^(?:http(?:s)?:\/\/).*?(?:\.localhost)/iu'; /** @noinspection BypassedUrlValidationInspection */ if (\preg_match($regex, $url)) { return false; } } // INFO: this is needed for e.g. "http://müller.de/" (internationalized domain names) and non ASCII-parameters /** @noinspection SuspiciousAssignmentsInspection - false-positive - https://github.com/kalessil/phpinspectionsea/issues/1500 */ $regex = '/^(?:http(?:s)?:\\/\\/)(?:[\p{L}0-9][\p{L}0-9_-]*(?:\\.[\p{L}0-9][\p{L}0-9_-]*))(?:\\d+)?(?:\\/\\.*)?/iu'; /** @noinspection BypassedUrlValidationInspection */ if (\preg_match($regex, $url)) { return true; } /** @noinspection BypassedUrlValidationInspection */ return \filter_var($url, \FILTER_VALIDATE_URL) !== false; } /** * Try to check if "$str" is a JSON-string. * * EXAMPLE: UTF8::is_json('{"array":[1,"¥","ä"]}'); // true * * @param string $str

The input string.

* @param bool $only_array_or_object_results_are_valid [optional]

Only array and objects are valid json * results.

* * @return bool *

Whether or not the $str is in JSON format.

*/ public static function is_json(string $str, bool $only_array_or_object_results_are_valid = true): bool { if ($str === '') { return false; } if (self::$SUPPORT['json'] === false) { throw new \RuntimeException('ext-json: is not installed'); } $jsonOrNull = self::json_decode($str); if ($jsonOrNull === null && \strtoupper($str) !== 'NULL') { return false; } if ( $only_array_or_object_results_are_valid && !\is_object($jsonOrNull) && !\is_array($jsonOrNull) ) { return false; } /** @noinspection PhpComposerExtensionStubsInspection */ return \json_last_error() === \JSON_ERROR_NONE; } /** * @param string $str

The input string.

* * @psalm-pure * * @return bool *

Whether or not $str contains only lowercase chars.

*/ public static function is_lowercase(string $str): bool { if (self::$SUPPORT['mbstring'] === true) { /** @noinspection PhpComposerExtensionStubsInspection */ return \mb_ereg_match('^[[:lower:]]*$', $str); } return self::str_matches_pattern($str, '^[[:lower:]]*$'); } /** * Returns true if the string is serialized, false otherwise. * * @param string $str

The input string.

* * @psalm-pure * * @return bool *

Whether or not $str is serialized.

*/ public static function is_serialized(string $str): bool { if ($str === '') { return false; } /** @noinspection PhpUsageOfSilenceOperatorInspection */ /** @noinspection UnserializeExploitsInspection */ return $str === 'b:0;' || @\unserialize($str) !== false; } /** * Returns true if the string contains only lower case chars, false * otherwise. * * @param string $str

The input string.

* * @psalm-pure * * @return bool *

Whether or not $str contains only lower case characters.

*/ public static function is_uppercase(string $str): bool { if (self::$SUPPORT['mbstring'] === true) { /** @noinspection PhpComposerExtensionStubsInspection */ return \mb_ereg_match('^[[:upper:]]*$', $str); } return self::str_matches_pattern($str, '^[[:upper:]]*$'); } /** * Check if the string is UTF-16. * * EXAMPLE:


     * UTF8::is_utf16(file_get_contents('utf-16-le.txt')); // 1
     * //
     * UTF8::is_utf16(file_get_contents('utf-16-be.txt')); // 2
     * //
     * UTF8::is_utf16(file_get_contents('utf-8.txt')); // false
     *

* * @param string $str

The input string.

* @param bool $check_if_string_is_binary * * @psalm-pure * * @return false|int * false if is't not UTF-16,
* 1 for UTF-16LE,
* 2 for UTF-16BE */ public static function is_utf16($str, bool $check_if_string_is_binary = true) { // init $str = (string) $str; $str_chars = []; if ( $check_if_string_is_binary && !self::is_binary($str, true) ) { return false; } if (self::$SUPPORT['mbstring'] === false) { /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::is_utf16() without mbstring may did not work correctly', \E_USER_WARNING); } $str = self::remove_bom($str); $maybe_utf16le = 0; $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE'); if ($test) { $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8'); $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE'); if ($test3 === $test) { /** * @psalm-suppress RedundantCondition */ if ($str_chars === []) { $str_chars = self::count_chars($str, true, false); } foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { if (\in_array($test3char, $str_chars, true)) { ++$maybe_utf16le; } } unset($test3charEmpty); } } $maybe_utf16be = 0; $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE'); if ($test) { $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8'); $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE'); if ($test3 === $test) { if ($str_chars === []) { $str_chars = self::count_chars($str, true, false); } foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { if (\in_array($test3char, $str_chars, true)) { ++$maybe_utf16be; } } unset($test3charEmpty); } } if ($maybe_utf16be !== $maybe_utf16le) { if ($maybe_utf16le > $maybe_utf16be) { return 1; } return 2; } return false; } /** * Check if the string is UTF-32. * * EXAMPLE:


     * UTF8::is_utf32(file_get_contents('utf-32-le.txt')); // 1
     * //
     * UTF8::is_utf32(file_get_contents('utf-32-be.txt')); // 2
     * //
     * UTF8::is_utf32(file_get_contents('utf-8.txt')); // false
     *

* * @param string $str

The input string.

* @param bool $check_if_string_is_binary * * @psalm-pure * * @return false|int * false if is't not UTF-32,
* 1 for UTF-32LE,
* 2 for UTF-32BE */ public static function is_utf32($str, bool $check_if_string_is_binary = true) { // init $str = (string) $str; $str_chars = []; if ( $check_if_string_is_binary && !self::is_binary($str, true) ) { return false; } if (self::$SUPPORT['mbstring'] === false) { /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::is_utf32() without mbstring may did not work correctly', \E_USER_WARNING); } $str = self::remove_bom($str); $maybe_utf32le = 0; $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE'); if ($test) { $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8'); $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE'); if ($test3 === $test) { /** * @psalm-suppress RedundantCondition */ if ($str_chars === []) { $str_chars = self::count_chars($str, true, false); } foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { if (\in_array($test3char, $str_chars, true)) { ++$maybe_utf32le; } } unset($test3charEmpty); } } $maybe_utf32be = 0; $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE'); if ($test) { $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8'); $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE'); if ($test3 === $test) { if ($str_chars === []) { $str_chars = self::count_chars($str, true, false); } foreach (self::count_chars($test3) as $test3char => &$test3charEmpty) { if (\in_array($test3char, $str_chars, true)) { ++$maybe_utf32be; } } unset($test3charEmpty); } } if ($maybe_utf32be !== $maybe_utf32le) { if ($maybe_utf32le > $maybe_utf32be) { return 1; } return 2; } return false; } /** * Checks whether the passed input contains only byte sequences that appear valid UTF-8. * * EXAMPLE:


     * UTF8::is_utf8(['Iñtërnâtiônàlizætiøn', 'foo']); // true
     * //
     * UTF8::is_utf8(["Iñtërnâtiônàlizætiøn\xA0\xA1", 'bar']); // false
     *

* * @param int|string|string[]|null $str

The input to be checked.

* @param bool $strict

Check also if the string is not UTF-16 or UTF-32.

* * @psalm-pure * * @return bool */ public static function is_utf8($str, bool $strict = false): bool { if (\is_array($str)) { foreach ($str as &$v) { if (!self::is_utf8($v, $strict)) { return false; } } return true; } return self::is_utf8_string((string) $str, $strict); } /** * (PHP 5 >= 5.2.0, PECL json >= 1.2.0)
* Decodes a JSON string * * EXAMPLE: UTF8::json_decode('[1,"\u00a5","\u00e4"]'); // array(1, '¥', 'ä') * * @see http://php.net/manual/en/function.json-decode.php * * @param string $json

* The json string being decoded. *

* This function only works with UTF-8 encoded strings. *

PHP implements a superset of * JSON - it will also encode and decode scalar types and NULL. The JSON standard * only supports these values when they are nested inside an array or an object. *

* @param bool $assoc [optional]

* When TRUE, returned objects will be converted into * associative arrays. *

* @param int $depth [optional]

* User specified recursion depth. *

* @param int $options [optional]

* Bitmask of JSON decode options. Currently only * JSON_BIGINT_AS_STRING * is supported (default is to cast large integers as floats) *

* * @psalm-pure * * @return mixed *

The value encoded in json in appropriate PHP type. Values true, false and * null (case-insensitive) are returned as TRUE, FALSE and NULL respectively. * NULL is returned if the json cannot be decoded or if the encoded data * is deeper than the recursion limit.

*/ public static function json_decode( string $json, bool $assoc = false, int $depth = 512, int $options = 0 ) { $json = self::filter($json); if (self::$SUPPORT['json'] === false) { throw new \RuntimeException('ext-json: is not installed'); } /** @noinspection PhpComposerExtensionStubsInspection */ return \json_decode($json, $assoc, $depth, $options); } /** * (PHP 5 >= 5.2.0, PECL json >= 1.2.0)
* Returns the JSON representation of a value. * * EXAMPLE: UTF8::json_enocde(array(1, '¥', 'ä')); // '[1,"\u00a5","\u00e4"]' * * @see http://php.net/manual/en/function.json-encode.php * * @param mixed $value

* The value being encoded. Can be any type except * a resource. *

* All string data must be UTF-8 encoded. *

PHP implements a superset of * JSON - it will also encode and decode scalar types and NULL. The JSON standard * only supports these values when they are nested inside an array or an object. *

* @param int $options [optional]

* Bitmask consisting of JSON_HEX_QUOT, * JSON_HEX_TAG, * JSON_HEX_AMP, * JSON_HEX_APOS, * JSON_NUMERIC_CHECK, * JSON_PRETTY_PRINT, * JSON_UNESCAPED_SLASHES, * JSON_FORCE_OBJECT, * JSON_UNESCAPED_UNICODE. The behaviour of these * constants is described on * the JSON constants page. *

* @param int $depth [optional]

* Set the maximum depth. Must be greater than zero. *

* * @psalm-pure * * @return false|string * A JSON encoded string on success or
* FALSE on failure */ public static function json_encode($value, int $options = 0, int $depth = 512) { $value = self::filter($value); if (self::$SUPPORT['json'] === false) { throw new \RuntimeException('ext-json: is not installed'); } /** @noinspection PhpComposerExtensionStubsInspection */ return \json_encode($value, $options, $depth); } /** * Checks whether JSON is available on the server. * * @psalm-pure * * @return bool *

true if available, false otherwise

* * @internal

Please do not use it anymore, we will make is private in next major version.

*/ public static function json_loaded(): bool { return \function_exists('json_decode'); } /** * Makes string's first char lowercase. * * EXAMPLE: UTF8::lcfirst('ÑTËRNÂTIÔNÀLIZÆTIØN'); // ñTËRNÂTIÔNÀLIZÆTIØN * * @param string $str

The input string

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* @param string|null $lang [optional]

Set the language for special cases: az, el, lt, * tr

* @param bool $try_to_keep_the_string_length [optional]

true === try to keep the string length: e.g. ẞ * -> ß

* * @psalm-pure * * @return string the resulting string */ public static function lcfirst( string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false, string $lang = null, bool $try_to_keep_the_string_length = false ): string { if ($clean_utf8) { $str = self::clean($str); } $use_mb_functions = ($lang === null && !$try_to_keep_the_string_length); if ($encoding === 'UTF-8') { $str_part_two = (string) \mb_substr($str, 1); if ($use_mb_functions) { $str_part_one = \mb_strtolower( (string) \mb_substr($str, 0, 1) ); } else { $str_part_one = self::strtolower( (string) \mb_substr($str, 0, 1), $encoding, false, $lang, $try_to_keep_the_string_length ); } } else { $encoding = self::normalize_encoding($encoding, 'UTF-8'); $str_part_two = (string) self::substr($str, 1, null, $encoding); $str_part_one = self::strtolower( (string) self::substr($str, 0, 1, $encoding), $encoding, false, $lang, $try_to_keep_the_string_length ); } return $str_part_one . $str_part_two; } /** * alias for "UTF8::lcfirst()" * * @param string $str * @param string $encoding * @param bool $clean_utf8 * @param string|null $lang * @param bool $try_to_keep_the_string_length * * @psalm-pure * * @return string * * @see UTF8::lcfirst() * @deprecated

please use "UTF8::lcfirst()"

*/ public static function lcword( string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false, string $lang = null, bool $try_to_keep_the_string_length = false ): string { return self::lcfirst( $str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length ); } /** * Lowercase for all words in the string. * * @param string $str

The input string.

* @param string[] $exceptions [optional]

Exclusion for some words.

* @param string $char_list [optional]

Additional chars that contains to words and do * not start a new word.

* @param string $encoding [optional]

Set the charset.

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* @param string|null $lang [optional]

Set the language for special cases: az, el, lt, * tr

* @param bool $try_to_keep_the_string_length [optional]

true === try to keep the string length: e.g. ẞ * -> ß

* * @psalm-pure * * @return string */ public static function lcwords( string $str, array $exceptions = [], string $char_list = '', string $encoding = 'UTF-8', bool $clean_utf8 = false, string $lang = null, bool $try_to_keep_the_string_length = false ): string { if (!$str) { return ''; } $words = self::str_to_words($str, $char_list); $use_exceptions = $exceptions !== []; $words_str = ''; foreach ($words as &$word) { if (!$word) { continue; } if ( !$use_exceptions || !\in_array($word, $exceptions, true) ) { $words_str .= self::lcfirst($word, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); } else { $words_str .= $word; } } return $words_str; } /** * alias for "UTF8::lcfirst()" * * @param string $str * @param string $encoding * @param bool $clean_utf8 * @param string|null $lang * @param bool $try_to_keep_the_string_length * * @psalm-pure * * @return string * * @see UTF8::lcfirst() * @deprecated

please use "UTF8::lcfirst()"

*/ public static function lowerCaseFirst( string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false, string $lang = null, bool $try_to_keep_the_string_length = false ): string { return self::lcfirst( $str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length ); } /** * Strip whitespace or other characters from the beginning of a UTF-8 string. * * EXAMPLE: UTF8::ltrim('　中文空白　 '); // '中文空白　 ' * * @param string $str

The string to be trimmed

* @param string|null $chars

Optional characters to be stripped

* * @psalm-pure * * @return string the string with unwanted characters stripped from the left */ public static function ltrim(string $str = '', string $chars = null): string { if ($str === '') { return ''; } if (self::$SUPPORT['mbstring'] === true) { if ($chars !== null) { /** @noinspection PregQuoteUsageInspection */ $chars = \preg_quote($chars); $pattern = "^[${chars}]+"; } else { $pattern = '^[\\s]+'; } /** @noinspection PhpComposerExtensionStubsInspection */ return (string) \mb_ereg_replace($pattern, '', $str); } if ($chars !== null) { $chars = \preg_quote($chars, '/'); $pattern = "^[${chars}]+"; } else { $pattern = '^[\\s]+'; } return self::regex_replace($str, $pattern, ''); } /** * Returns the UTF-8 character with the maximum code point in the given data. * * EXAMPLE: UTF8::max('abc-äöü-中文空白'); // 'ø' * * @param array|string $arg

A UTF-8 encoded string or an array of such strings.

* * @psalm-pure * * @return string|null the character with the highest code point than others, returns null on failure or empty input */ public static function max($arg) { if (\is_array($arg)) { $arg = \implode('', $arg); } $codepoints = self::codepoints($arg); if ($codepoints === []) { return null; } $codepoint_max = \max($codepoints); return self::chr((int) $codepoint_max); } /** * Calculates and returns the maximum number of bytes taken by any * UTF-8 encoded character in the given string. * * EXAMPLE: UTF8::max_chr_width('Intërnâtiônàlizætiøn'); // 2 * * @param string $str

The original Unicode string.

* * @psalm-pure * * @return int *

Max byte lengths of the given chars.

*/ public static function max_chr_width(string $str): int { $bytes = self::chr_size_list($str); if ($bytes !== []) { return (int) \max($bytes); } return 0; } /** * Checks whether mbstring is available on the server. * * @psalm-pure * * @return bool *

true if available, false otherwise

* * @internal

Please do not use it anymore, we will make is private in next major version.

*/ public static function mbstring_loaded(): bool { return \extension_loaded('mbstring'); } /** * Returns the UTF-8 character with the minimum code point in the given data. * * EXAMPLE: UTF8::min('abc-äöü-中文空白'); // '-' * * @param string|string[] $arg A UTF-8 encoded string or an array of such strings. * * @psalm-pure * * @return string|null *

The character with the lowest code point than others, returns null on failure or empty input.

*/ public static function min($arg) { if (\is_array($arg)) { $arg = \implode('', $arg); } $codepoints = self::codepoints($arg); if ($codepoints === []) { return null; } $codepoint_min = \min($codepoints); return self::chr((int) $codepoint_min); } /** * alias for "UTF8::normalize_encoding()" * * @param mixed $encoding * @param mixed $fallback * * @psalm-pure * * @return mixed * * @see UTF8::normalize_encoding() * @deprecated

please use "UTF8::normalize_encoding()"

*/ public static function normalizeEncoding($encoding, $fallback = '') { return self::normalize_encoding($encoding, $fallback); } /** * Normalize the encoding-"name" input. * * EXAMPLE: UTF8::normalize_encoding('UTF8'); // 'UTF-8' * * @param mixed $encoding

e.g.: ISO, UTF8, WINDOWS-1251 etc.

* @param mixed $fallback

e.g.: UTF-8

* * @psalm-pure * * @return mixed|string *

e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.
Will return a empty string as fallback (by default)

* * @template TNormalizeEncodingFallback * @phpstan-param string|TNormalizeEncodingFallback $fallback * @phpstan-return string|TNormalizeEncodingFallback */ public static function normalize_encoding($encoding, $fallback = '') { /** * @psalm-suppress ImpureStaticVariable * * @var array */ static $STATIC_NORMALIZE_ENCODING_CACHE = []; // init $encoding = (string) $encoding; if (!$encoding) { return $fallback; } if ( $encoding === 'UTF-8' || $encoding === 'UTF8' ) { return 'UTF-8'; } if ( $encoding === '8BIT' || $encoding === 'BINARY' ) { return 'CP850'; } if ( $encoding === 'HTML' || $encoding === 'HTML-ENTITIES' ) { return 'HTML-ENTITIES'; } if ( $encoding === 'ISO' || $encoding === 'ISO-8859-1' ) { return 'ISO-8859-1'; } if ( $encoding === '1' // only a fallback, for non "strict_types" usage ... || $encoding === '0' // only a fallback, for non "strict_types" usage ... ) { return $fallback; } if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) { return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding]; } if (self::$ENCODINGS === null) { self::$ENCODINGS = self::getData('encodings'); } if (\in_array($encoding, self::$ENCODINGS, true)) { $STATIC_NORMALIZE_ENCODING_CACHE[$encoding] = $encoding; return $encoding; } $encoding_original = $encoding; $encoding = \strtoupper($encoding); $encoding_upper_helper = (string) \preg_replace('/[^a-zA-Z0-9]/u', '', $encoding); $equivalences = [ 'ISO8859' => 'ISO-8859-1', 'ISO88591' => 'ISO-8859-1', 'ISO' => 'ISO-8859-1', 'LATIN' => 'ISO-8859-1', 'LATIN1' => 'ISO-8859-1', // Western European 'ISO88592' => 'ISO-8859-2', 'LATIN2' => 'ISO-8859-2', // Central European 'ISO88593' => 'ISO-8859-3', 'LATIN3' => 'ISO-8859-3', // Southern European 'ISO88594' => 'ISO-8859-4', 'LATIN4' => 'ISO-8859-4', // Northern European 'ISO88595' => 'ISO-8859-5', 'ISO88596' => 'ISO-8859-6', // Greek 'ISO88597' => 'ISO-8859-7', 'ISO88598' => 'ISO-8859-8', // Hebrew 'ISO88599' => 'ISO-8859-9', 'LATIN5' => 'ISO-8859-9', // Turkish 'ISO885911' => 'ISO-8859-11', 'TIS620' => 'ISO-8859-11', // Thai 'ISO885910' => 'ISO-8859-10', 'LATIN6' => 'ISO-8859-10', // Nordic 'ISO885913' => 'ISO-8859-13', 'LATIN7' => 'ISO-8859-13', // Baltic 'ISO885914' => 'ISO-8859-14', 'LATIN8' => 'ISO-8859-14', // Celtic 'ISO885915' => 'ISO-8859-15', 'LATIN9' => 'ISO-8859-15', // Western European (with some extra chars e.g. €) 'ISO885916' => 'ISO-8859-16', 'LATIN10' => 'ISO-8859-16', // Southeast European 'CP1250' => 'WINDOWS-1250', 'WIN1250' => 'WINDOWS-1250', 'WINDOWS1250' => 'WINDOWS-1250', 'CP1251' => 'WINDOWS-1251', 'WIN1251' => 'WINDOWS-1251', 'WINDOWS1251' => 'WINDOWS-1251', 'CP1252' => 'WINDOWS-1252', 'WIN1252' => 'WINDOWS-1252', 'WINDOWS1252' => 'WINDOWS-1252', 'CP1253' => 'WINDOWS-1253', 'WIN1253' => 'WINDOWS-1253', 'WINDOWS1253' => 'WINDOWS-1253', 'CP1254' => 'WINDOWS-1254', 'WIN1254' => 'WINDOWS-1254', 'WINDOWS1254' => 'WINDOWS-1254', 'CP1255' => 'WINDOWS-1255', 'WIN1255' => 'WINDOWS-1255', 'WINDOWS1255' => 'WINDOWS-1255', 'CP1256' => 'WINDOWS-1256', 'WIN1256' => 'WINDOWS-1256', 'WINDOWS1256' => 'WINDOWS-1256', 'CP1257' => 'WINDOWS-1257', 'WIN1257' => 'WINDOWS-1257', 'WINDOWS1257' => 'WINDOWS-1257', 'CP1258' => 'WINDOWS-1258', 'WIN1258' => 'WINDOWS-1258', 'WINDOWS1258' => 'WINDOWS-1258', 'UTF16' => 'UTF-16', 'UTF32' => 'UTF-32', 'UTF8' => 'UTF-8', 'UTF' => 'UTF-8', 'UTF7' => 'UTF-7', '8BIT' => 'CP850', 'BINARY' => 'CP850', ]; if (!empty($equivalences[$encoding_upper_helper])) { $encoding = $equivalences[$encoding_upper_helper]; } $STATIC_NORMALIZE_ENCODING_CACHE[$encoding_original] = $encoding; return $encoding; } /** * Standardize line ending to unix-like. * * @param string $str

The input string.

* @param string|string[] $replacer

The replacer char e.g. "\n" (Linux) or "\r\n" (Windows). You can also use \PHP_EOL * here.

* * @psalm-pure * * @return string *

A string with normalized line ending.

*/ public static function normalize_line_ending(string $str, $replacer = "\n"): string { return \str_replace(["\r\n", "\r", "\n"], $replacer, $str); } /** * Normalize some MS Word special characters. * * EXAMPLE: UTF8::normalize_msword('„Abcdef…”'); // '"Abcdef..."' * * @param string $str

The string to be normalized.

* * @psalm-pure * * @return string *

A string with normalized characters for commonly used chars in Word documents.

*/ public static function normalize_msword(string $str): string { return ASCII::normalize_msword($str); } /** * Normalize the whitespace. * * EXAMPLE: UTF8::normalize_whitespace("abc-\xc2\xa0-öäü-\xe2\x80\xaf-\xE2\x80\xAC", true); // "abc-\xc2\xa0-öäü- -" * * @param string $str

The string to be normalized.

* @param bool $keep_non_breaking_space [optional]

Set to true, to keep non-breaking-spaces.

* @param bool $keep_bidi_unicode_controls [optional]

Set to true, to keep non-printable (for the web) * bidirectional text chars.

* @param bool $normalize_control_characters [optional]

Set to true, to convert e.g. LINE-, PARAGRAPH-SEPARATOR with "\n" and LINE TABULATION with "\t".

* * @psalm-pure * * @return string *

A string with normalized whitespace.

*/ public static function normalize_whitespace( string $str, bool $keep_non_breaking_space = false, bool $keep_bidi_unicode_controls = false, bool $normalize_control_characters = false ): string { return ASCII::normalize_whitespace( $str, $keep_non_breaking_space, $keep_bidi_unicode_controls, $normalize_control_characters ); } /** * Calculates Unicode code point of the given UTF-8 encoded character. * * INFO: opposite to UTF8::chr() * * EXAMPLE: UTF8::ord('☃'); // 0x2603 * * @param string $chr

The character of which to calculate code point.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return int *

Unicode code point of the given character,
* 0 on invalid UTF-8 byte sequence

*/ public static function ord($chr, string $encoding = 'UTF-8'): int { /** * @psalm-suppress ImpureStaticVariable * * @var array */ static $CHAR_CACHE = []; // init $chr = (string) $chr; if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } $cache_key = $chr . '_' . $encoding; if (isset($CHAR_CACHE[$cache_key])) { return $CHAR_CACHE[$cache_key]; } // check again, if it's still not UTF-8 if ($encoding !== 'UTF-8') { $chr = self::encode($encoding, $chr); } if (self::$ORD === null) { self::$ORD = self::getData('ord'); } if (isset(self::$ORD[$chr])) { return $CHAR_CACHE[$cache_key] = self::$ORD[$chr]; } // // fallback via "IntlChar" // if (self::$SUPPORT['intlChar'] === true) { /** @noinspection PhpComposerExtensionStubsInspection */ $code = \IntlChar::ord($chr); if ($code) { return $CHAR_CACHE[$cache_key] = $code; } } // // fallback via vanilla php // /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */ $chr = \unpack('C*', (string) \substr($chr, 0, 4)); /** @noinspection OffsetOperationsInspection */ $code = $chr ? $chr[1] : 0; /** @noinspection OffsetOperationsInspection */ if ($code >= 0xF0 && isset($chr[4])) { /** @noinspection UnnecessaryCastingInspection */ /** @noinspection OffsetOperationsInspection */ return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80); } /** @noinspection OffsetOperationsInspection */ if ($code >= 0xE0 && isset($chr[3])) { /** @noinspection UnnecessaryCastingInspection */ /** @noinspection OffsetOperationsInspection */ return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80); } /** @noinspection OffsetOperationsInspection */ if ($code >= 0xC0 && isset($chr[2])) { /** @noinspection UnnecessaryCastingInspection */ /** @noinspection OffsetOperationsInspection */ return $CHAR_CACHE[$cache_key] = (int) ((($code - 0xC0) << 6) + $chr[2] - 0x80); } return $CHAR_CACHE[$cache_key] = $code; } /** * Parses the string into an array (into the the second parameter). * * WARNING: Unlike "parse_str()", this method does not (re-)place variables in the current scope, * if the second parameter is not set! * * EXAMPLE:


     * UTF8::parse_str('Iñtërnâtiônéàlizætiøn=測試&arr[]=foo+測試&arr[]=ການທົດສອບ', $array);
     * echo $array['Iñtërnâtiônéàlizætiøn']; // '測試'
     *

* * @see http://php.net/manual/en/function.parse-str.php * * @param string $str

The input string.

* @param array $result

The result will be returned into this reference parameter.

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* * @psalm-pure * * @return bool *

Will return false if php can't parse the string and we haven't any $result.

*/ public static function parse_str(string $str, &$result, bool $clean_utf8 = false): bool { if ($clean_utf8) { $str = self::clean($str); } if (self::$SUPPORT['mbstring'] === true) { $return = \mb_parse_str($str, $result); return $return !== false && $result !== []; } /** * @psalm-suppress ImpureFunctionCall - we use the second parameter, so we don't change variables by magic */ \parse_str($str, $result); return $result !== []; } /** * Checks if \u modifier is available that enables Unicode support in PCRE. * * @psalm-pure * * @return bool *

* true if support is available,
* false otherwise *

*/ public static function pcre_utf8_support(): bool { /** @noinspection PhpUsageOfSilenceOperatorInspection */ return (bool) @\preg_match('//u', ''); } /** * Create an array containing a range of UTF-8 characters. * * EXAMPLE: UTF8::range('κ', 'ζ'); // array('κ', 'ι', 'θ', 'η', 'ζ',) * * @param int|string $var1

Numeric or hexadecimal code points, or a UTF-8 character to start from.

* @param int|string $var2

Numeric or hexadecimal code points, or a UTF-8 character to end at.

* @param bool $use_ctype

use ctype to detect numeric and hexadecimal, otherwise we will use a simple * "is_numeric"

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param float|int $step [optional]

* If a step value is given, it will be used as the * increment between elements in the sequence. step * should be given as a positive number. If not specified, * step will default to 1. *

* * @psalm-pure * * @return string[] */ public static function range( $var1, $var2, bool $use_ctype = true, string $encoding = 'UTF-8', $step = 1 ): array { if (!$var1 || !$var2) { return []; } if ($step !== 1) { /** * @psalm-suppress RedundantConditionGivenDocblockType * @psalm-suppress DocblockTypeContradiction */ if (!\is_numeric($step)) { throw new \InvalidArgumentException('$step need to be a number, type given: ' . \gettype($step)); } /** * @psalm-suppress RedundantConditionGivenDocblockType - false-positive from psalm? */ if ($step <= 0) { throw new \InvalidArgumentException('$step need to be a positive number, given: ' . $step); } } if ($use_ctype && self::$SUPPORT['ctype'] === false) { throw new \RuntimeException('ext-ctype: is not installed'); } $is_digit = false; $is_xdigit = false; /** @noinspection PhpComposerExtensionStubsInspection */ if ($use_ctype && \ctype_digit((string) $var1) && \ctype_digit((string) $var2)) { $is_digit = true; $start = (int) $var1; } /** @noinspection PhpComposerExtensionStubsInspection */ elseif ($use_ctype && \ctype_xdigit($var1) && \ctype_xdigit($var2)) { $is_xdigit = true; $start = (int) self::hex_to_int((string) $var1); } elseif (!$use_ctype && \is_numeric($var1)) { $start = (int) $var1; } else { $start = self::ord((string) $var1); } if (!$start) { return []; } if ($is_digit) { $end = (int) $var2; } elseif ($is_xdigit) { $end = (int) self::hex_to_int((string) $var2); } elseif (!$use_ctype && \is_numeric($var2)) { $end = (int) $var2; } else { $end = self::ord((string) $var2); } if (!$end) { return []; } $array = []; foreach (\range($start, $end, $step) as $i) { $array[] = (string) self::chr((int) $i, $encoding); } return $array; } /** * Multi decode HTML entity + fix urlencoded-win1252-chars. * * EXAMPLE: UTF8::rawurldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest+test' * * e.g: * 'test+test' => 'test+test' * 'Düsseldorf' => 'Düsseldorf' * 'D%FCsseldorf' => 'Düsseldorf' * 'Düsseldorf' => 'Düsseldorf' * 'D%26%23xFC%3Bsseldorf' => 'Düsseldorf' * 'DÃ¼sseldorf' => 'Düsseldorf' * 'D%C3%BCsseldorf' => 'Düsseldorf' * 'D%C3%83%C2%BCsseldorf' => 'Düsseldorf' * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf' * * @param string $str

The input string.

* @param bool $multi_decode

Decode as often as possible.

* * @psalm-pure * * @return string *

The decoded URL, as a string.

*/ public static function rawurldecode(string $str, bool $multi_decode = true): string { if ($str === '') { return ''; } $str = self::urldecode_unicode_helper($str); if ($multi_decode) { do { $str_compare = $str; /** * @psalm-suppress PossiblyInvalidArgument */ $str = \rawurldecode( self::html_entity_decode( self::to_utf8($str), \ENT_QUOTES | \ENT_HTML5 ) ); } while ($str_compare !== $str); } else { /** * @psalm-suppress PossiblyInvalidArgument */ $str = \rawurldecode( self::html_entity_decode( self::to_utf8($str), \ENT_QUOTES | \ENT_HTML5 ) ); } return self::fix_simple_utf8($str); } /** * Replaces all occurrences of $pattern in $str by $replacement. * * @param string $str

The input string.

* @param string $pattern

The regular expression pattern.

* @param string $replacement

The string to replace with.

* @param string $options [optional]

Matching conditions to be used.

* @param string $delimiter [optional]

Delimiter the the regex. Default: '/'

* * @psalm-pure * * @return string */ public static function regex_replace( string $str, string $pattern, string $replacement, string $options = '', string $delimiter = '/' ): string { if ($options === 'msr') { $options = 'ms'; } // fallback if (!$delimiter) { $delimiter = '/'; } return (string) \preg_replace( $delimiter . $pattern . $delimiter . 'u' . $options, $replacement, $str ); } /** * alias for "UTF8::remove_bom()" * * @param string $str * * @psalm-pure * * @return string * * @see UTF8::remove_bom() * @deprecated

please use "UTF8::remove_bom()"

*/ public static function removeBOM(string $str): string { return self::remove_bom($str); } /** * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings. * * EXAMPLE: UTF8::remove_bom("\xEF\xBB\xBFΜπορώ να"); // 'Μπορώ να' * * @param string $str

The input string.

* * @psalm-pure * * @return string *

A string without UTF-BOM.

*/ public static function remove_bom(string $str): string { if ($str === '') { return ''; } $str_length = \strlen($str); foreach (self::$BOM as $bom_string => $bom_byte_length) { if (\strncmp($str, $bom_string, $bom_byte_length) === 0) { /** @var false|string $str_tmp - needed for PhpStan (stubs error) */ $str_tmp = \substr($str, $bom_byte_length, $str_length); if ($str_tmp === false) { return ''; } $str_length -= (int) $bom_byte_length; $str = (string) $str_tmp; } } return $str; } /** * Removes duplicate occurrences of a string in another string. * * EXAMPLE: UTF8::remove_duplicates('öäü-κόσμεκόσμε-äöü', 'κόσμε'); // 'öäü-κόσμε-äöü' * * @param string $str

The base string.

* @param string|string[] $what

String to search for in the base string.

* * @psalm-pure * * @return string *

A string with removed duplicates.

*/ public static function remove_duplicates(string $str, $what = ' '): string { if (\is_string($what)) { $what = [$what]; } /** * @psalm-suppress RedundantConditionGivenDocblockType */ if (\is_array($what)) { foreach ($what as $item) { $str = (string) \preg_replace('/(' . \preg_quote($item, '/') . ')+/u', $item, $str); } } return $str; } /** * Remove html via "strip_tags()" from the string. * * @param string $str

The input string.

* @param string $allowable_tags [optional]

You can use the optional second parameter to specify tags which * should not be stripped. Default: null *

* * @psalm-pure * * @return string *

A string with without html tags.

*/ public static function remove_html(string $str, string $allowable_tags = ''): string { return \strip_tags($str, $allowable_tags); } /** * Remove all breaks [
| \r\n | \r | \n | ...] from the string. * * @param string $str

The input string.

* @param string $replacement [optional]

Default is a empty string.

* * @psalm-pure * * @return string *

A string without breaks.

*/ public static function remove_html_breaks(string $str, string $replacement = ''): string { return (string) \preg_replace("#/\r\n|\r|\n|#isU", $replacement, $str); } /** * Remove invisible characters from a string. * * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script. * * EXAMPLE: UTF8::remove_invisible_characters("κόσ\0με"); // 'κόσμε' * * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php * * @param string $str

The input string.

* @param bool $url_encoded [optional]

* Try to remove url encoded control character. * WARNING: maybe contains false-positives e.g. aa%0Baa -> aaaa. *
* Default: false *

* @param string $replacement [optional]

The replacement character.

* @param bool $keep_basic_control_characters [optional]

Keep control characters like [LRM] or [LSEP].

* * @psalm-pure * * @return string *

A string without invisible chars.

*/ public static function remove_invisible_characters( string $str, bool $url_encoded = false, string $replacement = '', bool $keep_basic_control_characters = true ): string { return ASCII::remove_invisible_characters( $str, $url_encoded, $replacement, $keep_basic_control_characters ); } /** * Returns a new string with the prefix $substring removed, if present. * * @param string $str

The input string.

* @param string $substring

The prefix to remove.

* @param string $encoding [optional]

Default: 'UTF-8'

* * @psalm-pure * * @return string *

A string without the prefix $substring.

*/ public static function remove_left( string $str, string $substring, string $encoding = 'UTF-8' ): string { if ( $substring && \strpos($str, $substring) === 0 ) { if ($encoding === 'UTF-8') { return (string) \mb_substr( $str, (int) \mb_strlen($substring) ); } $encoding = self::normalize_encoding($encoding, 'UTF-8'); return (string) self::substr( $str, (int) self::strlen($substring, $encoding), null, $encoding ); } return $str; } /** * Returns a new string with the suffix $substring removed, if present. * * @param string $str * @param string $substring

The suffix to remove.

* @param string $encoding [optional]

Default: 'UTF-8'

* * @psalm-pure * * @return string *

A string having a $str without the suffix $substring.

*/ public static function remove_right( string $str, string $substring, string $encoding = 'UTF-8' ): string { if ($substring && \substr($str, -\strlen($substring)) === $substring) { if ($encoding === 'UTF-8') { return (string) \mb_substr( $str, 0, (int) \mb_strlen($str) - (int) \mb_strlen($substring) ); } $encoding = self::normalize_encoding($encoding, 'UTF-8'); return (string) self::substr( $str, 0, (int) self::strlen($str, $encoding) - (int) self::strlen($substring, $encoding), $encoding ); } return $str; } /** * Replaces all occurrences of $search in $str by $replacement. * * @param string $str

The input string.

* @param string $search

The needle to search for.

* @param string $replacement

The string to replace with.

* @param bool $case_sensitive [optional]

Whether or not to enforce case-sensitivity. Default: true

* * @psalm-pure * * @return string *

A string with replaced parts.

*/ public static function replace( string $str, string $search, string $replacement, bool $case_sensitive = true ): string { if ($case_sensitive) { return \str_replace($search, $replacement, $str); } return self::str_ireplace($search, $replacement, $str); } /** * Replaces all occurrences of $search in $str by $replacement. * * @param string $str

The input string.

* @param array $search

The elements to search for.

* @param array|string $replacement

The string to replace with.

* @param bool $case_sensitive [optional]

Whether or not to enforce case-sensitivity. Default: true

* * @psalm-pure * * @return string *

A string with replaced parts.

*/ public static function replace_all( string $str, array $search, $replacement, bool $case_sensitive = true ): string { if ($case_sensitive) { return \str_replace($search, $replacement, $str); } return self::str_ireplace($search, $replacement, $str); } /** * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement. * * EXAMPLE: UTF8::replace_diamond_question_mark('中文空白�', ''); // '中文空白' * * @param string $str

The input string

* @param string $replacement_char

The replacement character.

* @param bool $process_invalid_utf8_chars

Convert invalid UTF-8 chars

* * @psalm-pure * * @return string *

A string without diamond question marks (�).

*/ public static function replace_diamond_question_mark( string $str, string $replacement_char = '', bool $process_invalid_utf8_chars = true ): string { if ($str === '') { return ''; } if ($process_invalid_utf8_chars) { if ($replacement_char === '') { $replacement_char_helper = 'none'; } else { $replacement_char_helper = \ord($replacement_char); } if (self::$SUPPORT['mbstring'] === false) { // if there is no native support for "mbstring", // then we need to clean the string before ... $str = self::clean($str); } /** * @psalm-suppress ImpureFunctionCall - we will reset the value in the next step */ $save = \mb_substitute_character(); /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore "Unknown character" warnings, it's working anyway */ @\mb_substitute_character($replacement_char_helper); // the polyfill maybe return false, so cast to string $str = (string) \mb_convert_encoding($str, 'UTF-8', 'UTF-8'); \mb_substitute_character($save); } return \str_replace( [ "\xEF\xBF\xBD", '�', ], [ $replacement_char, $replacement_char, ], $str ); } /** * Strip whitespace or other characters from the end of a UTF-8 string. * * EXAMPLE: UTF8::rtrim('-ABC-中文空白- '); // '-ABC-中文空白-' * * @param string $str

The string to be trimmed.

* @param string|null $chars

Optional characters to be stripped.

* * @psalm-pure * * @return string *

A string with unwanted characters stripped from the right.

*/ public static function rtrim(string $str = '', string $chars = null): string { if ($str === '') { return ''; } if (self::$SUPPORT['mbstring'] === true) { if ($chars !== null) { /** @noinspection PregQuoteUsageInspection */ $chars = \preg_quote($chars); $pattern = "[${chars}]+$"; } else { $pattern = '[\\s]+$'; } /** @noinspection PhpComposerExtensionStubsInspection */ return (string) \mb_ereg_replace($pattern, '', $str); } if ($chars !== null) { $chars = \preg_quote($chars, '/'); $pattern = "[${chars}]+$"; } else { $pattern = '[\\s]+$'; } return self::regex_replace($str, $pattern, ''); } /** * WARNING: Print native UTF-8 support (libs) by default, e.g. for debugging. * * @param bool $useEcho * * @psalm-pure * * @return string|void */ public static function showSupport(bool $useEcho = true) { // init $html = ''; $html .= '

';
        /** @noinspection AlterInForeachInspection */
        foreach (self::$SUPPORT as $key => &$value) {
            $html .= $key . ' - ' . \print_r($value, true) . "\n
";
        }
        $html .= '

'; if ($useEcho) { echo $html; } return $html; } /** * Converts a UTF-8 character to HTML Numbered Entity like "{". * * EXAMPLE: UTF8::single_chr_html_encode('κ'); // 'κ' * * @param string $char

The Unicode character to be encoded as numbered entity.

* @param bool $keep_ascii_chars

Set to true to keep ASCII chars. * @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string *

The HTML numbered entity for the given character.

*/ public static function single_chr_html_encode( string $char, bool $keep_ascii_chars = false, string $encoding = 'UTF-8' ): string { if ($char === '') { return ''; } if ( $keep_ascii_chars && ASCII::is_ascii($char) ) { return $char; } return '&#' . self::ord($char, $encoding) . ';'; } /** * @param string $str * @param int $tab_length * * @psalm-pure * * @return string */ public static function spaces_to_tabs(string $str, int $tab_length = 4): string { if ($tab_length === 4) { $tab = ' '; } elseif ($tab_length === 2) { $tab = ' '; } else { $tab = \str_repeat(' ', $tab_length); } return \str_replace($tab, "\t", $str); } /** * alias for "UTF8::str_split()" * * @param int|string $str * @param int $length * @param bool $clean_utf8 * * @psalm-pure * * @return string[] * * @see UTF8::str_split() * @deprecated

please use "UTF8::str_split()"

*/ public static function split( $str, int $length = 1, bool $clean_utf8 = false ): array { /** @var string[] */ return self::str_split($str, $length, $clean_utf8); } /** * alias for "UTF8::str_starts_with()" * * @param string $haystack * @param string $needle * * @psalm-pure * * @return bool * * @see UTF8::str_starts_with() * @deprecated

please use "UTF8::str_starts_with()"

*/ public static function str_begins(string $haystack, string $needle): bool { return self::str_starts_with($haystack, $needle); } /** * Returns a camelCase version of the string. Trims surrounding spaces, * capitalizes letters following digits, spaces, dashes and underscores, * and removes spaces, dashes, as well as underscores. * * @param string $str

The input string.

* @param string $encoding [optional]

Default: 'UTF-8'

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* @param string|null $lang [optional]

Set the language for special cases: az, el, lt, * tr

* @param bool $try_to_keep_the_string_length [optional]

true === try to keep the string length: e.g. ẞ * -> ß

* * @psalm-pure * * @return string */ public static function str_camelize( string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false, string $lang = null, bool $try_to_keep_the_string_length = false ): string { if ($clean_utf8) { $str = self::clean($str); } if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } $str = self::lcfirst( \trim($str), $encoding, false, $lang, $try_to_keep_the_string_length ); $str = (string) \preg_replace('/^[-_]+/', '', $str); $use_mb_functions = $lang === null && !$try_to_keep_the_string_length; $str = (string) \preg_replace_callback( '/[-_\\s]+(.)?/u', /** * @param array $match * * @psalm-pure * * @return string */ static function (array $match) use ($use_mb_functions, $encoding, $lang, $try_to_keep_the_string_length): string { if (isset($match[1])) { if ($use_mb_functions) { if ($encoding === 'UTF-8') { return \mb_strtoupper($match[1]); } return \mb_strtoupper($match[1], $encoding); } return self::strtoupper($match[1], $encoding, false, $lang, $try_to_keep_the_string_length); } return ''; }, $str ); return (string) \preg_replace_callback( '/[\\p{N}]+(.)?/u', /** * @param array $match * * @psalm-pure * * @return string */ static function (array $match) use ($use_mb_functions, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length): string { if ($use_mb_functions) { if ($encoding === 'UTF-8') { return \mb_strtoupper($match[0]); } return \mb_strtoupper($match[0], $encoding); } return self::strtoupper($match[0], $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); }, $str ); } /** * Returns the string with the first letter of each word capitalized, * except for when the word is a name which shouldn't be capitalized. * * @param string $str * * @psalm-pure * * @return string *

A string with $str capitalized.

*/ public static function str_capitalize_name(string $str): string { return self::str_capitalize_name_helper( self::str_capitalize_name_helper( self::collapse_whitespace($str), ' ' ), '-' ); } /** * Returns true if the string contains $needle, false otherwise. By default * the comparison is case-sensitive, but can be made insensitive by setting * $case_sensitive to false. * * @param string $haystack

The input string.

* @param string $needle

Substring to look for.

* @param bool $case_sensitive [optional]

Whether or not to enforce case-sensitivity. Default: true

* * @psalm-pure * * @return bool *

Whether or not $haystack contains $needle.

*/ public static function str_contains( string $haystack, string $needle, bool $case_sensitive = true ): bool { if ($case_sensitive) { if (\PHP_VERSION_ID >= 80000) { /** @phpstan-ignore-next-line - only for PHP8 */ return \str_contains($haystack, $needle); } return \strpos($haystack, $needle) !== false; } return \mb_stripos($haystack, $needle) !== false; } /** * Returns true if the string contains all $needles, false otherwise. By * default the comparison is case-sensitive, but can be made insensitive by * setting $case_sensitive to false. * * @param string $haystack

The input string.

* @param array $needles

SubStrings to look for.

* @param bool $case_sensitive [optional]

Whether or not to enforce case-sensitivity. Default: true

* * @psalm-pure * * @return bool *

Whether or not $haystack contains $needle.

*/ public static function str_contains_all( string $haystack, array $needles, bool $case_sensitive = true ): bool { if ($haystack === '' || $needles === []) { return false; } /** @noinspection LoopWhichDoesNotLoopInspection */ foreach ($needles as &$needle) { if ($case_sensitive) { /** @noinspection NestedPositiveIfStatementsInspection */ if (!$needle || \strpos($haystack, $needle) === false) { return false; } } if (!$needle || \mb_stripos($haystack, $needle) === false) { return false; } } return true; } /** * Returns true if the string contains any $needles, false otherwise. By * default the comparison is case-sensitive, but can be made insensitive by * setting $case_sensitive to false. * * @param string $haystack

The input string.

* @param array $needles

SubStrings to look for.

* @param bool $case_sensitive [optional]

Whether or not to enforce case-sensitivity. Default: true

* * @psalm-pure * * @return bool *

Whether or not $str contains $needle.

*/ public static function str_contains_any( string $haystack, array $needles, bool $case_sensitive = true ): bool { if ($haystack === '' || $needles === []) { return false; } /** @noinspection LoopWhichDoesNotLoopInspection */ foreach ($needles as &$needle) { if (!$needle) { continue; } if ($case_sensitive) { if (\strpos($haystack, $needle) !== false) { return true; } continue; } if (\mb_stripos($haystack, $needle) !== false) { return true; } } return false; } /** * Returns a lowercase and trimmed string separated by dashes. Dashes are * inserted before uppercase characters (with the exception of the first * character of the string), and in place of spaces as well as underscores. * * @param string $str

The input string.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string */ public static function str_dasherize(string $str, string $encoding = 'UTF-8'): string { return self::str_delimit($str, '-', $encoding); } /** * Returns a lowercase and trimmed string separated by the given delimiter. * Delimiters are inserted before uppercase characters (with the exception * of the first character of the string), and in place of spaces, dashes, * and underscores. Alpha delimiters are not converted to lowercase. * * @param string $str

The input string.

* @param string $delimiter

Sequence used to separate parts of the string.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* @param string|null $lang [optional]

Set the language for special cases: az, el, lt, * tr

* @param bool $try_to_keep_the_string_length [optional]

true === try to keep the string length: e.g. ẞ -> * ß

* * @psalm-pure * * @return string */ public static function str_delimit( string $str, string $delimiter, string $encoding = 'UTF-8', bool $clean_utf8 = false, string $lang = null, bool $try_to_keep_the_string_length = false ): string { if (self::$SUPPORT['mbstring'] === true) { /** @noinspection PhpComposerExtensionStubsInspection */ $str = (string) \mb_ereg_replace('\\B(\\p{Lu})', '-\1', \trim($str)); $use_mb_functions = $lang === null && !$try_to_keep_the_string_length; if ($use_mb_functions && $encoding === 'UTF-8') { $str = \mb_strtolower($str); } else { $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); } /** @noinspection PhpComposerExtensionStubsInspection */ return (string) \mb_ereg_replace('[\\-_\\s]+', $delimiter, $str); } $str = (string) \preg_replace('/\\B(\\p{Lu})/u', '-\1', \trim($str)); $use_mb_functions = $lang === null && !$try_to_keep_the_string_length; if ($use_mb_functions && $encoding === 'UTF-8') { $str = \mb_strtolower($str); } else { $str = self::strtolower($str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); } return (string) \preg_replace('/[\\-_\\s]+/u', $delimiter, $str); } /** * Optimized "mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32. * * EXAMPLE:


     * UTF8::str_detect_encoding('中文空白'); // 'UTF-8'
     * UTF8::str_detect_encoding('Abc'); // 'ASCII'
     *

* * @param string $str

The input string.

* * @psalm-pure * * @return false|string *

* The detected string-encoding e.g. UTF-8 or UTF-16BE,
* otherwise it will return false e.g. for BINARY or not detected encoding. *

*/ public static function str_detect_encoding($str) { // init $str = (string) $str; // // 1.) check binary strings (010001001...) like UTF-16 / UTF-32 / PDF / Images / ... // if (self::is_binary($str, true)) { $is_utf32 = self::is_utf32($str, false); if ($is_utf32 === 1) { return 'UTF-32LE'; } if ($is_utf32 === 2) { return 'UTF-32BE'; } $is_utf16 = self::is_utf16($str, false); if ($is_utf16 === 1) { return 'UTF-16LE'; } if ($is_utf16 === 2) { return 'UTF-16BE'; } // is binary but not "UTF-16" or "UTF-32" return false; } // // 2.) simple check for ASCII chars // if (ASCII::is_ascii($str)) { return 'ASCII'; } // // 3.) simple check for UTF-8 chars // if (self::is_utf8_string($str)) { return 'UTF-8'; } // // 4.) check via "mb_detect_encoding()" // // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "mb_detect_encoding()" $encoding_detecting_order = [ 'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3', 'ISO-8859-4', 'ISO-8859-5', 'ISO-8859-6', 'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9', 'ISO-8859-10', 'ISO-8859-13', 'ISO-8859-14', 'ISO-8859-15', 'ISO-8859-16', 'WINDOWS-1251', 'WINDOWS-1252', 'WINDOWS-1254', 'CP932', 'CP936', 'CP950', 'CP866', 'CP850', 'CP51932', 'CP50220', 'CP50221', 'CP50222', 'ISO-2022-JP', 'ISO-2022-KR', 'JIS', 'JIS-ms', 'EUC-CN', 'EUC-JP', ]; if (self::$SUPPORT['mbstring'] === true) { // info: do not use the symfony polyfill here $encoding = \mb_detect_encoding($str, $encoding_detecting_order, true); if ($encoding) { return $encoding; } } // // 5.) check via "iconv()" // if (self::$ENCODINGS === null) { self::$ENCODINGS = self::getData('encodings'); } foreach (self::$ENCODINGS as $encoding_tmp) { // INFO: //IGNORE but still throw notice /** @noinspection PhpUsageOfSilenceOperatorInspection */ if ((string) @\iconv($encoding_tmp, $encoding_tmp . '//IGNORE', $str) === $str) { return $encoding_tmp; } } return false; } /** * alias for "UTF8::str_ends_with()" * * @param string $haystack * @param string $needle * * @psalm-pure * * @return bool * * @see UTF8::str_ends_with() * @deprecated

please use "UTF8::str_ends_with()"

*/ public static function str_ends(string $haystack, string $needle): bool { return self::str_ends_with($haystack, $needle); } /** * Check if the string ends with the given substring. * * EXAMPLE:


     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
     * UTF8::str_ends_with('BeginMiddleΚόσμε', 'κόσμε'); // false
     *

* * @param string $haystack

The string to search in.

* @param string $needle

The substring to search for.

* * @psalm-pure * * @return bool */ public static function str_ends_with(string $haystack, string $needle): bool { if ($needle === '') { return true; } if ($haystack === '') { return false; } if (\PHP_VERSION_ID >= 80000) { /** @phpstan-ignore-next-line - only for PHP8 */ return \str_ends_with($haystack, $needle); } return \substr($haystack, -\strlen($needle)) === $needle; } /** * Returns true if the string ends with any of $substrings, false otherwise. * * - case-sensitive * * @param string $str

The input string.

* @param string[] $substrings

Substrings to look for.

* * @psalm-pure * * @return bool *

Whether or not $str ends with $substring.

*/ public static function str_ends_with_any(string $str, array $substrings): bool { if ($substrings === []) { return false; } foreach ($substrings as &$substring) { if (\substr($str, -\strlen($substring)) === $substring) { return true; } } return false; } /** * Ensures that the string begins with $substring. If it doesn't, it's * prepended. * * @param string $str

The input string.

* @param string $substring

The substring to add if not present.

* * @psalm-pure * * @return string */ public static function str_ensure_left(string $str, string $substring): string { if ( $substring !== '' && \strpos($str, $substring) === 0 ) { return $str; } return $substring . $str; } /** * Ensures that the string ends with $substring. If it doesn't, it's appended. * * @param string $str

The input string.

* @param string $substring

The substring to add if not present.

* * @psalm-pure * * @return string */ public static function str_ensure_right(string $str, string $substring): string { if ( $str === '' || $substring === '' || \substr($str, -\strlen($substring)) !== $substring ) { $str .= $substring; } return $str; } /** * Capitalizes the first word of the string, replaces underscores with * spaces, and strips '_id'. * * @param string $str * * @psalm-pure * * @return string */ public static function str_humanize($str): string { $str = \str_replace( [ '_id', '_', ], [ '', ' ', ], $str ); return self::ucfirst(\trim($str)); } /** * alias for "UTF8::str_istarts_with()" * * @param string $haystack * @param string $needle * * @psalm-pure * * @return bool * * @see UTF8::str_istarts_with() * @deprecated

please use "UTF8::str_istarts_with()"

*/ public static function str_ibegins(string $haystack, string $needle): bool { return self::str_istarts_with($haystack, $needle); } /** * alias for "UTF8::str_iends_with()" * * @param string $haystack * @param string $needle * * @psalm-pure * * @return bool * * @see UTF8::str_iends_with() * @deprecated

please use "UTF8::str_iends_with()"

*/ public static function str_iends(string $haystack, string $needle): bool { return self::str_iends_with($haystack, $needle); } /** * Check if the string ends with the given substring, case-insensitive. * * EXAMPLE:


     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'Κόσμε'); // true
     * UTF8::str_iends_with('BeginMiddleΚόσμε', 'κόσμε'); // true
     *

* * @param string $haystack

The string to search in.

* @param string $needle

The substring to search for.

* * @psalm-pure * * @return bool */ public static function str_iends_with(string $haystack, string $needle): bool { if ($needle === '') { return true; } if ($haystack === '') { return false; } return self::strcasecmp(\substr($haystack, -\strlen($needle)), $needle) === 0; } /** * Returns true if the string ends with any of $substrings, false otherwise. * * - case-insensitive * * @param string $str

The input string.

* @param string[] $substrings

Substrings to look for.

* * @psalm-pure * * @return bool *

Whether or not $str ends with $substring.

*/ public static function str_iends_with_any(string $str, array $substrings): bool { if ($substrings === []) { return false; } foreach ($substrings as &$substring) { if (self::str_iends_with($str, $substring)) { return true; } } return false; } /** * Returns the index of the first occurrence of $needle in the string, * and false if not found. Accepts an optional offset from which to begin * the search. * * @param string $str

The input string.

* @param string $needle

Substring to look for.

* @param int $offset [optional]

Offset from which to search. Default: 0

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return false|int *

The occurrence's index if found, otherwise false.

* * @see UTF8::stripos() * @deprecated

please use "UTF8::stripos()"

*/ public static function str_iindex_first( string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8' ) { return self::stripos( $str, $needle, $offset, $encoding ); } /** * Returns the index of the last occurrence of $needle in the string, * and false if not found. Accepts an optional offset from which to begin * the search. Offsets may be negative to count from the last character * in the string. * * @param string $str

The input string.

* @param string $needle

Substring to look for.

* @param int $offset [optional]

Offset from which to search. Default: 0

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return false|int *

The last occurrence's index if found, otherwise false.

* * @see UTF8::strripos() * @deprecated

please use "UTF8::strripos()"

*/ public static function str_iindex_last( string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8' ) { return self::strripos( $str, $needle, $offset, $encoding ); } /** * Returns the index of the first occurrence of $needle in the string, * and false if not found. Accepts an optional offset from which to begin * the search. * * @param string $str

The input string.

* @param string $needle

Substring to look for.

* @param int $offset [optional]

Offset from which to search. Default: 0

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return false|int *

The occurrence's index if found, otherwise false.

* * @see UTF8::strpos() * @deprecated

please use "UTF8::strpos()"

*/ public static function str_index_first( string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8' ) { return self::strpos( $str, $needle, $offset, $encoding ); } /** * Returns the index of the last occurrence of $needle in the string, * and false if not found. Accepts an optional offset from which to begin * the search. Offsets may be negative to count from the last character * in the string. * * @param string $str

The input string.

* @param string $needle

Substring to look for.

* @param int $offset [optional]

Offset from which to search. Default: 0

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return false|int *

The last occurrence's index if found, otherwise false.

* * @see UTF8::strrpos() * @deprecated

please use "UTF8::strrpos()"

*/ public static function str_index_last( string $str, string $needle, int $offset = 0, string $encoding = 'UTF-8' ) { return self::strrpos( $str, $needle, $offset, $encoding ); } /** * Inserts $substring into the string at the $index provided. * * @param string $str

The input string.

* @param string $substring

String to be inserted.

* @param int $index

The index at which to insert the substring.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string */ public static function str_insert( string $str, string $substring, int $index, string $encoding = 'UTF-8' ): string { if ($encoding === 'UTF-8') { $len = (int) \mb_strlen($str); if ($index > $len) { return $str; } /** @noinspection UnnecessaryCastingInspection */ return (string) \mb_substr($str, 0, $index) . $substring . (string) \mb_substr($str, $index, $len); } $encoding = self::normalize_encoding($encoding, 'UTF-8'); $len = (int) self::strlen($str, $encoding); if ($index > $len) { return $str; } return ((string) self::substr($str, 0, $index, $encoding)) . $substring . ((string) self::substr($str, $index, $len, $encoding)); } /** * Case-insensitive and UTF-8 safe version of str_replace. * * EXAMPLE:


     * UTF8::str_ireplace('lIzÆ', 'lise', 'Iñtërnâtiônàlizætiøn'); // 'Iñtërnâtiônàlisetiøn'
     *

* * @see http://php.net/manual/en/function.str-ireplace.php * * @param string|string[] $search

* Every replacement with search array is * performed on the result of previous replacement. *

* @param string|string[] $replacement

The replacement.

* @param string|string[] $subject

* If subject is an array, then the search and * replace is performed with every entry of * subject, and the return value is an array as * well. *

* @param int $count [optional]

* The number of matched and replaced needles will * be returned in count which is passed by * reference. *

* * @psalm-pure * * @return string|string[] *

A string or an array of replacements.

* * @template TStrIReplaceSubject * @phpstan-param TStrIReplaceSubject $subject * @phpstan-return TStrIReplaceSubject */ public static function str_ireplace($search, $replacement, $subject, &$count = null) { $search = (array) $search; /** @noinspection AlterInForeachInspection */ foreach ($search as &$s) { $s = (string) $s; if ($s === '') { $s = '/^(?<=.)$/'; } else { $s = '/' . \preg_quote($s, '/') . '/ui'; } } // fallback /** @phpstan-ignore-next-line - only a fallback for PHP8 */ if ($replacement === null) { $replacement = ''; } /** @phpstan-ignore-next-line - only a fallback for PHP8 */ if ($subject === null) { $subject = ''; } /** * @psalm-suppress PossiblyNullArgument * @phpstan-var TStrIReplaceSubject $subject */ $subject = \preg_replace($search, $replacement, $subject, -1, $count); return $subject; } /** * Replaces $search from the beginning of string with $replacement. * * @param string $str

The input string.

* @param string $search

The string to search for.

* @param string $replacement

The replacement.

* * @psalm-pure * * @return string *

The string after the replacement.

*/ public static function str_ireplace_beginning(string $str, string $search, string $replacement): string { if ($str === '') { if ($replacement === '') { return ''; } if ($search === '') { return $replacement; } } if ($search === '') { return $str . $replacement; } $searchLength = \strlen($search); if (\strncasecmp($str, $search, $searchLength) === 0) { return $replacement . \substr($str, $searchLength); } return $str; } /** * Replaces $search from the ending of string with $replacement. * * @param string $str

The input string.

* @param string $search

The string to search for.

* @param string $replacement

The replacement.

* * @psalm-pure * * @return string *

The string after the replacement.

*/ public static function str_ireplace_ending(string $str, string $search, string $replacement): string { if ($str === '') { if ($replacement === '') { return ''; } if ($search === '') { return $replacement; } } if ($search === '') { return $str . $replacement; } if (\stripos($str, $search, \strlen($str) - \strlen($search)) !== false) { $str = \substr($str, 0, -\strlen($search)) . $replacement; } return $str; } /** * Check if the string starts with the given substring, case-insensitive. * * EXAMPLE:


     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
     * UTF8::str_istarts_with('ΚόσμεMiddleEnd', 'κόσμε'); // true
     *

* * @param string $haystack

The string to search in.

* @param string $needle

The substring to search for.

* * @psalm-pure * * @return bool */ public static function str_istarts_with(string $haystack, string $needle): bool { if ($needle === '') { return true; } if ($haystack === '') { return false; } return self::stripos($haystack, $needle) === 0; } /** * Returns true if the string begins with any of $substrings, false otherwise. * * - case-insensitive * * @param string $str

The input string.

* @param array $substrings

Substrings to look for.

* * @psalm-pure * * @return bool *

Whether or not $str starts with $substring.

*/ public static function str_istarts_with_any(string $str, array $substrings): bool { if ($str === '') { return false; } if ($substrings === []) { return false; } foreach ($substrings as &$substring) { if (self::str_istarts_with($str, $substring)) { return true; } } return false; } /** * Gets the substring after the first occurrence of a separator. * * @param string $str

The input string.

* @param string $separator

The string separator.

* @param string $encoding [optional]

Default: 'UTF-8'

* * @psalm-pure * * @return string */ public static function str_isubstr_after_first_separator( string $str, string $separator, string $encoding = 'UTF-8' ): string { if ($separator === '' || $str === '') { return ''; } $offset = self::stripos($str, $separator); if ($offset === false) { return ''; } if ($encoding === 'UTF-8') { return (string) \mb_substr( $str, $offset + (int) \mb_strlen($separator) ); } return (string) self::substr( $str, $offset + (int) self::strlen($separator, $encoding), null, $encoding ); } /** * Gets the substring after the last occurrence of a separator. * * @param string $str

The input string.

* @param string $separator

The string separator.

* @param string $encoding [optional]

Default: 'UTF-8'

* * @psalm-pure * * @return string */ public static function str_isubstr_after_last_separator( string $str, string $separator, string $encoding = 'UTF-8' ): string { if ($separator === '' || $str === '') { return ''; } $offset = self::strripos($str, $separator); if ($offset === false) { return ''; } if ($encoding === 'UTF-8') { return (string) \mb_substr( $str, $offset + (int) self::strlen($separator) ); } return (string) self::substr( $str, $offset + (int) self::strlen($separator, $encoding), null, $encoding ); } /** * Gets the substring before the first occurrence of a separator. * * @param string $str

The input string.

* @param string $separator

The string separator.

* @param string $encoding [optional]

Default: 'UTF-8'

* * @psalm-pure * * @return string */ public static function str_isubstr_before_first_separator( string $str, string $separator, string $encoding = 'UTF-8' ): string { if ($separator === '' || $str === '') { return ''; } $offset = self::stripos($str, $separator); if ($offset === false) { return ''; } if ($encoding === 'UTF-8') { return (string) \mb_substr($str, 0, $offset); } return (string) self::substr($str, 0, $offset, $encoding); } /** * Gets the substring before the last occurrence of a separator. * * @param string $str

The input string.

* @param string $separator

The string separator.

* @param string $encoding [optional]

Default: 'UTF-8'

* * @psalm-pure * * @return string */ public static function str_isubstr_before_last_separator( string $str, string $separator, string $encoding = 'UTF-8' ): string { if ($separator === '' || $str === '') { return ''; } if ($encoding === 'UTF-8') { $offset = \mb_strripos($str, $separator); if ($offset === false) { return ''; } return (string) \mb_substr($str, 0, $offset); } $offset = self::strripos($str, $separator, 0, $encoding); if ($offset === false) { return ''; } return (string) self::substr($str, 0, $offset, $encoding); } /** * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle". * * @param string $str

The input string.

* @param string $needle

The string to look for.

* @param bool $before_needle [optional]

Default: false

* @param string $encoding [optional]

Default: 'UTF-8'

* * @psalm-pure * * @return string */ public static function str_isubstr_first( string $str, string $needle, bool $before_needle = false, string $encoding = 'UTF-8' ): string { if ( $needle === '' || $str === '' ) { return ''; } $part = self::stristr( $str, $needle, $before_needle, $encoding ); if ($part === false) { return ''; } return $part; } /** * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle". * * @param string $str

The input string.

* @param string $needle

The string to look for.

* @param bool $before_needle [optional]

Default: false

* @param string $encoding [optional]

Default: 'UTF-8'

* * @psalm-pure * * @return string */ public static function str_isubstr_last( string $str, string $needle, bool $before_needle = false, string $encoding = 'UTF-8' ): string { if ( $needle === '' || $str === '' ) { return ''; } $part = self::strrichr( $str, $needle, $before_needle, $encoding ); if ($part === false) { return ''; } return $part; } /** * Returns the last $n characters of the string. * * @param string $str

The input string.

* @param int $n

Number of characters to retrieve from the end.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string */ public static function str_last_char( string $str, int $n = 1, string $encoding = 'UTF-8' ): string { if ($str === '' || $n <= 0) { return ''; } if ($encoding === 'UTF-8') { return (string) \mb_substr($str, -$n); } $encoding = self::normalize_encoding($encoding, 'UTF-8'); return (string) self::substr($str, -$n, null, $encoding); } /** * Limit the number of characters in a string. * * @param string $str

The input string.

* @param int $length [optional]

Default: 100

* @param string $str_add_on [optional]

Default: …

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string */ public static function str_limit( string $str, int $length = 100, string $str_add_on = '…', string $encoding = 'UTF-8' ): string { if ($str === '' || $length <= 0) { return ''; } if ($encoding === 'UTF-8') { if ((int) \mb_strlen($str) <= $length) { return $str; } /** @noinspection UnnecessaryCastingInspection */ return (string) \mb_substr($str, 0, $length - (int) self::strlen($str_add_on)) . $str_add_on; } $encoding = self::normalize_encoding($encoding, 'UTF-8'); if ((int) self::strlen($str, $encoding) <= $length) { return $str; } return ((string) self::substr($str, 0, $length - (int) self::strlen($str_add_on), $encoding)) . $str_add_on; } /** * Limit the number of characters in a string, but also after the next word. * * EXAMPLE: UTF8::str_limit_after_word('fòô bàř fòô', 8, ''); // 'fòô bàř' * * @param string $str

The input string.

* @param int $length [optional]

Default: 100

* @param string $str_add_on [optional]

Default: …

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string */ public static function str_limit_after_word( string $str, int $length = 100, string $str_add_on = '…', string $encoding = 'UTF-8' ): string { if ($str === '' || $length <= 0) { return ''; } if ($encoding === 'UTF-8') { /** @noinspection UnnecessaryCastingInspection */ if ((int) \mb_strlen($str) <= $length) { return $str; } if (\mb_substr($str, $length - 1, 1) === ' ') { return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on; } $str = \mb_substr($str, 0, $length); $array = \explode(' ', $str, -1); $new_str = \implode(' ', $array); if ($new_str === '') { return ((string) \mb_substr($str, 0, $length - 1)) . $str_add_on; } } else { if ((int) self::strlen($str, $encoding) <= $length) { return $str; } if (self::substr($str, $length - 1, 1, $encoding) === ' ') { return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on; } /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */ $str = self::substr($str, 0, $length, $encoding); /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */ if ($str === false) { return '' . $str_add_on; } $array = \explode(' ', $str, -1); $new_str = \implode(' ', $array); if ($new_str === '') { return ((string) self::substr($str, 0, $length - 1, $encoding)) . $str_add_on; } } return $new_str . $str_add_on; } /** * Returns the longest common prefix between the $str1 and $str2. * * @param string $str1

The input sting.

* @param string $str2

Second string for comparison.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string */ public static function str_longest_common_prefix( string $str1, string $str2, string $encoding = 'UTF-8' ): string { // init $longest_common_prefix = ''; if ($encoding === 'UTF-8') { $max_length = (int) \min( \mb_strlen($str1), \mb_strlen($str2) ); for ($i = 0; $i < $max_length; ++$i) { $char = \mb_substr($str1, $i, 1); if ( $char !== false && $char === \mb_substr($str2, $i, 1) ) { $longest_common_prefix .= $char; } else { break; } } } else { $encoding = self::normalize_encoding($encoding, 'UTF-8'); $max_length = (int) \min( self::strlen($str1, $encoding), self::strlen($str2, $encoding) ); for ($i = 0; $i < $max_length; ++$i) { $char = self::substr($str1, $i, 1, $encoding); if ( $char !== false && $char === self::substr($str2, $i, 1, $encoding) ) { $longest_common_prefix .= $char; } else { break; } } } return $longest_common_prefix; } /** * Returns the longest common substring between the $str1 and $str2. * In the case of ties, it returns that which occurs first. * * @param string $str1 * @param string $str2

Second string for comparison.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string *

A string with its $str being the longest common substring.

*/ public static function str_longest_common_substring( string $str1, string $str2, string $encoding = 'UTF-8' ): string { if ($str1 === '' || $str2 === '') { return ''; } // Uses dynamic programming to solve // http://en.wikipedia.org/wiki/Longest_common_substring_problem if ($encoding === 'UTF-8') { $str_length = (int) \mb_strlen($str1); $other_length = (int) \mb_strlen($str2); } else { $encoding = self::normalize_encoding($encoding, 'UTF-8'); $str_length = (int) self::strlen($str1, $encoding); $other_length = (int) self::strlen($str2, $encoding); } // Return if either string is empty if ($str_length === 0 || $other_length === 0) { return ''; } $len = 0; $end = 0; $table = \array_fill( 0, $str_length + 1, \array_fill(0, $other_length + 1, 0) ); if ($encoding === 'UTF-8') { for ($i = 1; $i <= $str_length; ++$i) { for ($j = 1; $j <= $other_length; ++$j) { $str_char = \mb_substr($str1, $i - 1, 1); $other_char = \mb_substr($str2, $j - 1, 1); if ($str_char === $other_char) { $table[$i][$j] = $table[$i - 1][$j - 1] + 1; if ($table[$i][$j] > $len) { $len = $table[$i][$j]; $end = $i; } } else { $table[$i][$j] = 0; } } } } else { for ($i = 1; $i <= $str_length; ++$i) { for ($j = 1; $j <= $other_length; ++$j) { $str_char = self::substr($str1, $i - 1, 1, $encoding); $other_char = self::substr($str2, $j - 1, 1, $encoding); if ($str_char === $other_char) { $table[$i][$j] = $table[$i - 1][$j - 1] + 1; if ($table[$i][$j] > $len) { $len = $table[$i][$j]; $end = $i; } } else { $table[$i][$j] = 0; } } } } if ($encoding === 'UTF-8') { return (string) \mb_substr($str1, $end - $len, $len); } return (string) self::substr($str1, $end - $len, $len, $encoding); } /** * Returns the longest common suffix between the $str1 and $str2. * * @param string $str1 * @param string $str2

Second string for comparison.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string */ public static function str_longest_common_suffix( string $str1, string $str2, string $encoding = 'UTF-8' ): string { if ($str1 === '' || $str2 === '') { return ''; } if ($encoding === 'UTF-8') { $max_length = (int) \min( \mb_strlen($str1, $encoding), \mb_strlen($str2, $encoding) ); $longest_common_suffix = ''; for ($i = 1; $i <= $max_length; ++$i) { $char = \mb_substr($str1, -$i, 1); if ( $char !== false && $char === \mb_substr($str2, -$i, 1) ) { $longest_common_suffix = $char . $longest_common_suffix; } else { break; } } } else { $encoding = self::normalize_encoding($encoding, 'UTF-8'); $max_length = (int) \min( self::strlen($str1, $encoding), self::strlen($str2, $encoding) ); $longest_common_suffix = ''; for ($i = 1; $i <= $max_length; ++$i) { $char = self::substr($str1, -$i, 1, $encoding); if ( $char !== false && $char === self::substr($str2, -$i, 1, $encoding) ) { $longest_common_suffix = $char . $longest_common_suffix; } else { break; } } } return $longest_common_suffix; } /** * Returns true if $str matches the supplied pattern, false otherwise. * * @param string $str

The input string.

* @param string $pattern

Regex pattern to match against.

* * @psalm-pure * * @return bool *

Whether or not $str matches the pattern.

*/ public static function str_matches_pattern(string $str, string $pattern): bool { return (bool) \preg_match('/' . $pattern . '/u', $str); } /** * Returns whether or not a character exists at an index. Offsets may be * negative to count from the last character in the string. Implements * part of the ArrayAccess interface. * * @param string $str

The input string.

* @param int $offset

The index to check.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return bool *

Whether or not the index exists.

*/ public static function str_offset_exists(string $str, int $offset, string $encoding = 'UTF-8'): bool { // init $length = (int) self::strlen($str, $encoding); if ($offset >= 0) { return $length > $offset; } return $length >= \abs($offset); } /** * Returns the character at the given index. Offsets may be negative to * count from the last character in the string. Implements part of the * ArrayAccess interface, and throws an OutOfBoundsException if the index * does not exist. * * @param string $str

The input string.

* @param int $index

The index from which to retrieve the char.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @throws \OutOfBoundsException if the positive or negative offset does not exist * * @return string *

The character at the specified index.

* * @psalm-pure */ public static function str_offset_get(string $str, int $index, string $encoding = 'UTF-8'): string { // init $length = (int) self::strlen($str); if ( ($index >= 0 && $length <= $index) || $length < \abs($index) ) { throw new \OutOfBoundsException('No character exists at the index'); } return self::char_at($str, $index, $encoding); } /** * Pad a UTF-8 string to a given length with another string. * * EXAMPLE: UTF8::str_pad('中文空白', 10, '_', STR_PAD_BOTH); // '___中文空白___' * * @param string $str

The input string.

* @param int $pad_length

The length of return string.

* @param string $pad_string [optional]

String to use for padding the input string.

* @param int|string $pad_type [optional]

* Can be STR_PAD_RIGHT (default), [or string "right"]
* STR_PAD_LEFT [or string "left"] or
* STR_PAD_BOTH [or string "both"] *

* @param string $encoding [optional]

Default: 'UTF-8'

* * @psalm-pure * * @return string *

Returns the padded string.

*/ public static function str_pad( string $str, int $pad_length, string $pad_string = ' ', $pad_type = \STR_PAD_RIGHT, string $encoding = 'UTF-8' ): string { if ($pad_length === 0 || $pad_string === '') { return $str; } if ($pad_type !== (int) $pad_type) { if ($pad_type === 'left') { $pad_type = \STR_PAD_LEFT; } elseif ($pad_type === 'right') { $pad_type = \STR_PAD_RIGHT; } elseif ($pad_type === 'both') { $pad_type = \STR_PAD_BOTH; } else { throw new \InvalidArgumentException( 'Pad expects $pad_type to be "STR_PAD_*" or ' . "to be one of 'left', 'right' or 'both'" ); } } if ($encoding === 'UTF-8') { $str_length = (int) \mb_strlen($str); if ($pad_length >= $str_length) { switch ($pad_type) { case \STR_PAD_LEFT: $ps_length = (int) \mb_strlen($pad_string); $diff = ($pad_length - $str_length); $pre = (string) \mb_substr( \str_repeat($pad_string, (int) \ceil($diff / $ps_length)), 0, $diff ); $post = ''; break; case \STR_PAD_BOTH: $diff = ($pad_length - $str_length); $ps_length_left = (int) \floor($diff / 2); $ps_length_right = (int) \ceil($diff / 2); $pre = (string) \mb_substr( \str_repeat($pad_string, $ps_length_left), 0, $ps_length_left ); $post = (string) \mb_substr( \str_repeat($pad_string, $ps_length_right), 0, $ps_length_right ); break; case \STR_PAD_RIGHT: default: $ps_length = (int) \mb_strlen($pad_string); $diff = ($pad_length - $str_length); $post = (string) \mb_substr( \str_repeat($pad_string, (int) \ceil($diff / $ps_length)), 0, $diff ); $pre = ''; } return $pre . $str . $post; } return $str; } $encoding = self::normalize_encoding($encoding, 'UTF-8'); $str_length = (int) self::strlen($str, $encoding); if ($pad_length >= $str_length) { switch ($pad_type) { case \STR_PAD_LEFT: $ps_length = (int) self::strlen($pad_string, $encoding); $diff = ($pad_length - $str_length); $pre = (string) self::substr( \str_repeat($pad_string, (int) \ceil($diff / $ps_length)), 0, $diff, $encoding ); $post = ''; break; case \STR_PAD_BOTH: $diff = ($pad_length - $str_length); $ps_length_left = (int) \floor($diff / 2); $ps_length_right = (int) \ceil($diff / 2); $pre = (string) self::substr( \str_repeat($pad_string, $ps_length_left), 0, $ps_length_left, $encoding ); $post = (string) self::substr( \str_repeat($pad_string, $ps_length_right), 0, $ps_length_right, $encoding ); break; case \STR_PAD_RIGHT: default: $ps_length = (int) self::strlen($pad_string, $encoding); $diff = ($pad_length - $str_length); $post = (string) self::substr( \str_repeat($pad_string, (int) \ceil($diff / $ps_length)), 0, $diff, $encoding ); $pre = ''; } return $pre . $str . $post; } return $str; } /** * Returns a new string of a given length such that both sides of the * string are padded. Alias for "UTF8::str_pad()" with a $pad_type of 'both'. * * @param string $str * @param int $length

Desired string length after padding.

* @param string $pad_str [optional]

String used to pad, defaults to space. Default: ' '

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string *

The string with padding applied.

*/ public static function str_pad_both( string $str, int $length, string $pad_str = ' ', string $encoding = 'UTF-8' ): string { return self::str_pad( $str, $length, $pad_str, \STR_PAD_BOTH, $encoding ); } /** * Returns a new string of a given length such that the beginning of the * string is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'left'. * * @param string $str * @param int $length

Desired string length after padding.

* @param string $pad_str [optional]

String used to pad, defaults to space. Default: ' '

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string *

The string with left padding.

*/ public static function str_pad_left( string $str, int $length, string $pad_str = ' ', string $encoding = 'UTF-8' ): string { return self::str_pad( $str, $length, $pad_str, \STR_PAD_LEFT, $encoding ); } /** * Returns a new string of a given length such that the end of the string * is padded. Alias for "UTF8::str_pad()" with a $pad_type of 'right'. * * @param string $str * @param int $length

Desired string length after padding.

* @param string $pad_str [optional]

String used to pad, defaults to space. Default: ' '

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string *

The string with right padding.

*/ public static function str_pad_right( string $str, int $length, string $pad_str = ' ', string $encoding = 'UTF-8' ): string { return self::str_pad( $str, $length, $pad_str, \STR_PAD_RIGHT, $encoding ); } /** * Repeat a string. * * EXAMPLE: UTF8::str_repeat("°~\xf0\x90\x28\xbc", 2); // '°~ð(¼°~ð(¼' * * @param string $str

* The string to be repeated. *

* @param int $multiplier

* Number of time the input string should be * repeated. *

* multiplier has to be greater than or equal to 0. * If the multiplier is set to 0, the function * will return an empty string. *

* * @psalm-pure * * @return string *

The repeated string.

*/ public static function str_repeat(string $str, int $multiplier): string { $str = self::filter($str); return \str_repeat($str, $multiplier); } /** * INFO: This is only a wrapper for "str_replace()" -> the original functions is already UTF-8 safe. * * Replace all occurrences of the search string with the replacement string * * @see http://php.net/manual/en/function.str-replace.php * * @param string|string[] $search

* The value being searched for, otherwise known as the needle. * An array may be used to designate multiple needles. *

* @param string|string[] $replace

* The replacement value that replaces found search * values. An array may be used to designate multiple replacements. *

* @param string|string[] $subject

* The string or array of strings being searched and replaced on, * otherwise known as the haystack. *

* If subject is an array, then the search and * replace is performed with every entry of * subject, and the return value is an array as * well. *

* @param int|null $count [optional]

* If passed, this will hold the number of matched and replaced needles. *

* * @psalm-pure * * @return string|string[] *

This function returns a string or an array with the replaced values.

* * @template TStrReplaceSubject * @phpstan-param TStrReplaceSubject $subject * @phpstan-return TStrReplaceSubject * * @deprecated please use \str_replace() instead */ public static function str_replace( $search, $replace, $subject, int &$count = null ) { /** * @psalm-suppress PossiblyNullArgument * @phpstan-var TStrReplaceSubject $return; */ $return = \str_replace( $search, $replace, $subject, $count ); return $return; } /** * Replaces $search from the beginning of string with $replacement. * * @param string $str

The input string.

* @param string $search

The string to search for.

* @param string $replacement

The replacement.

* * @psalm-pure * * @return string *

A string after the replacements.

*/ public static function str_replace_beginning( string $str, string $search, string $replacement ): string { if ($str === '') { if ($replacement === '') { return ''; } if ($search === '') { return $replacement; } } if ($search === '') { return $str . $replacement; } $searchLength = \strlen($search); if (\strncmp($str, $search, $searchLength) === 0) { return $replacement . \substr($str, $searchLength); } return $str; } /** * Replaces $search from the ending of string with $replacement. * * @param string $str

The input string.

* @param string $search

The string to search for.

* @param string $replacement

The replacement.

* * @psalm-pure * * @return string *

A string after the replacements.

*/ public static function str_replace_ending( string $str, string $search, string $replacement ): string { if ($str === '') { if ($replacement === '') { return ''; } if ($search === '') { return $replacement; } } if ($search === '') { return $str . $replacement; } if (\strpos($str, $search, \strlen($str) - \strlen($search)) !== false) { $str = \substr($str, 0, -\strlen($search)) . $replacement; } return $str; } /** * Replace the first "$search"-term with the "$replace"-term. * * @param string $search * @param string $replace * @param string $subject * * @psalm-pure * * @return string * * @psalm-suppress InvalidReturnType */ public static function str_replace_first( string $search, string $replace, string $subject ): string { $pos = self::strpos($subject, $search); if ($pos !== false) { /** * @psalm-suppress InvalidReturnStatement */ return self::substr_replace( $subject, $replace, $pos, (int) self::strlen($search) ); } return $subject; } /** * Replace the last "$search"-term with the "$replace"-term. * * @param string $search * @param string $replace * @param string $subject * * @psalm-pure * * @return string * * @psalm-suppress InvalidReturnType */ public static function str_replace_last( string $search, string $replace, string $subject ): string { $pos = self::strrpos($subject, $search); if ($pos !== false) { /** * @psalm-suppress InvalidReturnStatement */ return self::substr_replace( $subject, $replace, $pos, (int) self::strlen($search) ); } return $subject; } /** * Shuffles all the characters in the string. * * INFO: uses random algorithm which is weak for cryptography purposes * * EXAMPLE: UTF8::str_shuffle('fòô bàř fòô'); // 'àòôřb ffòô ' * * @param string $str

The input string

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @return string *

The shuffled string.

*/ public static function str_shuffle(string $str, string $encoding = 'UTF-8'): string { if ($encoding === 'UTF-8') { $indexes = \range(0, (int) \mb_strlen($str) - 1); /** @noinspection NonSecureShuffleUsageInspection */ \shuffle($indexes); // init $shuffled_str = ''; foreach ($indexes as &$i) { $tmp_sub_str = \mb_substr($str, $i, 1); if ($tmp_sub_str !== false) { $shuffled_str .= $tmp_sub_str; } } } else { $encoding = self::normalize_encoding($encoding, 'UTF-8'); $indexes = \range(0, (int) self::strlen($str, $encoding) - 1); /** @noinspection NonSecureShuffleUsageInspection */ \shuffle($indexes); // init $shuffled_str = ''; foreach ($indexes as &$i) { $tmp_sub_str = self::substr($str, $i, 1, $encoding); if ($tmp_sub_str !== false) { $shuffled_str .= $tmp_sub_str; } } } return $shuffled_str; } /** * Returns the substring beginning at $start, and up to, but not including * the index specified by $end. If $end is omitted, the function extracts * the remaining string. If $end is negative, it is computed from the end * of the string. * * @param string $str * @param int $start

Initial index from which to begin extraction.

* @param int|null $end [optional]

Index at which to end extraction. Default: null

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return false|string *

The extracted substring.

If str is shorter than start * characters long, FALSE will be returned. */ public static function str_slice( string $str, int $start, int $end = null, string $encoding = 'UTF-8' ) { if ($encoding === 'UTF-8') { if ($end === null) { $length = (int) \mb_strlen($str); } elseif ($end >= 0 && $end <= $start) { return ''; } elseif ($end < 0) { $length = (int) \mb_strlen($str) + $end - $start; } else { $length = $end - $start; } return \mb_substr($str, $start, $length); } $encoding = self::normalize_encoding($encoding, 'UTF-8'); if ($end === null) { $length = (int) self::strlen($str, $encoding); } elseif ($end >= 0 && $end <= $start) { return ''; } elseif ($end < 0) { $length = (int) self::strlen($str, $encoding) + $end - $start; } else { $length = $end - $start; } return self::substr($str, $start, $length, $encoding); } /** * Convert a string to e.g.: "snake_case" * * @param string $str * @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string *

A string in snake_case.

*/ public static function str_snakeize(string $str, string $encoding = 'UTF-8'): string { if ($str === '') { return ''; } $str = \str_replace( '-', '_', self::normalize_whitespace($str) ); if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } $str = (string) \preg_replace_callback( '/([\\p{N}|\\p{Lu}])/u', /** * @param string[] $matches * * @psalm-pure * * @return string */ static function (array $matches) use ($encoding): string { $match = $matches[1]; $match_int = (int) $match; if ((string) $match_int === $match) { return '_' . $match . '_'; } if ($encoding === 'UTF-8') { return '_' . \mb_strtolower($match); } return '_' . self::strtolower($match, $encoding); }, $str ); $str = (string) \preg_replace( [ '/\\s+/u', // convert spaces to "_" '/^\\s+|\\s+$/u', // trim leading & trailing spaces '/_+/', // remove double "_" ], [ '_', '', '_', ], $str ); return \trim(\trim($str, '_')); // trim leading & trailing "_" + whitespace } /** * Sort all characters according to code points. * * EXAMPLE: UTF8::str_sort(' -ABC-中文空白- '); // ' ---ABC中文白空' * * @param string $str

A UTF-8 string.

* @param bool $unique

Sort unique. If true, repeated characters are ignored.

* @param bool $desc

If true, will sort characters in reverse code point order.

* * @psalm-pure * * @return string *

A string of sorted characters.

*/ public static function str_sort(string $str, bool $unique = false, bool $desc = false): string { $array = self::codepoints($str); if ($unique) { $array = \array_flip(\array_flip($array)); } if ($desc) { \arsort($array); } else { \asort($array); } return self::string($array); } /** * Convert a string to an array of Unicode characters. * * EXAMPLE:


     * UTF8::str_split_array(['中文空白', 'test'], 2); // [['中文', '空白'], ['te', 'st']]
     *

* * @param int[]|string[] $input

The string[] or int[] to split into array.

* @param int $length [optional]

Max character length of each array * lement.

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the * string.

* @param bool $try_to_use_mb_functions [optional]

Set to false, if you don't want to use * "mb_substr"

* * @psalm-pure * * @return string[][] *

An array containing chunks of the input.

*/ public static function str_split_array( array $input, int $length = 1, bool $clean_utf8 = false, bool $try_to_use_mb_functions = true ): array { foreach ($input as $k => &$v) { $v = self::str_split( $v, $length, $clean_utf8, $try_to_use_mb_functions ); } /** @var string[][] $input */ return $input; } /** * Convert a string to an array of unicode characters. * * EXAMPLE: UTF8::str_split('中文空白'); // array('中', '文', '空', '白') * * @param int|string $input

The string or int to split into array.

* @param int $length [optional]

Max character length of each array * element.

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the * string.

* @param bool $try_to_use_mb_functions [optional]

Set to false, if you don't want to use * "mb_substr"

* * @psalm-pure * * @return string[] *

An array containing chunks of chars from the input.

* * @noinspection SuspiciousBinaryOperationInspection * @noinspection OffsetOperationsInspection */ public static function str_split( $input, int $length = 1, bool $clean_utf8 = false, bool $try_to_use_mb_functions = true ): array { if ($length <= 0) { return []; } // this is only an old fallback /** @noinspection PhpSillyAssignmentInspection - hack for phpstan */ /** @var int|int[]|string|string[] $input */ $input = $input; if (\is_array($input)) { /** * @psalm-suppress InvalidReturnStatement */ return self::str_split_array( $input, $length, $clean_utf8, $try_to_use_mb_functions ); } // init $input = (string) $input; if ($input === '') { return []; } if ($clean_utf8) { $input = self::clean($input); } if ( $try_to_use_mb_functions && self::$SUPPORT['mbstring'] === true ) { if (\function_exists('mb_str_split')) { /** * @psalm-suppress ImpureFunctionCall - why? */ $return = \mb_str_split($input, $length); if ($return !== false) { return $return; } } $i_max = \mb_strlen($input); if ($i_max <= 127) { $ret = []; for ($i = 0; $i < $i_max; ++$i) { $ret[] = \mb_substr($input, $i, 1); } } else { $return_array = []; \preg_match_all('/./us', $input, $return_array); $ret = $return_array[0] ?? []; } } elseif (self::$SUPPORT['pcre_utf8'] === true) { $return_array = []; \preg_match_all('/./us', $input, $return_array); $ret = $return_array[0] ?? []; } else { // fallback $ret = []; $len = \strlen($input); /** @noinspection ForeachInvariantsInspection */ for ($i = 0; $i < $len; ++$i) { if (($input[$i] & "\x80") === "\x00") { $ret[] = $input[$i]; } elseif ( isset($input[$i + 1]) && ($input[$i] & "\xE0") === "\xC0" ) { if (($input[$i + 1] & "\xC0") === "\x80") { $ret[] = $input[$i] . $input[$i + 1]; ++$i; } } elseif ( isset($input[$i + 2]) && ($input[$i] & "\xF0") === "\xE0" ) { if ( ($input[$i + 1] & "\xC0") === "\x80" && ($input[$i + 2] & "\xC0") === "\x80" ) { $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2]; $i += 2; } } elseif ( isset($input[$i + 3]) && ($input[$i] & "\xF8") === "\xF0" ) { if ( ($input[$i + 1] & "\xC0") === "\x80" && ($input[$i + 2] & "\xC0") === "\x80" && ($input[$i + 3] & "\xC0") === "\x80" ) { $ret[] = $input[$i] . $input[$i + 1] . $input[$i + 2] . $input[$i + 3]; $i += 3; } } } } if ($length > 1) { $ret = \array_chunk($ret, $length); return \array_map( static function (array $item): string { return \implode('', $item); }, $ret ); } if (isset($ret[0]) && $ret[0] === '') { return []; } return $ret; } /** * Splits the string with the provided regular expression, returning an * array of strings. An optional integer $limit will truncate the * results. * * @param string $str * @param string $pattern

The regex with which to split the string.

* @param int $limit [optional]

Maximum number of results to return. Default: -1 === no limit

* * @psalm-pure * * @return string[] *

An array of strings.

*/ public static function str_split_pattern(string $str, string $pattern, int $limit = -1): array { if ($limit === 0) { return []; } if ($pattern === '') { return [$str]; } if (self::$SUPPORT['mbstring'] === true) { if ($limit >= 0) { /** @noinspection PhpComposerExtensionStubsInspection */ $result_tmp = \mb_split($pattern, $str); $result = []; foreach ($result_tmp as $item_tmp) { if ($limit === 0) { break; } --$limit; $result[] = $item_tmp; } return $result; } /** @noinspection PhpComposerExtensionStubsInspection */ return \mb_split($pattern, $str); } if ($limit > 0) { ++$limit; } else { $limit = -1; } $array = \preg_split('/' . \preg_quote($pattern, '/') . '/u', $str, $limit); if ($array === false) { return []; } if ($limit > 0 && \count($array) === $limit) { \array_pop($array); } return $array; } /** * Check if the string starts with the given substring. * * EXAMPLE:


     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'Κόσμε'); // true
     * UTF8::str_starts_with('ΚόσμεMiddleEnd', 'κόσμε'); // false
     *

* * @param string $haystack

The string to search in.

* @param string $needle

The substring to search for.

* * @psalm-pure * * @return bool */ public static function str_starts_with(string $haystack, string $needle): bool { if ($needle === '') { return true; } if ($haystack === '') { return false; } if (\PHP_VERSION_ID >= 80000) { /** @phpstan-ignore-next-line - only for PHP8 */ return \str_starts_with($haystack, $needle); } return \strncmp($haystack, $needle, \strlen($needle)) === 0; } /** * Returns true if the string begins with any of $substrings, false otherwise. * * - case-sensitive * * @param string $str

The input string.

* @param array $substrings

Substrings to look for.

* * @psalm-pure * * @return bool *

Whether or not $str starts with $substring.

*/ public static function str_starts_with_any(string $str, array $substrings): bool { if ($str === '') { return false; } if ($substrings === []) { return false; } foreach ($substrings as &$substring) { if (self::str_starts_with($str, $substring)) { return true; } } return false; } /** * Gets the substring after the first occurrence of a separator. * * @param string $str

The input string.

* @param string $separator

The string separator.

* @param string $encoding [optional]

Default: 'UTF-8'

* * @psalm-pure * * @return string */ public static function str_substr_after_first_separator(string $str, string $separator, string $encoding = 'UTF-8'): string { if ($separator === '' || $str === '') { return ''; } if ($encoding === 'UTF-8') { $offset = \mb_strpos($str, $separator); if ($offset === false) { return ''; } return (string) \mb_substr( $str, $offset + (int) \mb_strlen($separator) ); } $offset = self::strpos($str, $separator, 0, $encoding); if ($offset === false) { return ''; } return (string) \mb_substr( $str, $offset + (int) self::strlen($separator, $encoding), null, $encoding ); } /** * Gets the substring after the last occurrence of a separator. * * @param string $str

The input string.

* @param string $separator

The string separator.

* @param string $encoding [optional]

Default: 'UTF-8'

* * @psalm-pure * * @return string */ public static function str_substr_after_last_separator( string $str, string $separator, string $encoding = 'UTF-8' ): string { if ($separator === '' || $str === '') { return ''; } if ($encoding === 'UTF-8') { $offset = \mb_strrpos($str, $separator); if ($offset === false) { return ''; } return (string) \mb_substr( $str, $offset + (int) \mb_strlen($separator) ); } $offset = self::strrpos($str, $separator, 0, $encoding); if ($offset === false) { return ''; } return (string) self::substr( $str, $offset + (int) self::strlen($separator, $encoding), null, $encoding ); } /** * Gets the substring before the first occurrence of a separator. * * @param string $str

The input string.

* @param string $separator

The string separator.

* @param string $encoding [optional]

Default: 'UTF-8'

* * @psalm-pure * * @return string */ public static function str_substr_before_first_separator( string $str, string $separator, string $encoding = 'UTF-8' ): string { if ($separator === '' || $str === '') { return ''; } if ($encoding === 'UTF-8') { $offset = \mb_strpos($str, $separator); if ($offset === false) { return ''; } return (string) \mb_substr( $str, 0, $offset ); } $offset = self::strpos($str, $separator, 0, $encoding); if ($offset === false) { return ''; } return (string) self::substr( $str, 0, $offset, $encoding ); } /** * Gets the substring before the last occurrence of a separator. * * @param string $str

The input string.

* @param string $separator

The string separator.

* @param string $encoding [optional]

Default: 'UTF-8'

* * @psalm-pure * * @return string */ public static function str_substr_before_last_separator(string $str, string $separator, string $encoding = 'UTF-8'): string { if ($separator === '' || $str === '') { return ''; } if ($encoding === 'UTF-8') { $offset = \mb_strrpos($str, $separator); if ($offset === false) { return ''; } return (string) \mb_substr( $str, 0, $offset ); } $offset = self::strrpos($str, $separator, 0, $encoding); if ($offset === false) { return ''; } $encoding = self::normalize_encoding($encoding, 'UTF-8'); return (string) self::substr( $str, 0, $offset, $encoding ); } /** * Gets the substring after (or before via "$before_needle") the first occurrence of the "$needle". * * @param string $str

The input string.

* @param string $needle

The string to look for.

* @param bool $before_needle [optional]

Default: false

* @param string $encoding [optional]

Default: 'UTF-8'

* * @psalm-pure * * @return string */ public static function str_substr_first( string $str, string $needle, bool $before_needle = false, string $encoding = 'UTF-8' ): string { if ($str === '' || $needle === '') { return ''; } if ($encoding === 'UTF-8') { if ($before_needle) { $part = \mb_strstr( $str, $needle, $before_needle ); } else { $part = \mb_strstr( $str, $needle ); } } else { $part = self::strstr( $str, $needle, $before_needle, $encoding ); } return $part === false ? '' : $part; } /** * Gets the substring after (or before via "$before_needle") the last occurrence of the "$needle". * * @param string $str

The input string.

* @param string $needle

The string to look for.

* @param bool $before_needle [optional]

Default: false

* @param string $encoding [optional]

Default: 'UTF-8'

* * @psalm-pure * * @return string */ public static function str_substr_last( string $str, string $needle, bool $before_needle = false, string $encoding = 'UTF-8' ): string { if ($str === '' || $needle === '') { return ''; } if ($encoding === 'UTF-8') { if ($before_needle) { $part = \mb_strrchr( $str, $needle, $before_needle ); } else { $part = \mb_strrchr( $str, $needle ); } } else { $part = self::strrchr( $str, $needle, $before_needle, $encoding ); } return $part === false ? '' : $part; } /** * Surrounds $str with the given substring. * * @param string $str * @param string $substring

The substring to add to both sides.

* * @psalm-pure * * @return string *

A string with the substring both prepended and appended.

*/ public static function str_surround(string $str, string $substring): string { return $substring . $str . $substring; } /** * Returns a trimmed string with the first letter of each word capitalized. * Also accepts an array, $ignore, allowing you to list words not to be * capitalized. * * @param string $str * @param array|string[]|null $ignore [optional]

An array of words not to capitalize or * null. Default: null

* @param string $encoding [optional]

Default: 'UTF-8'

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the * string.

* @param string|null $lang [optional]

Set the language for special cases: az, * el, lt, tr

* @param bool $try_to_keep_the_string_length [optional]

true === try to keep the string length: * e.g. ẞ -> ß

* @param bool $use_trim_first [optional]

true === trim the input string, * first

* @param string|null $word_define_chars [optional]

An string of chars that will be used as * whitespace separator === words.

* * @psalm-pure * * @return string *

The titleized string.

* * @noinspection PhpTooManyParametersInspection */ public static function str_titleize( string $str, array $ignore = null, string $encoding = 'UTF-8', bool $clean_utf8 = false, string $lang = null, bool $try_to_keep_the_string_length = false, bool $use_trim_first = true, string $word_define_chars = null ): string { if ($str === '') { return ''; } if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } if ($use_trim_first) { $str = \trim($str); } if ($clean_utf8) { $str = self::clean($str); } $use_mb_functions = $lang === null && !$try_to_keep_the_string_length; if ($word_define_chars) { $word_define_chars = \preg_quote($word_define_chars, '/'); } else { $word_define_chars = ''; } $str = (string) \preg_replace_callback( '/([^\\s' . $word_define_chars . ']+)/u', static function (array $match) use ($try_to_keep_the_string_length, $lang, $ignore, $use_mb_functions, $encoding): string { if ($ignore !== null && \in_array($match[0], $ignore, true)) { return $match[0]; } if ($use_mb_functions) { if ($encoding === 'UTF-8') { return \mb_strtoupper(\mb_substr($match[0], 0, 1)) . \mb_strtolower(\mb_substr($match[0], 1)); } return \mb_strtoupper(\mb_substr($match[0], 0, 1, $encoding), $encoding) . \mb_strtolower(\mb_substr($match[0], 1, null, $encoding), $encoding); } return self::ucfirst( self::strtolower( $match[0], $encoding, false, $lang, $try_to_keep_the_string_length ), $encoding, false, $lang, $try_to_keep_the_string_length ); }, $str ); return $str; } /** * Convert a string into a obfuscate string. * * EXAMPLE:


     *
     * UTF8::str_obfuscate('lars@moelleken.org', 0.5, '*', ['@', '.']); // e.g. "l***@m**lleke*.*r*"
     *

* * @param string $str * @param float $percent * @param string $obfuscateChar * @param string[] $keepChars * * @psalm-pure * * @return string *

The obfuscate string.

*/ public static function str_obfuscate( string $str, float $percent = 0.5, string $obfuscateChar = '*', array $keepChars = [] ): string { $obfuscateCharHelper = "\u{2603}"; $str = \str_replace($obfuscateChar, $obfuscateCharHelper, $str); $chars = self::chars($str); $charsMax = \count($chars); $charsMaxChange = \round($charsMax * $percent); $charsCounter = 0; $charKeyDone = []; while ($charsCounter < $charsMaxChange) { foreach ($chars as $charKey => $char) { if (isset($charKeyDone[$charKey])) { continue; } if (\random_int(0, 100) > 50) { continue; } if ($char === $obfuscateChar) { continue; } ++$charsCounter; $charKeyDone[$charKey] = true; if ($charsCounter > $charsMaxChange) { break; } if (\in_array($char, $keepChars, true)) { continue; } $chars[$charKey] = $obfuscateChar; } } $str = \implode('', $chars); return \str_replace($obfuscateCharHelper, $obfuscateChar, $str); } /** * Returns a trimmed string in proper title case. * * Also accepts an array, $ignore, allowing you to list words not to be * capitalized. * * Adapted from John Gruber's script. * * @see https://gist.github.com/gruber/9f9e8650d68b13ce4d78 * * @param string $str * @param array $ignore

An array of words not to capitalize.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string *

The titleized string.

*/ public static function str_titleize_for_humans( string $str, array $ignore = [], string $encoding = 'UTF-8' ): string { if ($str === '') { return ''; } $small_words = [ '(? In-Flight $str = (string) \preg_replace_callback( '~\\b (? "Stand-In" (Stand is already capped at this point) $str = (string) \preg_replace_callback( '~\\b (?UTF8::str_to_binary('😃'); // '11110000100111111001100010000011' * * @param string $str

The input string.

* * @psalm-pure * * @return false|string *

false on error

*/ public static function str_to_binary(string $str) { /** @var array|false $value - needed for PhpStan (stubs error) */ $value = \unpack('H*', $str); if ($value === false) { return false; } /** @noinspection OffsetOperationsInspection */ return \base_convert($value[1], 16, 2); } /** * @param string $str * @param bool $remove_empty_values

Remove empty values.

* @param int|null $remove_short_values

The min. string length or null to disable

* * @psalm-pure * * @return string[] */ public static function str_to_lines(string $str, bool $remove_empty_values = false, int $remove_short_values = null): array { if ($str === '') { return $remove_empty_values ? [] : ['']; } if (self::$SUPPORT['mbstring'] === true) { /** @noinspection PhpComposerExtensionStubsInspection */ $return = \mb_split("[\r\n]{1,2}", $str); } else { $return = \preg_split("/[\r\n]{1,2}/u", $str); } if ($return === false) { return $remove_empty_values ? [] : ['']; } if ( $remove_short_values === null && !$remove_empty_values ) { return $return; } return self::reduce_string_array( $return, $remove_empty_values, $remove_short_values ); } /** * Convert a string into an array of words. * * EXAMPLE: UTF8::str_to_words('中文空白 oöäü#s', '#') // array('', '中文空白', ' ', 'oöäü#s', '') * * @param string $str * @param string $char_list

Additional chars for the definition of "words".

* @param bool $remove_empty_values

Remove empty values.

* @param int|null $remove_short_values

The min. string length or null to disable

* * @psalm-pure * * @return string[] */ public static function str_to_words( string $str, string $char_list = '', bool $remove_empty_values = false, int $remove_short_values = null ): array { if ($str === '') { return $remove_empty_values ? [] : ['']; } $char_list = self::rxClass($char_list, '\pL'); $return = \preg_split("/({$char_list}+(?:[\p{Pd}’']{$char_list}+)*)/u", $str, -1, \PREG_SPLIT_DELIM_CAPTURE); if ($return === false) { return $remove_empty_values ? [] : ['']; } if ( $remove_short_values === null && !$remove_empty_values ) { return $return; } $tmp_return = self::reduce_string_array( $return, $remove_empty_values, $remove_short_values ); foreach ($tmp_return as &$item) { $item = (string) $item; } return $tmp_return; } /** * alias for "UTF8::to_ascii()" * * @param string $str * @param string $unknown * @param bool $strict * * @psalm-pure * * @return string * * @see UTF8::to_ascii() * @deprecated

please use "UTF8::to_ascii()"

*/ public static function str_transliterate( string $str, string $unknown = '?', bool $strict = false ): string { return self::to_ascii($str, $unknown, $strict); } /** * Truncates the string to a given length. If $substring is provided, and * truncating occurs, the string is further truncated so that the substring * may be appended without exceeding the desired length. * * @param string $str * @param int $length

Desired length of the truncated string.

* @param string $substring [optional]

The substring to append if it can fit. Default: ''

* @param string $encoding [optional]

Default: 'UTF-8'

* * @psalm-pure * * @return string *

A string after truncating.

*/ public static function str_truncate( string $str, int $length, string $substring = '', string $encoding = 'UTF-8' ): string { if ($str === '') { return ''; } if ($encoding === 'UTF-8') { if ($length >= (int) \mb_strlen($str)) { return $str; } if ($substring !== '') { $length -= (int) \mb_strlen($substring); /** @noinspection UnnecessaryCastingInspection */ return (string) \mb_substr($str, 0, $length) . $substring; } /** @noinspection UnnecessaryCastingInspection */ return (string) \mb_substr($str, 0, $length); } $encoding = self::normalize_encoding($encoding, 'UTF-8'); if ($length >= (int) self::strlen($str, $encoding)) { return $str; } if ($substring !== '') { $length -= (int) self::strlen($substring, $encoding); } return ( (string) self::substr( $str, 0, $length, $encoding ) ) . $substring; } /** * Truncates the string to a given length, while ensuring that it does not * split words. If $substring is provided, and truncating occurs, the * string is further truncated so that the substring may be appended without * exceeding the desired length. * * @param string $str * @param int $length

Desired length of the truncated string.

* @param string $substring [optional]

The substring to append if it can fit. * Default: * ''

* @param string $encoding [optional]

Default: 'UTF-8'

* @param bool $ignore_do_not_split_words_for_one_word [optional]

Default: false

* * @psalm-pure * * @return string *

A string after truncating.

*/ public static function str_truncate_safe( string $str, int $length, string $substring = '', string $encoding = 'UTF-8', bool $ignore_do_not_split_words_for_one_word = false ): string { if ($str === '' || $length <= 0) { return $substring; } if ($encoding === 'UTF-8') { if ($length >= (int) \mb_strlen($str)) { return $str; } // need to further trim the string so we can append the substring $length -= (int) \mb_strlen($substring); if ($length <= 0) { return $substring; } /** @var false|string $truncated - needed for PhpStan (stubs error) */ $truncated = \mb_substr($str, 0, $length); if ($truncated === false) { return ''; } // if the last word was truncated $space_position = \mb_strpos($str, ' ', $length - 1); if ($space_position !== $length) { // find pos of the last occurrence of a space, get up to that $last_position = \mb_strrpos($truncated, ' ', 0); if ( $last_position !== false || ( $space_position !== false && !$ignore_do_not_split_words_for_one_word ) ) { $truncated = (string) \mb_substr($truncated, 0, (int) $last_position); } } } else { $encoding = self::normalize_encoding($encoding, 'UTF-8'); if ($length >= (int) self::strlen($str, $encoding)) { return $str; } // need to further trim the string so we can append the substring $length -= (int) self::strlen($substring, $encoding); if ($length <= 0) { return $substring; } $truncated = self::substr($str, 0, $length, $encoding); if ($truncated === false) { return ''; } // if the last word was truncated $space_position = self::strpos($str, ' ', $length - 1, $encoding); if ($space_position !== $length) { // find pos of the last occurrence of a space, get up to that $last_position = self::strrpos($truncated, ' ', 0, $encoding); if ( $last_position !== false || ( $space_position !== false && !$ignore_do_not_split_words_for_one_word ) ) { $truncated = (string) self::substr($truncated, 0, (int) $last_position, $encoding); } } } return $truncated . $substring; } /** * Returns a lowercase and trimmed string separated by underscores. * Underscores are inserted before uppercase characters (with the exception * of the first character of the string), and in place of spaces as well as * dashes. * * @param string $str * * @psalm-pure * * @return string *

The underscored string.

*/ public static function str_underscored(string $str): string { return self::str_delimit($str, '_'); } /** * Returns an UpperCamelCase version of the supplied string. It trims * surrounding spaces, capitalizes letters following digits, spaces, dashes * and underscores, and removes spaces, dashes, underscores. * * @param string $str

The input string.

* @param string $encoding [optional]

Default: 'UTF-8'

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* @param string|null $lang [optional]

Set the language for special cases: az, el, lt, * tr

* @param bool $try_to_keep_the_string_length [optional]

true === try to keep the string length: e.g. ẞ * -> ß

* * @psalm-pure * * @return string *

A string in UpperCamelCase.

*/ public static function str_upper_camelize( string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false, string $lang = null, bool $try_to_keep_the_string_length = false ): string { return self::ucfirst(self::str_camelize($str, $encoding), $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length); } /** * alias for "UTF8::ucfirst()" * * @param string $str * @param string $encoding * @param bool $clean_utf8 * @param string|null $lang * @param bool $try_to_keep_the_string_length * * @psalm-pure * * @return string * * @see UTF8::ucfirst() * @deprecated

please use "UTF8::ucfirst()"

*/ public static function str_upper_first( string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false, string $lang = null, bool $try_to_keep_the_string_length = false ): string { return self::ucfirst( $str, $encoding, $clean_utf8, $lang, $try_to_keep_the_string_length ); } /** * Get the number of words in a specific string. * * EXAMPLES:


     * // format: 0 -> return only word count (int)
     * //
     * UTF8::str_word_count('中文空白 öäü abc#c'); // 4
     * UTF8::str_word_count('中文空白 öäü abc#c', 0, '#'); // 3
     *
     * // format: 1 -> return words (array)
     * //
     * UTF8::str_word_count('中文空白 öäü abc#c', 1); // array('中文空白', 'öäü', 'abc', 'c')
     * UTF8::str_word_count('中文空白 öäü abc#c', 1, '#'); // array('中文空白', 'öäü', 'abc#c')
     *
     * // format: 2 -> return words with offset (array)
     * //
     * UTF8::str_word_count('中文空白 öäü ab#c', 2); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc', 13 => 'c')
     * UTF8::str_word_count('中文空白 öäü ab#c', 2, '#'); // array(0 => '中文空白', 5 => 'öäü', 9 => 'abc#c')
     *

* * @param string $str

The input string.

* @param int $format [optional]

* 0 => return a number of words (default)
* 1 => return an array of words
* 2 => return an array of words with word-offset as key *

* @param string $char_list [optional]

Additional chars that contains to words and do not start a new word.

* * @psalm-pure * * @return int|string[] *

The number of words in the string.

*/ public static function str_word_count(string $str, int $format = 0, string $char_list = '') { $str_parts = self::str_to_words($str, $char_list); $len = \count($str_parts); if ($format === 1) { $number_of_words = []; for ($i = 1; $i < $len; $i += 2) { $number_of_words[] = $str_parts[$i]; } } elseif ($format === 2) { $number_of_words = []; $offset = (int) self::strlen($str_parts[0]); for ($i = 1; $i < $len; $i += 2) { $number_of_words[$offset] = $str_parts[$i]; $offset += (int) self::strlen($str_parts[$i]) + (int) self::strlen($str_parts[$i + 1]); } } else { $number_of_words = (int) (($len - 1) / 2); } return $number_of_words; } /** * Case-insensitive string comparison. * * INFO: Case-insensitive version of UTF8::strcmp() * * EXAMPLE: UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn", "Iñtërnâtiôn\nàlizætiøn"); // 0 * * @param string $str1

The first string.

* @param string $str2

The second string.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return int * < 0 if str1 is less than str2;
* > 0 if str1 is greater than str2,
* 0 if they are equal */ public static function strcasecmp( string $str1, string $str2, string $encoding = 'UTF-8' ): int { return self::strcmp( self::strtocasefold( $str1, true, false, $encoding, null, false ), self::strtocasefold( $str2, true, false, $encoding, null, false ) ); } /** * alias for "UTF8::strstr()" * * @param string $haystack * @param string $needle * @param bool $before_needle * @param string $encoding * @param bool $clean_utf8 * * @psalm-pure * * @return false|string * * @see UTF8::strstr() * @deprecated

please use "UTF8::strstr()"

*/ public static function strchr( string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $clean_utf8 = false ) { return self::strstr( $haystack, $needle, $before_needle, $encoding, $clean_utf8 ); } /** * Case-sensitive string comparison. * * EXAMPLE: UTF8::strcmp("iñtërnâtiôn\nàlizætiøn", "iñtërnâtiôn\nàlizætiøn"); // 0 * * @param string $str1

The first string.

* @param string $str2

The second string.

* * @psalm-pure * * @return int * < 0 if str1 is less than str2
* > 0 if str1 is greater than str2
* 0 if they are equal */ public static function strcmp(string $str1, string $str2): int { if ($str1 === $str2) { return 0; } return \strcmp( \Normalizer::normalize($str1, \Normalizer::NFD), \Normalizer::normalize($str2, \Normalizer::NFD) ); } /** * Find length of initial segment not matching mask. * * @param string $str * @param string $char_list * @param int $offset * @param int|null $length * @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return int */ public static function strcspn( string $str, string $char_list, int $offset = 0, int $length = null, string $encoding = 'UTF-8' ): int { if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } if ($char_list === '') { return (int) self::strlen($str, $encoding); } if ($offset || $length !== null) { if ($encoding === 'UTF-8') { if ($length === null) { $str_tmp = \mb_substr($str, $offset); } else { $str_tmp = \mb_substr($str, $offset, $length); } } else { $str_tmp = self::substr($str, $offset, $length, $encoding); } if ($str_tmp === false) { return 0; } /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */ $str = $str_tmp; } if ($str === '') { return 0; } $matches = []; if (\preg_match('/^(.*?)' . self::rxClass($char_list) . '/us', $str, $matches)) { $return = self::strlen($matches[1], $encoding); if ($return === false) { return 0; } return $return; } return (int) self::strlen($str, $encoding); } /** * alias for "UTF8::stristr()" * * @param string $haystack * @param string $needle * @param bool $before_needle * @param string $encoding * @param bool $clean_utf8 * * @psalm-pure * * @return false|string * * @see UTF8::stristr() * @deprecated

please use "UTF8::stristr()"

*/ public static function strichr( string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $clean_utf8 = false ) { return self::stristr( $haystack, $needle, $before_needle, $encoding, $clean_utf8 ); } /** * Create a UTF-8 string from code points. * * INFO: opposite to UTF8::codepoints() * * EXAMPLE: UTF8::string(array(246, 228, 252)); // 'öäü' * * @param int|int[]|string|string[] $intOrHex

Integer or Hexadecimal codepoints.

* * @phpstan-param int[]|numeric-string[]|int|numeric-string $intOrHex * * @psalm-pure * * @return string *

A UTF-8 encoded string.

*/ public static function string($intOrHex): string { if ($intOrHex === []) { return ''; } if (!\is_array($intOrHex)) { $intOrHex = [$intOrHex]; } $str = ''; foreach ($intOrHex as $strPart) { $str .= '&#' . (int) $strPart . ';'; } return self::html_entity_decode($str, \ENT_QUOTES | \ENT_HTML5); } /** * Checks if string starts with "BOM" (Byte Order Mark Character) character. * * EXAMPLE: UTF8::string_has_bom("\xef\xbb\xbf foobar"); // true * * @param string $str

The input string.

* * @psalm-pure * * @return bool *

* true if the string has BOM at the start,
* false otherwise *

*/ public static function string_has_bom(string $str): bool { /** @noinspection PhpUnusedLocalVariableInspection */ foreach (self::$BOM as $bom_string => &$bom_byte_length) { if (\strncmp($str, $bom_string, $bom_byte_length) === 0) { return true; } } return false; } /** * Strip HTML and PHP tags from a string + clean invalid UTF-8. * * EXAMPLE: UTF8::strip_tags("κόσμε\xa0\xa1"); // 'κόσμε' * * @see http://php.net/manual/en/function.strip-tags.php * * @param string $str

* The input string. *

* @param string|null $allowable_tags [optional]

* You can use the optional second parameter to specify tags which should * not be stripped. *

* HTML comments and PHP tags are also stripped. This is hardcoded and * can not be changed with allowable_tags. *

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* * @psalm-pure * * @return string *

The stripped string.

*/ public static function strip_tags( string $str, string $allowable_tags = null, bool $clean_utf8 = false ): string { if ($str === '') { return ''; } if ($clean_utf8) { $str = self::clean($str); } if ($allowable_tags === null) { return \strip_tags($str); } return \strip_tags($str, $allowable_tags); } /** * Strip all whitespace characters. This includes tabs and newline * characters, as well as multibyte whitespace such as the thin space * and ideographic space. * * EXAMPLE: UTF8::strip_whitespace(' Ο συγγραφέας '); // 'Οσυγγραφέας' * * @param string $str * * @psalm-pure * * @return string */ public static function strip_whitespace(string $str): string { if ($str === '') { return ''; } return (string) \preg_replace('/[[:space:]]+/u', '', $str); } /** * Find the position of the first occurrence of a substring in a string, case-insensitive. * * INFO: use UTF8::stripos_in_byte() for the byte-length * * EXAMPLE: UTF8::stripos('aσσb', 'ΣΣ'); // 1 (σσ == ΣΣ) * * @see http://php.net/manual/en/function.mb-stripos.php * * @param string $haystack

The string from which to get the position of the first occurrence of needle.

* @param string $needle

The string to find in haystack.

* @param int $offset [optional]

The position in haystack to start searching.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* * @psalm-pure * * @return false|int * Return the (int) numeric position of the first occurrence of needle in the * haystack string,
or false if needle is not found */ public static function stripos( string $haystack, string $needle, int $offset = 0, string $encoding = 'UTF-8', bool $clean_utf8 = false ) { if ($haystack === '') { if (\PHP_VERSION_ID >= 80000 && $needle === '') { return 0; } return false; } if ($needle === '' && \PHP_VERSION_ID < 80000) { return false; } if ($clean_utf8) { // "mb_strpos()" and "iconv_strpos()" returns wrong position, // if invalid characters are found in $haystack before $needle $haystack = self::clean($haystack); $needle = self::clean($needle); } if (self::$SUPPORT['mbstring'] === true) { if ($encoding === 'UTF-8') { return \mb_stripos($haystack, $needle, $offset); } $encoding = self::normalize_encoding($encoding, 'UTF-8'); return \mb_stripos($haystack, $needle, $offset, $encoding); } $encoding = self::normalize_encoding($encoding, 'UTF-8'); if ( $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings && $offset >= 0 // grapheme_stripos() can't handle negative offset && self::$SUPPORT['intl'] === true ) { $return_tmp = \grapheme_stripos($haystack, $needle, $offset); if ($return_tmp !== false) { return $return_tmp; } } // // fallback for ascii only // if (ASCII::is_ascii($haystack . $needle)) { return \stripos($haystack, $needle, $offset); } // // fallback via vanilla php // $haystack = self::strtocasefold($haystack, true, false, $encoding, null, false); $needle = self::strtocasefold($needle, true, false, $encoding, null, false); return self::strpos($haystack, $needle, $offset, $encoding); } /** * Returns all of haystack starting from and including the first occurrence of needle to the end. * * EXAMPLE:


     * $str = 'iñtërnâtiônàlizætiøn';
     * $search = 'NÂT';
     *
     * UTF8::stristr($str, $search)); // 'nâtiônàlizætiøn'
     * UTF8::stristr($str, $search, true)); // 'iñtër'
     *

* * @param string $haystack

The input string. Must be valid UTF-8.

* @param string $needle

The string to look for. Must be valid UTF-8.

* @param bool $before_needle [optional]

* If TRUE, it returns the part of the * haystack before the first occurrence of the needle (excluding the needle). *

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* * @psalm-pure * * @return false|string *

A sub-string,
or false if needle is not found.

*/ public static function stristr( string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $clean_utf8 = false ) { if ($haystack === '') { if (\PHP_VERSION_ID >= 80000 && $needle === '') { return ''; } return false; } if ($clean_utf8) { // "mb_strpos()" and "iconv_strpos()" returns wrong position, // if invalid characters are found in $haystack before $needle $needle = self::clean($needle); $haystack = self::clean($haystack); } if ($needle === '') { if (\PHP_VERSION_ID >= 80000) { return $haystack; } return false; } if (self::$SUPPORT['mbstring'] === true) { if ($encoding === 'UTF-8') { return \mb_stristr($haystack, $needle, $before_needle); } $encoding = self::normalize_encoding($encoding, 'UTF-8'); return \mb_stristr($haystack, $needle, $before_needle, $encoding); } $encoding = self::normalize_encoding($encoding, 'UTF-8'); if ( $encoding !== 'UTF-8' && self::$SUPPORT['mbstring'] === false ) { /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); } if ( $encoding === 'UTF-8' // INFO: "grapheme_stristr()" can't handle other encodings && self::$SUPPORT['intl'] === true ) { $return_tmp = \grapheme_stristr($haystack, $needle, $before_needle); if ($return_tmp !== false) { return $return_tmp; } } if (ASCII::is_ascii($needle . $haystack)) { return \stristr($haystack, $needle, $before_needle); } \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/usi', $haystack, $match); if (!isset($match[1])) { return false; } if ($before_needle) { return $match[1]; } return self::substr($haystack, (int) self::strlen($match[1], $encoding), null, $encoding); } /** * Get the string length, not the byte-length! * * INFO: use UTF8::strwidth() for the char-length * * EXAMPLE: UTF8::strlen("Iñtërnâtiôn\xE9àlizætiøn")); // 20 * * @see http://php.net/manual/en/function.mb-strlen.php * * @param string $str

The string being checked for length.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* * @psalm-pure * * @return false|int *

* The number (int) of characters in the string $str having character encoding * $encoding. * (One multi-byte character counted as +1). *
* Can return false, if e.g. mbstring is not installed and we process invalid * chars. *

*/ public static function strlen( string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false ) { if ($str === '') { return 0; } if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } if ($clean_utf8) { // "mb_strlen" and "\iconv_strlen" returns wrong length, // if invalid characters are found in $str $str = self::clean($str); } // // fallback via mbstring // if (self::$SUPPORT['mbstring'] === true) { if ($encoding === 'UTF-8') { /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */ return @\mb_strlen($str); } /** @noinspection PhpUsageOfSilenceOperatorInspection - ignore warnings, it's working anyway */ return @\mb_strlen($str, $encoding); } // // fallback for binary || ascii only // if ( $encoding === 'CP850' || $encoding === 'ASCII' ) { return \strlen($str); } if ( $encoding !== 'UTF-8' && self::$SUPPORT['mbstring'] === false && self::$SUPPORT['iconv'] === false ) { /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); } // // fallback via iconv // if (self::$SUPPORT['iconv'] === true) { $return_tmp = \iconv_strlen($str, $encoding); if ($return_tmp !== false) { return $return_tmp; } } // // fallback via intl // if ( $encoding === 'UTF-8' // INFO: "grapheme_strlen()" can't handle other encodings && self::$SUPPORT['intl'] === true ) { $return_tmp = \grapheme_strlen($str); if ($return_tmp !== null) { return $return_tmp; } } // // fallback for ascii only // if (ASCII::is_ascii($str)) { return \strlen($str); } // // fallback via vanilla php // \preg_match_all('/./us', $str, $parts); $return_tmp = \count($parts[0]); if ($return_tmp === 0) { return false; } return $return_tmp; } /** * Get string length in byte. * * @param string $str * * @psalm-pure * * @return int */ public static function strlen_in_byte(string $str): int { if ($str === '') { return 0; } if (self::$SUPPORT['mbstring_func_overload'] === true) { // "mb_" is available if overload is used, so use it ... return \mb_strlen($str, 'CP850'); // 8-BIT } return \strlen($str); } /** * Case-insensitive string comparisons using a "natural order" algorithm. * * INFO: natural order version of UTF8::strcasecmp() * * EXAMPLES:


     * UTF8::strnatcasecmp('2', '10Hello WORLD 中文空白!'); // -1
     * UTF8::strcasecmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
     *
     * UTF8::strnatcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
     * UTF8::strcasecmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
     *

* * @param string $str1

The first string.

* @param string $str2

The second string.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return int * < 0 if str1 is less than str2
* > 0 if str1 is greater than str2
* 0 if they are equal */ public static function strnatcasecmp(string $str1, string $str2, string $encoding = 'UTF-8'): int { return self::strnatcmp( self::strtocasefold($str1, true, false, $encoding, null, false), self::strtocasefold($str2, true, false, $encoding, null, false) ); } /** * String comparisons using a "natural order" algorithm * * INFO: natural order version of UTF8::strcmp() * * EXAMPLES:


     * UTF8::strnatcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // -1
     * UTF8::strcmp('2Hello world 中文空白!', '10Hello WORLD 中文空白!'); // 1
     *
     * UTF8::strnatcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // 1
     * UTF8::strcmp('10Hello world 中文空白!', '2Hello WORLD 中文空白!'); // -1
     *

* * @see http://php.net/manual/en/function.strnatcmp.php * * @param string $str1

The first string.

* @param string $str2

The second string.

* * @psalm-pure * * @return int * < 0 if str1 is less than str2;
* > 0 if str1 is greater than str2;
* 0 if they are equal */ public static function strnatcmp(string $str1, string $str2): int { if ($str1 === $str2) { return 0; } return \strnatcmp( (string) self::strtonatfold($str1), (string) self::strtonatfold($str2) ); } /** * Case-insensitive string comparison of the first n characters. * * EXAMPLE:


     * UTF8::strcasecmp("iñtërnâtiôn\nàlizætiøn321", "iñtërnâtiôn\nàlizætiøn123", 5); // 0
     *

* * @see http://php.net/manual/en/function.strncasecmp.php * * @param string $str1

The first string.

* @param string $str2

The second string.

* @param int $len

The length of strings to be used in the comparison.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return int * < 0 if str1 is less than str2;
* > 0 if str1 is greater than str2;
* 0 if they are equal */ public static function strncasecmp( string $str1, string $str2, int $len, string $encoding = 'UTF-8' ): int { return self::strncmp( self::strtocasefold($str1, true, false, $encoding, null, false), self::strtocasefold($str2, true, false, $encoding, null, false), $len ); } /** * String comparison of the first n characters. * * EXAMPLE:


     * UTF8::strncmp("Iñtërnâtiôn\nàlizætiøn321", "Iñtërnâtiôn\nàlizætiøn123", 5); // 0
     *

* * @see http://php.net/manual/en/function.strncmp.php * * @param string $str1

The first string.

* @param string $str2

The second string.

* @param int $len

Number of characters to use in the comparison.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return int * < 0 if str1 is less than str2;
* > 0 if str1 is greater than str2;
* 0 if they are equal */ public static function strncmp( string $str1, string $str2, int $len, string $encoding = 'UTF-8' ): int { if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } if ($encoding === 'UTF-8') { $str1 = (string) \mb_substr($str1, 0, $len); $str2 = (string) \mb_substr($str2, 0, $len); } else { $str1 = (string) self::substr($str1, 0, $len, $encoding); $str2 = (string) self::substr($str2, 0, $len, $encoding); } return self::strcmp($str1, $str2); } /** * Search a string for any of a set of characters. * * EXAMPLE: UTF8::strpbrk('-中文空白-', '白'); // '白-' * * @see http://php.net/manual/en/function.strpbrk.php * * @param string $haystack

The string where char_list is looked for.

* @param string $char_list

This parameter is case-sensitive.

* * @psalm-pure * * @return false|string *

The string starting from the character found, or false if it is not found.

*/ public static function strpbrk(string $haystack, string $char_list) { if ($haystack === '' || $char_list === '') { return false; } if (\preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) { return \substr($haystack, (int) \strpos($haystack, $m[0])); } return false; } /** * Find the position of the first occurrence of a substring in a string. * * INFO: use UTF8::strpos_in_byte() for the byte-length * * EXAMPLE: UTF8::strpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 8 * * @see http://php.net/manual/en/function.mb-strpos.php * * @param string $haystack

The string from which to get the position of the first occurrence of needle.

* @param int|string $needle

The string to find in haystack.
Or a code point as int.

* @param int $offset [optional]

The search offset. If it is not specified, 0 is used.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* * @psalm-pure * * @return false|int * The (int) numeric position of the first occurrence of needle in the haystack * string.
If needle is not found it returns false. */ public static function strpos( string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $clean_utf8 = false ) { if ($haystack === '') { if (\PHP_VERSION_ID >= 80000) { if ($needle === '') { return 0; } } else { return false; } } // iconv and mbstring do not support integer $needle if ((int) $needle === $needle) { $needle = (string) self::chr($needle); } $needle = (string) $needle; if ($haystack === '') { if (\PHP_VERSION_ID >= 80000 && $needle === '') { return 0; } return false; } if ($needle === '' && \PHP_VERSION_ID < 80000) { return false; } if ($clean_utf8) { // "mb_strpos()" and "iconv_strpos()" returns wrong position, // if invalid characters are found in $haystack before $needle $needle = self::clean($needle); $haystack = self::clean($haystack); } if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } // // fallback via mbstring // if (self::$SUPPORT['mbstring'] === true) { if ($encoding === 'UTF-8') { /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */ return @\mb_strpos($haystack, $needle, $offset); } /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */ return @\mb_strpos($haystack, $needle, $offset, $encoding); } // // fallback for binary || ascii only // if ( $encoding === 'CP850' || $encoding === 'ASCII' ) { return \strpos($haystack, $needle, $offset); } if ( $encoding !== 'UTF-8' && self::$SUPPORT['iconv'] === false && self::$SUPPORT['mbstring'] === false ) { /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); } // // fallback via intl // if ( $encoding === 'UTF-8' // INFO: "grapheme_strpos()" can't handle other encodings && $offset >= 0 // grapheme_strpos() can't handle negative offset && self::$SUPPORT['intl'] === true ) { $return_tmp = \grapheme_strpos($haystack, $needle, $offset); if ($return_tmp !== false) { return $return_tmp; } } // // fallback via iconv // if ( $offset >= 0 // iconv_strpos() can't handle negative offset && self::$SUPPORT['iconv'] === true ) { // ignore invalid negative offset to keep compatibility // with php < 5.5.35, < 5.6.21, < 7.0.6 $return_tmp = \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding); if ($return_tmp !== false) { return $return_tmp; } } // // fallback for ascii only // if (ASCII::is_ascii($haystack . $needle)) { /** @noinspection PhpUsageOfSilenceOperatorInspection - Offset not contained in string */ return @\strpos($haystack, $needle, $offset); } // // fallback via vanilla php // $haystack_tmp = self::substr($haystack, $offset, null, $encoding); if ($haystack_tmp === false) { $haystack_tmp = ''; } $haystack = (string) $haystack_tmp; if ($offset < 0) { $offset = 0; } $pos = \strpos($haystack, $needle); if ($pos === false) { return false; } if ($pos) { return $offset + (int) self::strlen(\substr($haystack, 0, $pos), $encoding); } return $offset + 0; } /** * Find the position of the first occurrence of a substring in a string. * * @param string $haystack

* The string being checked. *

* @param string $needle

* The position counted from the beginning of haystack. *

* @param int $offset [optional]

* The search offset. If it is not specified, 0 is used. *

* * @psalm-pure * * @return false|int *

The numeric position of the first occurrence of needle in the * haystack string. If needle is not found, it returns false.

*/ public static function strpos_in_byte(string $haystack, string $needle, int $offset = 0) { if ($haystack === '' || $needle === '') { return false; } if (self::$SUPPORT['mbstring_func_overload'] === true) { // "mb_" is available if overload is used, so use it ... return \mb_strpos($haystack, $needle, $offset, 'CP850'); // 8-BIT } return \strpos($haystack, $needle, $offset); } /** * Find the position of the first occurrence of a substring in a string, case-insensitive. * * @param string $haystack

* The string being checked. *

* @param string $needle

* The position counted from the beginning of haystack. *

* @param int $offset [optional]

* The search offset. If it is not specified, 0 is used. *

* * @psalm-pure * * @return false|int *

The numeric position of the first occurrence of needle in the * haystack string. If needle is not found, it returns false.

*/ public static function stripos_in_byte(string $haystack, string $needle, int $offset = 0) { if ($haystack === '' || $needle === '') { return false; } if (self::$SUPPORT['mbstring_func_overload'] === true) { // "mb_" is available if overload is used, so use it ... return \mb_stripos($haystack, $needle, $offset, 'CP850'); // 8-BIT } return \stripos($haystack, $needle, $offset); } /** * Find the last occurrence of a character in a string within another. * * EXAMPLE: UTF8::strrchr('κόσμεκόσμε-äöü', 'κόσμε'); // 'κόσμε-äöü' * * @see http://php.net/manual/en/function.mb-strrchr.php * * @param string $haystack

The string from which to get the last occurrence of needle.

* @param string $needle

The string to find in haystack

* @param bool $before_needle [optional]

* Determines which portion of haystack * this function returns. * If set to true, it returns all of haystack * from the beginning to the last occurrence of needle. * If set to false, it returns all of haystack * from the last occurrence of needle to the end, *

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* * @psalm-pure * * @return false|string *

The portion of haystack or false if needle is not found.

*/ public static function strrchr( string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $clean_utf8 = false ) { if ($haystack === '' || $needle === '') { return false; } if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } if ($clean_utf8) { // "mb_strpos()" and "iconv_strpos()" returns wrong position, // if invalid characters are found in $haystack before $needle $needle = self::clean($needle); $haystack = self::clean($haystack); } // // fallback via mbstring // if (self::$SUPPORT['mbstring'] === true) { if ($encoding === 'UTF-8') { return \mb_strrchr($haystack, $needle, $before_needle); } return \mb_strrchr($haystack, $needle, $before_needle, $encoding); } // // fallback for binary || ascii only // if ( !$before_needle && ( $encoding === 'CP850' || $encoding === 'ASCII' ) ) { return \strrchr($haystack, $needle); } if ( $encoding !== 'UTF-8' && self::$SUPPORT['mbstring'] === false ) { /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::strrchr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); } // // fallback via iconv // if (self::$SUPPORT['iconv'] === true) { $needle_tmp = self::substr($needle, 0, 1, $encoding); if ($needle_tmp === false) { return false; } $needle = (string) $needle_tmp; $pos = \iconv_strrpos($haystack, $needle, $encoding); if ($pos === false) { return false; } if ($before_needle) { return self::substr($haystack, 0, $pos, $encoding); } return self::substr($haystack, $pos, null, $encoding); } // // fallback via vanilla php // $needle_tmp = self::substr($needle, 0, 1, $encoding); if ($needle_tmp === false) { return false; } $needle = (string) $needle_tmp; $pos = self::strrpos($haystack, $needle, 0, $encoding); if ($pos === false) { return false; } if ($before_needle) { return self::substr($haystack, 0, $pos, $encoding); } return self::substr($haystack, $pos, null, $encoding); } /** * Reverses characters order in the string. * * EXAMPLE: UTF8::strrev('κ-öäü'); // 'üäö-κ' * * @param string $str

The input string.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string *

The string with characters in the reverse sequence.

*/ public static function strrev(string $str, string $encoding = 'UTF-8'): string { if ($str === '') { return ''; } // init $reversed = ''; $str = self::emoji_encode($str, true); if ($encoding === 'UTF-8') { if (self::$SUPPORT['intl'] === true) { // try "grapheme" first: https://stackoverflow.com/questions/17496493/strrev-dosent-support-utf-8 $i = (int) \grapheme_strlen($str); while ($i--) { $reversed_tmp = \grapheme_substr($str, $i, 1); if ($reversed_tmp !== false) { $reversed .= $reversed_tmp; } } } else { $i = (int) \mb_strlen($str); while ($i--) { $reversed_tmp = \mb_substr($str, $i, 1); if ($reversed_tmp !== false) { $reversed .= $reversed_tmp; } } } } else { $encoding = self::normalize_encoding($encoding, 'UTF-8'); $i = (int) self::strlen($str, $encoding); while ($i--) { $reversed_tmp = self::substr($str, $i, 1, $encoding); if ($reversed_tmp !== false) { $reversed .= $reversed_tmp; } } } return self::emoji_decode($reversed, true); } /** * Find the last occurrence of a character in a string within another, case-insensitive. * * EXAMPLE: UTF8::strrichr('Aκόσμεκόσμε-äöü', 'aκόσμε'); // 'Aκόσμεκόσμε-äöü' * * @see http://php.net/manual/en/function.mb-strrichr.php * * @param string $haystack

The string from which to get the last occurrence of needle.

* @param string $needle

The string to find in haystack.

* @param bool $before_needle [optional]

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* * @psalm-pure * * @return false|string *

The portion of haystack or
false if needle is not found.

*/ public static function strrichr( string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $clean_utf8 = false ) { if ($haystack === '' || $needle === '') { return false; } if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } if ($clean_utf8) { // "mb_strpos()" and "iconv_strpos()" returns wrong position, // if invalid characters are found in $haystack before $needle $needle = self::clean($needle); $haystack = self::clean($haystack); } // // fallback via mbstring // if (self::$SUPPORT['mbstring'] === true) { if ($encoding === 'UTF-8') { return \mb_strrichr($haystack, $needle, $before_needle); } return \mb_strrichr($haystack, $needle, $before_needle, $encoding); } // // fallback via vanilla php // $needle_tmp = self::substr($needle, 0, 1, $encoding); if ($needle_tmp === false) { return false; } $needle = (string) $needle_tmp; $pos = self::strripos($haystack, $needle, 0, $encoding); if ($pos === false) { return false; } if ($before_needle) { return self::substr($haystack, 0, $pos, $encoding); } return self::substr($haystack, $pos, null, $encoding); } /** * Find the position of the last occurrence of a substring in a string, case-insensitive. * * EXAMPLE: UTF8::strripos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13 * * @param string $haystack

The string to look in.

* @param int|string $needle

The string to look for.

* @param int $offset [optional]

Number of characters to ignore in the beginning or end.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* * @psalm-pure * * @return false|int *

The (int) numeric position of the last occurrence of needle in the haystack * string.
If needle is not found, it returns false.

*/ public static function strripos( string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $clean_utf8 = false ) { if ($haystack === '') { if (\PHP_VERSION_ID >= 80000) { if ($needle === '') { return 0; } } else { return false; } } // iconv and mbstring do not support integer $needle if ((int) $needle === $needle && $needle >= 0) { $needle = (string) self::chr($needle); } $needle = (string) $needle; if ($haystack === '') { if (\PHP_VERSION_ID >= 80000 && $needle === '') { return 0; } return false; } if ($needle === '' && \PHP_VERSION_ID < 80000) { return false; } if ($clean_utf8) { // mb_strripos() && iconv_strripos() is not tolerant to invalid characters $needle = self::clean($needle); $haystack = self::clean($haystack); } if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } // // fallback via mbstrig // if (self::$SUPPORT['mbstring'] === true) { if ($encoding === 'UTF-8') { return \mb_strripos($haystack, $needle, $offset); } return \mb_strripos($haystack, $needle, $offset, $encoding); } // // fallback for binary || ascii only // if ( $encoding === 'CP850' || $encoding === 'ASCII' ) { return \strripos($haystack, $needle, $offset); } if ( $encoding !== 'UTF-8' && self::$SUPPORT['mbstring'] === false ) { /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); } // // fallback via intl // if ( $encoding === 'UTF-8' // INFO: "grapheme_strripos()" can't handle other encodings && $offset >= 0 // grapheme_strripos() can't handle negative offset && self::$SUPPORT['intl'] === true ) { $return_tmp = \grapheme_strripos($haystack, $needle, $offset); if ($return_tmp !== false) { return $return_tmp; } } // // fallback for ascii only // if (ASCII::is_ascii($haystack . $needle)) { return \strripos($haystack, $needle, $offset); } // // fallback via vanilla php // $haystack = self::strtocasefold($haystack, true, false, $encoding); $needle = self::strtocasefold($needle, true, false, $encoding); return self::strrpos($haystack, $needle, $offset, $encoding, $clean_utf8); } /** * Finds position of last occurrence of a string within another, case-insensitive. * * @param string $haystack

* The string from which to get the position of the last occurrence * of needle. *

* @param string $needle

* The string to find in haystack. *

* @param int $offset [optional]

* The position in haystack * to start searching. *

* * @psalm-pure * * @return false|int *

eturn the numeric position of the last occurrence of needle in the * haystack string, or false if needle is not found.

*/ public static function strripos_in_byte(string $haystack, string $needle, int $offset = 0) { if ($haystack === '' || $needle === '') { return false; } if (self::$SUPPORT['mbstring_func_overload'] === true) { // "mb_" is available if overload is used, so use it ... return \mb_strripos($haystack, $needle, $offset, 'CP850'); // 8-BIT } return \strripos($haystack, $needle, $offset); } /** * Find the position of the last occurrence of a substring in a string. * * EXAMPLE: UTF8::strrpos('ABC-ÖÄÜ-中文空白-中文空白', '中'); // 13 * * @see http://php.net/manual/en/function.mb-strrpos.php * * @param string $haystack

The string being checked, for the last occurrence of needle

* @param int|string $needle

The string to find in haystack.
Or a code point as int.

* @param int $offset [optional]

May be specified to begin searching an arbitrary number of characters * into the string. Negative values will stop searching at an arbitrary point prior to * the end of the string. *

* @param string $encoding [optional]

Set the charset.

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* * @psalm-pure * * @return false|int *

The (int) numeric position of the last occurrence of needle in the haystack * string.
If needle is not found, it returns false.

*/ public static function strrpos( string $haystack, $needle, int $offset = 0, string $encoding = 'UTF-8', bool $clean_utf8 = false ) { if ($haystack === '') { if (\PHP_VERSION_ID >= 80000) { if ($needle === '') { return 0; } } else { return false; } } // iconv and mbstring do not support integer $needle if ((int) $needle === $needle && $needle >= 0) { $needle = (string) self::chr($needle); } $needle = (string) $needle; if ($haystack === '') { if (\PHP_VERSION_ID >= 80000 && $needle === '') { return 0; } return false; } if ($needle === '' && \PHP_VERSION_ID < 80000) { return false; } if ($clean_utf8) { // mb_strrpos && iconv_strrpos is not tolerant to invalid characters $needle = self::clean($needle); $haystack = self::clean($haystack); } if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } // // fallback via mbstring // if (self::$SUPPORT['mbstring'] === true) { if ($encoding === 'UTF-8') { return \mb_strrpos($haystack, $needle, $offset); } return \mb_strrpos($haystack, $needle, $offset, $encoding); } // // fallback for binary || ascii only // if ( $encoding === 'CP850' || $encoding === 'ASCII' ) { return \strrpos($haystack, $needle, $offset); } if ( $encoding !== 'UTF-8' && self::$SUPPORT['mbstring'] === false ) { /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); } // // fallback via intl // if ( $offset >= 0 // grapheme_strrpos() can't handle negative offset && $encoding === 'UTF-8' // INFO: "grapheme_strrpos()" can't handle other encodings && self::$SUPPORT['intl'] === true ) { $return_tmp = \grapheme_strrpos($haystack, $needle, $offset); if ($return_tmp !== false) { return $return_tmp; } } // // fallback for ascii only // if (ASCII::is_ascii($haystack . $needle)) { return \strrpos($haystack, $needle, $offset); } // // fallback via vanilla php // $haystack_tmp = null; if ($offset > 0) { $haystack_tmp = self::substr($haystack, $offset); } elseif ($offset < 0) { $haystack_tmp = self::substr($haystack, 0, $offset); $offset = 0; } if ($haystack_tmp !== null) { if ($haystack_tmp === false) { $haystack_tmp = ''; } $haystack = (string) $haystack_tmp; } $pos = \strrpos($haystack, $needle); if ($pos === false) { return false; } /** @var false|string $str_tmp - needed for PhpStan (stubs error) */ $str_tmp = \substr($haystack, 0, $pos); if ($str_tmp === false) { return false; } return $offset + (int) self::strlen($str_tmp); } /** * Find the position of the last occurrence of a substring in a string. * * @param string $haystack

* The string being checked, for the last occurrence * of needle. *

* @param string $needle

* The string to find in haystack. *

* @param int $offset [optional]

May be specified to begin searching an arbitrary number of characters into * the string. Negative values will stop searching at an arbitrary point * prior to the end of the string. *

* * @psalm-pure * * @return false|int *

The numeric position of the last occurrence of needle in the * haystack string. If needle is not found, it returns false.

*/ public static function strrpos_in_byte(string $haystack, string $needle, int $offset = 0) { if ($haystack === '' || $needle === '') { return false; } if (self::$SUPPORT['mbstring_func_overload'] === true) { // "mb_" is available if overload is used, so use it ... return \mb_strrpos($haystack, $needle, $offset, 'CP850'); // 8-BIT } return \strrpos($haystack, $needle, $offset); } /** * Finds the length of the initial segment of a string consisting entirely of characters contained within a given * mask. * * EXAMPLE: UTF8::strspn('iñtërnâtiônàlizætiøn', 'itñ'); // '3' * * @param string $str

The input string.

* @param string $mask

The mask of chars

* @param int $offset [optional] * @param int|null $length [optional] * @param string $encoding [optional]

Set the charset.

* * @psalm-pure * * @return false|int */ public static function strspn( string $str, string $mask, int $offset = 0, int $length = null, string $encoding = 'UTF-8' ) { if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } if ($offset || $length !== null) { if ($encoding === 'UTF-8') { if ($length === null) { $str = (string) \mb_substr($str, $offset); } else { $str = (string) \mb_substr($str, $offset, $length); } } else { $str = (string) self::substr($str, $offset, $length, $encoding); } } if ($str === '' || $mask === '') { return 0; } $matches = []; return \preg_match('/^' . self::rxClass($mask) . '+/u', $str, $matches) ? (int) self::strlen($matches[0], $encoding) : 0; } /** * Returns part of haystack string from the first occurrence of needle to the end of haystack. * * EXAMPLE:


     * $str = 'iñtërnâtiônàlizætiøn';
     * $search = 'nât';
     *
     * UTF8::strstr($str, $search)); // 'nâtiônàlizætiøn'
     * UTF8::strstr($str, $search, true)); // 'iñtër'
     *

* * @param string $haystack

The input string. Must be valid UTF-8.

* @param string $needle

The string to look for. Must be valid UTF-8.

* @param bool $before_needle [optional]

* If TRUE, strstr() returns the part of the * haystack before the first occurrence of the needle (excluding the needle). *

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* * @psalm-pure * * @return false|string *

A sub-string,
or false if needle is not found.

*/ public static function strstr( string $haystack, string $needle, bool $before_needle = false, string $encoding = 'UTF-8', bool $clean_utf8 = false ) { if ($haystack === '') { if (\PHP_VERSION_ID >= 80000 && $needle === '') { return ''; } return false; } if ($clean_utf8) { // "mb_strpos()" and "iconv_strpos()" returns wrong position, // if invalid characters are found in $haystack before $needle $needle = self::clean($needle); $haystack = self::clean($haystack); } if ($needle === '') { if (\PHP_VERSION_ID >= 80000) { return $haystack; } return false; } if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } // // fallback via mbstring // if (self::$SUPPORT['mbstring'] === true) { if ($encoding === 'UTF-8') { return \mb_strstr($haystack, $needle, $before_needle); } return \mb_strstr($haystack, $needle, $before_needle, $encoding); } // // fallback for binary || ascii only // if ( $encoding === 'CP850' || $encoding === 'ASCII' ) { return \strstr($haystack, $needle, $before_needle); } if ( $encoding !== 'UTF-8' && self::$SUPPORT['mbstring'] === false ) { /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); } // // fallback via intl // if ( $encoding === 'UTF-8' // INFO: "grapheme_strstr()" can't handle other encodings && self::$SUPPORT['intl'] === true ) { $return_tmp = \grapheme_strstr($haystack, $needle, $before_needle); if ($return_tmp !== false) { return $return_tmp; } } // // fallback for ascii only // if (ASCII::is_ascii($haystack . $needle)) { return \strstr($haystack, $needle, $before_needle); } // // fallback via vanilla php // \preg_match('/^(.*?)' . \preg_quote($needle, '/') . '/us', $haystack, $match); if (!isset($match[1])) { return false; } if ($before_needle) { return $match[1]; } return self::substr($haystack, (int) self::strlen($match[1])); } /** * Finds first occurrence of a string within another. * * @param string $haystack

* The string from which to get the first occurrence * of needle. *

* @param string $needle

* The string to find in haystack. *

* @param bool $before_needle [optional]

* Determines which portion of haystack * this function returns. * If set to true, it returns all of haystack * from the beginning to the first occurrence of needle. * If set to false, it returns all of haystack * from the first occurrence of needle to the end, *

* * @psalm-pure * * @return false|string *

The portion of haystack, * or false if needle is not found.

*/ public static function strstr_in_byte( string $haystack, string $needle, bool $before_needle = false ) { if ($haystack === '' || $needle === '') { return false; } if (self::$SUPPORT['mbstring_func_overload'] === true) { // "mb_" is available if overload is used, so use it ... return \mb_strstr($haystack, $needle, $before_needle, 'CP850'); // 8-BIT } return \strstr($haystack, $needle, $before_needle); } /** * Unicode transformation for case-less matching. * * EXAMPLE: UTF8::strtocasefold('ǰ◌̱'); // 'ǰ◌̱' * * @see http://unicode.org/reports/tr21/tr21-5.html * * @param string $str

The input string.

* @param bool $full [optional]

* true, replace full case folding chars (default)
* false, use only limited static array [UTF8::$COMMON_CASE_FOLD] *

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* @param string $encoding [optional]

Set the charset.

* @param string|null $lang [optional]

Set the language for special cases: az, el, lt, tr

* @param bool $lower [optional]

Use lowercase string, otherwise use uppercase string. PS: uppercase * is for some languages better ...

* * @psalm-pure * * @return string */ public static function strtocasefold( string $str, bool $full = true, bool $clean_utf8 = false, string $encoding = 'UTF-8', string $lang = null, bool $lower = true ): string { if ($str === '') { return ''; } if ($clean_utf8) { // "mb_strpos()" and "iconv_strpos()" returns wrong position, // if invalid characters are found in $haystack before $needle $str = self::clean($str); } $str = self::fixStrCaseHelper($str, $lower, $full); if ($lang === null && $encoding === 'UTF-8') { if ($lower) { return \mb_strtolower($str); } return \mb_strtoupper($str); } if ($lower) { return self::strtolower($str, $encoding, false, $lang); } return self::strtoupper($str, $encoding, false, $lang); } /** * Make a string lowercase. * * EXAMPLE: UTF8::strtolower('DÉJÀ Σσς Iıİi'); // 'déjà σσς iıii' * * @see http://php.net/manual/en/function.mb-strtolower.php * * @param string $str

The string being lowercased.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* @param string|null $lang [optional]

Set the language for special cases: az, el, lt, * tr

* @param bool $try_to_keep_the_string_length [optional]

true === try to keep the string length: e.g. ẞ * -> ß

* * @psalm-pure * * @return string *

String with all alphabetic characters converted to lowercase.

*/ public static function strtolower( $str, string $encoding = 'UTF-8', bool $clean_utf8 = false, string $lang = null, bool $try_to_keep_the_string_length = false ): string { // init $str = (string) $str; if ($str === '') { return ''; } if ($clean_utf8) { // "mb_strpos()" and "iconv_strpos()" returns wrong position, // if invalid characters are found in $haystack before $needle $str = self::clean($str); } // hack for old php version or for the polyfill ... if ($try_to_keep_the_string_length) { $str = self::fixStrCaseHelper($str, true); } if ($lang === null && $encoding === 'UTF-8') { return \mb_strtolower($str); } $encoding = self::normalize_encoding($encoding, 'UTF-8'); if ($lang !== null) { if (self::$SUPPORT['intl'] === true) { if (self::$INTL_TRANSLITERATOR_LIST === null) { self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list'); } $language_code = $lang . '-Lower'; if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) { /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::strtolower() cannot handle special language: ' . $lang . ' | supported: ' . \print_r(self::$INTL_TRANSLITERATOR_LIST, true), \E_USER_WARNING); $language_code = 'Any-Lower'; } /** @noinspection PhpComposerExtensionStubsInspection */ /** @noinspection UnnecessaryCastingInspection */ return (string) \transliterator_transliterate($language_code, $str); } /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::strtolower() without intl cannot handle the "lang" parameter: ' . $lang, \E_USER_WARNING); } // always fallback via symfony polyfill return \mb_strtolower($str, $encoding); } /** * Make a string uppercase. * * EXAMPLE: UTF8::strtoupper('Déjà Σσς Iıİi'); // 'DÉJÀ ΣΣΣ IIİI' * * @see http://php.net/manual/en/function.mb-strtoupper.php * * @param string $str

The string being uppercased.

* @param string $encoding [optional]

Set the charset.

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* @param string|null $lang [optional]

Set the language for special cases: az, el, lt, * tr

* @param bool $try_to_keep_the_string_length [optional]

true === try to keep the string length: e.g. ẞ * -> ß

* * @psalm-pure * * @return string *

String with all alphabetic characters converted to uppercase.

*/ public static function strtoupper( $str, string $encoding = 'UTF-8', bool $clean_utf8 = false, string $lang = null, bool $try_to_keep_the_string_length = false ): string { // init $str = (string) $str; if ($str === '') { return ''; } if ($clean_utf8) { // "mb_strpos()" and "iconv_strpos()" returns wrong position, // if invalid characters are found in $haystack before $needle $str = self::clean($str); } // hack for old php version or for the polyfill ... if ($try_to_keep_the_string_length) { $str = self::fixStrCaseHelper($str); } if ($lang === null && $encoding === 'UTF-8') { return \mb_strtoupper($str); } $encoding = self::normalize_encoding($encoding, 'UTF-8'); if ($lang !== null) { if (self::$SUPPORT['intl'] === true) { if (self::$INTL_TRANSLITERATOR_LIST === null) { self::$INTL_TRANSLITERATOR_LIST = self::getData('transliterator_list'); } $language_code = $lang . '-Upper'; if (!\in_array($language_code, self::$INTL_TRANSLITERATOR_LIST, true)) { /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, \E_USER_WARNING); $language_code = 'Any-Upper'; } /** @noinspection PhpComposerExtensionStubsInspection */ /** @noinspection UnnecessaryCastingInspection */ return (string) \transliterator_transliterate($language_code, $str); } /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::strtolower() without intl cannot handle the "lang"-parameter: ' . $lang, \E_USER_WARNING); } // always fallback via symfony polyfill return \mb_strtoupper($str, $encoding); } /** * Translate characters or replace sub-strings. * * EXAMPLE: *


     * $array = [
     *     'Hello'   => '○●◎',
     *     '中文空白' => 'earth',
     * ];
     * UTF8::strtr('Hello 中文空白', $array); // '○●◎ earth'
     *

* * @see http://php.net/manual/en/function.strtr.php * * @param string $str

The string being translated.

* @param string|string[] $from

The string replacing from.

* @param string|string[] $to [optional]

The string being translated to to.

* * @psalm-pure * * @return string *

This function returns a copy of str, translating all occurrences of each character in "from" * to the corresponding character in "to".

*/ public static function strtr(string $str, $from, $to = ''): string { if ($str === '') { return ''; } if ($from === $to) { return $str; } if ($to !== '') { if (!\is_array($from)) { $from = self::str_split($from); } if (!\is_array($to)) { $to = self::str_split($to); } $count_from = \count($from); $count_to = \count($to); if ($count_from !== $count_to) { if ($count_from > $count_to) { $from = \array_slice($from, 0, $count_to); } elseif ($count_from < $count_to) { $to = \array_slice($to, 0, $count_from); } } $from = \array_combine($from, $to); /** @noinspection CallableParameterUseCaseInTypeContextInspection - FP */ if ($from === false) { throw new \InvalidArgumentException('The number of elements for each array isn\'t equal or the arrays are empty: (from: ' . \print_r($from, true) . ' | to: ' . \print_r($to, true) . ')'); } } if (\is_string($from)) { return \str_replace($from, $to, $str); } return \strtr($str, $from); } /** * Return the width of a string. * * INFO: use UTF8::strlen() for the byte-length * * EXAMPLE: UTF8::strwidth("Iñtërnâtiôn\xE9àlizætiøn")); // 21 * * @param string $str

The input string.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* * @psalm-pure * * @return int */ public static function strwidth( string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false ): int { if ($str === '') { return 0; } if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } if ($clean_utf8) { // iconv and mbstring are not tolerant to invalid encoding // further, their behaviour is inconsistent with that of PHP's substr $str = self::clean($str); } // // fallback via mbstring // if (self::$SUPPORT['mbstring'] === true) { if ($encoding === 'UTF-8') { return \mb_strwidth($str); } return \mb_strwidth($str, $encoding); } // // fallback via vanilla php // if ($encoding !== 'UTF-8') { $str = self::encode('UTF-8', $str, false, $encoding); } $wide = 0; $str = (string) \preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $str, -1, $wide); return ($wide << 1) + (int) self::strlen($str); } /** * Get part of a string. * * EXAMPLE: UTF8::substr('中文空白', 1, 2); // '文空' * * @see http://php.net/manual/en/function.mb-substr.php * * @param string $str

The string being checked.

* @param int $offset

The first position used in str.

* @param int|null $length [optional]

The maximum length of the returned string.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* * @psalm-pure * * @return false|string * The portion of str specified by the offset and * length parameters.

If str is shorter than offset * characters long, FALSE will be returned. */ public static function substr( string $str, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $clean_utf8 = false ) { // empty string if ($str === '' || $length === 0) { return ''; } if ($clean_utf8) { // iconv and mbstring are not tolerant to invalid encoding // further, their behaviour is inconsistent with that of PHP's substr $str = self::clean($str); } // whole string if (!$offset && $length === null) { return $str; } if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } // // fallback via mbstring // if (self::$SUPPORT['mbstring'] === true && $encoding === 'UTF-8') { if ($length === null) { return \mb_substr($str, $offset); } return \mb_substr($str, $offset, $length); } // // fallback for binary || ascii only // if ( $encoding === 'CP850' || $encoding === 'ASCII' ) { if ($length === null) { return \substr($str, $offset); } return \substr($str, $offset, $length); } // otherwise we need the string-length $str_length = 0; if ($offset || $length === null) { $str_length = self::strlen($str, $encoding); } // e.g.: invalid chars + mbstring not installed if ($str_length === false) { return false; } // empty string if ($offset === $str_length && !$length) { return ''; } // impossible if ($offset && $offset > $str_length) { return ''; } $length = $length ?? (int) $str_length; if ( $encoding !== 'UTF-8' && self::$SUPPORT['mbstring'] === false ) { /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); } // // fallback via intl // if ( $encoding === 'UTF-8' // INFO: "grapheme_substr()" can't handle other encodings && $offset >= 0 // grapheme_substr() can't handle negative offset && self::$SUPPORT['intl'] === true ) { $return_tmp = \grapheme_substr($str, $offset, $length); if ($return_tmp !== false) { return $return_tmp; } } // // fallback via iconv // if ( $length >= 0 // "iconv_substr()" can't handle negative length && self::$SUPPORT['iconv'] === true ) { $return_tmp = \iconv_substr($str, $offset, $length); if ($return_tmp !== false) { return $return_tmp; } } // // fallback for ascii only // if (ASCII::is_ascii($str)) { return \substr($str, $offset, $length); } // // fallback via vanilla php // // split to array, and remove invalid characters $array = self::str_split($str); // extract relevant part, and join to make sting again return \implode('', \array_slice($array, $offset, $length)); } /** * Binary-safe comparison of two strings from an offset, up to a length of characters. * * EXAMPLE: * UTF8::substr_compare("○●◎\r", '●◎', 0, 2); // -1 * UTF8::substr_compare("○●◎\r", '◎●', 1, 2); // 1 * UTF8::substr_compare("○●◎\r", '●◎', 1, 2); // 0 * * * @param string $str1

The main string being compared.

* @param string $str2

The secondary string being compared.

* @param int $offset [optional]

The start position for the comparison. If negative, it starts * counting from the end of the string.

* @param int|null $length [optional]

The length of the comparison. The default value is the largest * of the length of the str compared to the length of main_str less the * offset.

* @param bool $case_insensitivity [optional]

If case_insensitivity is TRUE, comparison is case * insensitive.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return int * < 0 if str1 is less than str2;
* > 0 if str1 is greater than str2,
* 0 if they are equal */ public static function substr_compare( string $str1, string $str2, int $offset = 0, int $length = null, bool $case_insensitivity = false, string $encoding = 'UTF-8' ): int { if ( $offset !== 0 || $length !== null ) { if ($encoding === 'UTF-8') { if ($length === null) { $str1 = (string) \mb_substr($str1, $offset); } else { $str1 = (string) \mb_substr($str1, $offset, $length); } $str2 = (string) \mb_substr($str2, 0, (int) self::strlen($str1)); } else { $encoding = self::normalize_encoding($encoding, 'UTF-8'); $str1 = (string) self::substr($str1, $offset, $length, $encoding); $str2 = (string) self::substr($str2, 0, (int) self::strlen($str1), $encoding); } } if ($case_insensitivity) { return self::strcasecmp($str1, $str2, $encoding); } return self::strcmp($str1, $str2); } /** * Count the number of substring occurrences. * * EXAMPLE: UTF8::substr_count('中文空白', '文空', 1, 2); // 1 * * @see http://php.net/manual/en/function.substr-count.php * * @param string $haystack

The string to search in.

* @param string $needle

The substring to search for.

* @param int $offset [optional]

The offset where to start counting.

* @param int|null $length [optional]

* The maximum length after the specified offset to search for the * substring. It outputs a warning if the offset plus the length is * greater than the haystack length. *

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* * @psalm-pure * * @return false|int *

This functions returns an integer or false if there isn't a string.

*/ public static function substr_count( string $haystack, string $needle, int $offset = 0, int $length = null, string $encoding = 'UTF-8', bool $clean_utf8 = false ) { if ($needle === '') { return false; } if ($haystack === '') { if (\PHP_VERSION_ID >= 80000) { return 0; } return 0; } if ($length === 0) { return 0; } if ($encoding !== 'UTF-8' && $encoding !== 'CP850') { $encoding = self::normalize_encoding($encoding, 'UTF-8'); } if ($clean_utf8) { // "mb_strpos()" and "iconv_strpos()" returns wrong position, // if invalid characters are found in $haystack before $needle $needle = self::clean($needle); $haystack = self::clean($haystack); } if ($offset || $length > 0) { if ($length === null) { $length_tmp = self::strlen($haystack, $encoding); if ($length_tmp === false) { return false; } $length = (int) $length_tmp; } if ($encoding === 'UTF-8') { $haystack = (string) \mb_substr($haystack, $offset, $length); } else { $haystack = (string) \mb_substr($haystack, $offset, $length, $encoding); } } if ( $encoding !== 'UTF-8' && self::$SUPPORT['mbstring'] === false ) { /** * @psalm-suppress ImpureFunctionCall - is is only a warning */ \trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', \E_USER_WARNING); } if (self::$SUPPORT['mbstring'] === true) { if ($encoding === 'UTF-8') { return \mb_substr_count($haystack, $needle); } return \mb_substr_count($haystack, $needle, $encoding); } \preg_match_all('/' . \preg_quote($needle, '/') . '/us', $haystack, $matches, \PREG_SET_ORDER); return \count($matches); } /** * Count the number of substring occurrences. * * @param string $haystack

* The string being checked. *

* @param string $needle

* The string being found. *

* @param int $offset [optional]

* The offset where to start counting *

* @param int|null $length [optional]

* The maximum length after the specified offset to search for the * substring. It outputs a warning if the offset plus the length is * greater than the haystack length. *

* * @psalm-pure * * @return false|int *

The number of times the * needle substring occurs in the * haystack string.

*/ public static function substr_count_in_byte( string $haystack, string $needle, int $offset = 0, int $length = null ) { if ($haystack === '' || $needle === '') { return 0; } if ( ($offset || $length !== null) && self::$SUPPORT['mbstring_func_overload'] === true ) { if ($length === null) { $length_tmp = self::strlen($haystack); if ($length_tmp === false) { return false; } $length = (int) $length_tmp; } if ( ( $length !== 0 && $offset !== 0 ) && ($length + $offset) <= 0 && \PHP_VERSION_ID < 71000 // output from "substr_count()" have changed in PHP 7.1 ) { return false; } /** @var false|string $haystack_tmp - needed for PhpStan (stubs error) */ $haystack_tmp = \substr($haystack, $offset, $length); if ($haystack_tmp === false) { $haystack_tmp = ''; } $haystack = (string) $haystack_tmp; } if (self::$SUPPORT['mbstring_func_overload'] === true) { // "mb_" is available if overload is used, so use it ... return \mb_substr_count($haystack, $needle, 'CP850'); // 8-BIT } if ($length === null) { return \substr_count($haystack, $needle, $offset); } return \substr_count($haystack, $needle, $offset, $length); } /** * Returns the number of occurrences of $substring in the given string. * By default, the comparison is case-sensitive, but can be made insensitive * by setting $case_sensitive to false. * * @param string $str

The input string.

* @param string $substring

The substring to search for.

* @param bool $case_sensitive [optional]

Whether or not to enforce case-sensitivity. Default: true

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return int */ public static function substr_count_simple( string $str, string $substring, bool $case_sensitive = true, string $encoding = 'UTF-8' ): int { if ($str === '' || $substring === '') { return 0; } if ($encoding === 'UTF-8') { if ($case_sensitive) { return (int) \mb_substr_count($str, $substring); } return (int) \mb_substr_count( \mb_strtoupper($str), \mb_strtoupper($substring) ); } $encoding = self::normalize_encoding($encoding, 'UTF-8'); if ($case_sensitive) { return (int) \mb_substr_count($str, $substring, $encoding); } return (int) \mb_substr_count( self::strtocasefold($str, true, false, $encoding, null, false), self::strtocasefold($substring, true, false, $encoding, null, false), $encoding ); } /** * Removes a prefix ($needle) from the beginning of the string ($haystack), case-insensitive. * * EXMAPLE:


     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd'
     * UTF8::substr_ileft('ΚόσμεMiddleEnd', 'κόσμε'); // 'MiddleEnd'
     *

* * @param string $haystack

The string to search in.

* @param string $needle

The substring to search for.

* * @psalm-pure * * @return string *

Return the sub-string.

*/ public static function substr_ileft(string $haystack, string $needle): string { if ($haystack === '') { return ''; } if ($needle === '') { return $haystack; } if (self::str_istarts_with($haystack, $needle)) { $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle)); } return $haystack; } /** * Get part of a string process in bytes. * * @param string $str

The string being checked.

* @param int $offset

The first position used in str.

* @param int|null $length [optional]

The maximum length of the returned string.

* * @psalm-pure * * @return false|string * The portion of str specified by the offset and * length parameters.

If str is shorter than offset * characters long, FALSE will be returned. */ public static function substr_in_byte(string $str, int $offset = 0, int $length = null) { // empty string if ($str === '' || $length === 0) { return ''; } // whole string if (!$offset && $length === null) { return $str; } if (self::$SUPPORT['mbstring_func_overload'] === true) { // "mb_" is available if overload is used, so use it ... return \mb_substr($str, $offset, $length, 'CP850'); // 8-BIT } return \substr($str, $offset, $length ?? 2147483647); } /** * Removes a suffix ($needle) from the end of the string ($haystack), case-insensitive. * * EXAMPLE: * UTF8::substr_iright('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle' * UTF8::substr_iright('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddle' * * * @param string $haystack

The string to search in.

* @param string $needle

The substring to search for.

* * @psalm-pure * * @return string *

Return the sub-string.

*/ public static function substr_iright(string $haystack, string $needle): string { if ($haystack === '') { return ''; } if ($needle === '') { return $haystack; } if (self::str_iends_with($haystack, $needle)) { $haystack = (string) \mb_substr($haystack, 0, (int) self::strlen($haystack) - (int) self::strlen($needle)); } return $haystack; } /** * Removes a prefix ($needle) from the beginning of the string ($haystack). * * EXAMPLE: * UTF8::substr_left('ΚόσμεMiddleEnd', 'Κόσμε'); // 'MiddleEnd' * UTF8::substr_left('ΚόσμεMiddleEnd', 'κόσμε'); // 'ΚόσμεMiddleEnd' * * * @param string $haystack

The string to search in.

* @param string $needle

The substring to search for.

* * @psalm-pure * * @return string *

Return the sub-string.

*/ public static function substr_left(string $haystack, string $needle): string { if ($haystack === '') { return ''; } if ($needle === '') { return $haystack; } if (self::str_starts_with($haystack, $needle)) { $haystack = (string) \mb_substr($haystack, (int) self::strlen($needle)); } return $haystack; } /** * Replace text within a portion of a string. * * EXAMPLE:

UTF8::substr_replace(array('Iñtërnâtiônàlizætiøn', 'foo'), 'æ', 1); // array('Iæñtërnâtiônàlizætiøn', 'fæoo')

* * source: https://gist.github.com/stemar/8287074 * * @param string|string[] $str

The input string or an array of stings.

* @param string|string[] $replacement

The replacement string or an array of stings.

* @param int|int[] $offset

* If start is positive, the replacing will begin at the start'th offset * into string. *

* If start is negative, the replacing will begin at the start'th character * from the end of string. *

* @param int|int[]|null $length [optional]

If given and is positive, it represents the length of the * portion of string which is to be replaced. If it is negative, it * represents the number of characters from the end of string at which to * stop replacing. If it is not given, then it will default to strlen( * string ); i.e. end the replacing at the end of string. Of course, if * length is zero then this function will have the effect of inserting * replacement into string at the given start offset.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string|string[] *

The result string is returned. If string is an array then array is returned.

*/ public static function substr_replace( $str, $replacement, $offset, $length = null, string $encoding = 'UTF-8' ) { if (\is_array($str)) { $num = \count($str); // the replacement if (\is_array($replacement)) { $replacement = \array_slice($replacement, 0, $num); } else { $replacement = \array_pad([$replacement], $num, $replacement); } // the offset if (\is_array($offset)) { $offset = \array_slice($offset, 0, $num); foreach ($offset as &$value_tmp) { $value_tmp = (int) $value_tmp === $value_tmp ? $value_tmp : 0; } unset($value_tmp); } else { $offset = \array_pad([$offset], $num, $offset); } // the length if ($length === null) { $length = \array_fill(0, $num, 0); } elseif (\is_array($length)) { $length = \array_slice($length, 0, $num); foreach ($length as &$value_tmp_V2) { $value_tmp_V2 = (int) $value_tmp_V2 === $value_tmp_V2 ? $value_tmp_V2 : $num; } unset($value_tmp_V2); } else { $length = \array_pad([$length], $num, $length); } // recursive call return \array_map([self::class, 'substr_replace'], $str, $replacement, $offset, $length); } if (\is_array($replacement)) { if ($replacement !== []) { $replacement = $replacement[0]; } else { $replacement = ''; } } // init $str = (string) $str; $replacement = (string) $replacement; if (\is_array($length)) { throw new \InvalidArgumentException('Parameter "$length" can only be an array, if "$str" is also an array.'); } if (\is_array($offset)) { throw new \InvalidArgumentException('Parameter "$offset" can only be an array, if "$str" is also an array.'); } if ($str === '') { return $replacement; } if (self::$SUPPORT['mbstring'] === true) { $string_length = (int) self::strlen($str, $encoding); if ($offset < 0) { $offset = (int) \max(0, $string_length + $offset); } elseif ($offset > $string_length) { $offset = $string_length; } if ($length !== null && $length < 0) { $length = (int) \max(0, $string_length - $offset + $length); } elseif ($length === null || $length > $string_length) { $length = $string_length; } /** @noinspection AdditionOperationOnArraysInspection */ if (($offset + $length) > $string_length) { $length = $string_length - $offset; } /** @noinspection AdditionOperationOnArraysInspection */ return ((string) \mb_substr($str, 0, $offset, $encoding)) . $replacement . ((string) \mb_substr($str, $offset + $length, $string_length - $offset - $length, $encoding)); } // // fallback for ascii only // if (ASCII::is_ascii($str)) { return ($length === null) ? \substr_replace($str, $replacement, $offset) : \substr_replace($str, $replacement, $offset, $length); } // // fallback via vanilla php // \preg_match_all('/./us', $str, $str_matches); \preg_match_all('/./us', $replacement, $replacement_matches); if ($length === null) { $length_tmp = self::strlen($str, $encoding); if ($length_tmp === false) { // e.g.: non mbstring support + invalid chars return ''; } $length = (int) $length_tmp; } \array_splice($str_matches[0], $offset, $length, $replacement_matches[0]); return \implode('', $str_matches[0]); } /** * Removes a suffix ($needle) from the end of the string ($haystack). * * EXAMPLE:


     * UTF8::substr_right('BeginMiddleΚόσμε', 'Κόσμε'); // 'BeginMiddle'
     * UTF8::substr_right('BeginMiddleΚόσμε', 'κόσμε'); // 'BeginMiddleΚόσμε'
     *

* * @param string $haystack

The string to search in.

* @param string $needle

The substring to search for.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* * @psalm-pure * * @return string *

Return the sub-string.

*/ public static function substr_right( string $haystack, string $needle, string $encoding = 'UTF-8' ): string { if ($haystack === '') { return ''; } if ($needle === '') { return $haystack; } if ( $encoding === 'UTF-8' && \substr($haystack, -\strlen($needle)) === $needle ) { return (string) \mb_substr($haystack, 0, (int) \mb_strlen($haystack) - (int) \mb_strlen($needle)); } if (\substr($haystack, -\strlen($needle)) === $needle) { return (string) self::substr( $haystack, 0, (int) self::strlen($haystack, $encoding) - (int) self::strlen($needle, $encoding), $encoding ); } return $haystack; } /** * Returns a case swapped version of the string. * * EXAMPLE: UTF8::swapCase('déJÀ σσς iıII'); // 'DÉjà ΣΣΣ IIii' * * @param string $str

The input string.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* * @psalm-pure * * @return string *

Each character's case swapped.

*/ public static function swapCase(string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false): string { if ($str === '') { return ''; } if ($clean_utf8) { // "mb_strpos()" and "iconv_strpos()" returns wrong position, // if invalid characters are found in $haystack before $needle $str = self::clean($str); } if ($encoding === 'UTF-8') { return (string) (\mb_strtolower($str) ^ \mb_strtoupper($str) ^ $str); } return (string) (self::strtolower($str, $encoding) ^ self::strtoupper($str, $encoding) ^ $str); } /** * Checks whether symfony-polyfills are used. * * @psalm-pure * * @return bool *

true if in use, false otherwise

* * @internal

Please do not use it anymore, we will make is private in next major version.

*/ public static function symfony_polyfill_used(): bool { // init $return = false; $return_tmp = \extension_loaded('mbstring'); if (!$return_tmp && \function_exists('mb_strlen')) { $return = true; } $return_tmp = \extension_loaded('iconv'); if (!$return_tmp && \function_exists('iconv')) { $return = true; } return $return; } /** * @param string $str * @param int $tab_length * * @psalm-pure * * @return string */ public static function tabs_to_spaces(string $str, int $tab_length = 4): string { if ($tab_length === 4) { $spaces = ' '; } elseif ($tab_length === 2) { $spaces = ' '; } else { $spaces = \str_repeat(' ', $tab_length); } return \str_replace("\t", $spaces, $str); } /** * Converts the first character of each word in the string to uppercase * and all other chars to lowercase. * * @param string $str

The input string.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* @param string|null $lang [optional]

Set the language for special cases: az, el, lt, * tr

* @param bool $try_to_keep_the_string_length [optional]

true === try to keep the string length: e.g. ẞ * -> ß

* * @psalm-pure * * @return string *

A string with all characters of $str being title-cased.

*/ public static function titlecase( string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false, string $lang = null, bool $try_to_keep_the_string_length = false ): string { if ($clean_utf8) { // "mb_strpos()" and "iconv_strpos()" returns wrong position, // if invalid characters are found in $haystack before $needle $str = self::clean($str); } if ( $lang === null && !$try_to_keep_the_string_length ) { if ($encoding === 'UTF-8') { return \mb_convert_case($str, \MB_CASE_TITLE); } $encoding = self::normalize_encoding($encoding, 'UTF-8'); return \mb_convert_case($str, \MB_CASE_TITLE, $encoding); } return self::str_titleize( $str, null, $encoding, false, $lang, $try_to_keep_the_string_length, false ); } /** * alias for "UTF8::to_ascii()" * * @param string $str * @param string $subst_chr * @param bool $strict * * @psalm-pure * * @return string * * @see UTF8::to_ascii() * @deprecated

please use "UTF8::to_ascii()"

*/ public static function toAscii( string $str, string $subst_chr = '?', bool $strict = false ): string { return self::to_ascii($str, $subst_chr, $strict); } /** * alias for "UTF8::to_iso8859()" * * @param string|string[] $str * * @psalm-pure * * @return string|string[] * * @see UTF8::to_iso8859() * @deprecated

please use "UTF8::to_iso8859()"

*/ public static function toIso8859($str) { return self::to_iso8859($str); } /** * alias for "UTF8::to_latin1()" * * @param string|string[] $str * * @psalm-pure * * @return string|string[] * * @see UTF8::to_iso8859() * @deprecated

please use "UTF8::to_iso8859()"

*/ public static function toLatin1($str) { return self::to_iso8859($str); } /** * alias for "UTF8::to_utf8()" * * @param string|string[] $str * * @psalm-pure * * @return string|string[] * * @see UTF8::to_utf8() * @deprecated

please use "UTF8::to_utf8()"

*/ public static function toUTF8($str) { return self::to_utf8($str); } /** * Convert a string into ASCII. * * EXAMPLE: UTF8::to_ascii('déjà σσς iıii'); // 'deja sss iiii' * * @param string $str

The input string.

* @param string $unknown [optional]

Character use if character unknown. (default is ?)

* @param bool $strict [optional]

Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad * performance

* * @psalm-pure * * @return string */ public static function to_ascii( string $str, string $unknown = '?', bool $strict = false ): string { return ASCII::to_transliterate($str, $unknown, $strict); } /** * @param bool|int|float|string $str * * @psalm-pure * * @return bool */ public static function to_boolean($str): bool { // init $str = (string) $str; if ($str === '') { return false; } // Info: http://php.net/manual/en/filter.filters.validate.php $map = [ 'true' => true, '1' => true, 'on' => true, 'yes' => true, 'false' => false, '0' => false, 'off' => false, 'no' => false, ]; if (isset($map[$str])) { return $map[$str]; } $key = \strtolower($str); if (isset($map[$key])) { return $map[$key]; } if (\is_numeric($str)) { return ((float) $str + 0) > 0; } return (bool) \trim($str); } /** * Convert given string to safe filename (and keep string case). * * @param string $str * @param bool $use_transliterate No transliteration, conversion etc. is done by default - unsafe characters are * simply replaced with hyphen. * @param string $fallback_char * * @psalm-pure * * @return string */ public static function to_filename( string $str, bool $use_transliterate = false, string $fallback_char = '-' ): string { return ASCII::to_filename( $str, $use_transliterate, $fallback_char ); } /** * Convert a string into "ISO-8859"-encoding (Latin-1). * * EXAMPLE: UTF8::to_utf8(UTF8::to_iso8859(' -ABC-中文空白- ')); // ' -ABC-????- ' * * @param string|string[] $str * * @psalm-pure * * @return string|string[] */ public static function to_iso8859($str) { if (\is_array($str)) { foreach ($str as $k => &$v) { $v = self::to_iso8859($v); } return $str; } $str = (string) $str; if ($str === '') { return ''; } return self::utf8_decode($str); } /** * alias for "UTF8::to_iso8859()" * * @param string|string[] $str * * @psalm-pure * * @return string|string[] * * @see UTF8::to_iso8859() * @deprecated

please use "UTF8::to_iso8859()"

*/ public static function to_latin1($str) { return self::to_iso8859($str); } /** * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8. * *

It decode UTF-8 codepoints and Unicode escape sequences.
It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.
WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this * case.

* * EXAMPLE: UTF8::to_utf8(["\u0063\u0061\u0074"]); // array('cat') * * @param string|string[] $str

Any string or array of strings.

* @param bool $decode_html_entity_to_utf8

Set to true, if you need to decode html-entities.

* * @psalm-pure * * @return string|string[] *

The UTF-8 encoded string

* * @template TToUtf8 * @phpstan-param TToUtf8 $str * @phpstan-return TToUtf8 * * @noinspection SuspiciousBinaryOperationInspection */ public static function to_utf8($str, bool $decode_html_entity_to_utf8 = false) { if (\is_array($str)) { foreach ($str as $k => &$v) { $v = self::to_utf8_string($v, $decode_html_entity_to_utf8); } return $str; } /** @phpstan-var TToUtf8 $str */ $str = self::to_utf8_string($str, $decode_html_entity_to_utf8); return $str; } /** * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8. * *

It decode UTF-8 codepoints and Unicode escape sequences.
It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.
WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this * case.

* * EXAMPLE: UTF8::to_utf8_string("\u0063\u0061\u0074"); // 'cat' * * @param string $str

Any string.

* @param bool $decode_html_entity_to_utf8

Set to true, if you need to decode html-entities.

* * @psalm-pure * * @return string *

The UTF-8 encoded string

* * @noinspection SuspiciousBinaryOperationInspection */ public static function to_utf8_string(string $str, bool $decode_html_entity_to_utf8 = false): string { if ($str === '') { return $str; } $max = \strlen($str); $buf = ''; for ($i = 0; $i < $max; ++$i) { $c1 = $str[$i]; if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already if ($c1 <= "\xDF") { // looks like 2 bytes UTF8 $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already $buf .= $c1 . $c2; ++$i; } else { // not valid UTF8 - convert it $buf .= self::to_utf8_convert_helper($c1); } } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8 $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2]; if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already $buf .= $c1 . $c2 . $c3; $i += 2; } else { // not valid UTF8 - convert it $buf .= self::to_utf8_convert_helper($c1); } } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8 $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1]; $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2]; $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3]; if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already $buf .= $c1 . $c2 . $c3 . $c4; $i += 3; } else { // not valid UTF8 - convert it $buf .= self::to_utf8_convert_helper($c1); } } else { // doesn't look like UTF8, but should be converted $buf .= self::to_utf8_convert_helper($c1); } } elseif (($c1 & "\xC0") === "\x80") { // needs conversion $buf .= self::to_utf8_convert_helper($c1); } else { // it doesn't need conversion $buf .= $c1; } } // decode unicode escape sequences + unicode surrogate pairs $buf = \preg_replace_callback( '/\\\\u([dD][89abAB][0-9a-fA-F]{2})\\\\u([dD][cdefCDEF][\da-fA-F]{2})|\\\\u([0-9a-fA-F]{4})/', /** * @param array $matches * * @psalm-pure * * @return string */ static function (array $matches): string { if (isset($matches[3])) { $cp = (int) \hexdec($matches[3]); } else { // http://unicode.org/faq/utf_bom.html#utf16-4 $cp = ((int) \hexdec($matches[1]) << 10) + (int) \hexdec($matches[2]) + 0x10000 - (0xD800 << 10) - 0xDC00; } // https://github.com/php/php-src/blob/php-7.3.2/ext/standard/html.c#L471 // // php_utf32_utf8(unsigned char *buf, unsigned k) if ($cp < 0x80) { return (string) self::chr($cp); } if ($cp < 0xA0) { /** @noinspection UnnecessaryCastingInspection */ return (string) self::chr(0xC0 | $cp >> 6) . (string) self::chr(0x80 | $cp & 0x3F); } return self::decimal_to_chr($cp); }, $buf ); if ($buf === null) { return ''; } // decode UTF-8 codepoints if ($decode_html_entity_to_utf8) { $buf = self::html_entity_decode($buf); } return $buf; } /** * Returns the given string as an integer, or null if the string isn't numeric. * * @param string $str * * @psalm-pure * * @return int|null *

null if the string isn't numeric

*/ public static function to_int(string $str) { if (\is_numeric($str)) { return (int) $str; } return null; } /** * Returns the given input as string, or null if the input isn't int|float|string * and do not implement the "__toString()" method. * * @param float|int|object|string|null $input * * @psalm-pure * * @return string|null *

null if the input isn't int|float|string and has no "__toString()" method

*/ public static function to_string($input) { if ($input === null) { return null; } /** @var string $input_type - hack for psalm */ $input_type = \gettype($input); if ( $input_type === 'string' || $input_type === 'integer' || $input_type === 'float' || $input_type === 'double' ) { return (string) $input; } if ($input_type === 'object') { /** @noinspection PhpSillyAssignmentInspection */ /** @var object $input - hack for psalm / phpstan */ $input = $input; /** @noinspection NestedPositiveIfStatementsInspection */ /** @noinspection MissingOrEmptyGroupStatementInspection */ if (\method_exists($input, '__toString')) { return (string) $input; } } return null; } /** * Strip whitespace or other characters from the beginning and end of a UTF-8 string. * * INFO: This is slower then "trim()" * * We can only use the original-function, if we use <= 7-Bit in the string / chars * but the check for ASCII (7-Bit) cost more time, then we can safe here. * * EXAMPLE: UTF8::trim(' -ABC-中文空白- '); // '-ABC-中文空白-' * * @param string $str

The string to be trimmed

* @param string|null $chars [optional]

Optional characters to be stripped

* * @psalm-pure * * @return string *

The trimmed string.

*/ public static function trim(string $str = '', string $chars = null): string { if ($str === '') { return ''; } if (self::$SUPPORT['mbstring'] === true) { if ($chars !== null) { /** @noinspection PregQuoteUsageInspection */ $chars = \preg_quote($chars); $pattern = "^[${chars}]+|[${chars}]+\$"; } else { $pattern = '^[\\s]+|[\\s]+$'; } /** @noinspection PhpComposerExtensionStubsInspection */ return (string) \mb_ereg_replace($pattern, '', $str); } if ($chars !== null) { $chars = \preg_quote($chars, '/'); $pattern = "^[${chars}]+|[${chars}]+\$"; } else { $pattern = '^[\\s]+|[\\s]+$'; } return self::regex_replace($str, $pattern, ''); } /** * Makes string's first char uppercase. * * EXAMPLE: UTF8::ucfirst('ñtërnâtiônàlizætiøn foo'); // 'Ñtërnâtiônàlizætiøn foo' * * @param string $str

The input string.

* @param string $encoding [optional]

Set the charset for e.g. "mb_" function

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* @param string|null $lang [optional]

Set the language for special cases: az, el, lt, * tr

* @param bool $try_to_keep_the_string_length [optional]

true === try to keep the string length: e.g. ẞ * -> ß

* * @psalm-pure * * @return string *

The resulting string with with char uppercase.

*/ public static function ucfirst( string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false, string $lang = null, bool $try_to_keep_the_string_length = false ): string { if ($str === '') { return ''; } if ($clean_utf8) { // "mb_strpos()" and "iconv_strpos()" returns wrong position, // if invalid characters are found in $haystack before $needle $str = self::clean($str); } $use_mb_functions = $lang === null && !$try_to_keep_the_string_length; if ($encoding === 'UTF-8') { $str_part_two = (string) \mb_substr($str, 1); if ($use_mb_functions) { $str_part_one = \mb_strtoupper( (string) \mb_substr($str, 0, 1) ); } else { $str_part_one = self::strtoupper( (string) \mb_substr($str, 0, 1), $encoding, false, $lang, $try_to_keep_the_string_length ); } } else { $encoding = self::normalize_encoding($encoding, 'UTF-8'); $str_part_two = (string) self::substr($str, 1, null, $encoding); if ($use_mb_functions) { $str_part_one = \mb_strtoupper( (string) \mb_substr($str, 0, 1, $encoding), $encoding ); } else { $str_part_one = self::strtoupper( (string) self::substr($str, 0, 1, $encoding), $encoding, false, $lang, $try_to_keep_the_string_length ); } } return $str_part_one . $str_part_two; } /** * alias for "UTF8::ucfirst()" * * @param string $str * @param string $encoding * @param bool $clean_utf8 * * @psalm-pure * * @return string * * @see UTF8::ucfirst() * @deprecated

please use "UTF8::ucfirst()"

*/ public static function ucword( string $str, string $encoding = 'UTF-8', bool $clean_utf8 = false ): string { return self::ucfirst($str, $encoding, $clean_utf8); } /** * Uppercase for all words in the string. * * EXAMPLE: UTF8::ucwords('iñt ërn âTi ônà liz æti øn'); // 'Iñt Ërn ÂTi Ônà Liz Æti Øn' * * @param string $str

The input string.

* @param string[] $exceptions [optional]

Exclusion for some words.

* @param string $char_list [optional]

Additional chars that contains to words and do not start a new * word.

* @param string $encoding [optional]

Set the charset.

* @param bool $clean_utf8 [optional]

Remove non UTF-8 chars from the string.

* * @psalm-pure * * @return string */ public static function ucwords( string $str, array $exceptions = [], string $char_list = '', string $encoding = 'UTF-8', bool $clean_utf8 = false ): string { if (!$str) { return ''; } // INFO: mb_convert_case($str, MB_CASE_TITLE); // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters if ($clean_utf8) { // "mb_strpos()" and "iconv_strpos()" returns wrong position, // if invalid characters are found in $haystack before $needle $str = self::clean($str); } $use_php_default_functions = !(bool) ($char_list . \implode('', $exceptions)); if ( $use_php_default_functions && ASCII::is_ascii($str) ) { return \ucwords($str); } $words = self::str_to_words($str, $char_list); $use_exceptions = $exceptions !== []; $words_str = ''; foreach ($words as &$word) { if (!$word) { continue; } if ( !$use_exceptions || !\in_array($word, $exceptions, true) ) { $words_str .= self::ucfirst($word, $encoding); } else { $words_str .= $word; } } return $words_str; } /** * Multi decode HTML entity + fix urlencoded-win1252-chars. * * EXAMPLE: UTF8::urldecode('tes%20öäü%20\u00edtest+test'); // 'tes öäü ítest test' * * e.g: * 'test+test' => 'test test' * 'Düsseldorf' => 'Düsseldorf' * 'D%FCsseldorf' => 'Düsseldorf' * 'Düsseldorf' => 'Düsseldorf' * 'D%26%23xFC%3Bsseldorf' => 'Düsseldorf' * 'DÃ¼sseldorf' => 'Düsseldorf' * 'D%C3%BCsseldorf' => 'Düsseldorf' * 'D%C3%83%C2%BCsseldorf' => 'Düsseldorf' * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf' * * @param string $str

The input string.

* @param bool $multi_decode

Decode as often as possible.

* * @psalm-pure * * @return string */ public static function urldecode(string $str, bool $multi_decode = true): string { if ($str === '') { return ''; } $str = self::urldecode_unicode_helper($str); if ($multi_decode) { do { $str_compare = $str; /** * @psalm-suppress PossiblyInvalidArgument */ $str = \urldecode( self::html_entity_decode( self::to_utf8($str), \ENT_QUOTES | \ENT_HTML5 ) ); } while ($str_compare !== $str); } else { /** * @psalm-suppress PossiblyInvalidArgument */ $str = \urldecode( self::html_entity_decode( self::to_utf8($str), \ENT_QUOTES | \ENT_HTML5 ) ); } return self::fix_simple_utf8($str); } /** * Return a array with "urlencoded"-win1252 -> UTF-8 * * @psalm-pure * * @return string[] * * @deprecated

please use the "UTF8::urldecode()" function to decode a string

*/ public static function urldecode_fix_win1252_chars(): array { return [ '%20' => ' ', '%21' => '!', '%22' => '"', '%23' => '#', '%24' => '$', '%25' => '%', '%26' => '&', '%27' => "'", '%28' => '(', '%29' => ')', '%2A' => '*', '%2B' => '+', '%2C' => ',', '%2D' => '-', '%2E' => '.', '%2F' => '/', '%30' => '0', '%31' => '1', '%32' => '2', '%33' => '3', '%34' => '4', '%35' => '5', '%36' => '6', '%37' => '7', '%38' => '8', '%39' => '9', '%3A' => ':', '%3B' => ';', '%3C' => '<', '%3D' => '=', '%3E' => '>', '%3F' => '?', '%40' => '@', '%41' => 'A', '%42' => 'B', '%43' => 'C', '%44' => 'D', '%45' => 'E', '%46' => 'F', '%47' => 'G', '%48' => 'H', '%49' => 'I', '%4A' => 'J', '%4B' => 'K', '%4C' => 'L', '%4D' => 'M', '%4E' => 'N', '%4F' => 'O', '%50' => 'P', '%51' => 'Q', '%52' => 'R', '%53' => 'S', '%54' => 'T', '%55' => 'U', '%56' => 'V', '%57' => 'W', '%58' => 'X', '%59' => 'Y', '%5A' => 'Z', '%5B' => '[', '%5C' => '\\', '%5D' => ']', '%5E' => '^', '%5F' => '_', '%60' => '`', '%61' => 'a', '%62' => 'b', '%63' => 'c', '%64' => 'd', '%65' => 'e', '%66' => 'f', '%67' => 'g', '%68' => 'h', '%69' => 'i', '%6A' => 'j', '%6B' => 'k', '%6C' => 'l', '%6D' => 'm', '%6E' => 'n', '%6F' => 'o', '%70' => 'p', '%71' => 'q', '%72' => 'r', '%73' => 's', '%74' => 't', '%75' => 'u', '%76' => 'v', '%77' => 'w', '%78' => 'x', '%79' => 'y', '%7A' => 'z', '%7B' => '{', '%7C' => '|', '%7D' => '}', '%7E' => '~', '%7F' => '', '%80' => '`', '%81' => '', '%82' => '‚', '%83' => 'ƒ', '%84' => '„', '%85' => '…', '%86' => '†', '%87' => '‡', '%88' => 'ˆ', '%89' => '‰', '%8A' => 'Š', '%8B' => '‹', '%8C' => 'Œ', '%8D' => '', '%8E' => 'Ž', '%8F' => '', '%90' => '', '%91' => '‘', '%92' => '’', '%93' => '“', '%94' => '”', '%95' => '•', '%96' => '–', '%97' => '—', '%98' => '˜', '%99' => '™', '%9A' => 'š', '%9B' => '›', '%9C' => 'œ', '%9D' => '', '%9E' => 'ž', '%9F' => 'Ÿ', '%A0' => '', '%A1' => '¡', '%A2' => '¢', '%A3' => '£', '%A4' => '¤', '%A5' => '¥', '%A6' => '¦', '%A7' => '§', '%A8' => '¨', '%A9' => '©', '%AA' => 'ª', '%AB' => '«', '%AC' => '¬', '%AD' => '', '%AE' => '®', '%AF' => '¯', '%B0' => '°', '%B1' => '±', '%B2' => '²', '%B3' => '³', '%B4' => '´', '%B5' => 'µ', '%B6' => '¶', '%B7' => '·', '%B8' => '¸', '%B9' => '¹', '%BA' => 'º', '%BB' => '»', '%BC' => '¼', '%BD' => '½', '%BE' => '¾', '%BF' => '¿', '%C0' => 'À', '%C1' => 'Á', '%C2' => 'Â', '%C3' => 'Ã', '%C4' => 'Ä', '%C5' => 'Å', '%C6' => 'Æ', '%C7' => 'Ç', '%C8' => 'È', '%C9' => 'É', '%CA' => 'Ê', '%CB' => 'Ë', '%CC' => 'Ì', '%CD' => 'Í', '%CE' => 'Î', '%CF' => 'Ï', '%D0' => 'Ð', '%D1' => 'Ñ', '%D2' => 'Ò', '%D3' => 'Ó', '%D4' => 'Ô', '%D5' => 'Õ', '%D6' => 'Ö', '%D7' => '×', '%D8' => 'Ø', '%D9' => 'Ù', '%DA' => 'Ú', '%DB' => 'Û', '%DC' => 'Ü', '%DD' => 'Ý', '%DE' => 'Þ', '%DF' => 'ß', '%E0' => 'à', '%E1' => 'á', '%E2' => 'â', '%E3' => 'ã', '%E4' => 'ä', '%E5' => 'å', '%E6' => 'æ', '%E7' => 'ç', '%E8' => 'è', '%E9' => 'é', '%EA' => 'ê', '%EB' => 'ë', '%EC' => 'ì', '%ED' => 'í', '%EE' => 'î', '%EF' => 'ï', '%F0' => 'ð', '%F1' => 'ñ', '%F2' => 'ò', '%F3' => 'ó', '%F4' => 'ô', '%F5' => 'õ', '%F6' => 'ö', '%F7' => '÷', '%F8' => 'ø', '%F9' => 'ù', '%FA' => 'ú', '%FB' => 'û', '%FC' => 'ü', '%FD' => 'ý', '%FE' => 'þ', '%FF' => 'ÿ', ]; } /** * Decodes a UTF-8 string to ISO-8859-1. * * EXAMPLE: UTF8::encode('UTF-8', UTF8::utf8_decode('-ABC-中文空白-')); // '-ABC-????-' * * @param string $str

The input string.

* @param bool $keep_utf8_chars * * @psalm-pure * * @return string * * @noinspection SuspiciousBinaryOperationInspection */ public static function utf8_decode(string $str, bool $keep_utf8_chars = false): string { if ($str === '') { return ''; } // save for later comparision $str_backup = $str; $len = \strlen($str); if (self::$ORD === null) { self::$ORD = self::getData('ord'); } if (self::$CHR === null) { self::$CHR = self::getData('chr'); } $no_char_found = '?'; /** @noinspection ForeachInvariantsInspection */ for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) { switch ($str[$i] & "\xF0") { case "\xC0": case "\xD0": $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"]; $str[$j] = $c < 256 ? self::$CHR[$c] : $no_char_found; break; /** @noinspection PhpMissingBreakStatementInspection */ case "\xF0": ++$i; // no break case "\xE0": $str[$j] = $no_char_found; $i += 2; break; default: $str[$j] = $str[$i]; } } /** @var false|string $return - needed for PhpStan (stubs error) */ $return = \substr($str, 0, $j); if ($return === false) { $return = ''; } if ( $keep_utf8_chars && (int) self::strlen($return) >= (int) self::strlen($str_backup) ) { return $str_backup; } return $return; } /** * Encodes an ISO-8859-1 string to UTF-8. * * EXAMPLE: UTF8::utf8_decode(UTF8::utf8_encode('-ABC-中文空白-')); // '-ABC-中文空白-' * * @param string $str

The input string.

* * @psalm-pure * * @return string */ public static function utf8_encode(string $str): string { if ($str === '') { return ''; } /** @var false|string $str - the polyfill maybe return false */ $str = \utf8_encode($str); /** @noinspection CallableParameterUseCaseInTypeContextInspection */ /** @psalm-suppress TypeDoesNotContainType */ if ($str === false) { return ''; } return $str; } /** * fix -> utf8-win1252 chars * * @param string $str

The input string.

* * @psalm-pure * * @return string * * @deprecated

please use "UTF8::fix_simple_utf8()"

*/ public static function utf8_fix_win1252_chars(string $str): string { return self::fix_simple_utf8($str); } /** * Returns an array with all utf8 whitespace characters. * * @see http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html * * @psalm-pure * * @return string[] * An array with all known whitespace characters as values and the type of whitespace as keys * as defined in above URL */ public static function whitespace_table(): array { return self::$WHITESPACE_TABLE; } /** * Limit the number of words in a string. * * EXAMPLE: UTF8::words_limit('fòô bàř fòô', 2, ''); // 'fòô bàř' * * @param string $str

The input string.

* @param int $limit

The limit of words as integer.

* @param string $str_add_on

Replacement for the striped string.

* * @psalm-pure * * @return string */ public static function words_limit( string $str, int $limit = 100, string $str_add_on = '…' ): string { if ($str === '' || $limit < 1) { return ''; } \preg_match('/^\\s*+(?:[^\\s]++\\s*+){1,' . $limit . '}/u', $str, $matches); if ( !isset($matches[0]) || \mb_strlen($str) === (int) \mb_strlen($matches[0]) ) { return $str; } return \rtrim($matches[0]) . $str_add_on; } /** * Wraps a string to a given number of characters * * EXAMPLE:

UTF8::wordwrap('Iñtërnâtiônàlizætiøn', 2, '
', true)); // 'Iñ
të
rn
ât
iô
nà
li
zæ
ti
øn'

* * @see http://php.net/manual/en/function.wordwrap.php * * @param string $str

The input string.

* @param int $width [optional]

The column width.

* @param string $break [optional]

The line is broken using the optional break parameter.

* @param bool $cut [optional]

* If the cut is set to true, the string is * always wrapped at or before the specified width. So if you have * a word that is larger than the given width, it is broken apart. *

* * @psalm-pure * * @return string *

The given string wrapped at the specified column.

*/ public static function wordwrap( string $str, int $width = 75, string $break = "\n", bool $cut = false ): string { if ($str === '' || $break === '') { return ''; } $str_split = \explode($break, $str); if ($str_split === false) { return ''; } /** @var string[] $charsArray */ $charsArray = []; $word_split = ''; foreach ($str_split as $i => $i_value) { if ($i) { $charsArray[] = $break; $word_split .= '#'; } foreach (self::str_split($i_value) as $c) { $charsArray[] = $c; if ($c === ' ') { $word_split .= ' '; } else { $word_split .= '?'; } } } $str_return = ''; $j = 0; $b = -1; $i = -1; $word_split = \wordwrap($word_split, $width, '#', $cut); $max = \mb_strlen($word_split); while (($b = \mb_strpos($word_split, '#', $b + 1)) !== false) { for (++$i; $i < $b; ++$i) { if (isset($charsArray[$j])) { $str_return .= $charsArray[$j]; unset($charsArray[$j]); } ++$j; // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill if ($i > $max) { break 2; } } if ( $break === $charsArray[$j] || $charsArray[$j] === ' ' ) { unset($charsArray[$j++]); } $str_return .= $break; // prevent endless loop, e.g. if there is a error in the "mb_*" polyfill if ($b > $max) { break; } } return $str_return . \implode('', $charsArray); } /** * Line-Wrap the string after $limit, but split the string by "$delimiter" before ... * ... so that we wrap the per line. * * @param string $str

The input string.

* @param int $width [optional]

The column width.

* @param string $break [optional]

The line is broken using the optional break parameter.

* @param bool $cut [optional]

* If the cut is set to true, the string is * always wrapped at or before the specified width. So if you have * a word that is larger than the given width, it is broken apart. *

* @param bool $add_final_break [optional]

* If this flag is true, then the method will add a $break at the end * of the result string. *

* @param string|null $delimiter [optional]

* You can change the default behavior, where we split the string by newline. *

* * @psalm-pure * * @return string */ public static function wordwrap_per_line( string $str, int $width = 75, string $break = "\n", bool $cut = false, bool $add_final_break = true, string $delimiter = null ): string { if ($delimiter === null) { $strings = \preg_split('/\\r\\n|\\r|\\n/', $str); } else { $strings = \explode($delimiter, $str); } $string_helper_array = []; if ($strings !== false) { foreach ($strings as $value) { $string_helper_array[] = self::wordwrap($value, $width, $break, $cut); } } if ($add_final_break) { $final_break = $break; } else { $final_break = ''; } return \implode($delimiter ?? "\n", $string_helper_array) . $final_break; } /** * Returns an array of Unicode White Space characters. * * @psalm-pure * * @return string[] *

An array with numeric code point as key and White Space Character as value.

*/ public static function ws(): array { return self::$WHITESPACE; } /** * Checks whether the passed string contains only byte sequences that are valid UTF-8 characters. * * EXAMPLE:


     * UTF8::is_utf8_string('Iñtërnâtiônàlizætiøn']); // true
     * //
     * UTF8::is_utf8_string("Iñtërnâtiônàlizætiøn\xA0\xA1"); // false
     *

* * @see http://hsivonen.iki.fi/php-utf8/ * * @param string $str

The string to be checked.

* @param bool $strict

Check also if the string is not UTF-16 or UTF-32.

* * @psalm-pure * * @return bool * * @noinspection ReturnTypeCanBeDeclaredInspection */ private static function is_utf8_string(string $str, bool $strict = false) { if ($str === '') { return true; } if ($strict) { $is_binary = self::is_binary($str, true); if ($is_binary && self::is_utf16($str, false) !== false) { return false; } if ($is_binary && self::is_utf32($str, false) !== false) { return false; } } if (self::$SUPPORT['pcre_utf8']) { // If even just the first character can be matched, when the /u // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow // invalid, nothing at all will match, even if the string contains // some valid sequences return \preg_match('/^./us', $str) === 1; } $mState = 0; // cached expected number of octets after the current octet // until the beginning of the next UTF8 character sequence $mUcs4 = 0; // cached Unicode character $mBytes = 1; // cached expected number of octets in the current sequence if (self::$ORD === null) { self::$ORD = self::getData('ord'); } $len = \strlen($str); /** @noinspection ForeachInvariantsInspection */ for ($i = 0; $i < $len; ++$i) { $in = self::$ORD[$str[$i]]; if ($mState === 0) { // When mState is zero we expect either a US-ASCII character or a // multi-octet sequence. if ((0x80 & $in) === 0) { // US-ASCII, pass straight through. $mBytes = 1; } elseif ((0xE0 & $in) === 0xC0) { // First octet of 2 octet sequence. $mUcs4 = $in; $mUcs4 = ($mUcs4 & 0x1F) << 6; $mState = 1; $mBytes = 2; } elseif ((0xF0 & $in) === 0xE0) { // First octet of 3 octet sequence. $mUcs4 = $in; $mUcs4 = ($mUcs4 & 0x0F) << 12; $mState = 2; $mBytes = 3; } elseif ((0xF8 & $in) === 0xF0) { // First octet of 4 octet sequence. $mUcs4 = $in; $mUcs4 = ($mUcs4 & 0x07) << 18; $mState = 3; $mBytes = 4; } elseif ((0xFC & $in) === 0xF8) { /* First octet of 5 octet sequence. * * This is illegal because the encoded codepoint must be either * (a) not the shortest form or * (b) outside the Unicode range of 0-0x10FFFF. * Rather than trying to resynchronize, we will carry on until the end * of the sequence and let the later error handling code catch it. */ $mUcs4 = $in; $mUcs4 = ($mUcs4 & 0x03) << 24; $mState = 4; $mBytes = 5; } elseif ((0xFE & $in) === 0xFC) { // First octet of 6 octet sequence, see comments for 5 octet sequence. $mUcs4 = $in; $mUcs4 = ($mUcs4 & 1) << 30; $mState = 5; $mBytes = 6; } else { // Current octet is neither in the US-ASCII range nor a legal first // octet of a multi-octet sequence. return false; } } elseif ((0xC0 & $in) === 0x80) { // When mState is non-zero, we expect a continuation of the multi-octet // sequence // Legal continuation. $shift = ($mState - 1) * 6; $tmp = $in; $tmp = ($tmp & 0x0000003F) << $shift; $mUcs4 |= $tmp; // Prefix: End of the multi-octet sequence. mUcs4 now contains the final // Unicode code point to be output. if (--$mState === 0) { // Check for illegal sequences and code points. // // From Unicode 3.1, non-shortest form is illegal if ( ($mBytes === 2 && $mUcs4 < 0x0080) || ($mBytes === 3 && $mUcs4 < 0x0800) || ($mBytes === 4 && $mUcs4 < 0x10000) || ($mBytes > 4) || // From Unicode 3.2, surrogate characters are illegal. (($mUcs4 & 0xFFFFF800) === 0xD800) || // Code points outside the Unicode range are illegal. ($mUcs4 > 0x10FFFF) ) { return false; } // initialize UTF8 cache $mState = 0; $mUcs4 = 0; $mBytes = 1; } } else { // ((0xC0 & (*in) != 0x80) && (mState != 0)) // Incomplete multi-octet sequence. return false; } } return $mState === 0; } /** * @param string $str * @param bool $use_lowercase

Use uppercase by default, otherwise use lowercase.

* @param bool $use_full_case_fold

Convert not only common cases.

* * @psalm-pure * * @return string * * @noinspection ReturnTypeCanBeDeclaredInspection */ private static function fixStrCaseHelper( string $str, bool $use_lowercase = false, bool $use_full_case_fold = false ) { $upper = self::$COMMON_CASE_FOLD['upper']; $lower = self::$COMMON_CASE_FOLD['lower']; if ($use_lowercase) { $str = \str_replace( $upper, $lower, $str ); } else { $str = \str_replace( $lower, $upper, $str ); } if ($use_full_case_fold) { /** * @psalm-suppress ImpureStaticVariable * * @var array|null */ static $FULL_CASE_FOLD = null; if ($FULL_CASE_FOLD === null) { $FULL_CASE_FOLD = self::getData('caseFolding_full'); } if ($use_lowercase) { $str = \str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str); } else { $str = \str_replace($FULL_CASE_FOLD[1], $FULL_CASE_FOLD[0], $str); } } return $str; } /** * get data from "/data/*.php" * * @param string $file * * @psalm-pure * * @return array * * @noinspection ReturnTypeCanBeDeclaredInspection */ private static function getData(string $file) { /** @noinspection PhpIncludeInspection */ /** @noinspection UsingInclusionReturnValueInspection */ /** @psalm-suppress UnresolvableInclude */ return include __DIR__ . '/data/' . $file . '.php'; } /** * @psalm-pure * * @return true|null */ private static function initEmojiData() { if (self::$EMOJI_KEYS_CACHE === null) { if (self::$EMOJI === null) { self::$EMOJI = self::getData('emoji'); } /** * @psalm-suppress ImpureFunctionCall - static sort function is used */ \uksort( self::$EMOJI, static function (string $a, string $b): int { return \strlen($b) <=> \strlen($a); } ); self::$EMOJI_KEYS_CACHE = \array_keys(self::$EMOJI); self::$EMOJI_VALUES_CACHE = self::$EMOJI; foreach (self::$EMOJI_KEYS_CACHE as $key) { $tmp_key = \crc32($key); self::$EMOJI_KEYS_REVERSIBLE_CACHE[] = '_-_PORTABLE_UTF8_-_' . $tmp_key . '_-_' . \strrev((string) $tmp_key) . '_-_8FTU_ELBATROP_-_'; } return true; } return null; } /** * Checks whether mbstring "overloaded" is active on the server. * * @psalm-pure * * @return bool * * @noinspection ReturnTypeCanBeDeclaredInspection */ private static function mbstring_overloaded() { /** * INI directive 'mbstring.func_overload' is deprecated since PHP 7.2 */ /** @noinspection PhpComposerExtensionStubsInspection */ /** @noinspection PhpUsageOfSilenceOperatorInspection */ return \defined('MB_OVERLOAD_STRING') && ((int) @\ini_get('mbstring.func_overload') & \MB_OVERLOAD_STRING); } /** * @param array $strings * @param bool $remove_empty_values * @param int|null $remove_short_values * * @psalm-pure * * @return array * * @noinspection ReturnTypeCanBeDeclaredInspection */ private static function reduce_string_array( array $strings, bool $remove_empty_values, int $remove_short_values = null ) { // init $return = []; foreach ($strings as &$str) { if ( $remove_short_values !== null && \mb_strlen($str) <= $remove_short_values ) { continue; } if ( $remove_empty_values && \trim($str) === '' ) { continue; } $return[] = $str; } return $return; } /** * rxClass * * @param string $s * @param string $class * * @psalm-pure * * @return string * * @noinspection ReturnTypeCanBeDeclaredInspection */ private static function rxClass(string $s, string $class = '') { /** * @psalm-suppress ImpureStaticVariable * * @var array */ static $RX_CLASS_CACHE = []; $cache_key = $s . '_' . $class; if (isset($RX_CLASS_CACHE[$cache_key])) { return $RX_CLASS_CACHE[$cache_key]; } /** @var string[] $class_array */ $class_array[] = $class; /** @noinspection SuspiciousLoopInspection */ /** @noinspection AlterInForeachInspection */ foreach (self::str_split($s) as &$s) { if ($s === '-') { $class_array[0] = '-' . $class_array[0]; } elseif (!isset($s[2])) { $class_array[0] .= \preg_quote($s, '/'); } elseif (self::strlen($s) === 1) { $class_array[0] .= $s; } else { $class_array[] = $s; } } if ($class_array[0]) { $class_array[0] = '[' . $class_array[0] . ']'; } if (\count($class_array) === 1) { $return = $class_array[0]; } else { $return = '(?:' . \implode('|', $class_array) . ')'; } $RX_CLASS_CACHE[$cache_key] = $return; return $return; } /** * Personal names such as "Marcus Aurelius" are sometimes typed incorrectly using lowercase ("marcus aurelius"). * * @param string $names * @param string $delimiter * @param string $encoding * * @psalm-pure * * @return string * * @noinspection ReturnTypeCanBeDeclaredInspection */ private static function str_capitalize_name_helper( string $names, string $delimiter, string $encoding = 'UTF-8' ) { // init $name_helper_array = \explode($delimiter, $names); if ($name_helper_array === false) { return ''; } $special_cases = [ 'names' => [ 'ab', 'af', 'al', 'and', 'ap', 'bint', 'binte', 'da', 'de', 'del', 'den', 'der', 'di', 'dit', 'ibn', 'la', 'mac', 'nic', 'of', 'ter', 'the', 'und', 'van', 'von', 'y', 'zu', ], 'prefixes' => [ 'al-', "d'", 'ff', "l'", 'mac', 'mc', 'nic', ], ]; foreach ($name_helper_array as &$name) { if (\in_array($name, $special_cases['names'], true)) { continue; } $continue = false; if ($delimiter === '-') { /** @noinspection AlterInForeachInspection */ foreach ((array) $special_cases['names'] as &$beginning) { if (\strncmp($name, $beginning, \strlen($beginning)) === 0) { $continue = true; break; } } } /** @noinspection AlterInForeachInspection */ foreach ((array) $special_cases['prefixes'] as &$beginning) { if (\strncmp($name, $beginning, \strlen($beginning)) === 0) { $continue = true; break; } } if ($continue) { continue; } $name = self::ucfirst($name); } return \implode($delimiter, $name_helper_array); } /** * Generic case-sensitive transformation for collation matching. * * @param string $str

The input string

* * @psalm-pure * * @return string|null */ private static function strtonatfold(string $str) { $str = \Normalizer::normalize($str, \Normalizer::NFD); /** @phpstan-ignore-next-line - https://github.com/JetBrains/phpstorm-stubs/pull/949 */ if ($str === false) { return ''; } /** @noinspection PhpUndefinedClassInspection */ return \preg_replace( '/\p{Mn}+/u', '', $str ); } /** * @param int|string $input * * @psalm-pure * * @return string * * @noinspection ReturnTypeCanBeDeclaredInspection * @noinspection SuspiciousBinaryOperationInspection */ private static function to_utf8_convert_helper($input) { // init $buf = ''; if (self::$ORD === null) { self::$ORD = self::getData('ord'); } if (self::$CHR === null) { self::$CHR = self::getData('chr'); } if (self::$WIN1252_TO_UTF8 === null) { self::$WIN1252_TO_UTF8 = self::getData('win1252_to_utf8'); } $ordC1 = self::$ORD[$input]; if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases $buf .= self::$WIN1252_TO_UTF8[$ordC1]; } else { /** @noinspection OffsetOperationsInspection */ $cc1 = self::$CHR[$ordC1 / 64] | "\xC0"; $cc2 = ((string) $input & "\x3F") | "\x80"; $buf .= $cc1 . $cc2; } return $buf; } /** * @param string $str * * @psalm-pure * * @return string * * @noinspection ReturnTypeCanBeDeclaredInspection */ private static function urldecode_unicode_helper(string $str) { if (\strpos($str, '%u') === false) { return $str; } $pattern = '/%u([0-9a-fA-F]{3,4})/'; if (\preg_match($pattern, $str)) { $str = (string) \preg_replace($pattern, '&#x\\1;', $str); } return $str; } }

Constant Name	Description
ENT_COMPAT	Will convert double-quotes and leave single-quotes alone.
ENT_QUOTES	Will convert both double and single quotes.
ENT_NOQUOTES	Will leave both double and single quotes unconverted.
ENT_HTML401	* Handle code as HTML 4.01. *
ENT_XML1	* Handle code as XML 1. *
ENT_XHTML	* Handle code as XHTML. *
ENT_HTML5	* Handle code as HTML 5. *