<?php
/**
 * Part of the Joomla Framework String Package
 *
 * @copyright  Copyright (C) 2005 - 2021 Open Source Matters, Inc. All rights reserved.
 * @license    GNU General Public License version 2 or later; see LICENSE
 */

namespace Joomla\String;

// PHP mbstring and iconv local configuration
@ini_set('default_charset', 'UTF-8');

/**
 * String handling class for UTF-8 data wrapping the phputf8 library. All functions assume the validity of UTF-8 strings.
 *
 * @since  1.3.0
 */
abstract class StringHelper
{
	/**
	 * Increment styles.
	 *
	 * @var    array
	 * @since  1.3.0
	 */
	protected static $incrementStyles = [
		'dash'    => [
			'#-(\d+)$#',
			'-%d',
		],
		'default' => [
			['#\((\d+)\)$#', '#\(\d+\)$#'],
			[' (%d)', '(%d)'],
		],
	];

	/**
	 * Increments a trailing number in a string.
	 *
	 * Used to easily create distinct labels when copying objects. The method has the following styles:
	 *
	 * default: "Label" becomes "Label (2)"
	 * dash:    "Label" becomes "Label-2"
	 *
	 * @param   string       $string  The source string.
	 * @param   string|null  $style   The the style (default|dash).
	 * @param   integer      $n       If supplied, this number is used for the copy, otherwise it is the 'next' number.
	 *
	 * @return  string  The incremented string.
	 *
	 * @since   1.3.0
	 */
	public static function increment($string, $style = 'default', $n = 0)
	{
		$styleSpec = static::$incrementStyles[$style] ?? static::$incrementStyles['default'];

		// Regular expression search and replace patterns.
		if (\is_array($styleSpec[0]))
		{
			$rxSearch  = $styleSpec[0][0];
			$rxReplace = $styleSpec[0][1];
		}
		else
		{
			$rxSearch = $rxReplace = $styleSpec[0];
		}

		// New and old (existing) sprintf formats.
		if (\is_array($styleSpec[1]))
		{
			$newFormat = $styleSpec[1][0];
			$oldFormat = $styleSpec[1][1];
		}
		else
		{
			$newFormat = $oldFormat = $styleSpec[1];
		}

		// Check if we are incrementing an existing pattern, or appending a new one.
		if (preg_match($rxSearch, $string, $matches))
		{
			$n      = empty($n) ? ($matches[1] + 1) : $n;
			$string = preg_replace($rxReplace, sprintf($oldFormat, $n), $string);
		}
		else
		{
			$n = empty($n) ? 2 : $n;
			$string .= sprintf($newFormat, $n);
		}

		return $string;
	}

	/**
	 * Tests whether a string contains only 7bit ASCII bytes.
	 *
	 * You might use this to conditionally check whether a string needs handling as UTF-8 or not, potentially offering performance
	 * benefits by using the native PHP equivalent if it's just ASCII e.g.;
	 *
	 * <code>
	 * if (StringHelper::is_ascii($someString))
	 * {
	 *     // It's just ASCII - use the native PHP version
	 *     $someString = strtolower($someString);
	 * }
	 * else
	 * {
	 *     $someString = StringHelper::strtolower($someString);
	 * }
	 * </code>
	 *
	 * @param   string  $str  The string to test.
	 *
	 * @return  boolean True if the string is all ASCII
	 *
	 * @since   1.3.0
	 */
	public static function is_ascii($str)
	{
		return utf8_is_ascii($str);
	}

	/**
	 * UTF-8 aware alternative to ord()
	 *
	 * Returns the unicode ordinal for a character.
	 *
	 * @param   string  $chr  UTF-8 encoded character
	 *
	 * @return  integer Unicode ordinal for the character
	 *
	 * @link    https://www.php.net/ord
	 * @since   1.4.0
	 */
	public static function ord($chr)
	{
		return utf8_ord($chr);
	}

	/**
	 * UTF-8 aware alternative to strpos()
	 *
	 * Find position of first occurrence of a string.
	 *
	 * @param   string                $str     String being examined
	 * @param   string                $search  String being searched for
	 * @param   integer|null|boolean  $offset  Optional, specifies the position from which the search should be performed
	 *
	 * @return  integer|boolean  Number of characters before the first match or FALSE on failure
	 *
	 * @link    https://www.php.net/strpos
	 * @since   1.3.0
	 */
	public static function strpos($str, $search, $offset = false)
	{
		if ($offset === false)
		{
			return utf8_strpos($str, $search);
		}

		return utf8_strpos($str, $search, $offset);
	}

	/**
	 * UTF-8 aware alternative to strrpos()
	 *
	 * Finds position of last occurrence of a string.
	 *
	 * @param   string   $str     String being examined.
	 * @param   string   $search  String being searched for.
	 * @param   integer  $offset  Offset from the left of the string.
	 *
	 * @return  integer|boolean  Number of characters before the last match or false on failure
	 *
	 * @link    https://www.php.net/strrpos
	 * @since   1.3.0
	 */
	public static function strrpos($str, $search, $offset = 0)
	{
		return utf8_strrpos($str, $search, $offset);
	}

	/**
	 * UTF-8 aware alternative to substr()
	 *
	 * Return part of a string given character offset (and optionally length).
	 *
	 * @param   string                $str     String being processed
	 * @param   integer               $offset  Number of UTF-8 characters offset (from left)
	 * @param   integer|null|boolean  $length  Optional length in UTF-8 characters from offset
	 *
	 * @return  string|boolean
	 *
	 * @link    https://www.php.net/substr
	 * @since   1.3.0
	 */
	public static function substr($str, $offset, $length = false)
	{
		if ($length === false)
		{
			return utf8_substr($str, $offset);
		}

		return utf8_substr($str, $offset, $length);
	}

	/**
	 * UTF-8 aware alternative to strtolower()
	 *
	 * Make a string lowercase
	 *
	 * Note: The concept of a characters "case" only exists is some alphabets such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
	 * not exist in the Chinese alphabet, for example. See Unicode Standard Annex #21: Case Mappings
	 *
	 * @param   string  $str  String being processed
	 *
	 * @return  string|boolean  Either string in lowercase or FALSE is UTF-8 invalid
	 *
	 * @link    https://www.php.net/strtolower
	 * @since   1.3.0
	 */
	public static function strtolower($str)
	{
		return utf8_strtolower($str);
	}

	/**
	 * UTF-8 aware alternative to strtoupper()
	 *
	 * Make a string uppercase
	 *
	 * Note: The concept of a characters "case" only exists is some alphabets such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
	 * not exist in the Chinese alphabet, for example. See Unicode Standard Annex #21: Case Mappings
	 *
	 * @param   string  $str  String being processed
	 *
	 * @return  string|boolean  Either string in uppercase or FALSE is UTF-8 invalid
	 *
	 * @link    https://www.php.net/strtoupper
	 * @since   1.3.0
	 */
	public static function strtoupper($str)
	{
		return utf8_strtoupper($str);
	}

	/**
	 * UTF-8 aware alternative to strlen()
	 *
	 * Returns the number of characters in the string (NOT THE NUMBER OF BYTES).
	 *
	 * @param   string  $str  UTF-8 string.
	 *
	 * @return  integer  Number of UTF-8 characters in string.
	 *
	 * @link    https://www.php.net/strlen
	 * @since   1.3.0
	 */
	public static function strlen($str)
	{
		return utf8_strlen($str);
	}

	/**
	 * UTF-8 aware alternative to str_ireplace()
	 *
	 * Case-insensitive version of str_replace()
	 *
	 * @param   string                $search   String to search
	 * @param   string                $replace  Existing string to replace
	 * @param   string                $str      New string to replace with
	 * @param   integer|null|boolean  $count    Optional count value to be passed by referene
	 *
	 * @return  string  UTF-8 String
	 *
	 * @link    https://www.php.net/str_ireplace
	 * @since   1.3.0
	 */
	public static function str_ireplace($search, $replace, $str, $count = null)
	{
		if ($count === false)
		{
			return utf8_ireplace($search, $replace, $str);
		}

		return utf8_ireplace($search, $replace, $str, $count);
	}

	/**
	 * UTF-8 aware alternative to str_pad()
	 *
	 * Pad a string to a certain length with another string.
	 * $padStr may contain multi-byte characters.
	 *
	 * @param   string   $input   The input string.
	 * @param   integer  $length  If the value is negative, less than, or equal to the length of the input string, no padding takes place.
	 * @param   string   $padStr  The string may be truncated if the number of padding characters can't be evenly divided by the string's length.
	 * @param   integer  $type    The type of padding to apply
	 *
	 * @return  string
	 *
	 * @link    https://www.php.net/str_pad
	 * @since   1.4.0
	 */
	public static function str_pad($input, $length, $padStr = ' ', $type = STR_PAD_RIGHT)
	{
		return utf8_str_pad($input, $length, $padStr, $type);
	}

	/**
	 * UTF-8 aware alternative to str_split()
	 *
	 * Convert a string to an array.
	 *
	 * @param   string   $str       UTF-8 encoded string to process
	 * @param   integer  $splitLen  Number to characters to split string by
	 *
	 * @return  array|string|boolean
	 *
	 * @link    https://www.php.net/str_split
	 * @since   1.3.0
	 */
	public static function str_split($str, $splitLen = 1)
	{
		return utf8_str_split($str, $splitLen);
	}

	/**
	 * UTF-8/LOCALE aware alternative to strcasecmp()
	 *
	 * A case insensitive string comparison.
	 *
	 * @param   string          $str1    string 1 to compare
	 * @param   string          $str2    string 2 to compare
	 * @param   string|boolean  $locale  The locale used by strcoll or false to use classical comparison
	 *
	 * @return  integer   < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
	 *
	 * @link    https://www.php.net/strcasecmp
	 * @link    https://www.php.net/strcoll
	 * @link    https://www.php.net/setlocale
	 * @since   1.3.0
	 */
	public static function strcasecmp($str1, $str2, $locale = false)
	{
		if ($locale === false)
		{
			return utf8_strcasecmp($str1, $str2);
		}

		// Get current locale
		$locale0 = setlocale(LC_COLLATE, 0);

		if (!$locale = setlocale(LC_COLLATE, $locale))
		{
			$locale = $locale0;
		}

		// See if we have successfully set locale to UTF-8
		if (!stristr($locale, 'UTF-8') && stristr($locale, '_') && preg_match('~\.(\d+)$~', $locale, $m))
		{
			$encoding = 'CP' . $m[1];
		}
		elseif (stristr($locale, 'UTF-8') || stristr($locale, 'utf8'))
		{
			$encoding = 'UTF-8';
		}
		else
		{
			$encoding = 'nonrecodable';
		}

		// If we successfully set encoding it to utf-8 or encoding is sth weird don't recode
		if ($encoding == 'UTF-8' || $encoding == 'nonrecodable')
		{
			return strcoll(utf8_strtolower($str1), utf8_strtolower($str2));
		}

		return strcoll(
			static::transcode(utf8_strtolower($str1), 'UTF-8', $encoding),
			static::transcode(utf8_strtolower($str2), 'UTF-8', $encoding)
		);
	}

	/**
	 * UTF-8/LOCALE aware alternative to strcmp()
	 *
	 * A case sensitive string comparison.
	 *
	 * @param   string  $str1    string 1 to compare
	 * @param   string  $str2    string 2 to compare
	 * @param   mixed   $locale  The locale used by strcoll or false to use classical comparison
	 *
	 * @return  integer  < 0 if str1 is less than str2; > 0 if str1 is greater than str2, and 0 if they are equal.
	 *
	 * @link    https://www.php.net/strcmp
	 * @link    https://www.php.net/strcoll
	 * @link    https://www.php.net/setlocale
	 * @since   1.3.0
	 */
	public static function strcmp($str1, $str2, $locale = false)
	{
		if ($locale)
		{
			// Get current locale
			$locale0 = setlocale(LC_COLLATE, 0);

			if (!$locale = setlocale(LC_COLLATE, $locale))
			{
				$locale = $locale0;
			}

			// See if we have successfully set locale to UTF-8
			if (!stristr($locale, 'UTF-8') && stristr($locale, '_') && preg_match('~\.(\d+)$~', $locale, $m))
			{
				$encoding = 'CP' . $m[1];
			}
			elseif (stristr($locale, 'UTF-8') || stristr($locale, 'utf8'))
			{
				$encoding = 'UTF-8';
			}
			else
			{
				$encoding = 'nonrecodable';
			}

			// If we successfully set encoding it to utf-8 or encoding is sth weird don't recode
			if ($encoding == 'UTF-8' || $encoding == 'nonrecodable')
			{
				return strcoll($str1, $str2);
			}

			return strcoll(static::transcode($str1, 'UTF-8', $encoding), static::transcode($str2, 'UTF-8', $encoding));
		}

		return strcmp($str1, $str2);
	}

	/**
	 * UTF-8 aware alternative to strcspn()
	 *
	 * Find length of initial segment not matching mask.
	 *
	 * @param   string           $str     The string to process
	 * @param   string           $mask    The mask
	 * @param   integer|boolean  $start   Optional starting character position (in characters)
	 * @param   integer|boolean  $length  Optional length
	 *
	 * @return  integer  The length of the initial segment of str1 which does not contain any of the characters in str2
	 *
	 * @link    https://www.php.net/strcspn
	 * @since   1.3.0
	 */
	public static function strcspn($str, $mask, $start = null, $length = null)
	{
		if ($start === false && $length === false)
		{
			return utf8_strcspn($str, $mask);
		}

		if ($length === false)
		{
			return utf8_strcspn($str, $mask, $start);
		}

		return utf8_strcspn($str, $mask, $start, $length);
	}

	/**
	 * UTF-8 aware alternative to stristr()
	 *
	 * Returns all of haystack from the first occurrence of needle to the end. Needle and haystack are examined in a case-insensitive manner to
	 * find the first occurrence of a string using case insensitive comparison.
	 *
	 * @param   string  $str     The haystack
	 * @param   string  $search  The needle
	 *
	 * @return  string|boolean
	 *
	 * @link    https://www.php.net/stristr
	 * @since   1.3.0
	 */
	public static function stristr($str, $search)
	{
		return utf8_stristr($str, $search);
	}

	/**
	 * UTF-8 aware alternative to strrev()
	 *
	 * Reverse a string.
	 *
	 * @param   string  $str  String to be reversed
	 *
	 * @return  string   The string in reverse character order
	 *
	 * @link    https://www.php.net/strrev
	 * @since   1.3.0
	 */
	public static function strrev($str)
	{
		return utf8_strrev($str);
	}

	/**
	 * UTF-8 aware alternative to strspn()
	 *
	 * Find length of initial segment matching mask.
	 *
	 * @param   string        $str     The haystack
	 * @param   string        $mask    The mask
	 * @param   integer|null  $start   Start optional
	 * @param   integer|null  $length  Length optional
	 *
	 * @return  integer
	 *
	 * @link    https://www.php.net/strspn
	 * @since   1.3.0
	 */
	public static function strspn($str, $mask, $start = null, $length = null)
	{
		if ($start === null && $length === null)
		{
			return utf8_strspn($str, $mask);
		}

		if ($length === null)
		{
			return utf8_strspn($str, $mask, $start);
		}

		return utf8_strspn($str, $mask, $start, $length);
	}

	/**
	 * UTF-8 aware alternative to substr_replace()
	 *
	 * Replace text within a portion of a string.
	 *
	 * @param   string                $str     The haystack
	 * @param   string                $repl    The replacement string
	 * @param   integer               $start   Start
	 * @param   integer|boolean|null  $length  Length (optional)
	 *
	 * @return  string
	 *
	 * @link    https://www.php.net/substr_replace
	 * @since   1.3.0
	 */
	public static function substr_replace($str, $repl, $start, $length = null)
	{
		// Loaded by library loader
		if ($length === false)
		{
			return utf8_substr_replace($str, $repl, $start);
		}

		return utf8_substr_replace($str, $repl, $start, $length);
	}

	/**
	 * UTF-8 aware replacement for ltrim()
	 *
	 * Strip whitespace (or other characters) from the beginning of a string. You only need to use this if you are supplying the charlist
	 * optional arg and it contains UTF-8 characters. Otherwise ltrim will work normally on a UTF-8 string.
	 *
	 * @param   string          $str       The string to be trimmed
	 * @param   string|boolean  $charlist  The optional charlist of additional characters to trim
	 *
	 * @return  string  The trimmed string
	 *
	 * @link    https://www.php.net/ltrim
	 * @since   1.3.0
	 */
	public static function ltrim($str, $charlist = false)
	{
		if (empty($charlist) && $charlist !== false)
		{
			return $str;
		}

		if ($charlist === false)
		{
			return utf8_ltrim($str);
		}

		return utf8_ltrim($str, $charlist);
	}

	/**
	 * UTF-8 aware replacement for rtrim()
	 *
	 * Strip whitespace (or other characters) from the end of a string. You only need to use this if you are supplying the charlist
	 * optional arg and it contains UTF-8 characters. Otherwise rtrim will work normally on a UTF-8 string.
	 *
	 * @param   string          $str       The string to be trimmed
	 * @param   string|boolean  $charlist  The optional charlist of additional characters to trim
	 *
	 * @return  string  The trimmed string
	 *
	 * @link    https://www.php.net/rtrim
	 * @since   1.3.0
	 */
	public static function rtrim($str, $charlist = false)
	{
		if (empty($charlist) && $charlist !== false)
		{
			return $str;
		}

		if ($charlist === false)
		{
			return utf8_rtrim($str);
		}

		return utf8_rtrim($str, $charlist);
	}

	/**
	 * UTF-8 aware replacement for trim()
	 *
	 * Strip whitespace (or other characters) from the beginning and end of a string. You only need to use this if you are supplying the charlist
	 * optional arg and it contains UTF-8 characters. Otherwise trim will work normally on a UTF-8 string
	 *
	 * @param   string          $str       The string to be trimmed
	 * @param   string|boolean  $charlist  The optional charlist of additional characters to trim
	 *
	 * @return  string  The trimmed string
	 *
	 * @link    https://www.php.net/trim
	 * @since   1.3.0
	 */
	public static function trim($str, $charlist = false)
	{
		if (empty($charlist) && $charlist !== false)
		{
			return $str;
		}

		if ($charlist === false)
		{
			return utf8_trim($str);
		}

		return utf8_trim($str, $charlist);
	}

	/**
	 * UTF-8 aware alternative to ucfirst()
	 *
	 * Make a string's first character uppercase or all words' first character uppercase.
	 *
	 * @param   string       $str           String to be processed
	 * @param   string|null  $delimiter     The words delimiter (null means do not split the string)
	 * @param   string|null  $newDelimiter  The new words delimiter (null means equal to $delimiter)
	 *
	 * @return  string  If $delimiter is null, return the string with first character as upper case (if applicable)
	 *                  else consider the string of words separated by the delimiter, apply the ucfirst to each words
	 *                  and return the string with the new delimiter
	 *
	 * @link    https://www.php.net/ucfirst
	 * @since   1.3.0
	 */
	public static function ucfirst($str, $delimiter = null, $newDelimiter = null)
	{
		if ($delimiter === null)
		{
			return utf8_ucfirst($str);
		}

		if ($newDelimiter === null)
		{
			$newDelimiter = $delimiter;
		}

		return implode($newDelimiter, array_map('utf8_ucfirst', explode($delimiter, $str)));
	}

	/**
	 * UTF-8 aware alternative to ucwords()
	 *
	 * Uppercase the first character of each word in a string.
	 *
	 * @param   string  $str  String to be processed
	 *
	 * @return  string  String with first char of each word uppercase
	 *
	 * @link    https://www.php.net/ucwords
	 * @since   1.3.0
	 */
	public static function ucwords($str)
	{
		return utf8_ucwords($str);
	}

	/**
	 * Transcode a string.
	 *
	 * @param   string  $source        The string to transcode.
	 * @param   string  $fromEncoding  The source encoding.
	 * @param   string  $toEncoding    The target encoding.
	 *
	 * @return  string|null  The transcoded string, or null if the source was not a string.
	 *
	 * @link    https://bugs.php.net/bug.php?id=48147
	 *
	 * @since   1.3.0
	 */
	public static function transcode($source, $fromEncoding, $toEncoding)
	{
		switch (ICONV_IMPL)
		{
			case 'glibc':
				return @iconv($fromEncoding, $toEncoding . '//TRANSLIT,IGNORE', $source);

			case 'libiconv':
			default:
				return iconv($fromEncoding, $toEncoding . '//IGNORE//TRANSLIT', $source);
		}
	}

	/**
	 * Tests a string as to whether it's valid UTF-8 and supported by the Unicode standard.
	 *
	 * Note: this function has been modified to simple return true or false.
	 *
	 * @param   string  $str  UTF-8 encoded string.
	 *
	 * @return  boolean  true if valid
	 *
	 * @author  <hsivonen@iki.fi>
	 * @link    https://hsivonen.fi/php-utf8/
	 * @see     compliant
	 * @since   1.3.0
	 */
	public static function valid($str)
	{
		return utf8_is_valid($str);
	}

	/**
	 * Tests whether a string complies as UTF-8.
	 *
	 * This will be much faster than StringHelper::valid() but will pass five and six octet UTF-8 sequences, which are not supported by Unicode and
	 * so cannot be displayed correctly in a browser. In other words it is not as strict as StringHelper::valid() but it's faster. If you use it to
	 * validate user input, you place yourself at the risk that attackers will be able to inject 5 and 6 byte sequences (which may or may not be a
	 * significant risk, depending on what you are are doing).
	 *
	 * @param   string  $str  UTF-8 string to check
	 *
	 * @return  boolean  TRUE if string is valid UTF-8
	 *
	 * @see     StringHelper::valid
	 * @link    https://www.php.net/manual/en/reference.pcre.pattern.modifiers.php#54805
	 * @since   1.3.0
	 */
	public static function compliant($str)
	{
		return utf8_compliant($str);
	}

	/**
	 * Converts Unicode sequences to UTF-8 string.
	 *
	 * @param   string  $str  Unicode string to convert
	 *
	 * @return  string  UTF-8 string
	 *
	 * @since   1.3.0
	 */
	public static function unicode_to_utf8($str)
	{
		if (\extension_loaded('mbstring'))
		{
			return preg_replace_callback(
				'/\\\\u([0-9a-fA-F]{4})/',
				static function ($match)
				{
					return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
				},
				$str
			);
		}

		return $str;
	}

	/**
	 * Converts Unicode sequences to UTF-16 string.
	 *
	 * @param   string  $str  Unicode string to convert
	 *
	 * @return  string  UTF-16 string
	 *
	 * @since   1.3.0
	 */
	public static function unicode_to_utf16($str)
	{
		if (\extension_loaded('mbstring'))
		{
			return preg_replace_callback(
				'/\\\\u([0-9a-fA-F]{4})/',
				static function ($match)
				{
					return mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UTF-16BE');
				},
				$str
			);
		}

		return $str;
	}
}