* @license GNU General Public License version 2 or later; see LICENSE.txt */ namespace Joomla\CMS\Filter; use Joomla\CMS\String\PunycodeHelper; use Joomla\Filter\InputFilter as BaseInputFilter; // phpcs:disable PSR1.Files.SideEffects \defined('JPATH_PLATFORM') or die; // phpcs:enable PSR1.Files.SideEffects /** * InputFilter is a class for filtering input from any data source * * Forked from the php input filter library by: Daniel Morris * Original Contributors: Gianpaolo Racca, Ghislain Picard, Marco Wandschneider, Chris Tobin and Andrew Eddie. * * @since 1.7.0 */ class InputFilter extends BaseInputFilter { /** * An array containing a list of extensions for files that are typically * executable directly in the webserver context, potentially resulting in code executions * * @since 4.0.0 */ public const FORBIDDEN_FILE_EXTENSIONS = [ 'php', 'phps', 'pht', 'phtml', 'php3', 'php4', 'php5', 'php6', 'php7', 'asp', 'php8', 'phar', 'inc', 'pl', 'cgi', 'fcgi', 'java', 'jar', 'py', 'aspx' ]; /** * A flag for Unicode Supplementary Characters (4-byte Unicode character) stripping. * * @var integer * @since 3.5 */ private $stripUSC = 0; /** * A container for InputFilter instances. * * @var InputFilter[] * @since 4.0.0 */ protected static $instances = array(); /** * Constructor for inputFilter class. Only first parameter is required. * * @param array $tagsArray List of user-defined tags * @param array $attrArray List of user-defined attributes * @param integer $tagsMethod The constant static::ONLY_ALLOW_DEFINED_TAGS or static::BLOCK_DEFINED_TAGS * @param integer $attrMethod The constant static::ONLY_ALLOW_DEFINED_ATTRIBUTES or static::BLOCK_DEFINED_ATTRIBUTES * @param integer $xssAuto Only auto clean essentials = 0, Allow clean blocked tags/attributes = 1 * @param integer $stripUSC Strip 4-byte unicode characters = 1, no strip = 0 * * @since 1.7.0 */ public function __construct($tagsArray = array(), $attrArray = array(), $tagsMethod = 0, $attrMethod = 0, $xssAuto = 1, $stripUSC = 0) { parent::__construct($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto); // Assign member variables $this->stripUSC = $stripUSC; } /** * Returns an input filter object, only creating it if it doesn't already exist. * * @param array $tagsArray List of user-defined tags * @param array $attrArray List of user-defined attributes * @param integer $tagsMethod The constant static::ONLY_ALLOW_DEFINED_TAGS or static::BLOCK_DEFINED_TAGS * @param integer $attrMethod The constant static::ONLY_ALLOW_DEFINED_ATTRIBUTES or static::BLOCK_DEFINED_ATTRIBUTES * @param integer $xssAuto Only auto clean essentials = 0, Allow clean blocked tags/attributes = 1 * @param integer $stripUSC Strip 4-byte unicode characters = 1, no strip = 0 * * @return InputFilter The InputFilter object. * * @since 1.7.0 */ public static function getInstance($tagsArray = array(), $attrArray = array(), $tagsMethod = 0, $attrMethod = 0, $xssAuto = 1, $stripUSC = 0) { $sig = md5(serialize(array($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto))); if (empty(self::$instances[$sig])) { self::$instances[$sig] = new InputFilter($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto, $stripUSC); } return self::$instances[$sig]; } /** * Method to be called by another php script. Processes for XSS and * specified bad code. * * @param mixed $source Input string/array-of-string to be 'cleaned' * @param string $type The return type for the variable: * INT: An integer, or an array of integers, * UINT: An unsigned integer, or an array of unsigned integers, * FLOAT: A floating point number, or an array of floating point numbers, * BOOLEAN: A boolean value, * WORD: A string containing A-Z or underscores only (not case sensitive), * ALNUM: A string containing A-Z or 0-9 only (not case sensitive), * CMD: A string containing A-Z, 0-9, underscores, periods or hyphens (not case sensitive), * BASE64: A string containing A-Z, 0-9, forward slashes, plus or equals (not case sensitive), * STRING: A fully decoded and sanitised string (default), * HTML: A sanitised string, * ARRAY: An array, * PATH: A sanitised file path, or an array of sanitised file paths, * TRIM: A string trimmed from normal, non-breaking and multibyte spaces * USERNAME: Do not use (use an application specific filter), * RAW: The raw string is returned with no filtering, * unknown: An unknown filter will act like STRING. If the input is an array it will return an * array of fully decoded and sanitised strings. * * @return mixed 'Cleaned' version of input parameter * * @since 1.7.0 */ public function clean($source, $type = 'string') { // Strip Unicode Supplementary Characters when requested to do so if ($this->stripUSC) { // Alternatively: preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xE2\xAF\x91", $source) but it'd be slower. $source = $this->stripUSC($source); } return parent::clean($source, $type); } /** * Function to punyencode utf8 mail when saving content * * @param string $text The strings to encode * * @return string The punyencoded mail * * @since 3.5 */ public function emailToPunycode($text) { $pattern = '/(("mailto:)+[\w\.\-\+]+\@[^"?]+\.+[^."?]+("|\?))/'; if (preg_match_all($pattern, $text, $matches)) { foreach ($matches[0] as $match) { $match = (string) str_replace(array('?', '"'), '', $match); $text = (string) str_replace($match, PunycodeHelper::emailToPunycode($match), $text); } } return $text; } /** * Checks an uploaded for suspicious naming and potential PHP contents which could indicate a hacking attempt. * * The options you can define are: * null_byte Prevent files with a null byte in their name (buffer overflow attack) * forbidden_extensions Do not allow these strings anywhere in the file's extension * php_tag_in_content Do not allow ` true, // Forbidden string in extension (e.g. php matched .php, .xxx.php, .php.xxx and so on) 'forbidden_extensions' => self::FORBIDDEN_FILE_EXTENSIONS, // true, // true, // __HALT_COMPILER() 'phar_stub_in_content' => true, // Which file extensions to scan for short tags 'shorttag_extensions' => array( 'inc', 'phps', 'class', 'php3', 'php4', 'php5', 'php6', 'php7', 'php8', 'txt', 'dat', 'tpl', 'tmpl', ), // Forbidden extensions anywhere in the content 'fobidden_ext_in_content' => true, // Which file extensions to scan for .php in the content 'php_ext_content_extensions' => array('zip', 'rar', 'tar', 'gz', 'tgz', 'bz2', 'tbz', 'jpa'), ); $options = array_merge($defaultOptions, $options); // Make sure we can scan nested file descriptors $descriptors = $file; if (isset($file['name']) && isset($file['tmp_name'])) { $descriptors = static::decodeFileData( array( $file['name'], $file['type'], $file['tmp_name'], $file['error'], $file['size'], ) ); } // Handle non-nested descriptors (single files) if (isset($descriptors['name'])) { $descriptors = array($descriptors); } // Scan all descriptors detected foreach ($descriptors as $fileDescriptor) { if (!isset($fileDescriptor['name'])) { // This is a nested descriptor. We have to recurse. if (!static::isSafeFile($fileDescriptor, $options)) { return false; } continue; } $tempNames = $fileDescriptor['tmp_name']; $intendedNames = $fileDescriptor['name']; if (!\is_array($tempNames)) { $tempNames = array($tempNames); } if (!\is_array($intendedNames)) { $intendedNames = array($intendedNames); } $len = \count($tempNames); for ($i = 0; $i < $len; $i++) { $tempName = array_shift($tempNames); $intendedName = array_shift($intendedNames); // 1. Null byte check if ($options['null_byte']) { if (strstr($intendedName, "\x00")) { return false; } } // 2. PHP-in-extension check (.php, .php.xxx[.yyy[.zzz[...]]], .xxx[.yyy[.zzz[...]]].php) if (!empty($options['forbidden_extensions'])) { $explodedName = explode('.', $intendedName); $explodedName = array_reverse($explodedName); array_pop($explodedName); $explodedName = array_map('strtolower', $explodedName); /* * DO NOT USE array_intersect HERE! array_intersect expects the two arrays to * be set, i.e. they should have unique values. */ foreach ($options['forbidden_extensions'] as $ext) { if (\in_array($ext, $explodedName)) { return false; } } } // 3. File contents scanner (PHP tag in file contents) if ( $options['php_tag_in_content'] || $options['shorttag_in_content'] || $options['phar_stub_in_content'] || ($options['fobidden_ext_in_content'] && !empty($options['forbidden_extensions'])) ) { $fp = strlen($tempName) ? @fopen($tempName, 'r') : false; if ($fp !== false) { $data = ''; while (!feof($fp)) { $data .= @fread($fp, 131072); if ($options['php_tag_in_content'] && stripos($data, ' $v) { $result[$k] = static::decodeFileData(array($data[0][$k], $data[1][$k], $data[2][$k], $data[3][$k], $data[4][$k])); } return $result; } return array('name' => $data[0], 'type' => $data[1], 'tmp_name' => $data[2], 'error' => $data[3], 'size' => $data[4]); } /** * Try to convert to plaintext * * @param string $source The source string. * * @return string Plaintext string * * @since 3.5 */ protected function decode($source) { static $ttr; if (!\is_array($ttr)) { // Entity decode $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT, 'ISO-8859-1'); foreach ($trans_tbl as $k => $v) { $ttr[$v] = utf8_encode($k); } } $source = strtr($source, $ttr); // Convert decimal $source = preg_replace_callback( '/&#(\d+);/m', function ($m) { return utf8_encode(\chr($m[1])); }, $source ); // Convert hex $source = preg_replace_callback( '/&#x([a-f0-9]+);/mi', function ($m) { return utf8_encode(\chr('0x' . $m[1])); }, $source ); return $source; } /** * Recursively strip Unicode Supplementary Characters from the source. Not: objects cannot be filtered. * * @param mixed $source The data to filter * * @return mixed The filtered result * * @since 3.5 */ protected function stripUSC($source) { if (\is_object($source)) { return $source; } if (\is_array($source)) { $filteredArray = array(); foreach ($source as $k => $v) { $filteredArray[$k] = $this->stripUSC($v); } return $filteredArray; } return preg_replace('/[\xF0-\xF7].../s', "\xE2\xAF\x91", $source); } }