[ Index ]

PHP Cross Reference of Joomla 4.2.2 documentation

title

Body

[close]

/libraries/src/Filter/ -> InputFilter.php (source)

   1  <?php
   2  
   3  /**
   4   * Joomla! Content Management System
   5   *
   6   * @copyright  (C) 2006 Open Source Matters, Inc. <https://www.joomla.org>
   7   * @license    GNU General Public License version 2 or later; see LICENSE.txt
   8   */
   9  
  10  namespace Joomla\CMS\Filter;
  11  
  12  use Joomla\CMS\String\PunycodeHelper;
  13  use Joomla\Filter\InputFilter as BaseInputFilter;
  14  
  15  // phpcs:disable PSR1.Files.SideEffects
  16  \defined('JPATH_PLATFORM') or die;
  17  // phpcs:enable PSR1.Files.SideEffects
  18  
  19  /**
  20   * InputFilter is a class for filtering input from any data source
  21   *
  22   * Forked from the php input filter library by: Daniel Morris <[email protected]>
  23   * Original Contributors: Gianpaolo Racca, Ghislain Picard, Marco Wandschneider, Chris Tobin and Andrew Eddie.
  24   *
  25   * @since  1.7.0
  26   */
  27  class InputFilter extends BaseInputFilter
  28  {
  29      /**
  30       * An array containing a list of extensions for files that are typically
  31       * executable directly in the webserver context, potentially resulting in code executions
  32       *
  33       * @since 4.0.0
  34       */
  35      public const FORBIDDEN_FILE_EXTENSIONS = [
  36          'php', 'phps', 'pht', 'phtml', 'php3', 'php4', 'php5', 'php6', 'php7', 'asp',
  37          'php8', 'phar', 'inc', 'pl', 'cgi', 'fcgi', 'java', 'jar', 'py', 'aspx'
  38      ];
  39  
  40      /**
  41       * A flag for Unicode Supplementary Characters (4-byte Unicode character) stripping.
  42       *
  43       * @var    integer
  44       * @since  3.5
  45       */
  46      private $stripUSC = 0;
  47  
  48      /**
  49       * A container for InputFilter instances.
  50       *
  51       * @var    InputFilter[]
  52       * @since  4.0.0
  53       */
  54      protected static $instances = array();
  55      /**
  56       * Constructor for inputFilter class. Only first parameter is required.
  57       *
  58       * @param   array    $tagsArray   List of user-defined tags
  59       * @param   array    $attrArray   List of user-defined attributes
  60       * @param   integer  $tagsMethod  The constant static::ONLY_ALLOW_DEFINED_TAGS or static::BLOCK_DEFINED_TAGS
  61       * @param   integer  $attrMethod  The constant static::ONLY_ALLOW_DEFINED_ATTRIBUTES or static::BLOCK_DEFINED_ATTRIBUTES
  62       * @param   integer  $xssAuto     Only auto clean essentials = 0, Allow clean blocked tags/attributes = 1
  63       * @param   integer  $stripUSC    Strip 4-byte unicode characters = 1, no strip = 0
  64       *
  65       * @since   1.7.0
  66       */
  67      public function __construct($tagsArray = array(), $attrArray = array(), $tagsMethod = 0, $attrMethod = 0, $xssAuto = 1, $stripUSC = 0)
  68      {
  69          parent::__construct($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto);
  70  
  71          // Assign member variables
  72          $this->stripUSC = $stripUSC;
  73      }
  74  
  75      /**
  76       * Returns an input filter object, only creating it if it doesn't already exist.
  77       *
  78       * @param   array    $tagsArray   List of user-defined tags
  79       * @param   array    $attrArray   List of user-defined attributes
  80       * @param   integer  $tagsMethod  The constant static::ONLY_ALLOW_DEFINED_TAGS or static::BLOCK_DEFINED_TAGS
  81       * @param   integer  $attrMethod  The constant static::ONLY_ALLOW_DEFINED_ATTRIBUTES or static::BLOCK_DEFINED_ATTRIBUTES
  82       * @param   integer  $xssAuto     Only auto clean essentials = 0, Allow clean blocked tags/attributes = 1
  83       * @param   integer  $stripUSC    Strip 4-byte unicode characters = 1, no strip = 0
  84       *
  85       * @return  InputFilter  The InputFilter object.
  86       *
  87       * @since   1.7.0
  88       */
  89      public static function getInstance($tagsArray = array(), $attrArray = array(), $tagsMethod = 0, $attrMethod = 0, $xssAuto = 1, $stripUSC = 0)
  90      {
  91          $sig = md5(serialize(array($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto)));
  92  
  93          if (empty(self::$instances[$sig])) {
  94              self::$instances[$sig] = new InputFilter($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto, $stripUSC);
  95          }
  96  
  97          return self::$instances[$sig];
  98      }
  99  
 100      /**
 101       * Method to be called by another php script. Processes for XSS and
 102       * specified bad code.
 103       *
 104       * @param   mixed   $source  Input string/array-of-string to be 'cleaned'
 105       * @param   string  $type    The return type for the variable:
 106       *                           INT:       An integer, or an array of integers,
 107       *                           UINT:      An unsigned integer, or an array of unsigned integers,
 108       *                           FLOAT:     A floating point number, or an array of floating point numbers,
 109       *                           BOOLEAN:   A boolean value,
 110       *                           WORD:      A string containing A-Z or underscores only (not case sensitive),
 111       *                           ALNUM:     A string containing A-Z or 0-9 only (not case sensitive),
 112       *                           CMD:       A string containing A-Z, 0-9, underscores, periods or hyphens (not case sensitive),
 113       *                           BASE64:    A string containing A-Z, 0-9, forward slashes, plus or equals (not case sensitive),
 114       *                           STRING:    A fully decoded and sanitised string (default),
 115       *                           HTML:      A sanitised string,
 116       *                           ARRAY:     An array,
 117       *                           PATH:      A sanitised file path, or an array of sanitised file paths,
 118       *                           TRIM:      A string trimmed from normal, non-breaking and multibyte spaces
 119       *                           USERNAME:  Do not use (use an application specific filter),
 120       *                           RAW:       The raw string is returned with no filtering,
 121       *                           unknown:   An unknown filter will act like STRING. If the input is an array it will return an
 122       *                                      array of fully decoded and sanitised strings.
 123       *
 124       * @return  mixed  'Cleaned' version of input parameter
 125       *
 126       * @since   1.7.0
 127       */
 128      public function clean($source, $type = 'string')
 129      {
 130          // Strip Unicode Supplementary Characters when requested to do so
 131          if ($this->stripUSC) {
 132              // Alternatively: preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xE2\xAF\x91", $source) but it'd be slower.
 133              $source = $this->stripUSC($source);
 134          }
 135  
 136          return parent::clean($source, $type);
 137      }
 138  
 139      /**
 140       * Function to punyencode utf8 mail when saving content
 141       *
 142       * @param   string  $text  The strings to encode
 143       *
 144       * @return  string  The punyencoded mail
 145       *
 146       * @since   3.5
 147       */
 148      public function emailToPunycode($text)
 149      {
 150          $pattern = '/(("mailto:)+[\w\.\-\+]+\@[^"?]+\.+[^."?]+("|\?))/';
 151  
 152          if (preg_match_all($pattern, $text, $matches)) {
 153              foreach ($matches[0] as $match) {
 154                  $match  = (string) str_replace(array('?', '"'), '', $match);
 155                  $text   = (string) str_replace($match, PunycodeHelper::emailToPunycode($match), $text);
 156              }
 157          }
 158  
 159          return $text;
 160      }
 161  
 162      /**
 163       * Checks an uploaded for suspicious naming and potential PHP contents which could indicate a hacking attempt.
 164       *
 165       * The options you can define are:
 166       * null_byte                   Prevent files with a null byte in their name (buffer overflow attack)
 167       * forbidden_extensions        Do not allow these strings anywhere in the file's extension
 168       * php_tag_in_content          Do not allow `<?php` tag in content
 169       * phar_stub_in_content        Do not allow the `__HALT_COMPILER()` phar stub in content
 170       * shorttag_in_content         Do not allow short tag `<?` in content
 171       * shorttag_extensions         Which file extensions to scan for short tags in content
 172       * fobidden_ext_in_content     Do not allow forbidden_extensions anywhere in content
 173       * php_ext_content_extensions  Which file extensions to scan for .php in content
 174       *
 175       * This code is an adaptation and improvement of Admin Tools' UploadShield feature,
 176       * relicensed and contributed by its author.
 177       *
 178       * @param   array  $file     An uploaded file descriptor
 179       * @param   array  $options  The scanner options (see the code for details)
 180       *
 181       * @return  boolean  True of the file is safe
 182       *
 183       * @since   3.4
 184       */
 185      public static function isSafeFile($file, $options = array())
 186      {
 187          $defaultOptions = array(
 188  
 189              // Null byte in file name
 190              'null_byte'                  => true,
 191  
 192              // Forbidden string in extension (e.g. php matched .php, .xxx.php, .php.xxx and so on)
 193              'forbidden_extensions'       => self::FORBIDDEN_FILE_EXTENSIONS,
 194  
 195              // <?php tag in file contents
 196              'php_tag_in_content'         => true,
 197  
 198              // <? tag in file contents
 199              'shorttag_in_content'        => true,
 200  
 201              // __HALT_COMPILER()
 202              'phar_stub_in_content'        => true,
 203  
 204              // Which file extensions to scan for short tags
 205              'shorttag_extensions'        => array(
 206                  'inc', 'phps', 'class', 'php3', 'php4', 'php5', 'php6', 'php7', 'php8', 'txt', 'dat', 'tpl', 'tmpl',
 207              ),
 208  
 209              // Forbidden extensions anywhere in the content
 210              'fobidden_ext_in_content'    => true,
 211  
 212              // Which file extensions to scan for .php in the content
 213              'php_ext_content_extensions' => array('zip', 'rar', 'tar', 'gz', 'tgz', 'bz2', 'tbz', 'jpa'),
 214          );
 215  
 216          $options = array_merge($defaultOptions, $options);
 217  
 218          // Make sure we can scan nested file descriptors
 219          $descriptors = $file;
 220  
 221          if (isset($file['name']) && isset($file['tmp_name'])) {
 222              $descriptors = static::decodeFileData(
 223                  array(
 224                      $file['name'],
 225                      $file['type'],
 226                      $file['tmp_name'],
 227                      $file['error'],
 228                      $file['size'],
 229                  )
 230              );
 231          }
 232  
 233          // Handle non-nested descriptors (single files)
 234          if (isset($descriptors['name'])) {
 235              $descriptors = array($descriptors);
 236          }
 237  
 238          // Scan all descriptors detected
 239          foreach ($descriptors as $fileDescriptor) {
 240              if (!isset($fileDescriptor['name'])) {
 241                  // This is a nested descriptor. We have to recurse.
 242                  if (!static::isSafeFile($fileDescriptor, $options)) {
 243                      return false;
 244                  }
 245  
 246                  continue;
 247              }
 248  
 249              $tempNames     = $fileDescriptor['tmp_name'];
 250              $intendedNames = $fileDescriptor['name'];
 251  
 252              if (!\is_array($tempNames)) {
 253                  $tempNames = array($tempNames);
 254              }
 255  
 256              if (!\is_array($intendedNames)) {
 257                  $intendedNames = array($intendedNames);
 258              }
 259  
 260              $len = \count($tempNames);
 261  
 262              for ($i = 0; $i < $len; $i++) {
 263                  $tempName     = array_shift($tempNames);
 264                  $intendedName = array_shift($intendedNames);
 265  
 266                  // 1. Null byte check
 267                  if ($options['null_byte']) {
 268                      if (strstr($intendedName, "\x00")) {
 269                          return false;
 270                      }
 271                  }
 272  
 273                  // 2. PHP-in-extension check (.php, .php.xxx[.yyy[.zzz[...]]], .xxx[.yyy[.zzz[...]]].php)
 274                  if (!empty($options['forbidden_extensions'])) {
 275                      $explodedName = explode('.', $intendedName);
 276                      $explodedName = array_reverse($explodedName);
 277                      array_pop($explodedName);
 278                      $explodedName = array_map('strtolower', $explodedName);
 279  
 280                      /*
 281                       * DO NOT USE array_intersect HERE! array_intersect expects the two arrays to
 282                       * be set, i.e. they should have unique values.
 283                       */
 284                      foreach ($options['forbidden_extensions'] as $ext) {
 285                          if (\in_array($ext, $explodedName)) {
 286                              return false;
 287                          }
 288                      }
 289                  }
 290  
 291                  // 3. File contents scanner (PHP tag in file contents)
 292                  if (
 293                      $options['php_tag_in_content']
 294                      || $options['shorttag_in_content'] || $options['phar_stub_in_content']
 295                      || ($options['fobidden_ext_in_content'] && !empty($options['forbidden_extensions']))
 296                  ) {
 297                      $fp = strlen($tempName) ? @fopen($tempName, 'r') : false;
 298  
 299                      if ($fp !== false) {
 300                          $data = '';
 301  
 302                          while (!feof($fp)) {
 303                              $data .= @fread($fp, 131072);
 304  
 305                              if ($options['php_tag_in_content'] && stripos($data, '<?php') !== false) {
 306                                  return false;
 307                              }
 308  
 309                              if ($options['phar_stub_in_content'] && stripos($data, '__HALT_COMPILER()') !== false) {
 310                                  return false;
 311                              }
 312  
 313                              if ($options['shorttag_in_content']) {
 314                                  $suspiciousExtensions = $options['shorttag_extensions'];
 315  
 316                                  if (empty($suspiciousExtensions)) {
 317                                      $suspiciousExtensions = array(
 318                                          'inc', 'phps', 'class', 'php3', 'php4', 'txt', 'dat', 'tpl', 'tmpl',
 319                                      );
 320                                  }
 321  
 322                                  /*
 323                                   * DO NOT USE array_intersect HERE! array_intersect expects the two arrays to
 324                                   * be set, i.e. they should have unique values.
 325                                   */
 326                                  $collide = false;
 327  
 328                                  foreach ($suspiciousExtensions as $ext) {
 329                                      if (\in_array($ext, $explodedName)) {
 330                                          $collide = true;
 331  
 332                                          break;
 333                                      }
 334                                  }
 335  
 336                                  if ($collide) {
 337                                      // These are suspicious text files which may have the short tag (<?) in them
 338                                      if (strstr($data, '<?')) {
 339                                          return false;
 340                                      }
 341                                  }
 342                              }
 343  
 344                              if ($options['fobidden_ext_in_content'] && !empty($options['forbidden_extensions'])) {
 345                                  $suspiciousExtensions = $options['php_ext_content_extensions'];
 346  
 347                                  if (empty($suspiciousExtensions)) {
 348                                      $suspiciousExtensions = array(
 349                                          'zip', 'rar', 'tar', 'gz', 'tgz', 'bz2', 'tbz', 'jpa',
 350                                      );
 351                                  }
 352  
 353                                  /*
 354                                   * DO NOT USE array_intersect HERE! array_intersect expects the two arrays to
 355                                   * be set, i.e. they should have unique values.
 356                                   */
 357                                  $collide = false;
 358  
 359                                  foreach ($suspiciousExtensions as $ext) {
 360                                      if (\in_array($ext, $explodedName)) {
 361                                          $collide = true;
 362  
 363                                          break;
 364                                      }
 365                                  }
 366  
 367                                  if ($collide) {
 368                                      /*
 369                                       * These are suspicious text files which may have an executable
 370                                       * file extension in them
 371                                       */
 372                                      foreach ($options['forbidden_extensions'] as $ext) {
 373                                          if (strstr($data, '.' . $ext)) {
 374                                              return false;
 375                                          }
 376                                      }
 377                                  }
 378                              }
 379  
 380                              /*
 381                               * This makes sure that we don't accidentally skip a <?php tag if it's across
 382                               * a read boundary, even on multibyte strings
 383                               */
 384                              $data = substr($data, -10);
 385                          }
 386  
 387                          fclose($fp);
 388                      }
 389                  }
 390              }
 391          }
 392  
 393          return true;
 394      }
 395  
 396      /**
 397       * Method to decode a file data array.
 398       *
 399       * @param   array  $data  The data array to decode.
 400       *
 401       * @return  array
 402       *
 403       * @since   3.4
 404       */
 405      protected static function decodeFileData(array $data)
 406      {
 407          $result = array();
 408  
 409          if (\is_array($data[0])) {
 410              foreach ($data[0] as $k => $v) {
 411                  $result[$k] = static::decodeFileData(array($data[0][$k], $data[1][$k], $data[2][$k], $data[3][$k], $data[4][$k]));
 412              }
 413  
 414              return $result;
 415          }
 416  
 417          return array('name' => $data[0], 'type' => $data[1], 'tmp_name' => $data[2], 'error' => $data[3], 'size' => $data[4]);
 418      }
 419  
 420      /**
 421       * Try to convert to plaintext
 422       *
 423       * @param   string  $source  The source string.
 424       *
 425       * @return  string  Plaintext string
 426       *
 427       * @since   3.5
 428       */
 429      protected function decode($source)
 430      {
 431          static $ttr;
 432  
 433          if (!\is_array($ttr)) {
 434              // Entity decode
 435              $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT, 'ISO-8859-1');
 436  
 437              foreach ($trans_tbl as $k => $v) {
 438                  $ttr[$v] = utf8_encode($k);
 439              }
 440          }
 441  
 442          $source = strtr($source, $ttr);
 443  
 444          // Convert decimal
 445          $source = preg_replace_callback(
 446              '/&#(\d+);/m',
 447              function ($m) {
 448                  return utf8_encode(\chr($m[1]));
 449              },
 450              $source
 451          );
 452  
 453          // Convert hex
 454          $source = preg_replace_callback(
 455              '/&#x([a-f0-9]+);/mi',
 456              function ($m) {
 457                  return utf8_encode(\chr('0x' . $m[1]));
 458              },
 459              $source
 460          );
 461  
 462          return $source;
 463      }
 464  
 465      /**
 466       * Recursively strip Unicode Supplementary Characters from the source. Not: objects cannot be filtered.
 467       *
 468       * @param   mixed  $source  The data to filter
 469       *
 470       * @return  mixed  The filtered result
 471       *
 472       * @since  3.5
 473       */
 474      protected function stripUSC($source)
 475      {
 476          if (\is_object($source)) {
 477              return $source;
 478          }
 479  
 480          if (\is_array($source)) {
 481              $filteredArray = array();
 482  
 483              foreach ($source as $k => $v) {
 484                  $filteredArray[$k] = $this->stripUSC($v);
 485              }
 486  
 487              return $filteredArray;
 488          }
 489  
 490          return preg_replace('/[\xF0-\xF7].../s', "\xE2\xAF\x91", $source);
 491      }
 492  }


Generated: Wed Sep 7 05:41:13 2022 Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer