[ Index ]

PHP Cross Reference of Joomla 4.2.2 documentation

title

Body

[close]

/libraries/vendor/enshrined/svg-sanitize/src/ -> Sanitizer.php (source)

   1  <?php
   2  namespace enshrined\svgSanitize;
   3  
   4  use enshrined\svgSanitize\data\AllowedAttributes;
   5  use enshrined\svgSanitize\data\AllowedTags;
   6  use enshrined\svgSanitize\data\AttributeInterface;
   7  use enshrined\svgSanitize\data\TagInterface;
   8  use enshrined\svgSanitize\data\XPath;
   9  use enshrined\svgSanitize\ElementReference\Resolver;
  10  
  11  /**
  12   * Class Sanitizer
  13   *
  14   * @package enshrined\svgSanitize
  15   */
  16  class Sanitizer
  17  {
  18  
  19      /**
  20       * @var \DOMDocument
  21       */
  22      protected $xmlDocument;
  23  
  24      /**
  25       * @var array
  26       */
  27      protected $allowedTags;
  28  
  29      /**
  30       * @var array
  31       */
  32      protected $allowedAttrs;
  33  
  34      /**
  35       * @var
  36       */
  37      protected $xmlLoaderValue;
  38  
  39      /**
  40       * @var bool
  41       */
  42      protected $minifyXML = false;
  43  
  44      /**
  45       * @var bool
  46       */
  47      protected $removeRemoteReferences = false;
  48  
  49      /**
  50       * @var int
  51       */
  52      protected $useThreshold = 1000;
  53  
  54      /**
  55       * @var bool
  56       */
  57      protected $removeXMLTag = false;
  58  
  59      /**
  60       * @var int
  61       */
  62      protected $xmlOptions = LIBXML_NOEMPTYTAG;
  63  
  64      /**
  65       * @var array
  66       */
  67      protected $xmlIssues = array();
  68  
  69      /**
  70       * @var Resolver
  71       */
  72      protected $elementReferenceResolver;
  73  
  74      /**
  75       * @var int
  76       */
  77      protected $useNestingLimit = 15;
  78  
  79      /**
  80       *
  81       */
  82      function __construct()
  83      {
  84          // Load default tags/attributes
  85          $this->allowedAttrs = array_map('strtolower', AllowedAttributes::getAttributes());
  86          $this->allowedTags = array_map('strtolower', AllowedTags::getTags());
  87      }
  88  
  89      /**
  90       * Set up the DOMDocument
  91       */
  92      protected function resetInternal()
  93      {
  94          $this->xmlDocument = new \DOMDocument();
  95          $this->xmlDocument->preserveWhiteSpace = false;
  96          $this->xmlDocument->strictErrorChecking = false;
  97          $this->xmlDocument->formatOutput = !$this->minifyXML;
  98      }
  99  
 100      /**
 101       * Set XML options to use when saving XML
 102       * See: DOMDocument::saveXML
 103       *
 104       * @param int  $xmlOptions
 105       */
 106      public function setXMLOptions($xmlOptions)
 107      {
 108          $this->xmlOptions = $xmlOptions;
 109      }
 110  
 111      /**
 112       * Get XML options to use when saving XML
 113       * See: DOMDocument::saveXML
 114       *
 115       * @return int
 116       */
 117      public function getXMLOptions()
 118      {
 119          return $this->xmlOptions;
 120      }
 121  
 122      /**
 123       * Get the array of allowed tags
 124       *
 125       * @return array
 126       */
 127      public function getAllowedTags()
 128      {
 129          return $this->allowedTags;
 130      }
 131  
 132      /**
 133       * Set custom allowed tags
 134       *
 135       * @param TagInterface $allowedTags
 136       */
 137      public function setAllowedTags(TagInterface $allowedTags)
 138      {
 139          $this->allowedTags = array_map('strtolower', $allowedTags::getTags());
 140      }
 141  
 142      /**
 143       * Get the array of allowed attributes
 144       *
 145       * @return array
 146       */
 147      public function getAllowedAttrs()
 148      {
 149          return $this->allowedAttrs;
 150      }
 151  
 152      /**
 153       * Set custom allowed attributes
 154       *
 155       * @param AttributeInterface $allowedAttrs
 156       */
 157      public function setAllowedAttrs(AttributeInterface $allowedAttrs)
 158      {
 159          $this->allowedAttrs = array_map('strtolower', $allowedAttrs::getAttributes());
 160      }
 161  
 162      /**
 163       * Should we remove references to remote files?
 164       *
 165       * @param bool $removeRemoteRefs
 166       */
 167      public function removeRemoteReferences($removeRemoteRefs = false)
 168      {
 169          $this->removeRemoteReferences = $removeRemoteRefs;
 170      }
 171  
 172      /**
 173       * Get XML issues.
 174       *
 175       * @return array
 176       */
 177      public function getXmlIssues() {
 178          return $this->xmlIssues;
 179      }
 180  
 181  
 182      /**
 183       * Sanitize the passed string
 184       *
 185       * @param string $dirty
 186       * @return string
 187       */
 188      public function sanitize($dirty)
 189      {
 190          // Don't run on an empty string
 191          if (empty($dirty)) {
 192              return '';
 193          }
 194  
 195          // Strip php tags
 196          $dirty = preg_replace('/<\?(=|php)(.+?)\?>/i', '', $dirty);
 197  
 198          $this->resetInternal();
 199          $this->setUpBefore();
 200  
 201          $loaded = $this->xmlDocument->loadXML($dirty);
 202  
 203          // If we couldn't parse the XML then we go no further. Reset and return false
 204          if (!$loaded) {
 205              $this->resetAfter();
 206              return false;
 207          }
 208  
 209          // Pre-process all identified elements
 210          $xPath = new XPath($this->xmlDocument);
 211          $this->elementReferenceResolver = new Resolver($xPath, $this->useNestingLimit);
 212          $this->elementReferenceResolver->collect();
 213          $elementsToRemove = $this->elementReferenceResolver->getElementsToRemove();
 214  
 215          // Start the cleaning proccess
 216          $this->startClean($this->xmlDocument->childNodes, $elementsToRemove);
 217  
 218          // Save cleaned XML to a variable
 219          if ($this->removeXMLTag) {
 220              $clean = $this->xmlDocument->saveXML($this->xmlDocument->documentElement, $this->xmlOptions);
 221          } else {
 222              $clean = $this->xmlDocument->saveXML($this->xmlDocument, $this->xmlOptions);
 223          }
 224  
 225          $this->resetAfter();
 226  
 227          // Remove any extra whitespaces when minifying
 228          if ($this->minifyXML) {
 229              $clean = preg_replace('/\s+/', ' ', $clean);
 230          }
 231  
 232          // Return result
 233          return $clean;
 234      }
 235  
 236      /**
 237       * Set up libXML before we start
 238       */
 239      protected function setUpBefore()
 240      {
 241          // This function has been deprecated in PHP 8.0 because in libxml 2.9.0, external entity loading is
 242          // disabled by default, so this function is no longer needed to protect against XXE attacks.
 243          if (\LIBXML_VERSION < 20900) {
 244              // Turn off the entity loader
 245              $this->xmlLoaderValue = libxml_disable_entity_loader(true);
 246          }
 247  
 248          // Suppress the errors because we don't really have to worry about formation before cleansing
 249          libxml_use_internal_errors(true);
 250  
 251          // Reset array of altered XML
 252          $this->xmlIssues = array();
 253      }
 254  
 255      /**
 256       * Reset the class after use
 257       */
 258      protected function resetAfter()
 259      {
 260          // This function has been deprecated in PHP 8.0 because in libxml 2.9.0, external entity loading is
 261          // disabled by default, so this function is no longer needed to protect against XXE attacks.
 262          if (\LIBXML_VERSION < 20900) {
 263              // Reset the entity loader
 264              libxml_disable_entity_loader($this->xmlLoaderValue);
 265          }
 266      }
 267  
 268      /**
 269       * Start the cleaning with tags, then we move onto attributes and hrefs later
 270       *
 271       * @param \DOMNodeList $elements
 272       * @param array        $elementsToRemove
 273       */
 274      protected function startClean(\DOMNodeList $elements, array $elementsToRemove)
 275      {
 276          // loop through all elements
 277          // we do this backwards so we don't skip anything if we delete a node
 278          // see comments at: http://php.net/manual/en/class.domnamednodemap.php
 279          for ($i = $elements->length - 1; $i >= 0; $i--) {
 280              /** @var \DOMElement $currentElement */
 281              $currentElement = $elements->item($i);
 282  
 283              /**
 284               * If the element has exceeded the nesting limit, we should remove it.
 285               *
 286               * As it's only <use> elements that cause us issues with nesting DOS attacks
 287               * we should check what the element is before removing it. For now we'll only
 288               * remove <use> elements.
 289               */
 290              if (in_array($currentElement, $elementsToRemove) && 'use' === $currentElement->nodeName) {
 291                  $currentElement->parentNode->removeChild($currentElement);
 292                  $this->xmlIssues[] = array(
 293                      'message' => 'Invalid \'' . $currentElement->tagName . '\'',
 294                      'line'    => $currentElement->getLineNo(),
 295                  );
 296                  continue;
 297              }
 298  
 299              if ($currentElement instanceof \DOMElement) {
 300                  // If the tag isn't in the whitelist, remove it and continue with next iteration
 301                  if (!in_array(strtolower($currentElement->tagName), $this->allowedTags)) {
 302                      $currentElement->parentNode->removeChild($currentElement);
 303                      $this->xmlIssues[] = array(
 304                          'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'',
 305                          'line' => $currentElement->getLineNo(),
 306                      );
 307                      continue;
 308                  }
 309  
 310                  $this->cleanHrefs( $currentElement );
 311  
 312                  $this->cleanXlinkHrefs( $currentElement );
 313  
 314                  $this->cleanAttributesOnWhitelist($currentElement);
 315  
 316                  if (strtolower($currentElement->tagName) === 'use') {
 317                      if ($this->isUseTagDirty($currentElement)
 318                          || $this->isUseTagExceedingThreshold($currentElement)
 319                      ) {
 320                          $currentElement->parentNode->removeChild($currentElement);
 321                          $this->xmlIssues[] = array(
 322                              'message' => 'Suspicious \'' . $currentElement->tagName . '\'',
 323                              'line' => $currentElement->getLineNo(),
 324                          );
 325                          continue;
 326                      }
 327                  }
 328  
 329                  // Strip out font elements that will break out of foreign content.
 330                  if (strtolower($currentElement->tagName) === 'font') {
 331                      $breaksOutOfForeignContent = false;
 332                      for ($x = $currentElement->attributes->length - 1; $x >= 0; $x--) {
 333                          // get attribute name
 334                          $attrName = $currentElement->attributes->item( $x )->name;
 335  
 336                          if (in_array(strtolower($attrName), ['face', 'color', 'size'])) {
 337                              $breaksOutOfForeignContent = true;
 338                          }
 339                      }
 340  
 341                      if ($breaksOutOfForeignContent) {
 342                          $currentElement->parentNode->removeChild($currentElement);
 343                          $this->xmlIssues[] = array(
 344                              'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'',
 345                              'line' => $currentElement->getLineNo(),
 346                          );
 347                          continue;
 348                      }
 349                  }
 350              }
 351  
 352              $this->cleanUnsafeNodes($currentElement);
 353  
 354              if ($currentElement->hasChildNodes()) {
 355                  $this->startClean($currentElement->childNodes, $elementsToRemove);
 356              }
 357          }
 358      }
 359  
 360      /**
 361       * Only allow attributes that are on the whitelist
 362       *
 363       * @param \DOMElement $element
 364       */
 365      protected function cleanAttributesOnWhitelist(\DOMElement $element)
 366      {
 367          for ($x = $element->attributes->length - 1; $x >= 0; $x--) {
 368              // get attribute name
 369              $attrName = $element->attributes->item($x)->name;
 370  
 371              // Remove attribute if not in whitelist
 372              if (!in_array(strtolower($attrName), $this->allowedAttrs) && !$this->isAriaAttribute(strtolower($attrName)) && !$this->isDataAttribute(strtolower($attrName))) {
 373  
 374                  $element->removeAttribute($attrName);
 375                  $this->xmlIssues[] = array(
 376                      'message' => 'Suspicious attribute \'' . $attrName . '\'',
 377                      'line' => $element->getLineNo(),
 378                  );
 379              }
 380  
 381              /**
 382               * This is used for when a namespace isn't imported properly.
 383               * Such as xlink:href when the xlink namespace isn't imported.
 384               * We have to do this as the link is still ran in this case.
 385               */
 386              if (false !== strpos($attrName, 'href')) {
 387                  $href = $element->getAttribute($attrName);
 388                  if (false === $this->isHrefSafeValue($href)) {
 389                      $element->removeAttribute($attrName);
 390                      $this->xmlIssues[] = array(
 391                          'message' => 'Suspicious attribute \'href\'',
 392                          'line'    => $element->getLineNo(),
 393                      );
 394                  }
 395              }
 396  
 397              // Do we want to strip remote references?
 398              if($this->removeRemoteReferences) {
 399                  // Remove attribute if it has a remote reference
 400                  if (isset($element->attributes->item($x)->value) && $this->hasRemoteReference($element->attributes->item($x)->value)) {
 401                      $element->removeAttribute($attrName);
 402                      $this->xmlIssues[] = array(
 403                          'message' => 'Suspicious attribute \'' . $attrName . '\'',
 404                          'line' => $element->getLineNo(),
 405                      );
 406                  }
 407              }
 408          }
 409      }
 410  
 411      /**
 412       * Clean the xlink:hrefs of script and data embeds
 413       *
 414       * @param \DOMElement $element
 415       */
 416      protected function cleanXlinkHrefs(\DOMElement $element)
 417      {
 418          $xlinks = $element->getAttributeNS('http://www.w3.org/1999/xlink', 'href');
 419          if (false === $this->isHrefSafeValue($xlinks)) {
 420              $element->removeAttributeNS( 'http://www.w3.org/1999/xlink', 'href' );
 421              $this->xmlIssues[] = array(
 422                  'message' => 'Suspicious attribute \'href\'',
 423                  'line' => $element->getLineNo(),
 424              );
 425          }
 426      }
 427  
 428      /**
 429       * Clean the hrefs of script and data embeds
 430       *
 431       * @param \DOMElement $element
 432       */
 433      protected function cleanHrefs(\DOMElement $element)
 434      {
 435          $href = $element->getAttribute('href');
 436          if (false === $this->isHrefSafeValue($href)) {
 437              $element->removeAttribute('href');
 438              $this->xmlIssues[] = array(
 439                  'message' => 'Suspicious attribute \'href\'',
 440                  'line' => $element->getLineNo(),
 441              );
 442          }
 443      }
 444  
 445      /**
 446       * Only allow whitelisted starts to be within the href.
 447       *
 448       * This will stop scripts etc from being passed through, with or without attempting to hide bypasses.
 449       * This stops the need for us to use a complicated script regex.
 450       *
 451       * @param $value
 452       * @return bool
 453       */
 454      protected function isHrefSafeValue($value) {
 455  
 456          // Allow empty values
 457          if (empty($value)) {
 458              return true;
 459          }
 460  
 461          // Allow fragment identifiers.
 462          if ('#' === substr($value, 0, 1)) {
 463              return true;
 464          }
 465  
 466          // Allow relative URIs.
 467          if ('/' === substr($value, 0, 1)) {
 468              return true;
 469          }
 470  
 471          // Allow HTTPS domains.
 472          if ('https://' === substr($value, 0, 8)) {
 473              return true;
 474          }
 475  
 476          // Allow HTTP domains.
 477          if ('http://' === substr($value, 0, 7)) {
 478              return true;
 479          }
 480  
 481          // Allow known data URIs.
 482          if (in_array(substr($value, 0, 14), array(
 483              'data:image/png', // PNG
 484              'data:image/gif', // GIF
 485              'data:image/jpg', // JPG
 486              'data:image/jpe', // JPEG
 487              'data:image/pjp', // PJPEG
 488          ))) {
 489              return true;
 490          }
 491  
 492          // Allow known short data URIs.
 493          if (in_array(substr($value, 0, 12), array(
 494              'data:img/png', // PNG
 495              'data:img/gif', // GIF
 496              'data:img/jpg', // JPG
 497              'data:img/jpe', // JPEG
 498              'data:img/pjp', // PJPEG
 499          ))) {
 500              return true;
 501          }
 502  
 503          return false;
 504      }
 505  
 506      /**
 507       * Removes non-printable ASCII characters from string & trims it
 508       *
 509       * @param string $value
 510       * @return bool
 511       */
 512      protected function removeNonPrintableCharacters($value)
 513      {
 514          return trim(preg_replace('/[^ -~]/xu','',$value));
 515      }
 516  
 517      /**
 518       * Does this attribute value have a remote reference?
 519       *
 520       * @param $value
 521       * @return bool
 522       */
 523      protected function hasRemoteReference($value)
 524      {
 525          $value = $this->removeNonPrintableCharacters($value);
 526  
 527          $wrapped_in_url = preg_match('~^url\(\s*[\'"]\s*(.*)\s*[\'"]\s*\)$~xi', $value, $match);
 528          if (!$wrapped_in_url){
 529              return false;
 530          }
 531  
 532          $value = trim($match[1], '\'"');
 533  
 534          return preg_match('~^((https?|ftp|file):)?//~xi', $value);
 535      }
 536  
 537      /**
 538       * Should we minify the output?
 539       *
 540       * @param bool $shouldMinify
 541       */
 542      public function minify($shouldMinify = false)
 543      {
 544          $this->minifyXML = (bool) $shouldMinify;
 545      }
 546  
 547      /**
 548       * Should we remove the XML tag in the header?
 549       *
 550       * @param bool $removeXMLTag
 551       */
 552      public function removeXMLTag($removeXMLTag = false)
 553      {
 554          $this->removeXMLTag = (bool) $removeXMLTag;
 555      }
 556  
 557      /**
 558       * Whether `<use ... xlink:href="#identifier">` elements shall be
 559       * removed in case expansion would exceed this threshold.
 560       *
 561       * @param int $useThreshold
 562       */
 563      public function useThreshold($useThreshold = 1000)
 564      {
 565          $this->useThreshold = (int)$useThreshold;
 566      }
 567  
 568      /**
 569       * Check to see if an attribute is an aria attribute or not
 570       *
 571       * @param $attributeName
 572       *
 573       * @return bool
 574       */
 575      protected function isAriaAttribute($attributeName)
 576      {
 577          return strpos($attributeName, 'aria-') === 0;
 578      }
 579  
 580      /**
 581       * Check to see if an attribute is an data attribute or not
 582       *
 583       * @param $attributeName
 584       *
 585       * @return bool
 586       */
 587      protected function isDataAttribute($attributeName)
 588      {
 589          return strpos($attributeName, 'data-') === 0;
 590      }
 591  
 592      /**
 593       * Make sure our use tag is only referencing internal resources
 594       *
 595       * @param \DOMElement $element
 596       * @return bool
 597       */
 598      protected function isUseTagDirty(\DOMElement $element)
 599      {
 600          $href = Helper::getElementHref($element);
 601          return $href && strpos($href, '#') !== 0;
 602      }
 603  
 604      /**
 605       * Determines whether `<use ... xlink:href="#identifier">` is expanded
 606       * recursively in order to create DoS scenarios. The amount of a actually
 607       * used element needs to be below `$this->useThreshold`.
 608       *
 609       * @param \DOMElement $element
 610       * @return bool
 611       */
 612      protected function isUseTagExceedingThreshold(\DOMElement $element)
 613      {
 614          if ($this->useThreshold <= 0) {
 615              return false;
 616          }
 617          $useId = Helper::extractIdReferenceFromHref(
 618              Helper::getElementHref($element)
 619          );
 620          if ($useId === null) {
 621              return false;
 622          }
 623          foreach ($this->elementReferenceResolver->findByElementId($useId) as $subject) {
 624              if ($subject->countUse() >= $this->useThreshold) {
 625                  return true;
 626              }
 627          }
 628          return false;
 629      }
 630  
 631      /**
 632       * Set the nesting limit for <use> tags.
 633       *
 634       * @param $limit
 635       */
 636      public function setUseNestingLimit($limit)
 637      {
 638          $this->useNestingLimit = (int) $limit;
 639      }
 640  
 641      /**
 642       * Remove nodes that are either invalid or malformed.
 643       *
 644       * @param \DOMNode $currentElement The current element.
 645       */
 646      protected function cleanUnsafeNodes(\DOMNode $currentElement) {
 647          // Replace CDATA node with encoded text node
 648          if ($currentElement instanceof \DOMCdataSection) {
 649              $textNode = $currentElement->ownerDocument->createTextNode($currentElement->nodeValue);
 650              $currentElement->parentNode->replaceChild($textNode, $currentElement);
 651          // If the element doesn't have a tagname, remove it and continue with next iteration
 652          } elseif (!$currentElement instanceof \DOMElement && !$currentElement instanceof \DOMText) {
 653              $currentElement->parentNode->removeChild($currentElement);
 654              $this->xmlIssues[] = array(
 655                  'message' => 'Suspicious node \'' . $currentElement->nodeName . '\'',
 656                  'line' => $currentElement->getLineNo(),
 657              );
 658              return;
 659          }
 660  
 661          if ( $currentElement->childNodes && $currentElement->childNodes->length > 0 ) {
 662              for ($j = $currentElement->childNodes->length - 1; $j >= 0; $j--) {
 663                  /** @var \DOMElement $childElement */
 664                  $childElement = $currentElement->childNodes->item($j);
 665                  $this->cleanUnsafeNodes($childElement);
 666              }
 667          }
 668      }
 669  }


Generated: Wed Sep 7 05:41:13 2022 Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer