[ Index ] |
PHP Cross Reference of Joomla 4.2.2 documentation |
[Summary view] [Print] [Text view]
1 <?php 2 namespace enshrined\svgSanitize; 3 4 use enshrined\svgSanitize\data\AllowedAttributes; 5 use enshrined\svgSanitize\data\AllowedTags; 6 use enshrined\svgSanitize\data\AttributeInterface; 7 use enshrined\svgSanitize\data\TagInterface; 8 use enshrined\svgSanitize\data\XPath; 9 use enshrined\svgSanitize\ElementReference\Resolver; 10 11 /** 12 * Class Sanitizer 13 * 14 * @package enshrined\svgSanitize 15 */ 16 class Sanitizer 17 { 18 19 /** 20 * @var \DOMDocument 21 */ 22 protected $xmlDocument; 23 24 /** 25 * @var array 26 */ 27 protected $allowedTags; 28 29 /** 30 * @var array 31 */ 32 protected $allowedAttrs; 33 34 /** 35 * @var 36 */ 37 protected $xmlLoaderValue; 38 39 /** 40 * @var bool 41 */ 42 protected $minifyXML = false; 43 44 /** 45 * @var bool 46 */ 47 protected $removeRemoteReferences = false; 48 49 /** 50 * @var int 51 */ 52 protected $useThreshold = 1000; 53 54 /** 55 * @var bool 56 */ 57 protected $removeXMLTag = false; 58 59 /** 60 * @var int 61 */ 62 protected $xmlOptions = LIBXML_NOEMPTYTAG; 63 64 /** 65 * @var array 66 */ 67 protected $xmlIssues = array(); 68 69 /** 70 * @var Resolver 71 */ 72 protected $elementReferenceResolver; 73 74 /** 75 * @var int 76 */ 77 protected $useNestingLimit = 15; 78 79 /** 80 * 81 */ 82 function __construct() 83 { 84 // Load default tags/attributes 85 $this->allowedAttrs = array_map('strtolower', AllowedAttributes::getAttributes()); 86 $this->allowedTags = array_map('strtolower', AllowedTags::getTags()); 87 } 88 89 /** 90 * Set up the DOMDocument 91 */ 92 protected function resetInternal() 93 { 94 $this->xmlDocument = new \DOMDocument(); 95 $this->xmlDocument->preserveWhiteSpace = false; 96 $this->xmlDocument->strictErrorChecking = false; 97 $this->xmlDocument->formatOutput = !$this->minifyXML; 98 } 99 100 /** 101 * Set XML options to use when saving XML 102 * See: DOMDocument::saveXML 103 * 104 * @param int $xmlOptions 105 */ 106 public function setXMLOptions($xmlOptions) 107 { 108 $this->xmlOptions = $xmlOptions; 109 } 110 111 /** 112 * Get XML options to use when saving XML 113 * See: DOMDocument::saveXML 114 * 115 * @return int 116 */ 117 public function getXMLOptions() 118 { 119 return $this->xmlOptions; 120 } 121 122 /** 123 * Get the array of allowed tags 124 * 125 * @return array 126 */ 127 public function getAllowedTags() 128 { 129 return $this->allowedTags; 130 } 131 132 /** 133 * Set custom allowed tags 134 * 135 * @param TagInterface $allowedTags 136 */ 137 public function setAllowedTags(TagInterface $allowedTags) 138 { 139 $this->allowedTags = array_map('strtolower', $allowedTags::getTags()); 140 } 141 142 /** 143 * Get the array of allowed attributes 144 * 145 * @return array 146 */ 147 public function getAllowedAttrs() 148 { 149 return $this->allowedAttrs; 150 } 151 152 /** 153 * Set custom allowed attributes 154 * 155 * @param AttributeInterface $allowedAttrs 156 */ 157 public function setAllowedAttrs(AttributeInterface $allowedAttrs) 158 { 159 $this->allowedAttrs = array_map('strtolower', $allowedAttrs::getAttributes()); 160 } 161 162 /** 163 * Should we remove references to remote files? 164 * 165 * @param bool $removeRemoteRefs 166 */ 167 public function removeRemoteReferences($removeRemoteRefs = false) 168 { 169 $this->removeRemoteReferences = $removeRemoteRefs; 170 } 171 172 /** 173 * Get XML issues. 174 * 175 * @return array 176 */ 177 public function getXmlIssues() { 178 return $this->xmlIssues; 179 } 180 181 182 /** 183 * Sanitize the passed string 184 * 185 * @param string $dirty 186 * @return string 187 */ 188 public function sanitize($dirty) 189 { 190 // Don't run on an empty string 191 if (empty($dirty)) { 192 return ''; 193 } 194 195 // Strip php tags 196 $dirty = preg_replace('/<\?(=|php)(.+?)\?>/i', '', $dirty); 197 198 $this->resetInternal(); 199 $this->setUpBefore(); 200 201 $loaded = $this->xmlDocument->loadXML($dirty); 202 203 // If we couldn't parse the XML then we go no further. Reset and return false 204 if (!$loaded) { 205 $this->resetAfter(); 206 return false; 207 } 208 209 // Pre-process all identified elements 210 $xPath = new XPath($this->xmlDocument); 211 $this->elementReferenceResolver = new Resolver($xPath, $this->useNestingLimit); 212 $this->elementReferenceResolver->collect(); 213 $elementsToRemove = $this->elementReferenceResolver->getElementsToRemove(); 214 215 // Start the cleaning proccess 216 $this->startClean($this->xmlDocument->childNodes, $elementsToRemove); 217 218 // Save cleaned XML to a variable 219 if ($this->removeXMLTag) { 220 $clean = $this->xmlDocument->saveXML($this->xmlDocument->documentElement, $this->xmlOptions); 221 } else { 222 $clean = $this->xmlDocument->saveXML($this->xmlDocument, $this->xmlOptions); 223 } 224 225 $this->resetAfter(); 226 227 // Remove any extra whitespaces when minifying 228 if ($this->minifyXML) { 229 $clean = preg_replace('/\s+/', ' ', $clean); 230 } 231 232 // Return result 233 return $clean; 234 } 235 236 /** 237 * Set up libXML before we start 238 */ 239 protected function setUpBefore() 240 { 241 // This function has been deprecated in PHP 8.0 because in libxml 2.9.0, external entity loading is 242 // disabled by default, so this function is no longer needed to protect against XXE attacks. 243 if (\LIBXML_VERSION < 20900) { 244 // Turn off the entity loader 245 $this->xmlLoaderValue = libxml_disable_entity_loader(true); 246 } 247 248 // Suppress the errors because we don't really have to worry about formation before cleansing 249 libxml_use_internal_errors(true); 250 251 // Reset array of altered XML 252 $this->xmlIssues = array(); 253 } 254 255 /** 256 * Reset the class after use 257 */ 258 protected function resetAfter() 259 { 260 // This function has been deprecated in PHP 8.0 because in libxml 2.9.0, external entity loading is 261 // disabled by default, so this function is no longer needed to protect against XXE attacks. 262 if (\LIBXML_VERSION < 20900) { 263 // Reset the entity loader 264 libxml_disable_entity_loader($this->xmlLoaderValue); 265 } 266 } 267 268 /** 269 * Start the cleaning with tags, then we move onto attributes and hrefs later 270 * 271 * @param \DOMNodeList $elements 272 * @param array $elementsToRemove 273 */ 274 protected function startClean(\DOMNodeList $elements, array $elementsToRemove) 275 { 276 // loop through all elements 277 // we do this backwards so we don't skip anything if we delete a node 278 // see comments at: http://php.net/manual/en/class.domnamednodemap.php 279 for ($i = $elements->length - 1; $i >= 0; $i--) { 280 /** @var \DOMElement $currentElement */ 281 $currentElement = $elements->item($i); 282 283 /** 284 * If the element has exceeded the nesting limit, we should remove it. 285 * 286 * As it's only <use> elements that cause us issues with nesting DOS attacks 287 * we should check what the element is before removing it. For now we'll only 288 * remove <use> elements. 289 */ 290 if (in_array($currentElement, $elementsToRemove) && 'use' === $currentElement->nodeName) { 291 $currentElement->parentNode->removeChild($currentElement); 292 $this->xmlIssues[] = array( 293 'message' => 'Invalid \'' . $currentElement->tagName . '\'', 294 'line' => $currentElement->getLineNo(), 295 ); 296 continue; 297 } 298 299 if ($currentElement instanceof \DOMElement) { 300 // If the tag isn't in the whitelist, remove it and continue with next iteration 301 if (!in_array(strtolower($currentElement->tagName), $this->allowedTags)) { 302 $currentElement->parentNode->removeChild($currentElement); 303 $this->xmlIssues[] = array( 304 'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'', 305 'line' => $currentElement->getLineNo(), 306 ); 307 continue; 308 } 309 310 $this->cleanHrefs( $currentElement ); 311 312 $this->cleanXlinkHrefs( $currentElement ); 313 314 $this->cleanAttributesOnWhitelist($currentElement); 315 316 if (strtolower($currentElement->tagName) === 'use') { 317 if ($this->isUseTagDirty($currentElement) 318 || $this->isUseTagExceedingThreshold($currentElement) 319 ) { 320 $currentElement->parentNode->removeChild($currentElement); 321 $this->xmlIssues[] = array( 322 'message' => 'Suspicious \'' . $currentElement->tagName . '\'', 323 'line' => $currentElement->getLineNo(), 324 ); 325 continue; 326 } 327 } 328 329 // Strip out font elements that will break out of foreign content. 330 if (strtolower($currentElement->tagName) === 'font') { 331 $breaksOutOfForeignContent = false; 332 for ($x = $currentElement->attributes->length - 1; $x >= 0; $x--) { 333 // get attribute name 334 $attrName = $currentElement->attributes->item( $x )->name; 335 336 if (in_array(strtolower($attrName), ['face', 'color', 'size'])) { 337 $breaksOutOfForeignContent = true; 338 } 339 } 340 341 if ($breaksOutOfForeignContent) { 342 $currentElement->parentNode->removeChild($currentElement); 343 $this->xmlIssues[] = array( 344 'message' => 'Suspicious tag \'' . $currentElement->tagName . '\'', 345 'line' => $currentElement->getLineNo(), 346 ); 347 continue; 348 } 349 } 350 } 351 352 $this->cleanUnsafeNodes($currentElement); 353 354 if ($currentElement->hasChildNodes()) { 355 $this->startClean($currentElement->childNodes, $elementsToRemove); 356 } 357 } 358 } 359 360 /** 361 * Only allow attributes that are on the whitelist 362 * 363 * @param \DOMElement $element 364 */ 365 protected function cleanAttributesOnWhitelist(\DOMElement $element) 366 { 367 for ($x = $element->attributes->length - 1; $x >= 0; $x--) { 368 // get attribute name 369 $attrName = $element->attributes->item($x)->name; 370 371 // Remove attribute if not in whitelist 372 if (!in_array(strtolower($attrName), $this->allowedAttrs) && !$this->isAriaAttribute(strtolower($attrName)) && !$this->isDataAttribute(strtolower($attrName))) { 373 374 $element->removeAttribute($attrName); 375 $this->xmlIssues[] = array( 376 'message' => 'Suspicious attribute \'' . $attrName . '\'', 377 'line' => $element->getLineNo(), 378 ); 379 } 380 381 /** 382 * This is used for when a namespace isn't imported properly. 383 * Such as xlink:href when the xlink namespace isn't imported. 384 * We have to do this as the link is still ran in this case. 385 */ 386 if (false !== strpos($attrName, 'href')) { 387 $href = $element->getAttribute($attrName); 388 if (false === $this->isHrefSafeValue($href)) { 389 $element->removeAttribute($attrName); 390 $this->xmlIssues[] = array( 391 'message' => 'Suspicious attribute \'href\'', 392 'line' => $element->getLineNo(), 393 ); 394 } 395 } 396 397 // Do we want to strip remote references? 398 if($this->removeRemoteReferences) { 399 // Remove attribute if it has a remote reference 400 if (isset($element->attributes->item($x)->value) && $this->hasRemoteReference($element->attributes->item($x)->value)) { 401 $element->removeAttribute($attrName); 402 $this->xmlIssues[] = array( 403 'message' => 'Suspicious attribute \'' . $attrName . '\'', 404 'line' => $element->getLineNo(), 405 ); 406 } 407 } 408 } 409 } 410 411 /** 412 * Clean the xlink:hrefs of script and data embeds 413 * 414 * @param \DOMElement $element 415 */ 416 protected function cleanXlinkHrefs(\DOMElement $element) 417 { 418 $xlinks = $element->getAttributeNS('http://www.w3.org/1999/xlink', 'href'); 419 if (false === $this->isHrefSafeValue($xlinks)) { 420 $element->removeAttributeNS( 'http://www.w3.org/1999/xlink', 'href' ); 421 $this->xmlIssues[] = array( 422 'message' => 'Suspicious attribute \'href\'', 423 'line' => $element->getLineNo(), 424 ); 425 } 426 } 427 428 /** 429 * Clean the hrefs of script and data embeds 430 * 431 * @param \DOMElement $element 432 */ 433 protected function cleanHrefs(\DOMElement $element) 434 { 435 $href = $element->getAttribute('href'); 436 if (false === $this->isHrefSafeValue($href)) { 437 $element->removeAttribute('href'); 438 $this->xmlIssues[] = array( 439 'message' => 'Suspicious attribute \'href\'', 440 'line' => $element->getLineNo(), 441 ); 442 } 443 } 444 445 /** 446 * Only allow whitelisted starts to be within the href. 447 * 448 * This will stop scripts etc from being passed through, with or without attempting to hide bypasses. 449 * This stops the need for us to use a complicated script regex. 450 * 451 * @param $value 452 * @return bool 453 */ 454 protected function isHrefSafeValue($value) { 455 456 // Allow empty values 457 if (empty($value)) { 458 return true; 459 } 460 461 // Allow fragment identifiers. 462 if ('#' === substr($value, 0, 1)) { 463 return true; 464 } 465 466 // Allow relative URIs. 467 if ('/' === substr($value, 0, 1)) { 468 return true; 469 } 470 471 // Allow HTTPS domains. 472 if ('https://' === substr($value, 0, 8)) { 473 return true; 474 } 475 476 // Allow HTTP domains. 477 if ('http://' === substr($value, 0, 7)) { 478 return true; 479 } 480 481 // Allow known data URIs. 482 if (in_array(substr($value, 0, 14), array( 483 'data:image/png', // PNG 484 'data:image/gif', // GIF 485 'data:image/jpg', // JPG 486 'data:image/jpe', // JPEG 487 'data:image/pjp', // PJPEG 488 ))) { 489 return true; 490 } 491 492 // Allow known short data URIs. 493 if (in_array(substr($value, 0, 12), array( 494 'data:img/png', // PNG 495 'data:img/gif', // GIF 496 'data:img/jpg', // JPG 497 'data:img/jpe', // JPEG 498 'data:img/pjp', // PJPEG 499 ))) { 500 return true; 501 } 502 503 return false; 504 } 505 506 /** 507 * Removes non-printable ASCII characters from string & trims it 508 * 509 * @param string $value 510 * @return bool 511 */ 512 protected function removeNonPrintableCharacters($value) 513 { 514 return trim(preg_replace('/[^ -~]/xu','',$value)); 515 } 516 517 /** 518 * Does this attribute value have a remote reference? 519 * 520 * @param $value 521 * @return bool 522 */ 523 protected function hasRemoteReference($value) 524 { 525 $value = $this->removeNonPrintableCharacters($value); 526 527 $wrapped_in_url = preg_match('~^url\(\s*[\'"]\s*(.*)\s*[\'"]\s*\)$~xi', $value, $match); 528 if (!$wrapped_in_url){ 529 return false; 530 } 531 532 $value = trim($match[1], '\'"'); 533 534 return preg_match('~^((https?|ftp|file):)?//~xi', $value); 535 } 536 537 /** 538 * Should we minify the output? 539 * 540 * @param bool $shouldMinify 541 */ 542 public function minify($shouldMinify = false) 543 { 544 $this->minifyXML = (bool) $shouldMinify; 545 } 546 547 /** 548 * Should we remove the XML tag in the header? 549 * 550 * @param bool $removeXMLTag 551 */ 552 public function removeXMLTag($removeXMLTag = false) 553 { 554 $this->removeXMLTag = (bool) $removeXMLTag; 555 } 556 557 /** 558 * Whether `<use ... xlink:href="#identifier">` elements shall be 559 * removed in case expansion would exceed this threshold. 560 * 561 * @param int $useThreshold 562 */ 563 public function useThreshold($useThreshold = 1000) 564 { 565 $this->useThreshold = (int)$useThreshold; 566 } 567 568 /** 569 * Check to see if an attribute is an aria attribute or not 570 * 571 * @param $attributeName 572 * 573 * @return bool 574 */ 575 protected function isAriaAttribute($attributeName) 576 { 577 return strpos($attributeName, 'aria-') === 0; 578 } 579 580 /** 581 * Check to see if an attribute is an data attribute or not 582 * 583 * @param $attributeName 584 * 585 * @return bool 586 */ 587 protected function isDataAttribute($attributeName) 588 { 589 return strpos($attributeName, 'data-') === 0; 590 } 591 592 /** 593 * Make sure our use tag is only referencing internal resources 594 * 595 * @param \DOMElement $element 596 * @return bool 597 */ 598 protected function isUseTagDirty(\DOMElement $element) 599 { 600 $href = Helper::getElementHref($element); 601 return $href && strpos($href, '#') !== 0; 602 } 603 604 /** 605 * Determines whether `<use ... xlink:href="#identifier">` is expanded 606 * recursively in order to create DoS scenarios. The amount of a actually 607 * used element needs to be below `$this->useThreshold`. 608 * 609 * @param \DOMElement $element 610 * @return bool 611 */ 612 protected function isUseTagExceedingThreshold(\DOMElement $element) 613 { 614 if ($this->useThreshold <= 0) { 615 return false; 616 } 617 $useId = Helper::extractIdReferenceFromHref( 618 Helper::getElementHref($element) 619 ); 620 if ($useId === null) { 621 return false; 622 } 623 foreach ($this->elementReferenceResolver->findByElementId($useId) as $subject) { 624 if ($subject->countUse() >= $this->useThreshold) { 625 return true; 626 } 627 } 628 return false; 629 } 630 631 /** 632 * Set the nesting limit for <use> tags. 633 * 634 * @param $limit 635 */ 636 public function setUseNestingLimit($limit) 637 { 638 $this->useNestingLimit = (int) $limit; 639 } 640 641 /** 642 * Remove nodes that are either invalid or malformed. 643 * 644 * @param \DOMNode $currentElement The current element. 645 */ 646 protected function cleanUnsafeNodes(\DOMNode $currentElement) { 647 // Replace CDATA node with encoded text node 648 if ($currentElement instanceof \DOMCdataSection) { 649 $textNode = $currentElement->ownerDocument->createTextNode($currentElement->nodeValue); 650 $currentElement->parentNode->replaceChild($textNode, $currentElement); 651 // If the element doesn't have a tagname, remove it and continue with next iteration 652 } elseif (!$currentElement instanceof \DOMElement && !$currentElement instanceof \DOMText) { 653 $currentElement->parentNode->removeChild($currentElement); 654 $this->xmlIssues[] = array( 655 'message' => 'Suspicious node \'' . $currentElement->nodeName . '\'', 656 'line' => $currentElement->getLineNo(), 657 ); 658 return; 659 } 660 661 if ( $currentElement->childNodes && $currentElement->childNodes->length > 0 ) { 662 for ($j = $currentElement->childNodes->length - 1; $j >= 0; $j--) { 663 /** @var \DOMElement $childElement */ 664 $childElement = $currentElement->childNodes->item($j); 665 $this->cleanUnsafeNodes($childElement); 666 } 667 } 668 } 669 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Sep 7 05:41:13 2022 | Chilli.vc Blog - For Webmaster,Blog-Writer,System Admin and Domainer |