From 04b6207fdfaf70051c38bcd16958ad5a05db37e2 Mon Sep 17 00:00:00 2001 From: Titouan Galopin Date: Thu, 10 Oct 2024 10:56:48 +0200 Subject: [PATCH] [HtmlSanitizer] Add ability to sanitize a whole document --- .../Component/HtmlSanitizer/HtmlSanitizer.php | 49 +++--------- src/Symfony/Component/HtmlSanitizer/README.md | 6 ++ .../HtmlSanitizer/Reference/W3CReference.php | 3 + .../Tests/HtmlSanitizerAllTest.php | 26 ++++++ .../HtmlSanitizer/Visitor/DomVisitor.php | 80 +++++++++++++++++-- .../HtmlSanitizer/Visitor/Model/Cursor.php | 6 +- 6 files changed, 123 insertions(+), 47 deletions(-) diff --git a/src/Symfony/Component/HtmlSanitizer/HtmlSanitizer.php b/src/Symfony/Component/HtmlSanitizer/HtmlSanitizer.php index 430960edcb86f..95f1e2b74fb1e 100644 --- a/src/Symfony/Component/HtmlSanitizer/HtmlSanitizer.php +++ b/src/Symfony/Component/HtmlSanitizer/HtmlSanitizer.php @@ -25,15 +25,14 @@ final class HtmlSanitizer implements HtmlSanitizerInterface private ParserInterface $parser; /** - * @var array + * @var ?DomVisitor */ - private array $domVisitors = []; + private ?DomVisitor $domVisitor = null; public function __construct( private HtmlSanitizerConfig $config, ?ParserInterface $parser = null, ) { - $this->config = $config; $this->parser = $parser ?? new MastermindsParser(); } @@ -58,7 +57,7 @@ private function sanitizeWithContext(string $context, string $input): string } // Other context: build a DOM visitor - $this->domVisitors[$context] ??= $this->createDomVisitorForContext($context); + $this->domVisitor ??= $this->createDomVisitor(); // Prevent DOS attack induced by extremely long HTML strings if (-1 !== $this->config->getMaxInputLength() && \strlen($input) > $this->config->getMaxInputLength()) { @@ -80,7 +79,9 @@ private function sanitizeWithContext(string $context, string $input): string } // Visit the DOM tree and render the sanitized nodes - return $this->domVisitors[$context]->visit($parsed)?->render() ?? ''; + $sanitized = $this->domVisitor->visit($context, $parsed)?->render() ?? ''; + + return W3CReference::CONTEXT_DOCUMENT === $context ? ''.$sanitized : $sanitized; } private function isValidUtf8(string $html): bool @@ -89,50 +90,20 @@ private function isValidUtf8(string $html): bool return '' === $html || preg_match('//u', $html); } - private function createDomVisitorForContext(string $context): DomVisitor + private function createDomVisitor(): DomVisitor { $elementsConfig = []; - // Head: only a few elements are allowed - if (W3CReference::CONTEXT_HEAD === $context) { - foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) { - if (\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) { - $elementsConfig[$allowedElement] = $allowedAttributes; - } - } - - foreach ($this->config->getBlockedElements() as $blockedElement => $v) { - if (\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) { - $elementsConfig[$blockedElement] = HtmlSanitizerAction::Block; - } - } - - foreach ($this->config->getDroppedElements() as $droppedElement => $v) { - if (\array_key_exists($droppedElement, W3CReference::HEAD_ELEMENTS)) { - $elementsConfig[$droppedElement] = HtmlSanitizerAction::Drop; - } - } - - return new DomVisitor($this->config, $elementsConfig); - } - - // Body: allow any configured element that isn't in foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) { - if (!\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) { - $elementsConfig[$allowedElement] = $allowedAttributes; - } + $elementsConfig[$allowedElement] = $allowedAttributes; } foreach ($this->config->getBlockedElements() as $blockedElement => $v) { - if (!\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) { - $elementsConfig[$blockedElement] = HtmlSanitizerAction::Block; - } + $elementsConfig[$blockedElement] = HtmlSanitizerAction::Block; } foreach ($this->config->getDroppedElements() as $droppedElement => $v) { - if (!\array_key_exists($droppedElement, W3CReference::HEAD_ELEMENTS)) { - $elementsConfig[$droppedElement] = HtmlSanitizerAction::Drop; - } + $elementsConfig[$droppedElement] = HtmlSanitizerAction::Drop; } return new DomVisitor($this->config, $elementsConfig); diff --git a/src/Symfony/Component/HtmlSanitizer/README.md b/src/Symfony/Component/HtmlSanitizer/README.md index f528da047d62e..22a1ffa0b931e 100644 --- a/src/Symfony/Component/HtmlSanitizer/README.md +++ b/src/Symfony/Component/HtmlSanitizer/README.md @@ -98,6 +98,12 @@ $sanitizer->sanitize($userInput); // Sanitize the given string for a usage in a tag $sanitizer->sanitizeFor('head', $userInput); +// Sanitize the given string for a usage in a tag +$sanitizer->sanitizeFor('body', $userInput); + +// Sanitize the given string as a whole document (including and ) +$sanitizer->sanitizeFor('document', $userInput); + // Sanitize the given string for a usage in another tag $sanitizer->sanitizeFor('title', $userInput); // Will encode as HTML entities $sanitizer->sanitizeFor('textarea', $userInput); // Will encode as HTML entities diff --git a/src/Symfony/Component/HtmlSanitizer/Reference/W3CReference.php b/src/Symfony/Component/HtmlSanitizer/Reference/W3CReference.php index e519f76a46a43..e92f746f0b7ff 100644 --- a/src/Symfony/Component/HtmlSanitizer/Reference/W3CReference.php +++ b/src/Symfony/Component/HtmlSanitizer/Reference/W3CReference.php @@ -28,13 +28,16 @@ final class W3CReference * A parent element name can be passed as an argument to {@see HtmlSanitizer::sanitizeFor()}. * When doing so, depending on the given context, different elements will be allowed. */ + public const CONTEXT_DOCUMENT = 'document'; public const CONTEXT_HEAD = 'head'; public const CONTEXT_BODY = 'body'; public const CONTEXT_TEXT = 'text'; // Which context to apply depending on the passed parent element name public const CONTEXTS_MAP = [ + 'document' => self::CONTEXT_DOCUMENT, 'head' => self::CONTEXT_HEAD, + 'body' => self::CONTEXT_BODY, 'textarea' => self::CONTEXT_TEXT, 'title' => self::CONTEXT_TEXT, ]; diff --git a/src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerAllTest.php b/src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerAllTest.php index 8699879f67bfd..c58788f6bf45a 100644 --- a/src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerAllTest.php +++ b/src/Symfony/Component/HtmlSanitizer/Tests/HtmlSanitizerAllTest.php @@ -31,6 +31,32 @@ private function createSanitizer(): HtmlSanitizer ); } + /** + * @dataProvider provideSanitizeDocument + */ + public function testSanitizeDocument(string $input, string $expected) + { + $this->assertSame($expected, $this->createSanitizer()->sanitizeFor('document', $input)); + } + + public static function provideSanitizeDocument() + { + $heads = iterator_to_array(self::provideSanitizeHead()); + $bodies = iterator_to_array(self::provideSanitizeBody()); + + $cases = []; + foreach ($heads as $head) { + foreach ($bodies as $body) { + $cases[] = [ + ''.$head[0].''.$body[0].'', + ''.$head[1].''.$body[1].'', + ]; + } + } + + return $cases; + } + /** * @dataProvider provideSanitizeHead */ diff --git a/src/Symfony/Component/HtmlSanitizer/Visitor/DomVisitor.php b/src/Symfony/Component/HtmlSanitizer/Visitor/DomVisitor.php index e6d34a0967b79..c14040d186022 100644 --- a/src/Symfony/Component/HtmlSanitizer/Visitor/DomVisitor.php +++ b/src/Symfony/Component/HtmlSanitizer/Visitor/DomVisitor.php @@ -13,6 +13,7 @@ use Symfony\Component\HtmlSanitizer\HtmlSanitizerAction; use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; +use Symfony\Component\HtmlSanitizer\Reference\W3CReference; use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer; use Symfony\Component\HtmlSanitizer\Visitor\AttributeSanitizer\AttributeSanitizerInterface; use Symfony\Component\HtmlSanitizer\Visitor\Model\Cursor; @@ -51,6 +52,13 @@ final class DomVisitor */ private array $attributeSanitizers = []; + /** + * Registry of elements configuration for each sanitization context used in the document. + * + * @var array>> $elementsConfigByContext + */ + private array $elementsConfigByContext = []; + /** * @param array> $elementsConfig Registry of allowed/blocked elements: * * If an element is present as a key and contains an array, the element should be allowed @@ -75,9 +83,9 @@ public function __construct( $this->defaultAction = $config->getDefaultAction(); } - public function visit(\DOMDocumentFragment $domNode): ?NodeInterface + public function visit(?string $context, \DOMDocumentFragment $domNode): ?NodeInterface { - $cursor = new Cursor(new DocumentNode()); + $cursor = new Cursor([$context], new DocumentNode()); $this->visitChildren($domNode, $cursor); return $cursor->node; @@ -87,24 +95,35 @@ private function visitNode(\DOMNode $domNode, Cursor $cursor): void { $nodeName = StringSanitizer::htmlLower($domNode->nodeName); + if (array_key_exists($nodeName, W3CReference::CONTEXTS_MAP)) { + $cursor->contextsPath[] = $nodeName; + } + // Visit recursively if the node was not dropped if ($this->enterNode($nodeName, $domNode, $cursor)) { $this->visitChildren($domNode, $cursor); $cursor->node = $cursor->node->getParent(); } + + if (array_key_exists($nodeName, W3CReference::CONTEXTS_MAP)) { + array_pop($cursor->contextsPath); + } } private function enterNode(string $domNodeName, \DOMNode $domNode, Cursor $cursor): bool { - if (!\array_key_exists($domNodeName, $this->elementsConfig)) { + $context = array_reverse($cursor->contextsPath)[0] ?? 'body'; + $this->elementsConfigByContext[$context] ??= $this->createContextElementsConfig($context); + + if (!\array_key_exists($domNodeName, $this->elementsConfigByContext[$context])) { $action = $this->defaultAction; $allowedAttributes = []; } else { - if (\is_array($this->elementsConfig[$domNodeName])) { + if (\is_array($this->elementsConfigByContext[$context][$domNodeName])) { $action = HtmlSanitizerAction::Allow; - $allowedAttributes = $this->elementsConfig[$domNodeName]; + $allowedAttributes = $this->elementsConfigByContext[$context][$domNodeName]; } else { - $action = $this->elementsConfig[$domNodeName]; + $action = $this->elementsConfigByContext[$context][$domNodeName]; $allowedAttributes = []; } } @@ -185,4 +204,53 @@ private function setAttributes(string $domNodeName, \DOMNode $domNode, Node $nod } } } + + private function createContextElementsConfig(string $context): array + { + $elementsConfig = []; + + // Head: only a few elements are allowed + if (W3CReference::CONTEXT_HEAD === $context) { + foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) { + if (\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) { + $elementsConfig[$allowedElement] = $allowedAttributes; + } + } + + foreach ($this->config->getBlockedElements() as $blockedElement => $v) { + if (\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) { + $elementsConfig[$blockedElement] = HtmlSanitizerAction::Block; + } + } + + foreach ($this->config->getDroppedElements() as $droppedElement => $v) { + if (\array_key_exists($droppedElement, W3CReference::HEAD_ELEMENTS)) { + $elementsConfig[$droppedElement] = HtmlSanitizerAction::Drop; + } + } + + return $elementsConfig; + } + + // Body: allow any configured element that isn't in + foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) { + if (!\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) { + $elementsConfig[$allowedElement] = $allowedAttributes; + } + } + + foreach ($this->config->getBlockedElements() as $blockedElement => $v) { + if (!\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) { + $elementsConfig[$blockedElement] = HtmlSanitizerAction::Block; + } + } + + foreach ($this->config->getDroppedElements() as $droppedElement => $v) { + if (!\array_key_exists($droppedElement, W3CReference::HEAD_ELEMENTS)) { + $elementsConfig[$droppedElement] = HtmlSanitizerAction::Drop; + } + } + + return $elementsConfig; + } } diff --git a/src/Symfony/Component/HtmlSanitizer/Visitor/Model/Cursor.php b/src/Symfony/Component/HtmlSanitizer/Visitor/Model/Cursor.php index 5214c09b77d20..966ec86d134df 100644 --- a/src/Symfony/Component/HtmlSanitizer/Visitor/Model/Cursor.php +++ b/src/Symfony/Component/HtmlSanitizer/Visitor/Model/Cursor.php @@ -20,7 +20,9 @@ */ final class Cursor { - public function __construct(public ?NodeInterface $node) - { + public function __construct( + public array $contextsPath, + public ?NodeInterface $node + ) { } }