File tree 2 files changed +24
-2
lines changed
Filter options
src/Symfony/Component/DomCrawler
2 files changed +24
-2
lines changed
Original file line number Diff line number Diff line change @@ -1151,12 +1151,30 @@ protected function sibling(\DOMNode $node, string $siblingDir = 'nextSibling')
1151
1151
1152
1152
private function parseHtml5 (string $ htmlContent , string $ charset = 'UTF-8 ' ): \DOMDocument
1153
1153
{
1154
- return $ this ->html5Parser ->parse ($ this ->convertToHtmlEntities ($ htmlContent , $ charset ));
1154
+ if (!$ this ->supportsEncoding ($ charset )) {
1155
+ $ htmlContent = $ this ->convertToHtmlEntities ($ htmlContent , $ charset );
1156
+ $ charset = 'UTF-8 ' ;
1157
+ }
1158
+
1159
+ return $ this ->html5Parser ->parse ($ htmlContent , ['encoding ' => $ charset ]);
1160
+ }
1161
+
1162
+ private function supportsEncoding (string $ encoding ): bool
1163
+ {
1164
+ try {
1165
+ return '' === @mb_convert_encoding ('' , $ encoding , 'UTF-8 ' );
1166
+ } catch (\Throwable $ e ) {
1167
+ return false ;
1168
+ }
1155
1169
}
1156
1170
1157
1171
private function parseXhtml (string $ htmlContent , string $ charset = 'UTF-8 ' ): \DOMDocument
1158
1172
{
1159
- $ htmlContent = $ this ->convertToHtmlEntities ($ htmlContent , $ charset );
1173
+ if ('UTF-8 ' === $ charset && preg_match ('//u ' , $ htmlContent )) {
1174
+ $ htmlContent = '<?xml encoding="UTF-8"> ' .$ htmlContent ;
1175
+ } else {
1176
+ $ htmlContent = $ this ->convertToHtmlEntities ($ htmlContent , $ charset );
1177
+ }
1160
1178
1161
1179
$ internalErrors = libxml_use_internal_errors (true );
1162
1180
if (\LIBXML_VERSION < 20900 ) {
Original file line number Diff line number Diff line change @@ -194,6 +194,10 @@ public function testAddContent()
194
194
$ crawler = $ this ->createCrawler ();
195
195
$ crawler ->addContent ($ this ->getDoctype ().'<html><meta http-equiv="Content-Type" content="text/html; charset=unicode" /><div class="foo"></html></html> ' );
196
196
$ this ->assertEquals ('foo ' , $ crawler ->filterXPath ('//div ' )->attr ('class ' ), '->addContent() ignores bad charset ' );
197
+
198
+ $ crawler = $ this ->createCrawler ();
199
+ $ crawler ->addContent ($ this ->getDoctype ().'<html><script>var foo = "bär";</script></html> ' , 'text/html; charset=UTF-8 ' );
200
+ $ this ->assertEquals ('var foo = "bär"; ' , $ crawler ->filterXPath ('//script ' )->text (), '->addContent() does not interfere with script content ' );
197
201
}
198
202
199
203
/**
You can’t perform that action at this time.
0 commit comments