Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 991c2ba

Browse filesBrowse files
committed
feature #48940 [DomCrawler] Add argument $normalizeWhitespace to Crawler::innerText() and make it return the first non-empty text (otsch)
This PR was merged into the 6.3 branch. Discussion ---------- [DomCrawler] Add argument `$normalizeWhitespace` to `Crawler::innerText()` and make it return the first non-empty text This is a new PR instead of #48684 with target branch 6.3 as requested. | Q | A | ------------- | --- | Branch? | 6.3 | Bug fix? | yes | New feature? | no | Deprecations? | no | Tickets | Fix #48682 | License | MIT Commits ------- bb0c214 [DomCrawler] Add argument `$normalizeWhitespace` to `Crawler::innerText()` and make it return the first non-empty text
2 parents be5fbce + bb0c214 commit 991c2ba
Copy full SHA for 991c2ba

File tree

3 files changed

+80
-10
lines changed
Filter options

3 files changed

+80
-10
lines changed

‎src/Symfony/Component/DomCrawler/CHANGELOG.md

Copy file name to clipboardExpand all lines: src/Symfony/Component/DomCrawler/CHANGELOG.md
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ CHANGELOG
55
---
66

77
* Add `CrawlerSelectorCount` test constraint
8+
* Add argument `$normalizeWhitespace` to `Crawler::innerText()`
9+
* Make `Crawler::innerText()` return the first non-empty text
810

911
6.0
1012
---

‎src/Symfony/Component/DomCrawler/Crawler.php

Copy file name to clipboardExpand all lines: src/Symfony/Component/DomCrawler/Crawler.php
+24-3Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -553,18 +553,34 @@ public function text(string $default = null, bool $normalizeWhitespace = true):
553553
$text = $this->getNode(0)->nodeValue;
554554

555555
if ($normalizeWhitespace) {
556-
return trim(preg_replace("/(?:[ \n\r\t\x0C]{2,}+|[\n\r\t\x0C])/", ' ', $text), " \n\r\t\x0C");
556+
return $this->normalizeWhitespace($text);
557557
}
558558

559559
return $text;
560560
}
561561

562562
/**
563563
* Returns only the inner text that is the direct descendent of the current node, excluding any child nodes.
564+
*
565+
* @param bool $normalizeWhitespace Whether whitespaces should be trimmed and normalized to single spaces
564566
*/
565-
public function innerText(): string
567+
public function innerText(/* bool $normalizeWhitespace = true */): string
566568
{
567-
return $this->filterXPath('.//text()')->text();
569+
$normalizeWhitespace = 1 <= \func_num_args() ? func_get_arg(0) : true;
570+
571+
foreach ($this->getNode(0)->childNodes as $childNode) {
572+
if (\XML_TEXT_NODE !== $childNode->nodeType) {
573+
continue;
574+
}
575+
if (!$normalizeWhitespace) {
576+
return $childNode->nodeValue;
577+
}
578+
if ('' !== trim($childNode->nodeValue)) {
579+
return $this->normalizeWhitespace($childNode->nodeValue);
580+
}
581+
}
582+
583+
return '';
568584
}
569585

570586
/**
@@ -1189,4 +1205,9 @@ private function isValidHtml5Heading(string $heading): bool
11891205
{
11901206
return 1 === preg_match('/^\x{FEFF}?\s*(<!--[^>]*?-->\s*)*$/u', $heading);
11911207
}
1208+
1209+
private function normalizeWhitespace(string $string): string
1210+
{
1211+
return trim(preg_replace("/(?:[ \n\r\t\x0C]{2,}+|[\n\r\t\x0C])/", ' ', $string), " \n\r\t\x0C");
1212+
}
11921213
}

‎src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php

Copy file name to clipboardExpand all lines: src/Symfony/Component/DomCrawler/Tests/AbstractCrawlerTest.php
+54-7Lines changed: 54 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -348,12 +348,56 @@ public function testText()
348348
$this->assertSame('my value', $this->createTestCrawler(null)->filterXPath('//ol')->text('my value'));
349349
}
350350

351-
public function testInnerText()
351+
public function provideInnerTextExamples()
352352
{
353-
self::assertCount(1, $crawler = $this->createTestCrawler()->filterXPath('//*[@id="complex-element"]'));
353+
return [
354+
[
355+
'//*[@id="complex-elements"]/*[@class="one"]', // XPath query
356+
'Parent text Child text', // Result of Crawler::text()
357+
'Parent text', // Result of Crawler::innerText()
358+
' Parent text ', // Result of Crawler::innerText(false)
359+
],
360+
[
361+
'//*[@id="complex-elements"]/*[@class="two"]',
362+
'Child text Parent text',
363+
'Parent text',
364+
' ',
365+
],
366+
[
367+
'//*[@id="complex-elements"]/*[@class="three"]',
368+
'Parent text Child text Parent text',
369+
'Parent text',
370+
' Parent text ',
371+
],
372+
[
373+
'//*[@id="complex-elements"]/*[@class="four"]',
374+
'Child text',
375+
'',
376+
' ',
377+
],
378+
[
379+
'//*[@id="complex-elements"]/*[@class="five"]',
380+
'Child text Another child',
381+
'',
382+
' ',
383+
],
384+
];
385+
}
386+
387+
/**
388+
* @dataProvider provideInnerTextExamples
389+
*/
390+
public function testInnerText(
391+
string $xPathQuery,
392+
string $expectedText,
393+
string $expectedInnerText,
394+
string $expectedInnerTextNormalizeWhitespaceFalse,
395+
) {
396+
self::assertCount(1, $crawler = $this->createTestCrawler()->filterXPath($xPathQuery));
354397

355-
self::assertSame('Parent text Child text', $crawler->text());
356-
self::assertSame('Parent text', $crawler->innerText());
398+
self::assertSame($expectedText, $crawler->text());
399+
self::assertSame($expectedInnerText, $crawler->innerText());
400+
self::assertSame($expectedInnerTextNormalizeWhitespaceFalse, $crawler->innerText(false));
357401
}
358402

359403
public function testHtml()
@@ -1265,9 +1309,12 @@ public function createTestCrawler($uri = null)
12651309
<div id="child2" xmlns:foo="http://example.com"></div>
12661310
</div>
12671311
<div id="sibling"><img /></div>
1268-
<div id="complex-element">
1269-
Parent text
1270-
<span>Child text</span>
1312+
<div id="complex-elements">
1313+
<div class="one"> Parent text <span>Child text</span> </div>
1314+
<div class="two"> <span>Child text</span> Parent text </div>
1315+
<div class="three"> Parent text <span>Child text</span> Parent text </div>
1316+
<div class="four"> <span>Child text</span> </div>
1317+
<div class="five"><span>Child text</span> <span>Another child</span></div>
12711318
</div>
12721319
</body>
12731320
</html>

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.