Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 80438c2

Browse filesBrowse files
committed
Fixed the XPath filtering to have the same behavior than Symfony 2.4
1 parent 711ac32 commit 80438c2
Copy full SHA for 80438c2

File tree

Expand file treeCollapse file tree

1 file changed

+101
-13
lines changed
Filter options
Expand file treeCollapse file tree

1 file changed

+101
-13
lines changed

‎src/Symfony/Component/DomCrawler/Crawler.php

Copy file name to clipboardExpand all lines: src/Symfony/Component/DomCrawler/Crawler.php
+101-13Lines changed: 101 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ public function addHtmlContent($content, $charset = 'UTF-8')
170170

171171
$this->addDocument($dom);
172172

173-
$base = $this->filterXPath('descendant-or-self::base')->extract(array('href'));
173+
$base = $this->filterRelativeXPath('descendant-or-self::base')->extract(array('href'));
174174

175175
$baseHref = current($base);
176176
if (count($base) && !empty($baseHref)) {
@@ -580,6 +580,11 @@ public function extract($attributes)
580580
/**
581581
* Filters the list of nodes with an XPath expression.
582582
*
583+
* The XPath expression is evaluated in the context of the crawler, which
584+
* is considered as a fake parent of the elements inside it.
585+
* This means that a child selector "div" or "./div" will match only
586+
* the div elements of the current crawler, not their children.
587+
*
583588
* @param string $xpath An XPath expression
584589
*
585590
* @return Crawler A new instance of Crawler with the filtered list of nodes
@@ -588,14 +593,14 @@ public function extract($attributes)
588593
*/
589594
public function filterXPath($xpath)
590595
{
591-
$crawler = new static(null, $this->uri);
596+
$xpath = $this->relativize($xpath);
592597

593-
foreach ($this as $node) {
594-
$domxpath = new \DOMXPath($node->ownerDocument);
595-
$crawler->add($domxpath->query($xpath, $node));
598+
// If we dropped all expressions in the XPath while preparing it, there would be no match
599+
if ('' === $xpath) {
600+
return new static(null, $this->uri);
596601
}
597602

598-
return $crawler;
603+
return $this->filterRelativeXPath($xpath);
599604
}
600605

601606
/**
@@ -619,7 +624,8 @@ public function filter($selector)
619624
// @codeCoverageIgnoreEnd
620625
}
621626

622-
return $this->filterXPath(CssSelector::toXPath($selector));
627+
// The CssSelector already prefixes the selector with descendant-or-self::
628+
return $this->filterRelativeXPath(CssSelector::toXPath($selector));
623629
}
624630

625631
/**
@@ -633,10 +639,10 @@ public function filter($selector)
633639
*/
634640
public function selectLink($value)
635641
{
636-
$xpath = sprintf('//a[contains(concat(\' \', normalize-space(string(.)), \' \'), %s)] ', static::xpathLiteral(' '.$value.' ')).
637-
sprintf('| //a/img[contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)]/ancestor::a', static::xpathLiteral(' '.$value.' '));
642+
$xpath = sprintf('descendant-or-self::a[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) ', static::xpathLiteral(' '.$value.' ')).
643+
sprintf('or ./img[contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)]]', static::xpathLiteral(' '.$value.' '));
638644

639-
return $this->filterXPath($xpath);
645+
return $this->filterRelativeXPath($xpath);
640646
}
641647

642648
/**
@@ -651,11 +657,11 @@ public function selectLink($value)
651657
public function selectButton($value)
652658
{
653659
$translate = 'translate(@type, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")';
654-
$xpath = sprintf('//input[((contains(%s, "submit") or contains(%s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %s)) ', $translate, $translate, static::xpathLiteral(' '.$value.' ')).
660+
$xpath = sprintf('descendant-or-self::input[((contains(%s, "submit") or contains(%s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %s)) ', $translate, $translate, static::xpathLiteral(' '.$value.' ')).
655661
sprintf('or (contains(%s, "image") and contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)) or @id="%s" or @name="%s"] ', $translate, static::xpathLiteral(' '.$value.' '), $value, $value).
656-
sprintf('| //button[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) or @id="%s" or @name="%s"]', static::xpathLiteral(' '.$value.' '), $value, $value);
662+
sprintf('| descendant-or-self::button[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) or @id="%s" or @name="%s"]', static::xpathLiteral(' '.$value.' '), $value, $value);
657663

658-
return $this->filterXPath($xpath);
664+
return $this->filterRelativeXPath($xpath);
659665
}
660666

661667
/**
@@ -771,6 +777,88 @@ public static function xpathLiteral($s)
771777
return sprintf("concat(%s)", implode($parts, ', '));
772778
}
773779

780+
/**
781+
* Filters the list of nodes with an XPath expression.
782+
*
783+
* The XPath expression should already be processed to apply it in the context of each node.
784+
*
785+
* @param string $xpath
786+
*
787+
* @return Crawler
788+
*/
789+
private function filterRelativeXPath($xpath)
790+
{
791+
$crawler = new static(null, $this->uri);
792+
793+
foreach ($this as $node) {
794+
$domxpath = new \DOMXPath($node->ownerDocument);
795+
$crawler->add($domxpath->query($xpath, $node));
796+
}
797+
798+
return $crawler;
799+
}
800+
801+
/**
802+
* Make the XPath relative to the current context.
803+
*
804+
* The returned XPath will match elements matching the XPath inside the current crawler
805+
* when running in the context of a node of the crawler.
806+
*
807+
* @param string $xpath
808+
*
809+
* @return string
810+
*/
811+
private function relativize($xpath)
812+
{
813+
$expressions = array();
814+
815+
$unionPattern = '/\|(?![^\[]*\])/';
816+
// An expression which will never match to replace expressions which cannot match in the crawler
817+
// We cannot simply drop
818+
$nonMatchingExpression = 'a[name() = "b"]';
819+
820+
// Split any unions into individual expressions.
821+
foreach (preg_split($unionPattern, $xpath) as $expression) {
822+
$expression = trim($expression);
823+
$parenthesis = '';
824+
825+
// If the union is inside some braces, we need to preserve the opening braces and apply
826+
// the change only inside it.
827+
if (preg_match('/^[\(\s*]+/', $expression, $matches)) {
828+
$parenthesis = $matches[0];
829+
$expression = substr($expression, strlen($parenthesis));
830+
}
831+
832+
// BC for Symfony 2.4 and lower were elements were adding in a fake _root parent
833+
if (0 === strpos($expression, '/_root/')) {
834+
$expression = './'.substr($expression, 7);
835+
}
836+
837+
// add prefix before absolute element selector
838+
if (empty($expression)) {
839+
$expression = $nonMatchingExpression;
840+
} elseif (0 === strpos($expression, '//')) {
841+
$expression = 'descendant-or-self::' . substr($expression, 2);
842+
} elseif (0 === strpos($expression, './')) {
843+
$expression = 'self::' . substr($expression, 2);
844+
} elseif ('/' === $expression[0]) {
845+
// the only direct child in Symfony 2.4 and lower is _root, which is already handled previously
846+
// so let's drop the expression entirely
847+
$expression = $nonMatchingExpression;
848+
} elseif ('.' === $expression[0]) {
849+
// '.' is the fake root element in Symfony 2.4 and lower, which is excluded from results
850+
$expression = $nonMatchingExpression;
851+
} elseif (0 === strpos($expression, 'descendant::')) {
852+
$expression = 'descendant-or-self::' . substr($expression, strlen('descendant::'));
853+
} elseif (0 !== strpos($expression, 'descendant-or-self::')) {
854+
$expression = 'self::' .$expression;
855+
}
856+
$expressions[] = $parenthesis.$expression;
857+
}
858+
859+
return implode(' | ', $expressions);
860+
}
861+
774862
/**
775863
* @param int $position
776864
*

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.