Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

[DomCrawler] Fixed filterXPath() chaining loosing the parent DOM nodes #10935

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 105 additions & 17 deletions 122 src/Symfony/Component/DomCrawler/Crawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ public function addHtmlContent($content, $charset = 'UTF-8')

$this->addDocument($dom);

$base = $this->filterXPath('descendant-or-self::base')->extract(array('href'));
$base = $this->filterRelativeXPath('descendant-or-self::base')->extract(array('href'));

$baseHref = current($base);
if (count($base) && !empty($baseHref)) {
Expand Down Expand Up @@ -459,7 +459,7 @@ public function parents()
$nodes = array();

while ($node = $node->parentNode) {
if (1 === $node->nodeType && '_root' !== $node->nodeName) {
if (1 === $node->nodeType) {
$nodes[] = $node;
}
}
Expand Down Expand Up @@ -596,6 +596,11 @@ public function extract($attributes)
/**
* Filters the list of nodes with an XPath expression.
*
* The XPath expression is evaluated in the context of the crawler, which
* is considered as a fake parent of the elements inside it.
* This means that a child selector "div" or "./div" will match only
* the div elements of the current crawler, not their children.
*
* @param string $xpath An XPath expression
*
* @return Crawler A new instance of Crawler with the filtered list of nodes
Expand All @@ -604,16 +609,14 @@ public function extract($attributes)
*/
public function filterXPath($xpath)
{
$document = new \DOMDocument('1.0', 'UTF-8');
$root = $document->appendChild($document->createElement('_root'));
foreach ($this as $node) {
$root->appendChild($document->importNode($node, true));
}
$xpath = $this->relativize($xpath);

$prefixes = $this->findNamespacePrefixes($xpath);
$domxpath = $this->createDOMXPath($document, $prefixes);
// If we dropped all expressions in the XPath while preparing it, there would be no match
if ('' === $xpath) {
return new static(null, $this->uri);
}

return new static($domxpath->query($xpath), $this->uri);
return $this->filterRelativeXPath($xpath);
}

/**
Expand All @@ -635,7 +638,8 @@ public function filter($selector)
throw new \RuntimeException('Unable to filter with a CSS selector as the Symfony CssSelector is not installed (you can use filterXPath instead).');
}

return $this->filterXPath(CssSelector::toXPath($selector));
// The CssSelector already prefixes the selector with descendant-or-self::
return $this->filterRelativeXPath(CssSelector::toXPath($selector));
}

/**
Expand All @@ -649,10 +653,10 @@ public function filter($selector)
*/
public function selectLink($value)
{
$xpath = sprintf('//a[contains(concat(\' \', normalize-space(string(.)), \' \'), %s)] ', static::xpathLiteral(' '.$value.' ')).
sprintf('| //a/img[contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)]/ancestor::a', static::xpathLiteral(' '.$value.' '));
$xpath = sprintf('descendant-or-self::a[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) ', static::xpathLiteral(' '.$value.' ')).
sprintf('or ./img[contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)]]', static::xpathLiteral(' '.$value.' '));

return $this->filterXPath($xpath);
return $this->filterRelativeXPath($xpath);
}

/**
Expand All @@ -667,11 +671,11 @@ public function selectLink($value)
public function selectButton($value)
{
$translate = 'translate(@type, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")';
$xpath = sprintf('//input[((contains(%s, "submit") or contains(%s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %s)) ', $translate, $translate, static::xpathLiteral(' '.$value.' ')).
$xpath = sprintf('descendant-or-self::input[((contains(%s, "submit") or contains(%s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %s)) ', $translate, $translate, static::xpathLiteral(' '.$value.' ')).
sprintf('or (contains(%s, "image") and contains(concat(\' \', normalize-space(string(@alt)), \' \'), %s)) or @id="%s" or @name="%s"] ', $translate, static::xpathLiteral(' '.$value.' '), $value, $value).
sprintf('| //button[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) or @id="%s" or @name="%s"]', static::xpathLiteral(' '.$value.' '), $value, $value);
sprintf('| descendant-or-self::button[contains(concat(\' \', normalize-space(string(.)), \' \'), %s) or @id="%s" or @name="%s"]', static::xpathLiteral(' '.$value.' '), $value, $value);

return $this->filterXPath($xpath);
return $this->filterRelativeXPath($xpath);
}

/**
Expand Down Expand Up @@ -806,6 +810,90 @@ public static function xpathLiteral($s)
return sprintf("concat(%s)", implode($parts, ', '));
}

/**
* Filters the list of nodes with an XPath expression.
*
* The XPath expression should already be processed to apply it in the context of each node.
*
* @param string $xpath
*
* @return Crawler
*/
private function filterRelativeXPath($xpath)
{
$prefixes = $this->findNamespacePrefixes($xpath);

$crawler = new static(null, $this->uri);

foreach ($this as $node) {
$domxpath = $this->createDOMXPath($node->ownerDocument, $prefixes);
$crawler->add($domxpath->query($xpath, $node));
}

return $crawler;
}

/**
* Make the XPath relative to the current context.
*
* The returned XPath will match elements matching the XPath inside the current crawler
* when running in the context of a node of the crawler.
*
* @param string $xpath
*
* @return string
*/
private function relativize($xpath)
{
$expressions = array();

$unionPattern = '/\|(?![^\[]*\])/';
// An expression which will never match to replace expressions which cannot match in the crawler
// We cannot simply drop
$nonMatchingExpression = 'a[name() = "b"]';

// Split any unions into individual expressions.
foreach (preg_split($unionPattern, $xpath) as $expression) {
$expression = trim($expression);
$parenthesis = '';

// If the union is inside some braces, we need to preserve the opening braces and apply
// the change only inside it.
if (preg_match('/^[\(\s*]+/', $expression, $matches)) {
$parenthesis = $matches[0];
$expression = substr($expression, strlen($parenthesis));
}

// BC for Symfony 2.4 and lower were elements were adding in a fake _root parent
if (0 === strpos($expression, '/_root/')) {
$expression = './'.substr($expression, 7);
}

// add prefix before absolute element selector
if (empty($expression)) {
$expression = $nonMatchingExpression;
} elseif (0 === strpos($expression, '//')) {
$expression = 'descendant-or-self::' . substr($expression, 2);
} elseif (0 === strpos($expression, './')) {
$expression = 'self::' . substr($expression, 2);
} elseif ('/' === $expression[0]) {
// the only direct child in Symfony 2.4 and lower is _root, which is already handled previously
// so let's drop the expression entirely
$expression = $nonMatchingExpression;
} elseif ('.' === $expression[0]) {
// '.' is the fake root element in Symfony 2.4 and lower, which is excluded from results
$expression = $nonMatchingExpression;
} elseif (0 === strpos($expression, 'descendant::')) {
$expression = 'descendant-or-self::' . substr($expression, strlen('descendant::'));
} elseif (0 !== strpos($expression, 'descendant-or-self::')) {
$expression = 'self::' .$expression;
}
$expressions[] = $parenthesis.$expression;
}

return implode(' | ', $expressions);
}

/**
* @param int $position
*
Expand Down
8 changes: 1 addition & 7 deletions 8 src/Symfony/Component/DomCrawler/Field/FormField.php
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,7 @@ public function __construct(\DOMElement $node)
{
$this->node = $node;
$this->name = $node->getAttribute('name');

$this->document = new \DOMDocument('1.0', 'UTF-8');
$this->node = $this->document->importNode($this->node, true);

$root = $this->document->appendChild($this->document->createElement('_root'));
$root->appendChild($this->node);
$this->xpath = new \DOMXPath($this->document);
$this->xpath = new \DOMXPath($node->ownerDocument);

$this->initialize();
}
Expand Down
26 changes: 9 additions & 17 deletions 26 src/Symfony/Component/DomCrawler/Form.php
Original file line number Diff line number Diff line change
Expand Up @@ -404,9 +404,7 @@ private function initialize()
{
$this->fields = new FormFieldRegistry();

$document = new \DOMDocument('1.0', 'UTF-8');
$xpath = new \DOMXPath($document);
$root = $document->appendChild($document->createElement('_root'));
$xpath = new \DOMXPath($this->node->ownerDocument);

// add submitted button if it has a valid name
if ('form' !== $this->button->nodeName && $this->button->hasAttribute('name') && $this->button->getAttribute('name')) {
Expand All @@ -416,38 +414,32 @@ private function initialize()

// temporarily change the name of the input node for the x coordinate
$this->button->setAttribute('name', $name.'.x');
$this->set(new Field\InputFormField($document->importNode($this->button, true)));
$this->set(new Field\InputFormField($this->button));

// temporarily change the name of the input node for the y coordinate
$this->button->setAttribute('name', $name.'.y');
$this->set(new Field\InputFormField($document->importNode($this->button, true)));
$this->set(new Field\InputFormField($this->button));

// restore the original name of the input node
$this->button->setAttribute('name', $name);
} else {
$this->set(new Field\InputFormField($document->importNode($this->button, true)));
$this->set(new Field\InputFormField($this->button));
}
}

// find form elements corresponding to the current form
if ($this->node->hasAttribute('id')) {
// traverse through the whole document
$node = $document->importNode($this->node->ownerDocument->documentElement, true);
$root->appendChild($node);

// corresponding elements are either descendants or have a matching HTML5 form attribute
$formId = Crawler::xpathLiteral($this->node->getAttribute('id'));
$fieldNodes = $xpath->query(sprintf('descendant::input[@form=%s] | descendant::button[@form=%s] | descendant::textarea[@form=%s] | descendant::select[@form=%s] | //form[@id=%s]//input[not(@form)] | //form[@id=%s]//button[not(@form)] | //form[@id=%s]//textarea[not(@form)] | //form[@id=%s]//select[not(@form)]', $formId, $formId, $formId, $formId, $formId, $formId, $formId, $formId), $root);

$fieldNodes = $xpath->query(sprintf('descendant::input[@form=%s] | descendant::button[@form=%s] | descendant::textarea[@form=%s] | descendant::select[@form=%s] | //form[@id=%s]//input[not(@form)] | //form[@id=%s]//button[not(@form)] | //form[@id=%s]//textarea[not(@form)] | //form[@id=%s]//select[not(@form)]', $formId, $formId, $formId, $formId, $formId, $formId, $formId, $formId));
foreach ($fieldNodes as $node) {
$this->addField($node);
}
} else {
// parent form has no id, add descendant elements only
$node = $document->importNode($this->node, true);
$root->appendChild($node);

// descendant elements with form attribute are not part of this form
$fieldNodes = $xpath->query('descendant::input[not(@form)] | descendant::button[not(@form)] | descendant::textarea[not(@form)] | descendant::select[not(@form)]', $root);
// do the xpath query with $this->node as the context node, to only find descendant elements
// however, descendant elements with form attribute are not part of this form
$fieldNodes = $xpath->query('descendant::input[not(@form)] | descendant::button[not(@form)] | descendant::textarea[not(@form)] | descendant::select[not(@form)]', $this->node);
foreach ($fieldNodes as $node) {
$this->addField($node);
}
Expand Down
67 changes: 66 additions & 1 deletion 67 src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -390,8 +390,31 @@ public function testFilterXPath()
$this->assertInstanceOf('Symfony\\Component\\DomCrawler\\Crawler', $crawler, '->filterXPath() returns a new instance of a crawler');

$crawler = $this->createTestCrawler()->filterXPath('//ul');

$this->assertCount(6, $crawler->filterXPath('//li'), '->filterXPath() filters the node list with the XPath expression');

$crawler = $this->createTestCrawler();
$this->assertCount(3, $crawler->filterXPath('//body')->filterXPath('//button')->parents(), '->filterXpath() preserves parents when chained');
}

public function testFilterXpathComplexQueries()
{
$crawler = $this->createTestCrawler()->filterXPath('//body');

$this->assertCount(0, $crawler->filterXPath('/input'));
$this->assertCount(0, $crawler->filterXPath('/body'));
$this->assertCount(1, $crawler->filterXPath('/_root/body'));
$this->assertCount(1, $crawler->filterXPath('./body'));
$this->assertCount(4, $crawler->filterXPath('//form')->filterXPath('//button | //input'));
$this->assertCount(1, $crawler->filterXPath('body'));
$this->assertCount(6, $crawler->filterXPath('//button | //input'));
$this->assertCount(1, $crawler->filterXPath('//body'));
$this->assertCount(1, $crawler->filterXPath('descendant-or-self::body'));
$this->assertCount(1, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('./div'), 'A child selection finds only the current div');
$this->assertCount(2, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('descendant::div'), 'A descendant selector matches the current div and its child');
$this->assertCount(2, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('//div'), 'A descendant selector matches the current div and its child');
$this->assertCount(5, $crawler->filterXPath('(//a | //div)//img'));
$this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)'));
$this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )'));
}

public function testFilterXPathWithDefaultNamespace()
Expand Down Expand Up @@ -558,6 +581,44 @@ public function testLink()
}
}

public function testSelectLinkAndLinkFiltered()
{
$html = <<<HTML
<!DOCTYPE html>
<html lang="en">
<body>
<div id="action">
<a href="/index.php?r=site/login">Login</a>
</div>
<form id="login-form" action="/index.php?r=site/login" method="post">
<button type="submit">Submit</button>
</form>
</body>
</html>
HTML;

$crawler = new Crawler($html);
$filtered = $crawler->filterXPath("descendant-or-self::*[@id = 'login-form']");

$this->assertCount(0, $filtered->selectLink('Login'));
$this->assertCount(1, $filtered->selectButton('Submit'));

$filtered = $crawler->filterXPath("descendant-or-self::*[@id = 'action']");

$this->assertCount(1, $filtered->selectLink('Login'));
$this->assertCount(0, $filtered->selectButton('Submit'));

$this->assertCount(1, $crawler->selectLink('Login')->selectLink('Login'));
$this->assertCount(1, $crawler->selectButton('Submit')->selectButton('Submit'));
}

public function testChaining()
{
$crawler = new Crawler('<div name="a"><div name="b"><div name="c"></div></div></div>');

$this->assertEquals('a', $crawler->filterXPath('//div')->filterXPath('div')->filterXPath('div')->attr('name'));
}

public function testLinks()
{
$crawler = $this->createTestCrawler('http://example.com/bar/')->selectLink('Foo');
Expand Down Expand Up @@ -768,6 +829,10 @@ public function createTestCrawler($uri = null)
<li>Two Bis</li>
<li>Three Bis</li>
</ul>
<div id="parent">
<div id="child"></div>
</div>
<div id="sibling"><img /></div>
</body>
</html>
');
Expand Down
40 changes: 40 additions & 0 deletions 40 src/Symfony/Component/DomCrawler/Tests/FormTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,20 @@ public function testConstructorThrowsExceptionIfNoRelatedForm()
$form = new Form($nodes->item(1), 'http://example.com');
}

public function testConstructorLoadsOnlyFieldsOfTheRightForm()
{
$dom = $this->createTestMultipleForm();

$nodes = $dom->getElementsByTagName('form');
$buttonElements = $dom->getElementsByTagName('button');

$form = new Form($nodes->item(0), 'http://example.com');
$this->assertCount(3, $form->all());

$form = new Form($buttonElements->item(1), 'http://example.com');
$this->assertCount(5, $form->all());
}

public function testConstructorHandlesFormAttribute()
{
$dom = $this->createTestHtml5Form();
Expand Down Expand Up @@ -860,6 +874,32 @@ protected function createTestHtml5Form()
return $dom;
}

protected function createTestMultipleForm()
{
$dom = new \DOMDocument();
$dom->loadHTML('
<html>
<h1>Hello form</h1>
<form action="" method="POST">
<div><input type="checkbox" name="apples[]" value="1" checked /></div>
<input type="checkbox" name="oranges[]" value="1" checked />
<div><label></label><input type="hidden" name="form_name" value="form-1" /></div>
<input type="submit" name="button_1" value="Capture fields" />
<button type="submit" name="button_2">Submit form_2</button>
</form>
<form action="" method="POST">
<div><div><input type="checkbox" name="oranges[]" value="2" checked />
<input type="checkbox" name="oranges[]" value="3" checked /></div></div>
<input type="hidden" name="form_name" value="form_2" />
<input type="hidden" name="outer_field" value="success" />
<button type="submit" name="button_3">Submit from outside the form</button>
</form>
<button />
</html>');

return $dom;
}

public function testgetPhpValuesWithEmptyTextarea()
{
$dom = new \DOMDocument();
Expand Down
Morty Proxy This is a proxified and sanitized view of the page, visit original site.