-
-
Notifications
You must be signed in to change notification settings - Fork 9.6k
#20411 fix Yaml parsing for very long quoted strings #21523
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,7 +61,7 @@ public function __construct($offset = 0, $totalNumberOfLines = null, array $skip | |
*/ | ||
public function parse($value, $exceptionOnInvalidType = false, $objectSupport = false, $objectForMap = false) | ||
{ | ||
if (!preg_match('//u', $value)) { | ||
if (false === preg_match('//u', $value)) { | ||
throw new ParseException('The YAML value does not appear to be valid UTF-8.'); | ||
} | ||
$this->currentLineNb = -1; | ||
|
@@ -92,13 +92,13 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport = | |
} | ||
|
||
$isRef = $mergeNode = false; | ||
if (preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+?))?\s*$#u', $this->currentLine, $values)) { | ||
if (self::preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+))?$#u', rtrim($this->currentLine), $values)) { | ||
if ($context && 'mapping' == $context) { | ||
throw new ParseException('You cannot define a sequence item when in a mapping', $this->getRealCurrentLineNb() + 1, $this->currentLine); | ||
} | ||
$context = 'sequence'; | ||
|
||
if (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) { | ||
if (isset($values['value']) && self::preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) { | ||
$isRef = $matches['ref']; | ||
$values['value'] = $matches['value']; | ||
} | ||
|
@@ -108,7 +108,7 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport = | |
$data[] = $this->parseBlock($this->getRealCurrentLineNb() + 1, $this->getNextEmbedBlock(null, true), $exceptionOnInvalidType, $objectSupport, $objectForMap); | ||
} else { | ||
if (isset($values['leadspaces']) | ||
&& preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $values['value'], $matches) | ||
&& self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+))?$#u', rtrim($values['value']), $matches) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. see comment on line 127 |
||
) { | ||
// this is a compact notation element, add to next block and parse | ||
$block = $values['value']; | ||
|
@@ -124,7 +124,10 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport = | |
if ($isRef) { | ||
$this->refs[$isRef] = end($data); | ||
} | ||
} elseif (preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->currentLine, $values) && (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'")))) { | ||
} elseif ( | ||
self::preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+))?$#u', rtrim($this->currentLine), $values) | ||
&& (false === strpos($values['key'], ' #') || in_array($values['key'][0], array('"', "'"))) | ||
) { | ||
if ($context && 'sequence' == $context) { | ||
throw new ParseException('You cannot define a mapping item when in a sequence', $this->currentLineNb + 1, $this->currentLine); | ||
} | ||
|
@@ -203,7 +206,7 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport = | |
} | ||
} | ||
} | ||
} elseif (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) { | ||
} elseif (isset($values['value']) && self::preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) { | ||
$isRef = $matches['ref']; | ||
$values['value'] = $matches['value']; | ||
} | ||
|
@@ -266,27 +269,7 @@ public function parse($value, $exceptionOnInvalidType = false, $objectSupport = | |
return $value; | ||
} | ||
|
||
switch (preg_last_error()) { | ||
case PREG_INTERNAL_ERROR: | ||
$error = 'Internal PCRE error.'; | ||
break; | ||
case PREG_BACKTRACK_LIMIT_ERROR: | ||
$error = 'pcre.backtrack_limit reached.'; | ||
break; | ||
case PREG_RECURSION_LIMIT_ERROR: | ||
$error = 'pcre.recursion_limit reached.'; | ||
break; | ||
case PREG_BAD_UTF8_ERROR: | ||
$error = 'Malformed UTF-8 data.'; | ||
break; | ||
case PREG_BAD_UTF8_OFFSET_ERROR: | ||
$error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.'; | ||
break; | ||
default: | ||
$error = 'Unable to parse.'; | ||
} | ||
|
||
throw new ParseException($error, $this->getRealCurrentLineNb() + 1, $this->currentLine); | ||
throw new ParseException('Unable to parse', $this->getRealCurrentLineNb() + 1, $this->currentLine); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will this now ever be reached anymore? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this line is covered by 3 tests:
|
||
} | ||
} | ||
|
||
|
@@ -520,7 +503,7 @@ private function parseValue($value, $exceptionOnInvalidType, $objectSupport, $ob | |
return $this->refs[$value]; | ||
} | ||
|
||
if (preg_match('/^'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) { | ||
if (self::preg_match('/^'.self::BLOCK_SCALAR_HEADER_PATTERN.'$/', $value, $matches)) { | ||
$modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : ''; | ||
|
||
return $this->parseBlockScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), (int) abs($modifiers)); | ||
|
@@ -566,7 +549,7 @@ private function parseBlockScalar($style, $chomping = '', $indentation = 0) | |
|
||
// determine indentation if not specified | ||
if (0 === $indentation) { | ||
if (preg_match('/^ +/', $this->currentLine, $matches)) { | ||
if (self::preg_match('/^ +/', $this->currentLine, $matches)) { | ||
$indentation = strlen($matches[0]); | ||
} | ||
} | ||
|
@@ -577,7 +560,7 @@ private function parseBlockScalar($style, $chomping = '', $indentation = 0) | |
while ( | ||
$notEOF && ( | ||
$isCurrentLineBlank || | ||
preg_match($pattern, $this->currentLine, $matches) | ||
self::preg_match($pattern, $this->currentLine, $matches) | ||
) | ||
) { | ||
if ($isCurrentLineBlank && strlen($this->currentLine) > $indentation) { | ||
|
@@ -800,6 +783,49 @@ private function isStringUnIndentedCollectionItem() | |
*/ | ||
private function isBlockScalarHeader() | ||
{ | ||
return (bool) preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine); | ||
return (bool) self::preg_match('~'.self::BLOCK_SCALAR_HEADER_PATTERN.'$~', $this->currentLine); | ||
} | ||
|
||
/** | ||
* A local wrapper for `preg_match` which will throw a ParseException if there | ||
* is an internal error in the PCRE engine. | ||
* | ||
* This avoids us needing to check for "false" every time PCRE is used | ||
* in the YAML engine | ||
* | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. must be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, will do |
||
* @throws ParseException on a PCRE internal error | ||
* | ||
* @see preg_last_error() | ||
* | ||
* @internal | ||
*/ | ||
public static function preg_match($pattern, $subject, &$matches = null, $flags = 0, $offset = 0) | ||
{ | ||
$ret = preg_match($pattern, $subject, $matches, $flags, $offset); | ||
if ($ret === false) { | ||
switch (preg_last_error()) { | ||
case PREG_INTERNAL_ERROR: | ||
$error = 'Internal PCRE error.'; | ||
break; | ||
case PREG_BACKTRACK_LIMIT_ERROR: | ||
$error = 'pcre.backtrack_limit reached.'; | ||
break; | ||
case PREG_RECURSION_LIMIT_ERROR: | ||
$error = 'pcre.recursion_limit reached.'; | ||
break; | ||
case PREG_BAD_UTF8_ERROR: | ||
$error = 'Malformed UTF-8 data.'; | ||
break; | ||
case PREG_BAD_UTF8_OFFSET_ERROR: | ||
$error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.'; | ||
break; | ||
default: | ||
$error = 'Error.'; | ||
} | ||
|
||
throw new ParseException($error); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should not be reachable, but should hopefully mean that any further undetected "backtrack_limit" bugs, or any similar bugs added in the future, will result in an exception rather than an incorrect result. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this misses the location of the error in the ParseException There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The location is not always available, unless I make two wrappers -- one static, for Inline and other callers, and one non-static, for use during the parse. Do you think that's worthwhile? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could pass the needed context to the new method. Might not look nice, but will do what it should. |
||
} | ||
|
||
return $ret; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See comment on line 127 below