Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit fb0a626

Browse filesBrowse files
committed
enh(ci): Add tests for polynomial regex issues
1 parent fa46dd1 commit fb0a626
Copy full SHA for fb0a626

File tree

1 file changed

+196
-2
lines changed
Filter options

1 file changed

+196
-2
lines changed

‎test/regex/index.js

Copy file name to clipboardExpand all lines: test/regex/index.js
+196-2Lines changed: 196 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ hljs.debugMode();
1515
*/
1616
const expBacktrackingCache = {};
1717

18+
/**
19+
* A map for a regex pattern to whether or not it it vulnerable to polynomial backtracking.
20+
*
21+
* @type {Record<string, boolean>}
22+
*/
23+
const polyBacktrackingCache = {};
24+
1825
function retrieveRules(language, { name }) {
1926
// first we need to get the language compiled so we have
2027
// access to the raw regex
@@ -56,7 +63,7 @@ function testLanguage(languageName) {
5663
// });
5764
// });
5865

59-
it(`has ${rules.length} regex matchers`, () => {} );
66+
it(`have ${rules.length} regex matchers`, () => {} );
6067

6168
it('should not use octal escapes', function() {
6269
forEachPattern(rules, ({ ast, rulePath, reportError }) => {
@@ -194,7 +201,194 @@ function testLanguage(languageName) {
194201
expBacktrackingCache[patternStr] = false;
195202
});
196203
});
204+
it('should not cause polynomial backtracking', function () {
205+
forEachPattern(rules, ({ pattern, ast, rulePath, reportError }) => {
206+
const patternStr = String(pattern);
207+
if (polyBacktrackingCache[patternStr] === false) {
208+
// we know that the pattern won't cause poly backtracking because we checked before
209+
return;
210+
}
211+
212+
const EMPTY = ast.flags.unicode ? CharSet.empty(0x10FFFF) : CharSet.empty(0xFFFF);
213+
214+
/**
215+
* @param {Node} node
216+
* @returns {CharSet}
217+
*/
218+
function toCharSet(node) {
219+
switch (node.type) {
220+
case "Alternative": {
221+
if (node.elements.length === 1) {
222+
return toCharSet(node.elements[0]);
223+
}
224+
return EMPTY;
225+
}
226+
case "CapturingGroup":
227+
case "Group": {
228+
let total = EMPTY;
229+
for (const item of node.alternatives) {
230+
total = total.union(toCharSet(item));
231+
}
232+
return total;
233+
}
234+
case "Character":
235+
return JS.createCharSet([node.value], ast.flags);
236+
case "CharacterClass": {
237+
const value = JS.createCharSet(node.elements.map(x => {
238+
if (x.type === "CharacterSet") {
239+
return x;
240+
} else if (x.type === "Character") {
241+
return x.value;
242+
} else {
243+
return { min: x.min.value, max: x.max.value };
244+
}
245+
}), ast.flags);
246+
if (node.negate) {
247+
return value.negate();
248+
} else {
249+
return value;
250+
}
251+
}
252+
case "CharacterSet":
253+
return JS.createCharSet([node], ast.flags);
254+
255+
default:
256+
return EMPTY;
257+
}
258+
}
259+
260+
/**
261+
* @param {Element} from
262+
* @returns {Element | null}
263+
*/
264+
function getAfter(from) {
265+
const parent = from.parent;
266+
if (parent.type === "Quantifier") {
267+
return getAfter(parent);
268+
} else if (parent.type === "Alternative") {
269+
const index = parent.elements.indexOf(from);
270+
const after = parent.elements[index + 1];
271+
if (after) {
272+
return after;
273+
} else {
274+
const grandParent = parent.parent;
275+
if (grandParent.type === "Pattern") {
276+
return null;
277+
} else {
278+
return getAfter(grandParent);
279+
}
280+
}
281+
} else {
282+
throw Error("Unreachable");
283+
}
284+
}
285+
286+
visitRegExpAST(ast.pattern, {
287+
onQuantifierLeave(node) {
288+
if (node.max !== Infinity) {
289+
return;
290+
}
291+
const char = toCharSet(node.element);
292+
tryReachUntil(getAfter(node), char, null);
293+
294+
/**
295+
* @param {Quantifier} quantifier
296+
* @param {CharSet} char
297+
*/
298+
function assertNoPoly(quantifier, char) {
299+
if (quantifier.max === Infinity) {
300+
const qChar = toCharSet(quantifier.element);
301+
if (qChar && !qChar.isDisjointWith(char)) {
302+
const intersection = qChar.intersect(char);
303+
const literal = JS.toLiteral({
304+
type: "Concatenation",
305+
elements: [
306+
{ type: "CharacterClass", characters: intersection }
307+
]
308+
})
309+
const lang = `/${literal.source}/${literal.flags}`;
310+
311+
const rangeStr = patternStr.substring(node.start + 1, quantifier.end + 1);
312+
const rangeHighlight = `^${"~".repeat(node.end - node.start - 1)}${" ".repeat(quantifier.start - node.end)}^${"~".repeat(quantifier.end - quantifier.start - 1)}`;
313+
314+
reportError(`${rulePath}: Polynomial backtracking. By repeating any character that matches ${lang}, an attack string can be created.\n\n ${rangeStr}\n ${rangeHighlight}\n\nFull pattern:\n${patternStr}\n${" ".repeat(node.start + 1)}${rangeHighlight}`);
315+
}
316+
}
317+
}
197318

319+
/**
320+
* @param {Element | null | undefined} element
321+
* @param {CharSet} char
322+
* @param {Element | null | undefined} until
323+
* @returns {CharSet}
324+
*/
325+
function tryReachUntil(element, char, until) {
326+
if (!element || element == until || char.isEmpty) {
327+
return char;
328+
}
329+
330+
const after = getAfter(element);
331+
332+
if (element.type === "Quantifier") {
333+
assertNoPoly(element, char);
334+
}
335+
336+
return tryReachUntil(after, goInto(element, after, char), until);
337+
}
338+
339+
/**
340+
* @param {Element} element
341+
* @param {Element} after
342+
* @param {CharSet} char
343+
* @returns {CharSet}
344+
*/
345+
function goInto(element, after, char) {
346+
switch (element.type) {
347+
case "Assertion": {
348+
if (element.kind === "lookahead" || element.kind === "lookbehind") {
349+
for (const alt of element.alternatives) {
350+
if (alt.elements.length > 0) {
351+
tryReachUntil(alt.elements[0], char, after);
352+
}
353+
}
354+
}
355+
return EMPTY;
356+
}
357+
case "Group":
358+
case "CapturingGroup": {
359+
let total = EMPTY;
360+
for (const alt of element.alternatives) {
361+
if (alt.elements.length > 0) {
362+
total = total.union(tryReachUntil(alt.elements[0], char, after));
363+
} else {
364+
total = char;
365+
}
366+
}
367+
return total;
368+
}
369+
case "Character":
370+
case "CharacterClass":
371+
case "CharacterSet": {
372+
return char.intersect(toCharSet(element));
373+
}
374+
case "Quantifier": {
375+
if (element.min === 0) {
376+
goInto(element.element, after, char);
377+
return char;
378+
} else {
379+
return goInto(element.element, after, char);
380+
}
381+
}
382+
default:
383+
return EMPTY;
384+
}
385+
}
386+
},
387+
});
388+
389+
polyBacktrackingCache[patternStr] = false;
390+
});
391+
});
198392
});
199393
}
200394

@@ -209,5 +403,5 @@ for (const language of languages) {
209403
}
210404

211405
describe("COMBINED: All grammars", () => {
212-
it(`has ${count} total regex`, () => {});
406+
it(`have ${count} total regex`, () => {});
213407
});

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.