From c192a270228ebfb083c45908ab3800c80690bb93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Kuzn=C3=ADk?= Date: Thu, 2 May 2019 15:39:26 +0100 Subject: [PATCH 1/2] Fix OID test --- Tests/t_ldap_schema_tokenizer.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Tests/t_ldap_schema_tokenizer.py b/Tests/t_ldap_schema_tokenizer.py index c8581771..459927d3 100644 --- a/Tests/t_ldap_schema_tokenizer.py +++ b/Tests/t_ldap_schema_tokenizer.py @@ -44,8 +44,8 @@ # broken schema of Oracle Internet Directory TESTCASES_BROKEN_OID = ( - ("BLUBB DI 'BLU B B ER'MUST 'BLAH' ", ['BLUBB', 'DI', 'BLU B B ER', 'MUST', 'BLAH']), - ("BLUBBER DI 'BLU'BB ER' DA 'BLAH' ", ["BLUBBER", "DI", "BLU'BB ER", "DA", "BLAH"]), + "BLUBB DI 'BLU B B ER'MUST 'BLAH' ", #['BLUBB', 'DI', 'BLU B B ER', 'MUST', 'BLAH'] + "BLUBBER DI 'BLU'BB ER' DA 'BLAH' ", #["BLUBBER", "DI", "BLU'BB ER", "DA", "BLAH"] ) # for quoted single quotes inside string values @@ -104,7 +104,6 @@ def test_utf8(self): """ self._run_split_tokens_tests(TESTCASES_UTF8) - @unittest.expectedFailure def test_broken_oid(self): """ run test cases specified in constant TESTCASES_BROKEN_OID From 60bc814e460df1b72b5e1672ca6adcde7ebbb5be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Kuzn=C3=ADk?= Date: Thu, 2 May 2019 15:58:11 +0100 Subject: [PATCH 2/2] Extend tokenizer to handle escapes --- Lib/ldap/schema/tokenizer.py | 10 +++++++--- Tests/t_ldap_schema_tokenizer.py | 1 - 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Lib/ldap/schema/tokenizer.py b/Lib/ldap/schema/tokenizer.py index 20958c09..69823f2b 100644 --- a/Lib/ldap/schema/tokenizer.py +++ b/Lib/ldap/schema/tokenizer.py @@ -13,12 +13,16 @@ r"|" # or r"([^'$()\s]+)" # string of length >= 1 without '$() or whitespace r"|" # or - r"('.*?'(?!\w))" # any string or empty string surrounded by single quotes - # except if right quote is succeeded by alphanumeric char + r"('(?:[^'\\]|\\\\|\\.)*?'(?!\w))" + # any string or empty string surrounded by unescaped + # single quotes except if right quote is succeeded by + # alphanumeric char r"|" # or r"([^\s]+?)", # residue, all non-whitespace strings ).findall +UNESCAPE_PATTERN = re.compile(r"\\(.)") + def split_tokens(s): """ @@ -30,7 +34,7 @@ def split_tokens(s): if unquoted: parts.append(unquoted) elif quoted: - parts.append(quoted[1:-1]) + parts.append(UNESCAPE_PATTERN.sub(r'\1', quoted[1:-1])) elif opar: parens += 1 parts.append(opar) diff --git a/Tests/t_ldap_schema_tokenizer.py b/Tests/t_ldap_schema_tokenizer.py index 459927d3..0890379a 100644 --- a/Tests/t_ldap_schema_tokenizer.py +++ b/Tests/t_ldap_schema_tokenizer.py @@ -110,7 +110,6 @@ def test_broken_oid(self): """ self._run_failure_tests(TESTCASES_BROKEN_OID) - @unittest.expectedFailure def test_escaped_quotes(self): """ run test cases specified in constant TESTCASES_ESCAPED_QUOTES