From 4b0fd78776bbd8f852128e585fc9d305d2ca5455 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 18 Oct 2021 14:51:55 -0600 Subject: [PATCH 01/18] Resolve ".." in stdlib_dir. --- Modules/getpath.c | 89 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 83 insertions(+), 6 deletions(-) diff --git a/Modules/getpath.c b/Modules/getpath.c index 1405023b39b580..446778f519cc62 100644 --- a/Modules/getpath.c +++ b/Modules/getpath.c @@ -322,6 +322,63 @@ absolutize(wchar_t **path_p) } +/* Remove navigation elements such as "." and "..". */ +// This is similar to canonicalize() is PC/getpathp.c. +static PyStatus +normalize(const wchar_t *orig, wchar_t *buf, const size_t buf_len) +{ + assert(orig && *orig != L'\0'); + // We disallow trailing dots to keep this function simpler. + assert(orig[wcslen(orig)-1] != L'.'); + if (wcslen(orig) + 1 >= buf_len) { + return PATHLEN_ERR(); + } + + int dots = -1; + wchar_t *buf_next = buf; + // The resulting filename will never be longer than orig. + for (const wchar_t *unused = orig; *unused != L'\0'; unused++) { + wchar_t c = *unused; + buf_next[0] = c; + buf_next++; + if (c == SEP) { + assert(dots <= 2); + if (dots == 2) { + buf_next -= 3; // "../" + assert(buf_next != buf); + // Turn "/x/y/../z" into "/x/z". We cap it off at the root, + // so "/../spam" becomes "/spam". + if (buf_next - 1 != buf) { + buf_next--; + assert(*buf_next == SEP); + // Move to the previous SEP in the buffer. + while (*(buf_next - 1) != SEP) { + buf_next--; + } + } + } + else if (dots == 1) { + // Turn "/./" into "/". + buf_next -= 2; // "./" + assert(*(buf_next - 1) == SEP); + } + dots = 0; + } + else if (dots >= 0) { + if (c == L'.') { + assert(dots <= 2); + dots++; + } + else { + dots = -1; + } + } + } + *buf_next = L'\0'; + return _PyStatus_OK(); +} + + /* Is module -- check for .pyc too */ static PyStatus ismodule(const wchar_t *path, int *result) @@ -519,6 +576,28 @@ search_for_prefix(PyCalculatePath *calculate, _PyPathConfig *pathconfig, } +static PyStatus +calculate_set_stdlib_dir(PyCalculatePath *calculate, _PyPathConfig *pathconfig) +{ + // Note that, unlike calculate_set_prefix(), here we allow a negative + // prefix_found. That means the source tree Lib dir gets used. + if (!calculate->prefix_found) { + return _PyStatus_OK(); + } + wchar_t buf[MAXPATHLEN + 1]; + PyStatus status = normalize(calculate->prefix, + buf, Py_ARRAY_LENGTH(buf)); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + pathconfig->stdlib_dir = _PyMem_RawWcsdup(buf); + if (pathconfig->stdlib_dir == NULL) { + return _PyStatus_NO_MEMORY(); + } + return _PyStatus_OK(); +} + + static PyStatus calculate_prefix(PyCalculatePath *calculate, _PyPathConfig *pathconfig) { @@ -1494,12 +1573,10 @@ calculate_path(PyCalculatePath *calculate, _PyPathConfig *pathconfig) } if (pathconfig->stdlib_dir == NULL) { - if (calculate->prefix_found) { - /* This must be done *before* calculate_set_prefix() is called. */ - pathconfig->stdlib_dir = _PyMem_RawWcsdup(calculate->prefix); - if (pathconfig->stdlib_dir == NULL) { - return _PyStatus_NO_MEMORY(); - } + /* This must be done *before* calculate_set_prefix() is called. */ + status = calculate_set_stdlib_dir(calculate, pathconfig); + if (_PyStatus_EXCEPTION(status)) { + return status; } } From 70dd79940d506ef0ede17606914614df89e1d300 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 18 Oct 2021 17:35:38 -0600 Subject: [PATCH 02/18] Go back to not running most of test_embed in out-of-tree builds. --- Lib/test/test_embed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 4b4396efb5cadb..4186f011e2388a 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -61,7 +61,7 @@ def setUp(self): else: exepath = os.path.join(builddir, 'Programs') self.test_exe = exe = os.path.join(exepath, exename) - if not os.path.exists(exe): + if exepath != support.REPO_ROOT or not os.path.exists(exe): self.skipTest("%r doesn't exist" % exe) # This is needed otherwise we get a fatal error: # "Py_Initialize: Unable to get the locale encoding From 3baa05d8210c8965fc0341afcca72c20eefc4a87 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 18 Oct 2021 18:11:42 -0600 Subject: [PATCH 03/18] Fix a typo. --- Modules/getpath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/getpath.c b/Modules/getpath.c index 446778f519cc62..22e95e8706b87a 100644 --- a/Modules/getpath.c +++ b/Modules/getpath.c @@ -323,7 +323,7 @@ absolutize(wchar_t **path_p) /* Remove navigation elements such as "." and "..". */ -// This is similar to canonicalize() is PC/getpathp.c. +// This is similar to canonicalize() in PC/getpathp.c. static PyStatus normalize(const wchar_t *orig, wchar_t *buf, const size_t buf_len) { From 8318fdc5ef3de3e046519da07a6709a41a29f42f Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 18 Oct 2021 18:52:09 -0600 Subject: [PATCH 04/18] Fix relative paths before passing to normalize(). --- Modules/getpath.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/Modules/getpath.c b/Modules/getpath.c index 22e95e8706b87a..2b7052795b5ed8 100644 --- a/Modules/getpath.c +++ b/Modules/getpath.c @@ -584,9 +584,23 @@ calculate_set_stdlib_dir(PyCalculatePath *calculate, _PyPathConfig *pathconfig) if (!calculate->prefix_found) { return _PyStatus_OK(); } + PyStatus status; + wchar_t *prefix = calculate->prefix; + if (!_Py_isabs(prefix)) { + prefix = _PyMem_RawWcsdup(prefix); + if (prefix == NULL) { + return _PyStatus_NO_MEMORY(); + } + status = absolutize(&prefix); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + } wchar_t buf[MAXPATHLEN + 1]; - PyStatus status = normalize(calculate->prefix, - buf, Py_ARRAY_LENGTH(buf)); + status = normalize(prefix, buf, Py_ARRAY_LENGTH(buf)); + if (prefix != calculate->prefix) { + PyMem_RawFree(prefix); + } if (_PyStatus_EXCEPTION(status)) { return status; } From 7dceb0ed9e01c5a515ead296c342b2117229fb5f Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 19 Oct 2021 08:55:24 -0600 Subject: [PATCH 05/18] Do not let "dots" go past 2. --- Modules/getpath.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Modules/getpath.c b/Modules/getpath.c index 2b7052795b5ed8..304c5fc5d29582 100644 --- a/Modules/getpath.c +++ b/Modules/getpath.c @@ -365,8 +365,7 @@ normalize(const wchar_t *orig, wchar_t *buf, const size_t buf_len) dots = 0; } else if (dots >= 0) { - if (c == L'.') { - assert(dots <= 2); + if (c == L'.' && dots < 2) { dots++; } else { From 63700f83519b0794c2c6514822be34029fd2271a Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 19 Oct 2021 10:40:47 -0600 Subject: [PATCH 06/18] Update the description of normalize(). --- Modules/getpath.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Modules/getpath.c b/Modules/getpath.c index 304c5fc5d29582..1cabf8fc8dac05 100644 --- a/Modules/getpath.c +++ b/Modules/getpath.c @@ -322,8 +322,9 @@ absolutize(wchar_t **path_p) } -/* Remove navigation elements such as "." and "..". */ -// This is similar to canonicalize() in PC/getpathp.c. +/* Remove navigation elements such as "." and "..". + + This is essentially a C implementation of posixpath.normpath(). */ static PyStatus normalize(const wchar_t *orig, wchar_t *buf, const size_t buf_len) { From ce2cce996a88b540c31ec22483feef8848852aea Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 19 Oct 2021 10:41:33 -0600 Subject: [PATCH 07/18] Collapse extra slashes. --- Modules/getpath.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Modules/getpath.c b/Modules/getpath.c index 1cabf8fc8dac05..15189f9c9b5716 100644 --- a/Modules/getpath.c +++ b/Modules/getpath.c @@ -363,6 +363,11 @@ normalize(const wchar_t *orig, wchar_t *buf, const size_t buf_len) buf_next -= 2; // "./" assert(*(buf_next - 1) == SEP); } + else if (dots == 0) { + // Turn "//" into "/". + buf_next--; + assert(*(buf_next - 1) == SEP); + } dots = 0; } else if (dots >= 0) { From 2b7740677615c7016cd4a8099cc0573226c1fdd1 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 19 Oct 2021 10:44:30 -0600 Subject: [PATCH 08/18] unused -> remainder --- Modules/getpath.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/getpath.c b/Modules/getpath.c index 15189f9c9b5716..f038c2b470ccc7 100644 --- a/Modules/getpath.c +++ b/Modules/getpath.c @@ -338,8 +338,8 @@ normalize(const wchar_t *orig, wchar_t *buf, const size_t buf_len) int dots = -1; wchar_t *buf_next = buf; // The resulting filename will never be longer than orig. - for (const wchar_t *unused = orig; *unused != L'\0'; unused++) { - wchar_t c = *unused; + for (const wchar_t *remainder = orig; *remainder != L'\0'; remainder++) { + wchar_t c = *remainder; buf_next[0] = c; buf_next++; if (c == SEP) { From c7d148027877166b9b9752e6886be0aae1da3993 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 19 Oct 2021 11:34:26 -0600 Subject: [PATCH 09/18] Resolve trailing dots and strip trailing the slash, if any. --- Modules/getpath.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/Modules/getpath.c b/Modules/getpath.c index f038c2b470ccc7..f485c4203d1a62 100644 --- a/Modules/getpath.c +++ b/Modules/getpath.c @@ -324,13 +324,12 @@ absolutize(wchar_t **path_p) /* Remove navigation elements such as "." and "..". - This is essentially a C implementation of posixpath.normpath(). */ + This is mostly a C implementation of posixpath.normpath(). */ static PyStatus normalize(const wchar_t *orig, wchar_t *buf, const size_t buf_len) { assert(orig && *orig != L'\0'); - // We disallow trailing dots to keep this function simpler. - assert(orig[wcslen(orig)-1] != L'.'); + assert(*orig == SEP); // an absolute path if (wcslen(orig) + 1 >= buf_len) { return PATHLEN_ERR(); } @@ -345,15 +344,17 @@ normalize(const wchar_t *orig, wchar_t *buf, const size_t buf_len) if (c == SEP) { assert(dots <= 2); if (dots == 2) { - buf_next -= 3; // "../" - assert(buf_next != buf); - // Turn "/x/y/../z" into "/x/z". We cap it off at the root, - // so "/../spam" becomes "/spam". - if (buf_next - 1 != buf) { - buf_next--; - assert(*buf_next == SEP); + // Turn "/x/y/../z" into "/x/z". + buf_next -= 4; // "/../" + assert(*buf_next == SEP); + // We cap it off at the root, so "/../spam" becomes "/spam". + if (buf_next == buf) { + buf_next++; + } + else { // Move to the previous SEP in the buffer. while (*(buf_next - 1) != SEP) { + assert(buf_next != buf); buf_next--; } } @@ -379,6 +380,24 @@ normalize(const wchar_t *orig, wchar_t *buf, const size_t buf_len) } } } + if (dots >= 0) { + // Strip any trailing dots and trailing slash. + buf_next -= dots + 1; // "/" or "/." or "/.." + assert(*buf_next == SEP); + if (buf_next == buf) { + // Leave the single slash for root. + buf_next++; + } + else { + if (dots == 2) { + // Move to the previous SEP in the buffer. + do { + assert(buf_next != buf); + buf_next--; + } while (*(buf_next) != SEP); + } + } + } *buf_next = L'\0'; return _PyStatus_OK(); } From a28cca4e20d37da207189c93675ca7c4fa4034cd Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 19 Oct 2021 11:37:55 -0600 Subject: [PATCH 10/18] Drop the test_embed fix from this branch. --- Lib/test/test_embed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 4186f011e2388a..4b4396efb5cadb 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -61,7 +61,7 @@ def setUp(self): else: exepath = os.path.join(builddir, 'Programs') self.test_exe = exe = os.path.join(exepath, exename) - if exepath != support.REPO_ROOT or not os.path.exists(exe): + if not os.path.exists(exe): self.skipTest("%r doesn't exist" % exe) # This is needed otherwise we get a fatal error: # "Py_Initialize: Unable to get the locale encoding From 7d6658d4034a72dc1750a449ce2cde7ddd839e4f Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 19 Oct 2021 10:34:04 -0600 Subject: [PATCH 11/18] Move _Py_normalize_path() to the C-API (for testing). --- Include/internal/pycore_fileutils.h | 3 + Modules/getpath.c | 87 +---------------------------- Python/fileutils.c | 82 +++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 84 deletions(-) diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index ab436ae9b007ac..d1caf9c2372349 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -80,6 +80,9 @@ extern int _Py_add_relfile(wchar_t *dirname, const wchar_t *relfile, size_t bufsize); extern size_t _Py_find_basename(const wchar_t *filename); +PyAPI_FUNC(int) _Py_normalize_path(const wchar_t *path, + wchar_t *buf, const size_t buf_len); + // Macros to protect CRT calls against instant termination when passed an // invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler. diff --git a/Modules/getpath.c b/Modules/getpath.c index f485c4203d1a62..4dbd502ddcf045 100644 --- a/Modules/getpath.c +++ b/Modules/getpath.c @@ -322,87 +322,6 @@ absolutize(wchar_t **path_p) } -/* Remove navigation elements such as "." and "..". - - This is mostly a C implementation of posixpath.normpath(). */ -static PyStatus -normalize(const wchar_t *orig, wchar_t *buf, const size_t buf_len) -{ - assert(orig && *orig != L'\0'); - assert(*orig == SEP); // an absolute path - if (wcslen(orig) + 1 >= buf_len) { - return PATHLEN_ERR(); - } - - int dots = -1; - wchar_t *buf_next = buf; - // The resulting filename will never be longer than orig. - for (const wchar_t *remainder = orig; *remainder != L'\0'; remainder++) { - wchar_t c = *remainder; - buf_next[0] = c; - buf_next++; - if (c == SEP) { - assert(dots <= 2); - if (dots == 2) { - // Turn "/x/y/../z" into "/x/z". - buf_next -= 4; // "/../" - assert(*buf_next == SEP); - // We cap it off at the root, so "/../spam" becomes "/spam". - if (buf_next == buf) { - buf_next++; - } - else { - // Move to the previous SEP in the buffer. - while (*(buf_next - 1) != SEP) { - assert(buf_next != buf); - buf_next--; - } - } - } - else if (dots == 1) { - // Turn "/./" into "/". - buf_next -= 2; // "./" - assert(*(buf_next - 1) == SEP); - } - else if (dots == 0) { - // Turn "//" into "/". - buf_next--; - assert(*(buf_next - 1) == SEP); - } - dots = 0; - } - else if (dots >= 0) { - if (c == L'.' && dots < 2) { - dots++; - } - else { - dots = -1; - } - } - } - if (dots >= 0) { - // Strip any trailing dots and trailing slash. - buf_next -= dots + 1; // "/" or "/." or "/.." - assert(*buf_next == SEP); - if (buf_next == buf) { - // Leave the single slash for root. - buf_next++; - } - else { - if (dots == 2) { - // Move to the previous SEP in the buffer. - do { - assert(buf_next != buf); - buf_next--; - } while (*(buf_next) != SEP); - } - } - } - *buf_next = L'\0'; - return _PyStatus_OK(); -} - - /* Is module -- check for .pyc too */ static PyStatus ismodule(const wchar_t *path, int *result) @@ -621,12 +540,12 @@ calculate_set_stdlib_dir(PyCalculatePath *calculate, _PyPathConfig *pathconfig) } } wchar_t buf[MAXPATHLEN + 1]; - status = normalize(prefix, buf, Py_ARRAY_LENGTH(buf)); + int res = _Py_normalize_path(prefix, buf, Py_ARRAY_LENGTH(buf)); if (prefix != calculate->prefix) { PyMem_RawFree(prefix); } - if (_PyStatus_EXCEPTION(status)) { - return status; + if (res < 0) { + return PATHLEN_ERR(); } pathconfig->stdlib_dir = _PyMem_RawWcsdup(buf); if (pathconfig->stdlib_dir == NULL) { diff --git a/Python/fileutils.c b/Python/fileutils.c index 3d8f3a4f16326c..2b85fbadfa4210 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2181,6 +2181,88 @@ _Py_find_basename(const wchar_t *filename) } +/* Remove navigation elements such as "." and "..". + + This is mostly a C implementation of posixpath.normpath(). + Return 0 on success. Return -1 if "orig" is too big for the buffer. */ +int +_Py_normalize_path(const wchar_t *path, wchar_t *buf, const size_t buf_len) +{ + assert(path && *path != L'\0'); + assert(*path == SEP); // an absolute path + if (wcslen(path) + 1 >= buf_len) { + return -1; + } + + int dots = -1; + wchar_t *buf_next = buf; + // The resulting filename will never be longer than path. + for (const wchar_t *remainder = path; *remainder != L'\0'; remainder++) { + wchar_t c = *remainder; + buf_next[0] = c; + buf_next++; + if (c == SEP) { + assert(dots <= 2); + if (dots == 2) { + // Turn "/x/y/../z" into "/x/z". + buf_next -= 4; // "/../" + assert(*buf_next == SEP); + // We cap it off at the root, so "/../spam" becomes "/spam". + if (buf_next == buf) { + buf_next++; + } + else { + // Move to the previous SEP in the buffer. + while (*(buf_next - 1) != SEP) { + assert(buf_next != buf); + buf_next--; + } + } + } + else if (dots == 1) { + // Turn "/./" into "/". + buf_next -= 2; // "./" + assert(*(buf_next - 1) == SEP); + } + else if (dots == 0) { + // Turn "//" into "/". + buf_next--; + assert(*(buf_next - 1) == SEP); + } + dots = 0; + } + else if (dots >= 0) { + if (c == L'.' && dots < 2) { + dots++; + } + else { + dots = -1; + } + } + } + if (dots >= 0) { + // Strip any trailing dots and trailing slash. + buf_next -= dots + 1; // "/" or "/." or "/.." + assert(*buf_next == SEP); + if (buf_next == buf) { + // Leave the single slash for root. + buf_next++; + } + else { + if (dots == 2) { + // Move to the previous SEP in the buffer. + do { + assert(buf_next != buf); + buf_next--; + } while (*(buf_next) != SEP); + } + } + } + *buf_next = L'\0'; + return 0; +} + + /* Get the current directory. buflen is the buffer size in wide characters including the null character. Decode the path from the locale encoding. From 718e6a9dd385cf9997105f872c6a688604628245 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 19 Oct 2021 13:40:15 -0600 Subject: [PATCH 12/18] Leave a leading "//" alone. --- Python/fileutils.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/Python/fileutils.c b/Python/fileutils.c index 2b85fbadfa4210..ab9ffe4591e9ed 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2195,6 +2195,8 @@ _Py_normalize_path(const wchar_t *path, wchar_t *buf, const size_t buf_len) } int dots = -1; + int check_leading = 1; + const wchar_t *buf_start = buf; wchar_t *buf_next = buf; // The resulting filename will never be longer than path. for (const wchar_t *remainder = path; *remainder != L'\0'; remainder++) { @@ -2208,13 +2210,13 @@ _Py_normalize_path(const wchar_t *path, wchar_t *buf, const size_t buf_len) buf_next -= 4; // "/../" assert(*buf_next == SEP); // We cap it off at the root, so "/../spam" becomes "/spam". - if (buf_next == buf) { + if (buf_next == buf_start) { buf_next++; } else { // Move to the previous SEP in the buffer. while (*(buf_next - 1) != SEP) { - assert(buf_next != buf); + assert(buf_next != buf_start); buf_next--; } } @@ -2228,6 +2230,14 @@ _Py_normalize_path(const wchar_t *path, wchar_t *buf, const size_t buf_len) // Turn "//" into "/". buf_next--; assert(*(buf_next - 1) == SEP); + if (check_leading) { + if (buf_next - 1 == buf && *(remainder + 1) != SEP) { + // Leave a leading "//" alone, unless "///...". + buf_next++; + buf_start++; + } + check_leading = 0; + } } dots = 0; } @@ -2244,15 +2254,15 @@ _Py_normalize_path(const wchar_t *path, wchar_t *buf, const size_t buf_len) // Strip any trailing dots and trailing slash. buf_next -= dots + 1; // "/" or "/." or "/.." assert(*buf_next == SEP); - if (buf_next == buf) { - // Leave the single slash for root. + if (buf_next == buf_start) { + // Leave the leading slash for root. buf_next++; } else { if (dots == 2) { // Move to the previous SEP in the buffer. do { - assert(buf_next != buf); + assert(buf_next != buf_start); buf_next--; } while (*(buf_next) != SEP); } From 52265e273ddcff52bb15b4994143b9d2c692551a Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 19 Oct 2021 10:30:19 -0600 Subject: [PATCH 13/18] Add more os.path.normpath test cases. --- Lib/test/test_posixpath.py | 54 +++++++++++++++++++++++++------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index 8d398ec0103544..cadb037771c81b 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -304,25 +304,43 @@ def test_expanduser_pwd(self): for path in ('~', '~/.local', '~vstinner/'): self.assertEqual(posixpath.expanduser(path), path) + NORMPATH_CASES = [ + ("", "."), + ("/", "/"), + ("/foo", "/foo"), + ("/foo/bar", "/foo/bar"), + ("//", "//"), + ("///", "/"), + ("///foo/.//bar//", "/foo/bar"), + ("///foo/.//bar//.//..//.//baz///", "/foo/baz"), + ("///..//./foo/.//bar", "/foo/bar"), + ("..", ".."), + ("../", ".."), + ("../foo", "../foo"), + ("../../foo", "../../foo"), + ("../foo/../bar", "../bar"), + ("../../foo/../bar/./baz/boom/..", "../../bar/baz"), + ("/..", "/"), + ("/..", "/"), + ("/../", "/"), + ("/../foo", "/foo"), + ("/../../foo", "/foo"), + ("/../foo/../", "/"), + ("/../foo/../bar", "/bar"), + ("/../../foo/../bar/./baz/boom/..", "/bar/baz"), + ] + def test_normpath(self): - self.assertEqual(posixpath.normpath(""), ".") - self.assertEqual(posixpath.normpath("/"), "/") - self.assertEqual(posixpath.normpath("//"), "//") - self.assertEqual(posixpath.normpath("///"), "/") - self.assertEqual(posixpath.normpath("///foo/.//bar//"), "/foo/bar") - self.assertEqual(posixpath.normpath("///foo/.//bar//.//..//.//baz"), - "/foo/baz") - self.assertEqual(posixpath.normpath("///..//./foo/.//bar"), "/foo/bar") - - self.assertEqual(posixpath.normpath(b""), b".") - self.assertEqual(posixpath.normpath(b"/"), b"/") - self.assertEqual(posixpath.normpath(b"//"), b"//") - self.assertEqual(posixpath.normpath(b"///"), b"/") - self.assertEqual(posixpath.normpath(b"///foo/.//bar//"), b"/foo/bar") - self.assertEqual(posixpath.normpath(b"///foo/.//bar//.//..//.//baz"), - b"/foo/baz") - self.assertEqual(posixpath.normpath(b"///..//./foo/.//bar"), - b"/foo/bar") + for path, expected in self.NORMPATH_CASES: + with self.subTest(path): + result = posixpath.normpath(path) + self.assertEqual(result, expected) + + path = path.encode('utf-8') + expected = expected.encode('utf-8') + with self.subTest(path): + result = posixpath.normpath(path) + self.assertEqual(result, expected) @skip_if_ABSTFN_contains_backslash def test_realpath_curdir(self): From 0a5cbee43cba1c2d80743d9960cb03f5e47f099d Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 19 Oct 2021 10:34:04 -0600 Subject: [PATCH 14/18] Add tests for _Py_normalize_path(). --- Lib/test/test_fileutils.py | 32 ++++++++++++++++++++++++++++++++ Modules/_testinternalcapi.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 Lib/test/test_fileutils.py diff --git a/Lib/test/test_fileutils.py b/Lib/test/test_fileutils.py new file mode 100644 index 00000000000000..41987d5de0f7cd --- /dev/null +++ b/Lib/test/test_fileutils.py @@ -0,0 +1,32 @@ +# Run tests for functions in Python/fileutils.c. + +import os +import os.path +import unittest +from test.support import import_helper +from .test_posixpath import PosixPathTest as posixdata + +# Skip this test if the _testcapi module isn't available. +_testcapi = import_helper.import_module('_testinternalcapi') + + +class PathTests(unittest.TestCase): + + if os.name == 'nt': + raise Unittest.SkipTest() + else: + NORMPATH_CASES = posixdata.NORMPATH_CASES + + def test_normalize_path(self): + for filename, expected in self.NORMPATH_CASES: + if not os.path.isabs(filename): + continue + with self.subTest(filename): + result = _testcapi.test_normalize_path(filename) + self.assertEqual(result, expected) + +del posixdata + + +if __name__ == "__main__": + unittest.main() diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 3ba939651a4173..f9c510de44a7c0 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -14,12 +14,14 @@ #include "Python.h" #include "pycore_atomic_funcs.h" // _Py_atomic_int_get() #include "pycore_bitutils.h" // _Py_bswap32() +#include "pycore_fileutils.h" // _Py_normalize_path #include "pycore_gc.h" // PyGC_Head #include "pycore_hashtable.h" // _Py_hashtable_new() #include "pycore_initconfig.h" // _Py_GetConfigsAsDict() #include "pycore_interp.h" // _PyInterpreterState_GetConfigCopy() #include "pycore_pyerrors.h" // _Py_UTF8_Edit_Cost() #include "pycore_pystate.h" // _PyThreadState_GET() +#include "osdefs.h" // MAXPATHLEN static PyObject * @@ -366,6 +368,31 @@ test_edit_cost(PyObject *self, PyObject *Py_UNUSED(args)) } +static PyObject * +test_normalize_path(PyObject *self, PyObject *filename) +{ +// if (!PyUnicode_Check(filename)) { +// PyErr_SetString(PyExc_TypeError, "argument must be a string"); +// return NULL; +// } + Py_ssize_t size = -1; + wchar_t *encoded = PyUnicode_AsWideCharString(filename, &size); + if (encoded == NULL) { + return NULL; + } + + wchar_t buf[MAXPATHLEN + 1]; + int res = _Py_normalize_path(encoded, buf, Py_ARRAY_LENGTH(buf)); + PyMem_Free(encoded); + if (res != 0) { + PyErr_SetString(PyExc_ValueError, "string too long"); + return NULL; + } + + return PyUnicode_FromWideChar(buf, -1); +} + + static PyMethodDef TestMethods[] = { {"get_configs", get_configs, METH_NOARGS}, {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, @@ -377,6 +404,7 @@ static PyMethodDef TestMethods[] = { {"set_config", test_set_config, METH_O}, {"test_atomic_funcs", test_atomic_funcs, METH_NOARGS}, {"test_edit_cost", test_edit_cost, METH_NOARGS}, + {"test_normalize_path", test_normalize_path, METH_O, NULL}, {NULL, NULL} /* sentinel */ }; From cc44daea6fa6d848ce243e2c2e606b7c140f9ab3 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 19 Oct 2021 14:42:38 -0600 Subject: [PATCH 15/18] Fix the tests on Windows. --- Lib/test/test_fileutils.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_fileutils.py b/Lib/test/test_fileutils.py index 41987d5de0f7cd..a5dc19b90c9a0d 100644 --- a/Lib/test/test_fileutils.py +++ b/Lib/test/test_fileutils.py @@ -4,7 +4,6 @@ import os.path import unittest from test.support import import_helper -from .test_posixpath import PosixPathTest as posixdata # Skip this test if the _testcapi module isn't available. _testcapi = import_helper.import_module('_testinternalcapi') @@ -12,21 +11,19 @@ class PathTests(unittest.TestCase): - if os.name == 'nt': - raise Unittest.SkipTest() - else: - NORMPATH_CASES = posixdata.NORMPATH_CASES - def test_normalize_path(self): - for filename, expected in self.NORMPATH_CASES: + if os.name == 'nt': + raise unittest.SkipTest('Windows has its own helper for this') + else: + from .test_posixpath import PosixPathTest as posixdata + tests = posixdata.NORMPATH_CASES + for filename, expected in tests: if not os.path.isabs(filename): continue with self.subTest(filename): result = _testcapi.test_normalize_path(filename) self.assertEqual(result, expected) -del posixdata - if __name__ == "__main__": unittest.main() From 9da250bb8796810610cd7805aa6674ed3b93e45c Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 19 Oct 2021 14:48:00 -0600 Subject: [PATCH 16/18] test_normalize_path() -> normalize_path() --- Lib/test/test_fileutils.py | 2 +- Modules/_testinternalcapi.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_fileutils.py b/Lib/test/test_fileutils.py index a5dc19b90c9a0d..a1438e6ffd7a33 100644 --- a/Lib/test/test_fileutils.py +++ b/Lib/test/test_fileutils.py @@ -21,7 +21,7 @@ def test_normalize_path(self): if not os.path.isabs(filename): continue with self.subTest(filename): - result = _testcapi.test_normalize_path(filename) + result = _testcapi.normalize_path(filename) self.assertEqual(result, expected) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index f9c510de44a7c0..b28f142e49d843 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -369,7 +369,7 @@ test_edit_cost(PyObject *self, PyObject *Py_UNUSED(args)) static PyObject * -test_normalize_path(PyObject *self, PyObject *filename) +normalize_path(PyObject *self, PyObject *filename) { // if (!PyUnicode_Check(filename)) { // PyErr_SetString(PyExc_TypeError, "argument must be a string"); @@ -404,7 +404,7 @@ static PyMethodDef TestMethods[] = { {"set_config", test_set_config, METH_O}, {"test_atomic_funcs", test_atomic_funcs, METH_NOARGS}, {"test_edit_cost", test_edit_cost, METH_NOARGS}, - {"test_normalize_path", test_normalize_path, METH_O, NULL}, + {"normalize_path", normalize_path, METH_O, NULL}, {NULL, NULL} /* sentinel */ }; From 099295384cc386511126f6dc72a0b58130f1a0b8 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 19 Oct 2021 14:48:35 -0600 Subject: [PATCH 17/18] Drop old text. --- Modules/_testinternalcapi.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index b28f142e49d843..c146cb5d6f1081 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -371,10 +371,6 @@ test_edit_cost(PyObject *self, PyObject *Py_UNUSED(args)) static PyObject * normalize_path(PyObject *self, PyObject *filename) { -// if (!PyUnicode_Check(filename)) { -// PyErr_SetString(PyExc_TypeError, "argument must be a string"); -// return NULL; -// } Py_ssize_t size = -1; wchar_t *encoded = PyUnicode_AsWideCharString(filename, &size); if (encoded == NULL) { From 3e8f8e0b786b8083bedbf131c90faf67cca5402f Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Fri, 22 Oct 2021 15:47:39 -0700 Subject: [PATCH 18/18] Add more test cases and fix a bug with non-leading //. --- Lib/test/test_fileutils.py | 5 +++-- Lib/test/test_posixpath.py | 10 +++++++++- Python/fileutils.c | 15 +++++++++------ 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/Lib/test/test_fileutils.py b/Lib/test/test_fileutils.py index a1438e6ffd7a33..45b3f3233c6171 100644 --- a/Lib/test/test_fileutils.py +++ b/Lib/test/test_fileutils.py @@ -11,7 +11,7 @@ class PathTests(unittest.TestCase): - def test_normalize_path(self): + def test_capi_normalize_path(self): if os.name == 'nt': raise unittest.SkipTest('Windows has its own helper for this') else: @@ -22,7 +22,8 @@ def test_normalize_path(self): continue with self.subTest(filename): result = _testcapi.normalize_path(filename) - self.assertEqual(result, expected) + self.assertEqual(result, expected, + msg=f'input: {filename!r} expected output: {expected!r}') if __name__ == "__main__": diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index cadb037771c81b..e4d8384ef0b4bc 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -307,6 +307,9 @@ def test_expanduser_pwd(self): NORMPATH_CASES = [ ("", "."), ("/", "/"), + ("/.", "/"), + ("/./", "/"), + ("/.//.", "/"), ("/foo", "/foo"), ("/foo/bar", "/foo/bar"), ("//", "//"), @@ -314,6 +317,8 @@ def test_expanduser_pwd(self): ("///foo/.//bar//", "/foo/bar"), ("///foo/.//bar//.//..//.//baz///", "/foo/baz"), ("///..//./foo/.//bar", "/foo/bar"), + (".", "."), + (".//.", "."), ("..", ".."), ("../", ".."), ("../foo", "../foo"), @@ -323,11 +328,14 @@ def test_expanduser_pwd(self): ("/..", "/"), ("/..", "/"), ("/../", "/"), + ("/..//", "/"), + ("//..", "//"), ("/../foo", "/foo"), ("/../../foo", "/foo"), ("/../foo/../", "/"), ("/../foo/../bar", "/bar"), ("/../../foo/../bar/./baz/boom/..", "/bar/baz"), + ("/../../foo/../bar/./baz/boom/.", "/bar/baz/boom"), ] def test_normpath(self): @@ -338,7 +346,7 @@ def test_normpath(self): path = path.encode('utf-8') expected = expected.encode('utf-8') - with self.subTest(path): + with self.subTest(path, type=bytes): result = posixpath.normpath(path) self.assertEqual(result, expected) diff --git a/Python/fileutils.c b/Python/fileutils.c index ab9ffe4591e9ed..ac0046cdac37c3 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2241,12 +2241,15 @@ _Py_normalize_path(const wchar_t *path, wchar_t *buf, const size_t buf_len) } dots = 0; } - else if (dots >= 0) { - if (c == L'.' && dots < 2) { - dots++; - } - else { - dots = -1; + else { + check_leading = 0; + if (dots >= 0) { + if (c == L'.' && dots < 2) { + dots++; + } + else { + dots = -1; + } } } }