Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 10bb90e

Browse filesBrowse files
authored
gh-102511: Speed up os.path.splitroot() with native helpers (GH-118089)
1 parent e38b43c commit 10bb90e
Copy full SHA for 10bb90e

File tree

Expand file treeCollapse file tree

8 files changed

+337
-108
lines changed
Filter options
Expand file treeCollapse file tree

8 files changed

+337
-108
lines changed

‎Include/internal/pycore_fileutils.h

Copy file name to clipboardExpand all lines: Include/internal/pycore_fileutils.h
+2Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,8 @@ extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t
290290
extern HRESULT PathCchSkipRoot(const wchar_t *pszPath, const wchar_t **ppszRootEnd);
291291
#endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */
292292

293+
extern void _Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize);
294+
293295
// Macros to protect CRT calls against instant termination when passed an
294296
// invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler.
295297
// Usage:

‎Lib/ntpath.py

Copy file name to clipboardExpand all lines: Lib/ntpath.py
+68-48Lines changed: 68 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -167,56 +167,76 @@ def splitdrive(p):
167167
return drive, root + tail
168168

169169

170-
def splitroot(p):
171-
"""Split a pathname into drive, root and tail. The drive is defined
172-
exactly as in splitdrive(). On Windows, the root may be a single path
173-
separator or an empty string. The tail contains anything after the root.
174-
For example:
175-
176-
splitroot('//server/share/') == ('//server/share', '/', '')
177-
splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
178-
splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
179-
splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
180-
"""
181-
p = os.fspath(p)
182-
if isinstance(p, bytes):
183-
sep = b'\\'
184-
altsep = b'/'
185-
colon = b':'
186-
unc_prefix = b'\\\\?\\UNC\\'
187-
empty = b''
188-
else:
189-
sep = '\\'
190-
altsep = '/'
191-
colon = ':'
192-
unc_prefix = '\\\\?\\UNC\\'
193-
empty = ''
194-
normp = p.replace(altsep, sep)
195-
if normp[:1] == sep:
196-
if normp[1:2] == sep:
197-
# UNC drives, e.g. \\server\share or \\?\UNC\server\share
198-
# Device drives, e.g. \\.\device or \\?\device
199-
start = 8 if normp[:8].upper() == unc_prefix else 2
200-
index = normp.find(sep, start)
201-
if index == -1:
202-
return p, empty, empty
203-
index2 = normp.find(sep, index + 1)
204-
if index2 == -1:
205-
return p, empty, empty
206-
return p[:index2], p[index2:index2 + 1], p[index2 + 1:]
170+
try:
171+
from nt import _path_splitroot_ex
172+
except ImportError:
173+
def splitroot(p):
174+
"""Split a pathname into drive, root and tail. The drive is defined
175+
exactly as in splitdrive(). On Windows, the root may be a single path
176+
separator or an empty string. The tail contains anything after the root.
177+
For example:
178+
179+
splitroot('//server/share/') == ('//server/share', '/', '')
180+
splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
181+
splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
182+
splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
183+
"""
184+
p = os.fspath(p)
185+
if isinstance(p, bytes):
186+
sep = b'\\'
187+
altsep = b'/'
188+
colon = b':'
189+
unc_prefix = b'\\\\?\\UNC\\'
190+
empty = b''
207191
else:
208-
# Relative path with root, e.g. \Windows
209-
return empty, p[:1], p[1:]
210-
elif normp[1:2] == colon:
211-
if normp[2:3] == sep:
212-
# Absolute drive-letter path, e.g. X:\Windows
213-
return p[:2], p[2:3], p[3:]
192+
sep = '\\'
193+
altsep = '/'
194+
colon = ':'
195+
unc_prefix = '\\\\?\\UNC\\'
196+
empty = ''
197+
normp = p.replace(altsep, sep)
198+
if normp[:1] == sep:
199+
if normp[1:2] == sep:
200+
# UNC drives, e.g. \\server\share or \\?\UNC\server\share
201+
# Device drives, e.g. \\.\device or \\?\device
202+
start = 8 if normp[:8].upper() == unc_prefix else 2
203+
index = normp.find(sep, start)
204+
if index == -1:
205+
return p, empty, empty
206+
index2 = normp.find(sep, index + 1)
207+
if index2 == -1:
208+
return p, empty, empty
209+
return p[:index2], p[index2:index2 + 1], p[index2 + 1:]
210+
else:
211+
# Relative path with root, e.g. \Windows
212+
return empty, p[:1], p[1:]
213+
elif normp[1:2] == colon:
214+
if normp[2:3] == sep:
215+
# Absolute drive-letter path, e.g. X:\Windows
216+
return p[:2], p[2:3], p[3:]
217+
else:
218+
# Relative path with drive, e.g. X:Windows
219+
return p[:2], empty, p[2:]
214220
else:
215-
# Relative path with drive, e.g. X:Windows
216-
return p[:2], empty, p[2:]
217-
else:
218-
# Relative path, e.g. Windows
219-
return empty, empty, p
221+
# Relative path, e.g. Windows
222+
return empty, empty, p
223+
else:
224+
def splitroot(p):
225+
"""Split a pathname into drive, root and tail. The drive is defined
226+
exactly as in splitdrive(). On Windows, the root may be a single path
227+
separator or an empty string. The tail contains anything after the root.
228+
For example:
229+
230+
splitroot('//server/share/') == ('//server/share', '/', '')
231+
splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
232+
splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
233+
splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
234+
"""
235+
p = os.fspath(p)
236+
if isinstance(p, bytes):
237+
drive, root, tail = _path_splitroot_ex(os.fsdecode(p))
238+
return os.fsencode(drive), os.fsencode(root), os.fsencode(tail)
239+
return _path_splitroot_ex(p)
220240

221241

222242
# Split a path in head (everything up to the last '/') and tail (the

‎Lib/posixpath.py

Copy file name to clipboardExpand all lines: Lib/posixpath.py
+47-27Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -134,33 +134,53 @@ def splitdrive(p):
134134
return p[:0], p
135135

136136

137-
def splitroot(p):
138-
"""Split a pathname into drive, root and tail. On Posix, drive is always
139-
empty; the root may be empty, a single slash, or two slashes. The tail
140-
contains anything after the root. For example:
141-
142-
splitroot('foo/bar') == ('', '', 'foo/bar')
143-
splitroot('/foo/bar') == ('', '/', 'foo/bar')
144-
splitroot('//foo/bar') == ('', '//', 'foo/bar')
145-
splitroot('///foo/bar') == ('', '/', '//foo/bar')
146-
"""
147-
p = os.fspath(p)
148-
if isinstance(p, bytes):
149-
sep = b'/'
150-
empty = b''
151-
else:
152-
sep = '/'
153-
empty = ''
154-
if p[:1] != sep:
155-
# Relative path, e.g.: 'foo'
156-
return empty, empty, p
157-
elif p[1:2] != sep or p[2:3] == sep:
158-
# Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
159-
return empty, sep, p[1:]
160-
else:
161-
# Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
162-
# https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
163-
return empty, p[:2], p[2:]
137+
try:
138+
from posix import _path_splitroot_ex
139+
except ImportError:
140+
def splitroot(p):
141+
"""Split a pathname into drive, root and tail. On Posix, drive is always
142+
empty; the root may be empty, a single slash, or two slashes. The tail
143+
contains anything after the root. For example:
144+
145+
splitroot('foo/bar') == ('', '', 'foo/bar')
146+
splitroot('/foo/bar') == ('', '/', 'foo/bar')
147+
splitroot('//foo/bar') == ('', '//', 'foo/bar')
148+
splitroot('///foo/bar') == ('', '/', '//foo/bar')
149+
"""
150+
p = os.fspath(p)
151+
if isinstance(p, bytes):
152+
sep = b'/'
153+
empty = b''
154+
else:
155+
sep = '/'
156+
empty = ''
157+
if p[:1] != sep:
158+
# Relative path, e.g.: 'foo'
159+
return empty, empty, p
160+
elif p[1:2] != sep or p[2:3] == sep:
161+
# Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
162+
return empty, sep, p[1:]
163+
else:
164+
# Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
165+
# https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
166+
return empty, p[:2], p[2:]
167+
else:
168+
def splitroot(p):
169+
"""Split a pathname into drive, root and tail. On Posix, drive is always
170+
empty; the root may be empty, a single slash, or two slashes. The tail
171+
contains anything after the root. For example:
172+
173+
splitroot('foo/bar') == ('', '', 'foo/bar')
174+
splitroot('/foo/bar') == ('', '/', 'foo/bar')
175+
splitroot('//foo/bar') == ('', '//', 'foo/bar')
176+
splitroot('///foo/bar') == ('', '/', '//foo/bar')
177+
"""
178+
p = os.fspath(p)
179+
if isinstance(p, bytes):
180+
# Optimisation: the drive is always empty
181+
_, root, tail = _path_splitroot_ex(os.fsdecode(p))
182+
return b'', os.fsencode(root), os.fsencode(tail)
183+
return _path_splitroot_ex(p)
164184

165185

166186
# Return the tail (basename) part of a path, same as split(path)[1].

‎Lib/test/test_ntpath.py

Copy file name to clipboardExpand all lines: Lib/test/test_ntpath.py
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,7 @@ def test_normpath(self):
374374
tester("ntpath.normpath('\\\\foo\\')", '\\\\foo\\')
375375
tester("ntpath.normpath('\\\\foo')", '\\\\foo')
376376
tester("ntpath.normpath('\\\\')", '\\\\')
377+
tester("ntpath.normpath('//?/UNC/server/share/..')", '\\\\?\\UNC\\server\\share\\')
377378

378379
def test_realpath_curdir(self):
379380
expected = ntpath.normpath(os.getcwd())
+1Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Speed up :func:`os.path.splitroot` with a native implementation.

‎Modules/clinic/posixmodule.c.h

Copy file name to clipboardExpand all lines: Modules/clinic/posixmodule.c.h
+59-1Lines changed: 59 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎Modules/posixmodule.c

Copy file name to clipboardExpand all lines: Modules/posixmodule.c
+44Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5467,6 +5467,49 @@ os__path_islink_impl(PyObject *module, PyObject *path)
54675467
#endif /* MS_WINDOWS */
54685468

54695469

5470+
/*[clinic input]
5471+
os._path_splitroot_ex
5472+
5473+
path: unicode
5474+
5475+
[clinic start generated code]*/
5476+
5477+
static PyObject *
5478+
os__path_splitroot_ex_impl(PyObject *module, PyObject *path)
5479+
/*[clinic end generated code: output=de97403d3dfebc40 input=f1470e12d899f9ac]*/
5480+
{
5481+
Py_ssize_t len, drvsize, rootsize;
5482+
PyObject *drv = NULL, *root = NULL, *tail = NULL, *result = NULL;
5483+
5484+
wchar_t *buffer = PyUnicode_AsWideCharString(path, &len);
5485+
if (!buffer) {
5486+
goto exit;
5487+
}
5488+
5489+
_Py_skiproot(buffer, len, &drvsize, &rootsize);
5490+
drv = PyUnicode_FromWideChar(buffer, drvsize);
5491+
if (drv == NULL) {
5492+
goto exit;
5493+
}
5494+
root = PyUnicode_FromWideChar(&buffer[drvsize], rootsize);
5495+
if (root == NULL) {
5496+
goto exit;
5497+
}
5498+
tail = PyUnicode_FromWideChar(&buffer[drvsize + rootsize],
5499+
len - drvsize - rootsize);
5500+
if (tail == NULL) {
5501+
goto exit;
5502+
}
5503+
result = Py_BuildValue("(OOO)", drv, root, tail);
5504+
exit:
5505+
PyMem_Free(buffer);
5506+
Py_XDECREF(drv);
5507+
Py_XDECREF(root);
5508+
Py_XDECREF(tail);
5509+
return result;
5510+
}
5511+
5512+
54705513
/*[clinic input]
54715514
os._path_normpath
54725515
@@ -16799,6 +16842,7 @@ static PyMethodDef posix_methods[] = {
1679916842
OS__FINDFIRSTFILE_METHODDEF
1680016843
OS__GETVOLUMEPATHNAME_METHODDEF
1680116844
OS__PATH_SPLITROOT_METHODDEF
16845+
OS__PATH_SPLITROOT_EX_METHODDEF
1680216846
OS__PATH_NORMPATH_METHODDEF
1680316847
OS_GETLOADAVG_METHODDEF
1680416848
OS_URANDOM_METHODDEF

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.