Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

GH-81079: Add case_sensitive argument to pathlib.Path.glob() #102710

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
May 4, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
GH-81079: Add case_sensitive argument to pathlib.Path.glob()
This argument allows case-sensitive matching to be enabled on Windows, and
case-insensitive matching to be enabled on Posix.
  • Loading branch information
barneygale committed Mar 15, 2023
commit b0d836e7f40b983abf64b7dd3d50d5de1d27ab29
20 changes: 18 additions & 2 deletions 20 Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,7 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.5


.. method:: Path.glob(pattern)
.. method:: Path.glob(pattern, *, case_sensitive=None)

Glob the given relative *pattern* in the directory represented by this path,
yielding all matching files (of any kind)::
Expand All @@ -873,6 +873,11 @@ call fails (for example because the path doesn't exist).
PosixPath('setup.py'),
PosixPath('test_pathlib.py')]

By default, this method matches paths using platform-specific casing rules:
case-sensitive on POSIX, and case-insensitive on Windows. The
*case_sensitive* keyword-only argument can be set to true or false to
override this behaviour.

.. note::
Using the "``**``" pattern in large directory trees may consume
an inordinate amount of time.
Expand All @@ -883,6 +888,9 @@ call fails (for example because the path doesn't exist).
Return only directories if *pattern* ends with a pathname components
separator (:data:`~os.sep` or :data:`~os.altsep`).

.. versionadded:: 3.12
The *case_sensitive* argument.

.. method:: Path.group()

Return the name of the group owning the file. :exc:`KeyError` is raised
Expand Down Expand Up @@ -1268,7 +1276,7 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.6
The *strict* argument (pre-3.6 behavior is strict).

.. method:: Path.rglob(pattern)
.. method:: Path.rglob(pattern, *, case_sensitive=None)

Glob the given relative *pattern* recursively. This is like calling
:func:`Path.glob` with "``**/``" added in front of the *pattern*, where
Expand All @@ -1281,12 +1289,20 @@ call fails (for example because the path doesn't exist).
PosixPath('setup.py'),
PosixPath('test_pathlib.py')]

By default, this method matches paths using platform-specific casing rules:
case-sensitive on POSIX, and case-insensitive on Windows. The
*case_sensitive* keyword-only argument can be set to true or false to
override this behaviour.

.. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob

.. versionchanged:: 3.11
Return only directories if *pattern* ends with a pathname components
separator (:data:`~os.sep` or :data:`~os.altsep`).

.. versionadded:: 3.12
The *case_sensitive* argument.

.. method:: Path.rmdir()

Remove this directory. The directory must be empty.
Expand Down
90 changes: 42 additions & 48 deletions 90 Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,16 @@ def _ignore_error(exception):
return (getattr(exception, 'errno', None) in _IGNORED_ERRNOS or
getattr(exception, 'winerror', None) in _IGNORED_WINERRORS)


def _is_wildcard_pattern(pat):
# Whether this pattern needs actual matching using fnmatch, or can
# be looked up directly as a file.
return "*" in pat or "?" in pat or "[" in pat

#
# Globbing helpers
#

def _is_case_sensitive(flavour):
return flavour.normcase('Aa') == 'Aa'


@functools.lru_cache()
def _make_selector(pattern_parts, flavour):
def _make_selector(pattern_parts, case_sensitive):
pat = pattern_parts[0]
child_parts = pattern_parts[1:]
if not pat:
Expand All @@ -74,21 +72,21 @@ def _make_selector(pattern_parts, flavour):
cls = _RecursiveWildcardSelector
elif '**' in pat:
raise ValueError("Invalid pattern: '**' can only be an entire path component")
elif _is_wildcard_pattern(pat):
cls = _WildcardSelector
elif pat == '..':
cls = _ParentSelector
else:
cls = _PreciseSelector
return cls(pat, child_parts, flavour)
cls = _WildcardSelector
return cls(pat, child_parts, case_sensitive)


class _Selector:
"""A selector matches a specific glob pattern part against the children
of a given path."""

def __init__(self, child_parts, flavour):
def __init__(self, child_parts, case_sensitive):
self.child_parts = child_parts
if child_parts:
self.successor = _make_selector(child_parts, flavour)
self.successor = _make_selector(child_parts, case_sensitive)
self.dironly = True
else:
self.successor = _TerminatingSelector()
Expand All @@ -98,44 +96,36 @@ def select_from(self, parent_path):
"""Iterate over all child paths of `parent_path` matched by this
selector. This can contain parent_path itself."""
path_cls = type(parent_path)
is_dir = path_cls.is_dir
exists = path_cls.exists
scandir = path_cls._scandir
normcase = path_cls._flavour.normcase
if not is_dir(parent_path):
if not parent_path.is_dir():
return iter([])
return self._select_from(parent_path, is_dir, exists, scandir, normcase)
return self._select_from(parent_path, scandir)


class _TerminatingSelector:

def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
def _select_from(self, parent_path, scandir):
yield parent_path


class _PreciseSelector(_Selector):
class _ParentSelector(_Selector):

def __init__(self, name, child_parts, flavour):
self.name = name
_Selector.__init__(self, child_parts, flavour)
def __init__(self, name, child_parts, case_sensitive):
_Selector.__init__(self, child_parts, case_sensitive)

def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
try:
path = parent_path._make_child_relpath(self.name)
if (is_dir if self.dironly else exists)(path):
for p in self.successor._select_from(path, is_dir, exists, scandir, normcase):
yield p
except PermissionError:
return
def _select_from(self, parent_path, scandir):
path = parent_path._make_child_relpath('..')
return self.successor._select_from(path, scandir)


class _WildcardSelector(_Selector):

def __init__(self, pat, child_parts, flavour):
self.match = re.compile(fnmatch.translate(flavour.normcase(pat))).fullmatch
_Selector.__init__(self, child_parts, flavour)
def __init__(self, pat, child_parts, case_sensitive):
flags = re.NOFLAG if case_sensitive else re.IGNORECASE
zooba marked this conversation as resolved.
Show resolved Hide resolved
self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch
_Selector.__init__(self, child_parts, case_sensitive)

def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
def _select_from(self, parent_path, scandir):
try:
# We must close the scandir() object before proceeding to
# avoid exhausting file descriptors when globbing deep trees.
Expand All @@ -154,20 +144,20 @@ def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
raise
continue
name = entry.name
if self.match(normcase(name)):
if self.match(name):
path = parent_path._make_child_relpath(name)
for p in self.successor._select_from(path, is_dir, exists, scandir, normcase):
for p in self.successor._select_from(path, scandir):
yield p
except PermissionError:
return


class _RecursiveWildcardSelector(_Selector):

def __init__(self, pat, child_parts, flavour):
_Selector.__init__(self, child_parts, flavour)
def __init__(self, pat, child_parts, case_sensitive):
_Selector.__init__(self, child_parts, case_sensitive)

def _iterate_directories(self, parent_path, is_dir, scandir):
def _iterate_directories(self, parent_path, scandir):
yield parent_path
try:
# We must close the scandir() object before proceeding to
Expand All @@ -183,18 +173,18 @@ def _iterate_directories(self, parent_path, is_dir, scandir):
raise
if entry_is_dir and not entry.is_symlink():
path = parent_path._make_child_relpath(entry.name)
for p in self._iterate_directories(path, is_dir, scandir):
for p in self._iterate_directories(path, scandir):
yield p
except PermissionError:
return

def _select_from(self, parent_path, is_dir, exists, scandir, normcase):
def _select_from(self, parent_path, scandir):
try:
yielded = set()
try:
successor_select = self.successor._select_from
for starting_point in self._iterate_directories(parent_path, is_dir, scandir):
for p in successor_select(starting_point, is_dir, exists, scandir, normcase):
for starting_point in self._iterate_directories(parent_path, scandir):
for p in successor_select(starting_point, scandir):
if p not in yielded:
yield p
yielded.add(p)
Expand Down Expand Up @@ -763,7 +753,7 @@ def _scandir(self):
# includes scandir(), which is used to implement glob().
return os.scandir(self)

def glob(self, pattern):
def glob(self, pattern, *, case_sensitive=None):
"""Iterate over this subtree and yield all existing files (of any
kind, including directories) matching the given relative pattern.
"""
Expand All @@ -775,11 +765,13 @@ def glob(self, pattern):
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('')
selector = _make_selector(tuple(pattern_parts), self._flavour)
if case_sensitive is None:
case_sensitive = _is_case_sensitive(self._flavour)
selector = _make_selector(tuple(pattern_parts), case_sensitive)
for p in selector.select_from(self):
yield p

def rglob(self, pattern):
def rglob(self, pattern, *, case_sensitive=None):
"""Recursively yield all existing files (of any kind, including
directories) matching the given relative pattern, anywhere in
this subtree.
Expand All @@ -790,7 +782,9 @@ def rglob(self, pattern):
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('')
selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour)
if case_sensitive is None:
case_sensitive = _is_case_sensitive(self._flavour)
selector = _make_selector(("**",) + tuple(pattern_parts), case_sensitive)
for p in selector.select_from(self):
yield p

Expand Down
16 changes: 14 additions & 2 deletions 16 Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1777,6 +1777,18 @@ def _check(glob, expected):
else:
_check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE", "linkB"])

def test_glob_case_sensitive(self):
P = self.cls
def _check(path, pattern, case_sensitive, expected):
actual = {str(q) for q in path.glob(pattern, case_sensitive=case_sensitive)}
expected = {str(P(BASE, q)) for q in expected}
self.assertEqual(actual, expected)
path = P(BASE)
_check(path, "DIRB/FILE*", True, [])
_check(path, "DIRB/FILE*", False, ["dirB/fileB"])
_check(path, "dirb/file*", True, [])
_check(path, "dirb/file*", False, ["dirB/fileB"])

def test_rglob_common(self):
def _check(glob, expected):
self.assertEqual(set(glob), { P(BASE, q) for q in expected })
Expand Down Expand Up @@ -3053,15 +3065,15 @@ def test_glob(self):
self.assertEqual(set(p.glob("FILEa")), { P(BASE, "fileA") })
self.assertEqual(set(p.glob("*a\\")), { P(BASE, "dirA") })
self.assertEqual(set(p.glob("F*a")), { P(BASE, "fileA") })
self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\FILEa"})
self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"})
self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"})

def test_rglob(self):
P = self.cls
p = P(BASE, "dirC")
self.assertEqual(set(p.rglob("FILEd")), { P(BASE, "dirC/dirD/fileD") })
self.assertEqual(set(p.rglob("*\\")), { P(BASE, "dirC/dirD") })
self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\FILEd"})
self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"})

def test_expanduser(self):
P = self.cls
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add *case_sensitive* keyword-only argument to :meth:`pathlib.Path.glob` and
:meth:`~pathlib.Path.rglob`.
Morty Proxy This is a proxified and sanitized view of the page, visit original site.