2018-04-28 17:12:29 +10:00
|
|
|
#!/usr/bin/env python
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# patterns.py: Common wildcard searching/filtering functionality for files.
|
|
|
|
#
|
|
|
|
# Copyright (C) 2010 Yesudeep Mangalapilly <yesudeep@gmail.com>
|
|
|
|
#
|
|
|
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
# of this software and associated documentation files (the "Software"), to deal
|
|
|
|
# in the Software without restriction, including without limitation the rights
|
|
|
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
# copies of the Software, and to permit persons to whom the Software is
|
|
|
|
# furnished to do so, subject to the following conditions:
|
|
|
|
#
|
|
|
|
# The above copyright notice and this permission notice shall be included in
|
|
|
|
# all copies or substantial portions of the Software.
|
|
|
|
#
|
|
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
# THE SOFTWARE.
|
|
|
|
|
|
|
|
"""
|
|
|
|
:module: pathtools.patterns
|
|
|
|
:synopsis: Wildcard pattern matching and filtering functions for paths.
|
|
|
|
:author: Yesudeep Mangalapilly <yesudeep@gmail.com>
|
|
|
|
|
|
|
|
Functions
|
|
|
|
---------
|
|
|
|
.. autofunction:: match_path
|
|
|
|
.. autofunction:: match_path_against
|
|
|
|
.. autofunction:: filter_paths
|
|
|
|
"""
|
|
|
|
|
2020-12-19 03:10:20 +11:00
|
|
|
from builtins import map
|
2018-04-28 17:12:29 +10:00
|
|
|
from fnmatch import fnmatch, fnmatchcase
|
|
|
|
|
|
|
|
__all__ = ['match_path',
|
|
|
|
'match_path_against',
|
|
|
|
'match_any_paths',
|
|
|
|
'filter_paths']
|
|
|
|
|
|
|
|
|
|
|
|
def _string_lower(s):
|
|
|
|
"""
|
|
|
|
Convenience function to lowercase a string (the :mod:`string` module is
|
|
|
|
deprecated/removed in Python 3.0).
|
|
|
|
|
|
|
|
:param s:
|
|
|
|
The string which will be lowercased.
|
|
|
|
:returns:
|
|
|
|
Lowercased copy of string s.
|
|
|
|
"""
|
|
|
|
return s.lower()
|
|
|
|
|
|
|
|
|
|
|
|
def match_path_against(pathname, patterns, case_sensitive=True):
|
|
|
|
"""
|
|
|
|
Determines whether the pathname matches any of the given wildcard patterns,
|
|
|
|
optionally ignoring the case of the pathname and patterns.
|
|
|
|
|
|
|
|
:param pathname:
|
|
|
|
A path name that will be matched against a wildcard pattern.
|
|
|
|
:param patterns:
|
|
|
|
A list of wildcard patterns to match_path the filename against.
|
|
|
|
:param case_sensitive:
|
|
|
|
``True`` if the matching should be case-sensitive; ``False`` otherwise.
|
|
|
|
:returns:
|
|
|
|
``True`` if the pattern matches; ``False`` otherwise.
|
|
|
|
|
|
|
|
Doctests::
|
|
|
|
>>> match_path_against("/home/username/foobar/blah.py", ["*.py", "*.txt"], False)
|
|
|
|
True
|
|
|
|
>>> match_path_against("/home/username/foobar/blah.py", ["*.PY", "*.txt"], True)
|
|
|
|
False
|
|
|
|
>>> match_path_against("/home/username/foobar/blah.py", ["*.PY", "*.txt"], False)
|
|
|
|
True
|
|
|
|
>>> match_path_against("C:\\windows\\blah\\BLAH.PY", ["*.py", "*.txt"], True)
|
|
|
|
False
|
|
|
|
>>> match_path_against("C:\\windows\\blah\\BLAH.PY", ["*.py", "*.txt"], False)
|
|
|
|
True
|
|
|
|
"""
|
|
|
|
if case_sensitive:
|
|
|
|
match_func = fnmatchcase
|
|
|
|
pattern_transform_func = (lambda w: w)
|
|
|
|
else:
|
|
|
|
match_func = fnmatch
|
|
|
|
pathname = pathname.lower()
|
|
|
|
pattern_transform_func = _string_lower
|
|
|
|
for pattern in set(patterns):
|
|
|
|
pattern = pattern_transform_func(pattern)
|
|
|
|
if match_func(pathname, pattern):
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def _match_path(pathname,
|
|
|
|
included_patterns,
|
|
|
|
excluded_patterns,
|
|
|
|
case_sensitive=True):
|
|
|
|
"""Internal function same as :func:`match_path` but does not check arguments.
|
|
|
|
|
|
|
|
Doctests::
|
|
|
|
>>> _match_path("/users/gorakhargosh/foobar.py", ["*.py"], ["*.PY"], True)
|
|
|
|
True
|
|
|
|
>>> _match_path("/users/gorakhargosh/FOOBAR.PY", ["*.py"], ["*.PY"], True)
|
|
|
|
False
|
|
|
|
>>> _match_path("/users/gorakhargosh/foobar/", ["*.py"], ["*.txt"], False)
|
|
|
|
False
|
|
|
|
>>> _match_path("/users/gorakhargosh/FOOBAR.PY", ["*.py"], ["*.PY"], False)
|
|
|
|
Traceback (most recent call last):
|
|
|
|
...
|
|
|
|
ValueError: conflicting patterns `set(['*.py'])` included and excluded
|
|
|
|
"""
|
|
|
|
if not case_sensitive:
|
|
|
|
included_patterns = set(map(_string_lower, included_patterns))
|
|
|
|
excluded_patterns = set(map(_string_lower, excluded_patterns))
|
|
|
|
else:
|
|
|
|
included_patterns = set(included_patterns)
|
|
|
|
excluded_patterns = set(excluded_patterns)
|
|
|
|
common_patterns = included_patterns & excluded_patterns
|
|
|
|
if common_patterns:
|
|
|
|
raise ValueError('conflicting patterns `%s` included and excluded'\
|
|
|
|
% common_patterns)
|
|
|
|
return (match_path_against(pathname, included_patterns, case_sensitive)\
|
|
|
|
and not match_path_against(pathname, excluded_patterns,
|
|
|
|
case_sensitive))
|
|
|
|
|
|
|
|
|
|
|
|
def match_path(pathname,
|
|
|
|
included_patterns=None,
|
|
|
|
excluded_patterns=None,
|
|
|
|
case_sensitive=True):
|
|
|
|
"""
|
|
|
|
Matches a pathname against a set of acceptable and ignored patterns.
|
|
|
|
|
|
|
|
:param pathname:
|
|
|
|
A pathname which will be matched against a pattern.
|
|
|
|
:param included_patterns:
|
|
|
|
Allow filenames matching wildcard patterns specified in this list.
|
|
|
|
If no pattern is specified, the function treats the pathname as
|
|
|
|
a match_path.
|
|
|
|
:param excluded_patterns:
|
|
|
|
Ignores filenames matching wildcard patterns specified in this list.
|
|
|
|
If no pattern is specified, the function treats the pathname as
|
|
|
|
a match_path.
|
|
|
|
:param case_sensitive:
|
|
|
|
``True`` if matching should be case-sensitive; ``False`` otherwise.
|
|
|
|
:returns:
|
|
|
|
``True`` if the pathname matches; ``False`` otherwise.
|
|
|
|
:raises:
|
|
|
|
ValueError if included patterns and excluded patterns contain the
|
|
|
|
same pattern.
|
|
|
|
|
|
|
|
Doctests::
|
|
|
|
>>> match_path("/Users/gorakhargosh/foobar.py")
|
|
|
|
True
|
|
|
|
>>> match_path("/Users/gorakhargosh/foobar.py", case_sensitive=False)
|
|
|
|
True
|
|
|
|
>>> match_path("/users/gorakhargosh/foobar.py", ["*.py"], ["*.PY"], True)
|
|
|
|
True
|
|
|
|
>>> match_path("/users/gorakhargosh/FOOBAR.PY", ["*.py"], ["*.PY"], True)
|
|
|
|
False
|
|
|
|
>>> match_path("/users/gorakhargosh/foobar/", ["*.py"], ["*.txt"], False)
|
|
|
|
False
|
|
|
|
>>> match_path("/users/gorakhargosh/FOOBAR.PY", ["*.py"], ["*.PY"], False)
|
|
|
|
Traceback (most recent call last):
|
|
|
|
...
|
|
|
|
ValueError: conflicting patterns `set(['*.py'])` included and excluded
|
|
|
|
"""
|
|
|
|
included = ["*"] if included_patterns is None else included_patterns
|
|
|
|
excluded = [] if excluded_patterns is None else excluded_patterns
|
|
|
|
return _match_path(pathname, included, excluded, case_sensitive)
|
|
|
|
|
|
|
|
|
|
|
|
def filter_paths(pathnames,
|
|
|
|
included_patterns=None,
|
|
|
|
excluded_patterns=None,
|
|
|
|
case_sensitive=True):
|
|
|
|
"""
|
|
|
|
Filters from a set of paths based on acceptable patterns and
|
|
|
|
ignorable patterns.
|
|
|
|
|
|
|
|
:param pathnames:
|
|
|
|
A list of path names that will be filtered based on matching and
|
|
|
|
ignored patterns.
|
|
|
|
:param included_patterns:
|
|
|
|
Allow filenames matching wildcard patterns specified in this list.
|
|
|
|
If no pattern list is specified, ["*"] is used as the default pattern,
|
|
|
|
which matches all files.
|
|
|
|
:param excluded_patterns:
|
|
|
|
Ignores filenames matching wildcard patterns specified in this list.
|
|
|
|
If no pattern list is specified, no files are ignored.
|
|
|
|
:param case_sensitive:
|
|
|
|
``True`` if matching should be case-sensitive; ``False`` otherwise.
|
|
|
|
:returns:
|
|
|
|
A list of pathnames that matched the allowable patterns and passed
|
|
|
|
through the ignored patterns.
|
|
|
|
|
|
|
|
Doctests::
|
|
|
|
>>> pathnames = set(["/users/gorakhargosh/foobar.py", "/var/cache/pdnsd.status", "/etc/pdnsd.conf", "/usr/local/bin/python"])
|
|
|
|
>>> set(filter_paths(pathnames)) == pathnames
|
|
|
|
True
|
|
|
|
>>> set(filter_paths(pathnames, case_sensitive=False)) == pathnames
|
|
|
|
True
|
|
|
|
>>> set(filter_paths(pathnames, ["*.py", "*.conf"], ["*.status"], case_sensitive=True)) == set(["/users/gorakhargosh/foobar.py", "/etc/pdnsd.conf"])
|
|
|
|
True
|
|
|
|
"""
|
|
|
|
included = ["*"] if included_patterns is None else included_patterns
|
|
|
|
excluded = [] if excluded_patterns is None else excluded_patterns
|
|
|
|
|
|
|
|
for pathname in pathnames:
|
|
|
|
# We don't call the public match_path because it checks arguments
|
|
|
|
# and sets default values if none are found. We're already doing that
|
|
|
|
# above.
|
|
|
|
if _match_path(pathname, included, excluded, case_sensitive):
|
|
|
|
yield pathname
|
|
|
|
|
|
|
|
def match_any_paths(pathnames,
|
|
|
|
included_patterns=None,
|
|
|
|
excluded_patterns=None,
|
|
|
|
case_sensitive=True):
|
|
|
|
"""
|
|
|
|
Matches from a set of paths based on acceptable patterns and
|
|
|
|
ignorable patterns.
|
|
|
|
|
|
|
|
:param pathnames:
|
|
|
|
A list of path names that will be filtered based on matching and
|
|
|
|
ignored patterns.
|
|
|
|
:param included_patterns:
|
|
|
|
Allow filenames matching wildcard patterns specified in this list.
|
|
|
|
If no pattern list is specified, ["*"] is used as the default pattern,
|
|
|
|
which matches all files.
|
|
|
|
:param excluded_patterns:
|
|
|
|
Ignores filenames matching wildcard patterns specified in this list.
|
|
|
|
If no pattern list is specified, no files are ignored.
|
|
|
|
:param case_sensitive:
|
|
|
|
``True`` if matching should be case-sensitive; ``False`` otherwise.
|
|
|
|
:returns:
|
|
|
|
``True`` if any of the paths matches; ``False`` otherwise.
|
|
|
|
|
|
|
|
Doctests::
|
|
|
|
>>> pathnames = set(["/users/gorakhargosh/foobar.py", "/var/cache/pdnsd.status", "/etc/pdnsd.conf", "/usr/local/bin/python"])
|
|
|
|
>>> match_any_paths(pathnames)
|
|
|
|
True
|
|
|
|
>>> match_any_paths(pathnames, case_sensitive=False)
|
|
|
|
True
|
|
|
|
>>> match_any_paths(pathnames, ["*.py", "*.conf"], ["*.status"], case_sensitive=True)
|
|
|
|
True
|
|
|
|
>>> match_any_paths(pathnames, ["*.txt"], case_sensitive=False)
|
|
|
|
False
|
|
|
|
>>> match_any_paths(pathnames, ["*.txt"], case_sensitive=True)
|
|
|
|
False
|
|
|
|
"""
|
|
|
|
included = ["*"] if included_patterns is None else included_patterns
|
|
|
|
excluded = [] if excluded_patterns is None else excluded_patterns
|
|
|
|
|
|
|
|
for pathname in pathnames:
|
|
|
|
# We don't call the public match_path because it checks arguments
|
|
|
|
# and sets default values if none are found. We're already doing that
|
|
|
|
# above.
|
|
|
|
if _match_path(pathname, included, excluded, case_sensitive):
|
|
|
|
return True
|
|
|
|
return False
|