diff --git a/resources/lib/pathvalidate/__init__.py b/resources/lib/pathvalidate/__init__.py new file mode 100644 index 00000000..425aaf9d --- /dev/null +++ b/resources/lib/pathvalidate/__init__.py @@ -0,0 +1,35 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +from .__version__ import __author__, __copyright__, __email__, __license__, __version__ +from ._common import ( + Platform, + ascii_symbols, + normalize_platform, + replace_unprintable_char, + unprintable_ascii_chars, + validate_null_string, + validate_pathtype, +) +from ._filename import FileNameSanitizer, is_valid_filename, sanitize_filename, validate_filename +from ._filepath import ( + FilePathSanitizer, + is_valid_filepath, + sanitize_file_path, + sanitize_filepath, + validate_file_path, + validate_filepath, +) +from ._ltsv import sanitize_ltsv_label, validate_ltsv_label +from ._symbol import replace_symbol, validate_symbol +from .error import ( + ErrorReason, + InvalidCharError, + InvalidLengthError, + InvalidReservedNameError, + NullNameError, + ReservedNameError, + ValidationError, + ValidReservedNameError, +) diff --git a/resources/lib/pathvalidate/__version__.py b/resources/lib/pathvalidate/__version__.py new file mode 100644 index 00000000..375dee84 --- /dev/null +++ b/resources/lib/pathvalidate/__version__.py @@ -0,0 +1,6 @@ +__author__ = "Tsuyoshi Hombashi" +__copyright__ = "Copyright 2016, {}".format(__author__) +__license__ = "MIT License" +__version__ = "2.4.1" +__maintainer__ = __author__ +__email__ = "tsuyoshi.hombashi@gmail.com" diff --git a/resources/lib/pathvalidate/_base.py b/resources/lib/pathvalidate/_base.py new file mode 100644 index 00000000..f7451a78 --- /dev/null +++ b/resources/lib/pathvalidate/_base.py @@ -0,0 +1,137 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +import abc +import os +from typing import Optional, Tuple, cast + +from ._common import PathType, Platform, PlatformType, normalize_platform, unprintable_ascii_chars +from .error import ReservedNameError, ValidationError + + +class BaseFile: + _INVALID_PATH_CHARS = "".join(unprintable_ascii_chars) + _INVALID_FILENAME_CHARS = _INVALID_PATH_CHARS + "/" + _INVALID_WIN_PATH_CHARS = _INVALID_PATH_CHARS + ':*?"<>|\t\n\r\x0b\x0c' + _INVALID_WIN_FILENAME_CHARS = _INVALID_FILENAME_CHARS + _INVALID_WIN_PATH_CHARS + "\\" + + _ERROR_MSG_TEMPLATE = "invalid char found: invalids=({invalid}), value={value}" + + @property + def platform(self) -> Platform: + return self.__platform + + @property + def reserved_keywords(self) -> Tuple[str, ...]: + return tuple() + + @property + def min_len(self) -> int: + return self._min_len + + @property + def max_len(self) -> int: + return self._max_len + + def __init__( + self, + min_len: Optional[int], + max_len: Optional[int], + check_reserved: bool, + platform_max_len: Optional[int] = None, + platform: PlatformType = None, + ) -> None: + self.__platform = normalize_platform(platform) + self._check_reserved = check_reserved + + if min_len is None: + min_len = 1 + self._min_len = max(min_len, 1) + + if platform_max_len is None: + platform_max_len = self._get_default_max_path_len() + + if max_len in [None, -1]: + self._max_len = platform_max_len + else: + self._max_len = cast(int, max_len) + + self._max_len = min(self._max_len, platform_max_len) + self._validate_max_len() + + def _is_posix(self) -> bool: + return self.platform == Platform.POSIX + + def _is_universal(self) -> bool: + return self.platform == Platform.UNIVERSAL + + def _is_linux(self) -> bool: + return self.platform == Platform.LINUX + + def _is_windows(self) -> bool: + return self.platform == Platform.WINDOWS + + def _is_macos(self) -> bool: + return self.platform == Platform.MACOS + + def _validate_max_len(self) -> None: + if self.max_len < 1: + raise ValueError("max_len must be greater or equals to one") + + if self.min_len > self.max_len: + raise ValueError("min_len must be lower than max_len") + + def _get_default_max_path_len(self) -> int: + if self._is_linux(): + return 4096 + + if self._is_windows(): + return 260 + + if self._is_posix() or self._is_macos(): + return 1024 + + return 260 # universal + + +class AbstractValidator(BaseFile, metaclass=abc.ABCMeta): + @abc.abstractmethod + def validate(self, value: PathType) -> None: # pragma: no cover + pass + + def is_valid(self, value: PathType) -> bool: + try: + self.validate(value) + except (TypeError, ValidationError): + return False + + return True + + def _is_reserved_keyword(self, value: str) -> bool: + return value in self.reserved_keywords + + +class AbstractSanitizer(BaseFile, metaclass=abc.ABCMeta): + @abc.abstractmethod + def sanitize(self, value: PathType, replacement_text: str = "") -> PathType: # pragma: no cover + pass + + +class BaseValidator(AbstractValidator): + def _validate_reserved_keywords(self, name: str) -> None: + if not self._check_reserved: + return + + root_name = self.__extract_root_name(name) + if self._is_reserved_keyword(root_name.upper()): + raise ReservedNameError( + "'{}' is a reserved name".format(root_name), + reusable_name=False, + reserved_name=root_name, + platform=self.platform, + ) + + @staticmethod + def __extract_root_name(path: str) -> str: + return os.path.splitext(os.path.basename(path))[0] diff --git a/resources/lib/pathvalidate/_common.py b/resources/lib/pathvalidate/_common.py new file mode 100644 index 00000000..18d354cf --- /dev/null +++ b/resources/lib/pathvalidate/_common.py @@ -0,0 +1,147 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +import enum +import platform +import re +import string +from pathlib import Path +from typing import Any, List, Optional, Union, cast + + +_re_whitespaces = re.compile(r"^[\s]+$") + + +@enum.unique +class Platform(enum.Enum): + POSIX = "POSIX" + UNIVERSAL = "universal" + + LINUX = "Linux" + WINDOWS = "Windows" + MACOS = "macOS" + + +PathType = Union[str, Path] +PlatformType = Union[str, Platform, None] + + +def is_pathlike_obj(value: PathType) -> bool: + return isinstance(value, Path) + + +def validate_pathtype( + text: PathType, allow_whitespaces: bool = False, error_msg: Optional[str] = None +) -> None: + from .error import ErrorReason, ValidationError + + if _is_not_null_string(text) or is_pathlike_obj(text): + return + + if allow_whitespaces and _re_whitespaces.search(str(text)): + return + + if is_null_string(text): + if not error_msg: + error_msg = "the value must be a not empty" + + raise ValidationError( + description=error_msg, + reason=ErrorReason.NULL_NAME, + ) + + raise TypeError("text must be a string: actual={}".format(type(text))) + + +def validate_null_string(text: PathType, error_msg: Optional[str] = None) -> None: + # Deprecated: alias to validate_pathtype + validate_pathtype(text, False, error_msg) + + +def preprocess(name: PathType) -> str: + if is_pathlike_obj(name): + name = str(name) + + return cast(str, name) + + +def is_null_string(value: Any) -> bool: + if value is None: + return True + + try: + return len(value.strip()) == 0 + except AttributeError: + return False + + +def _is_not_null_string(value: Any) -> bool: + try: + return len(value.strip()) > 0 + except AttributeError: + return False + + +def _get_unprintable_ascii_chars() -> List[str]: + return [chr(c) for c in range(128) if chr(c) not in string.printable] + + +unprintable_ascii_chars = tuple(_get_unprintable_ascii_chars()) + + +def _get_ascii_symbols() -> List[str]: + symbol_list = [] # type: List[str] + + for i in range(128): + c = chr(i) + + if c in unprintable_ascii_chars or c in string.digits + string.ascii_letters: + continue + + symbol_list.append(c) + + return symbol_list + + +ascii_symbols = tuple(_get_ascii_symbols()) + +__RE_UNPRINTABLE_CHARS = re.compile( + "[{}]".format(re.escape("".join(unprintable_ascii_chars))), re.UNICODE +) + + +def replace_unprintable_char(text: str, replacement_text: str = "") -> str: + try: + return __RE_UNPRINTABLE_CHARS.sub(replacement_text, text) + except (TypeError, AttributeError): + raise TypeError("text must be a string") + + +def normalize_platform(name: PlatformType) -> Platform: + if isinstance(name, Platform): + return name + + if name: + name = name.strip().lower() + + if name == "posix": + return Platform.POSIX + + if name == "auto": + name = platform.system().lower() + + if name in ["linux"]: + return Platform.LINUX + + if name and name.startswith("win"): + return Platform.WINDOWS + + if name in ["mac", "macos", "darwin"]: + return Platform.MACOS + + return Platform.UNIVERSAL + + +def findall_to_str(match: List[Any]) -> str: + return ", ".join([repr(text) for text in match]) diff --git a/resources/lib/pathvalidate/_const.py b/resources/lib/pathvalidate/_const.py new file mode 100644 index 00000000..07bc01f2 --- /dev/null +++ b/resources/lib/pathvalidate/_const.py @@ -0,0 +1,16 @@ +_NTFS_RESERVED_FILE_NAMES = ( + "$Mft", + "$MftMirr", + "$LogFile", + "$Volume", + "$AttrDef", + "$Bitmap", + "$Boot", + "$BadClus", + "$Secure", + "$Upcase", + "$Extend", + "$Quota", + "$ObjId", + "$Reparse", +) # Only in root directory diff --git a/resources/lib/pathvalidate/_filename.py b/resources/lib/pathvalidate/_filename.py new file mode 100644 index 00000000..376ac04d --- /dev/null +++ b/resources/lib/pathvalidate/_filename.py @@ -0,0 +1,341 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +import itertools +import ntpath +import posixpath +import re +from pathlib import Path +from typing import Optional, Pattern, Tuple + +from ._base import AbstractSanitizer, BaseFile, BaseValidator +from ._common import ( + PathType, + Platform, + PlatformType, + findall_to_str, + is_pathlike_obj, + preprocess, + validate_pathtype, +) +from .error import ErrorReason, InvalidCharError, InvalidLengthError, ValidationError + + +_DEFAULT_MAX_FILENAME_LEN = 255 +_RE_INVALID_FILENAME = re.compile( + "[{:s}]".format(re.escape(BaseFile._INVALID_FILENAME_CHARS)), re.UNICODE +) +_RE_INVALID_WIN_FILENAME = re.compile( + "[{:s}]".format(re.escape(BaseFile._INVALID_WIN_FILENAME_CHARS)), re.UNICODE +) + + +class FileNameSanitizer(AbstractSanitizer): + def __init__( + self, + min_len: Optional[int] = 1, + max_len: Optional[int] = _DEFAULT_MAX_FILENAME_LEN, + platform: PlatformType = None, + check_reserved: bool = True, + ) -> None: + super().__init__( + min_len=min_len, + max_len=max_len, + check_reserved=check_reserved, + platform_max_len=_DEFAULT_MAX_FILENAME_LEN, + platform=platform, + ) + + self._sanitize_regexp = self._get_sanitize_regexp() + self.__validator = FileNameValidator( + min_len=self.min_len, + max_len=self.max_len, + check_reserved=check_reserved, + platform=self.platform, + ) + + def sanitize(self, value: PathType, replacement_text: str = "") -> PathType: + try: + validate_pathtype(value, allow_whitespaces=True if not self._is_windows() else False) + except ValidationError as e: + if e.reason == ErrorReason.NULL_NAME: + return "" + raise + + sanitized_filename = self._sanitize_regexp.sub(replacement_text, str(value)) + sanitized_filename = sanitized_filename[: self.max_len] + + try: + self.__validator.validate(sanitized_filename) + except ValidationError as e: + if e.reason == ErrorReason.RESERVED_NAME and e.reusable_name is False: + sanitized_filename = re.sub( + re.escape(e.reserved_name), "{}_".format(e.reserved_name), sanitized_filename + ) + elif e.reason == ErrorReason.INVALID_CHARACTER: + if self.platform in [Platform.UNIVERSAL, Platform.WINDOWS]: + sanitized_filename = sanitized_filename.rstrip(" .") + + if is_pathlike_obj(value): + return Path(sanitized_filename) + + return sanitized_filename + + def _get_sanitize_regexp(self) -> Pattern: + if self.platform in [Platform.UNIVERSAL, Platform.WINDOWS]: + return _RE_INVALID_WIN_FILENAME + + return _RE_INVALID_FILENAME + + +class FileNameValidator(BaseValidator): + _WINDOWS_RESERVED_FILE_NAMES = ("CON", "PRN", "AUX", "CLOCK$", "NUL") + tuple( + "{:s}{:d}".format(name, num) + for name, num in itertools.product(("COM", "LPT"), range(1, 10)) + ) + _MACOS_RESERVED_FILE_NAMES = (":",) + + @property + def reserved_keywords(self) -> Tuple[str, ...]: + common_keywords = super().reserved_keywords + + if self._is_universal(): + return ( + common_keywords + + self._WINDOWS_RESERVED_FILE_NAMES + + self._MACOS_RESERVED_FILE_NAMES + ) + + if self._is_windows(): + return common_keywords + self._WINDOWS_RESERVED_FILE_NAMES + + if self._is_posix() or self._is_macos(): + return common_keywords + self._MACOS_RESERVED_FILE_NAMES + + return common_keywords + + def __init__( + self, + min_len: Optional[int] = 1, + max_len: Optional[int] = _DEFAULT_MAX_FILENAME_LEN, + platform: PlatformType = None, + check_reserved: bool = True, + ) -> None: + super().__init__( + min_len=min_len, + max_len=max_len, + check_reserved=check_reserved, + platform_max_len=_DEFAULT_MAX_FILENAME_LEN, + platform=platform, + ) + + def validate(self, value: PathType) -> None: + validate_pathtype( + value, + allow_whitespaces=False + if self.platform in [Platform.UNIVERSAL, Platform.WINDOWS] + else True, + ) + + unicode_filename = preprocess(value) + value_len = len(unicode_filename) + + self.validate_abspath(unicode_filename) + + if value_len > self.max_len: + raise InvalidLengthError( + "filename is too long: expected<={:d}, actual={:d}".format(self.max_len, value_len) + ) + if value_len < self.min_len: + raise InvalidLengthError( + "filename is too short: expected>={:d}, actual={:d}".format(self.min_len, value_len) + ) + + self._validate_reserved_keywords(unicode_filename) + + if self._is_universal() or self._is_windows(): + self.__validate_win_filename(unicode_filename) + else: + self.__validate_unix_filename(unicode_filename) + + def validate_abspath(self, value: str) -> None: + err = ValidationError( + description="found an absolute path ({}), expected a filename".format(value), + platform=self.platform, + reason=ErrorReason.FOUND_ABS_PATH, + ) + + if self._is_universal() or self._is_windows(): + if ntpath.isabs(value): + raise err + + if posixpath.isabs(value): + raise err + + def __validate_unix_filename(self, unicode_filename: str) -> None: + match = _RE_INVALID_FILENAME.findall(unicode_filename) + if match: + raise InvalidCharError( + self._ERROR_MSG_TEMPLATE.format( + invalid=findall_to_str(match), value=repr(unicode_filename) + ) + ) + + def __validate_win_filename(self, unicode_filename: str) -> None: + match = _RE_INVALID_WIN_FILENAME.findall(unicode_filename) + if match: + raise InvalidCharError( + self._ERROR_MSG_TEMPLATE.format( + invalid=findall_to_str(match), value=repr(unicode_filename) + ), + platform=Platform.WINDOWS, + ) + + if unicode_filename in (".", ".."): + return + + if unicode_filename[-1] in (" ", "."): + raise InvalidCharError( + self._ERROR_MSG_TEMPLATE.format( + invalid=re.escape(unicode_filename[-1]), value=repr(unicode_filename) + ), + platform=Platform.WINDOWS, + description="Do not end a file or directory name with a space or a period", + ) + + +def validate_filename( + filename: PathType, + platform: Optional[str] = None, + min_len: int = 1, + max_len: int = _DEFAULT_MAX_FILENAME_LEN, + check_reserved: bool = True, +) -> None: + """Verifying whether the ``filename`` is a valid file name or not. + + Args: + filename: + Filename to validate. + platform: + Target platform name of the filename. + + .. include:: platform.txt + min_len: + Minimum length of the ``filename``. The value must be greater or equal to one. + Defaults to ``1``. + max_len: + Maximum length of the ``filename``. The value must be lower than: + + - ``Linux``: 4096 + - ``macOS``: 1024 + - ``Windows``: 260 + - ``universal``: 260 + + Defaults to ``255``. + check_reserved: + If |True|, check reserved names of the ``platform``. + + Raises: + ValidationError (ErrorReason.INVALID_LENGTH): + If the ``filename`` is longer than ``max_len`` characters. + ValidationError (ErrorReason.INVALID_CHARACTER): + If the ``filename`` includes invalid character(s) for a filename: + |invalid_filename_chars|. + The following characters are also invalid for Windows platform: + |invalid_win_filename_chars|. + ValidationError (ErrorReason.RESERVED_NAME): + If the ``filename`` equals reserved name by OS. + Windows reserved name is as follows: + ``"CON"``, ``"PRN"``, ``"AUX"``, ``"NUL"``, ``"COM[1-9]"``, ``"LPT[1-9]"``. + + Example: + :ref:`example-validate-filename` + + See Also: + `Naming Files, Paths, and Namespaces - Win32 apps | Microsoft Docs + `__ + """ + + FileNameValidator( + platform=platform, min_len=min_len, max_len=max_len, check_reserved=check_reserved + ).validate(filename) + + +def is_valid_filename( + filename: PathType, + platform: Optional[str] = None, + min_len: int = 1, + max_len: Optional[int] = None, + check_reserved: bool = True, +) -> bool: + """Check whether the ``filename`` is a valid name or not. + + Args: + filename: + A filename to be checked. + + Example: + :ref:`example-is-valid-filename` + + See Also: + :py:func:`.validate_filename()` + """ + + return FileNameValidator( + platform=platform, min_len=min_len, max_len=max_len, check_reserved=check_reserved + ).is_valid(filename) + + +def sanitize_filename( + filename: PathType, + replacement_text: str = "", + platform: Optional[str] = None, + max_len: Optional[int] = _DEFAULT_MAX_FILENAME_LEN, + check_reserved: bool = True, +) -> PathType: + """Make a valid filename from a string. + + To make a valid filename the function does: + + - Replace invalid characters as file names included in the ``filename`` + with the ``replacement_text``. Invalid characters are: + + - unprintable characters + - |invalid_filename_chars| + - for Windows (or universal) only: |invalid_win_filename_chars| + + - Append underscore (``"_"``) at the tail of the name if sanitized name + is one of the reserved names by operating systems + (only when ``check_reserved`` is |True|). + + Args: + filename: Filename to sanitize. + replacement_text: + Replacement text for invalid characters. Defaults to ``""``. + platform: + Target platform name of the filename. + + .. include:: platform.txt + max_len: + Maximum length of the ``filename`` length. Truncate the name length if + the ``filename`` length exceeds this value. + Defaults to ``255``. + check_reserved: + If |True|, sanitize reserved names of the ``platform``. + + Returns: + Same type as the ``filename`` (str or PathLike object): + Sanitized filename. + + Raises: + ValueError: + If the ``filename`` is an invalid filename. + + Example: + :ref:`example-sanitize-filename` + """ + + return FileNameSanitizer( + platform=platform, max_len=max_len, check_reserved=check_reserved + ).sanitize(filename, replacement_text) diff --git a/resources/lib/pathvalidate/_filepath.py b/resources/lib/pathvalidate/_filepath.py new file mode 100644 index 00000000..c6c76929 --- /dev/null +++ b/resources/lib/pathvalidate/_filepath.py @@ -0,0 +1,427 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +import ntpath +import os.path +import posixpath +import re +from pathlib import Path +from typing import List, Optional, Pattern, Tuple # noqa + +from ._base import AbstractSanitizer, BaseFile, BaseValidator +from ._common import ( + PathType, + Platform, + PlatformType, + findall_to_str, + is_pathlike_obj, + preprocess, + validate_pathtype, +) +from ._const import _NTFS_RESERVED_FILE_NAMES +from ._filename import FileNameSanitizer, FileNameValidator +from .error import ( + ErrorReason, + InvalidCharError, + InvalidLengthError, + ReservedNameError, + ValidationError, +) + + +_RE_INVALID_PATH = re.compile("[{:s}]".format(re.escape(BaseFile._INVALID_PATH_CHARS)), re.UNICODE) +_RE_INVALID_WIN_PATH = re.compile( + "[{:s}]".format(re.escape(BaseFile._INVALID_WIN_PATH_CHARS)), re.UNICODE +) + + +class FilePathSanitizer(AbstractSanitizer): + def __init__( + self, + min_len: Optional[int] = 1, + max_len: Optional[int] = None, + platform: PlatformType = None, + check_reserved: bool = True, + normalize: bool = True, + ) -> None: + super().__init__( + min_len=min_len, + max_len=max_len, + check_reserved=check_reserved, + platform=platform, + ) + + self._sanitize_regexp = self._get_sanitize_regexp() + self.__fpath_validator = FilePathValidator( + min_len=self.min_len, + max_len=self.max_len, + check_reserved=check_reserved, + platform=self.platform, + ) + self.__fname_sanitizer = FileNameSanitizer( + min_len=self.min_len, + max_len=self.max_len, + check_reserved=check_reserved, + platform=self.platform, + ) + self.__normalize = normalize + + if self._is_universal() or self._is_windows(): + self.__split_drive = ntpath.splitdrive + else: + self.__split_drive = posixpath.splitdrive + + def sanitize(self, value: PathType, replacement_text: str = "") -> PathType: + if not value: + return "" + + self.__fpath_validator.validate_abspath(value) + + unicode_filepath = preprocess(value) + + if self.__normalize: + unicode_filepath = os.path.normpath(unicode_filepath) + + drive, unicode_filepath = self.__split_drive(unicode_filepath) + sanitized_path = self._sanitize_regexp.sub(replacement_text, unicode_filepath) + if self._is_windows(): + path_separator = "\\" + else: + path_separator = "/" + + sanitized_entries = [] # type: List[str] + if drive: + sanitized_entries.append(drive) + for entry in sanitized_path.replace("\\", "/").split("/"): + if entry in _NTFS_RESERVED_FILE_NAMES: + sanitized_entries.append("{}_".format(entry)) + continue + + sanitized_entry = str(self.__fname_sanitizer.sanitize(entry)) + if not sanitized_entry: + if not sanitized_entries: + sanitized_entries.append("") + continue + + sanitized_entries.append(sanitized_entry) + + sanitized_path = path_separator.join(sanitized_entries) + + if is_pathlike_obj(value): + return Path(sanitized_path) + + return sanitized_path + + def _get_sanitize_regexp(self) -> Pattern: + if self.platform in [Platform.UNIVERSAL, Platform.WINDOWS]: + return _RE_INVALID_WIN_PATH + + return _RE_INVALID_PATH + + +class FilePathValidator(BaseValidator): + _RE_NTFS_RESERVED = re.compile( + "|".join("^/{}$".format(re.escape(pattern)) for pattern in _NTFS_RESERVED_FILE_NAMES), + re.IGNORECASE, + ) + _MACOS_RESERVED_FILE_PATHS = ("/", ":") + + @property + def reserved_keywords(self) -> Tuple[str, ...]: + common_keywords = super().reserved_keywords + + if any([self._is_universal(), self._is_posix(), self._is_macos()]): + return common_keywords + self._MACOS_RESERVED_FILE_PATHS + + if self._is_linux(): + return common_keywords + ("/",) + + return common_keywords + + def __init__( + self, + min_len: Optional[int] = 1, + max_len: Optional[int] = None, + platform: PlatformType = None, + check_reserved: bool = True, + ) -> None: + super().__init__( + min_len=min_len, + max_len=max_len, + check_reserved=check_reserved, + platform=platform, + ) + + self.__fname_validator = FileNameValidator( + min_len=min_len, max_len=max_len, check_reserved=check_reserved, platform=platform + ) + + if self._is_universal() or self._is_windows(): + self.__split_drive = ntpath.splitdrive + else: + self.__split_drive = posixpath.splitdrive + + def validate(self, value: PathType) -> None: + validate_pathtype( + value, + allow_whitespaces=False + if self.platform in [Platform.UNIVERSAL, Platform.WINDOWS] + else True, + ) + self.validate_abspath(value) + + _drive, value = self.__split_drive(str(value)) + if not value: + return + + filepath = os.path.normpath(value) + unicode_filepath = preprocess(filepath) + value_len = len(unicode_filepath) + + if value_len > self.max_len: + raise InvalidLengthError( + "file path is too long: expected<={:d}, actual={:d}".format(self.max_len, value_len) + ) + if value_len < self.min_len: + raise InvalidLengthError( + "file path is too short: expected>={:d}, actual={:d}".format( + self.min_len, value_len + ) + ) + + self._validate_reserved_keywords(unicode_filepath) + unicode_filepath = unicode_filepath.replace("\\", "/") + for entry in unicode_filepath.split("/"): + if not entry or entry in (".", ".."): + continue + + self.__fname_validator._validate_reserved_keywords(entry) + + if self._is_universal() or self._is_windows(): + self.__validate_win_filepath(unicode_filepath) + else: + self.__validate_unix_filepath(unicode_filepath) + + def validate_abspath(self, value: PathType) -> None: + value = str(value) + is_posix_abs = posixpath.isabs(value) + is_nt_abs = ntpath.isabs(value) + err_object = ValidationError( + description=( + "an invalid absolute file path ({}) for the platform ({}).".format( + value, self.platform.value + ) + + " to avoid the error, specify an appropriate platform correspond" + + " with the path format, or 'auto'." + ), + platform=self.platform, + reason=ErrorReason.MALFORMED_ABS_PATH, + ) + + if any([self._is_windows() and is_nt_abs, self._is_linux() and is_posix_abs]): + return + + if self._is_universal() and any([is_posix_abs, is_nt_abs]): + ValidationError( + description=( + "{}. expected a platform independent file path".format( + "POSIX absolute file path found" + if is_posix_abs + else "NT absolute file path found" + ) + ), + platform=self.platform, + reason=ErrorReason.MALFORMED_ABS_PATH, + ) + + if any([self._is_windows(), self._is_universal()]) and is_posix_abs: + raise err_object + + drive, _tail = ntpath.splitdrive(value) + if not self._is_windows() and drive and is_nt_abs: + raise err_object + + def __validate_unix_filepath(self, unicode_filepath: str) -> None: + match = _RE_INVALID_PATH.findall(unicode_filepath) + if match: + raise InvalidCharError( + self._ERROR_MSG_TEMPLATE.format( + invalid=findall_to_str(match), value=repr(unicode_filepath) + ) + ) + + def __validate_win_filepath(self, unicode_filepath: str) -> None: + match = _RE_INVALID_WIN_PATH.findall(unicode_filepath) + if match: + raise InvalidCharError( + self._ERROR_MSG_TEMPLATE.format( + invalid=findall_to_str(match), value=repr(unicode_filepath) + ), + platform=Platform.WINDOWS, + ) + + _drive, value = self.__split_drive(unicode_filepath) + if value: + match_reserved = self._RE_NTFS_RESERVED.search(value) + if match_reserved: + reserved_name = match_reserved.group() + raise ReservedNameError( + "'{}' is a reserved name".format(reserved_name), + reusable_name=False, + reserved_name=reserved_name, + platform=self.platform, + ) + + +def validate_filepath( + file_path: PathType, + platform: Optional[str] = None, + min_len: int = 1, + max_len: Optional[int] = None, + check_reserved: bool = True, +) -> None: + """Verifying whether the ``file_path`` is a valid file path or not. + + Args: + file_path: + File path to validate. + platform: + Target platform name of the file path. + + .. include:: platform.txt + min_len: + Minimum length of the ``file_path``. The value must be greater or equal to one. + Defaults to ``1``. + max_len: + Maximum length of the ``file_path`` length. If the value is |None|, + automatically determined by the ``platform``: + + - ``Linux``: 4096 + - ``macOS``: 1024 + - ``Windows``: 260 + - ``universal``: 260 + check_reserved: + If |True|, check reserved names of the ``platform``. + + Raises: + ValidationError (ErrorReason.INVALID_CHARACTER): + If the ``file_path`` includes invalid char(s): + |invalid_file_path_chars|. + The following characters are also invalid for Windows platform: + |invalid_win_file_path_chars| + ValidationError (ErrorReason.INVALID_LENGTH): + If the ``file_path`` is longer than ``max_len`` characters. + ValidationError: + If ``file_path`` include invalid values. + + Example: + :ref:`example-validate-file-path` + + See Also: + `Naming Files, Paths, and Namespaces - Win32 apps | Microsoft Docs + `__ + """ + + FilePathValidator( + platform=platform, min_len=min_len, max_len=max_len, check_reserved=check_reserved + ).validate(file_path) + + +def validate_file_path(file_path, platform=None, max_path_len=None): + # Deprecated + validate_filepath(file_path, platform, max_path_len) + + +def is_valid_filepath( + file_path: PathType, + platform: Optional[str] = None, + min_len: int = 1, + max_len: Optional[int] = None, + check_reserved: bool = True, +) -> bool: + """Check whether the ``file_path`` is a valid name or not. + + Args: + file_path: + A filepath to be checked. + + Example: + :ref:`example-is-valid-filepath` + + See Also: + :py:func:`.validate_filepath()` + """ + + return FilePathValidator( + platform=platform, min_len=min_len, max_len=max_len, check_reserved=check_reserved + ).is_valid(file_path) + + +def sanitize_filepath( + file_path: PathType, + replacement_text: str = "", + platform: Optional[str] = None, + max_len: Optional[int] = None, + check_reserved: bool = True, + normalize: bool = True, +) -> PathType: + """Make a valid file path from a string. + + To make a valid file path the function does: + + - replace invalid characters for a file path within the ``file_path`` + with the ``replacement_text``. Invalid characters are as follows: + + - unprintable characters + - |invalid_file_path_chars| + - for Windows (or universal) only: |invalid_win_file_path_chars| + + - Append underscore (``"_"``) at the tail of the name if sanitized name + is one of the reserved names by operating systems + (only when ``check_reserved`` is |True|). + + Args: + file_path: + File path to sanitize. + replacement_text: + Replacement text for invalid characters. + Defaults to ``""``. + platform: + Target platform name of the file path. + + .. include:: platform.txt + max_len: + Maximum length of the ``file_path`` length. Truncate the name if the ``file_path`` + length exceedd this value. If the value is |None|, + ``max_len`` will automatically determined by the ``platform``: + + - ``Linux``: 4096 + - ``macOS``: 1024 + - ``Windows``: 260 + - ``universal``: 260 + check_reserved: + If |True|, sanitize reserved names of the ``platform``. + normalize: + If |True|, normalize the the file path. + + Returns: + Same type as the argument (str or PathLike object): + Sanitized filepath. + + Raises: + ValueError: + If the ``file_path`` is an invalid file path. + + Example: + :ref:`example-sanitize-file-path` + """ + + return FilePathSanitizer( + platform=platform, max_len=max_len, check_reserved=check_reserved, normalize=normalize + ).sanitize(file_path, replacement_text) + + +def sanitize_file_path(file_path, replacement_text="", platform=None, max_path_len=None): + # Deprecated + return sanitize_filepath(file_path, platform, max_path_len) diff --git a/resources/lib/pathvalidate/_ltsv.py b/resources/lib/pathvalidate/_ltsv.py new file mode 100644 index 00000000..f3b7082f --- /dev/null +++ b/resources/lib/pathvalidate/_ltsv.py @@ -0,0 +1,45 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +import re + +from ._common import preprocess, validate_pathtype +from .error import InvalidCharError + + +__RE_INVALID_LTSV_LABEL = re.compile("[^0-9A-Za-z_.-]", re.UNICODE) + + +def validate_ltsv_label(label: str) -> None: + """ + Verifying whether ``label`` is a valid + `Labeled Tab-separated Values (LTSV) `__ label or not. + + :param label: Label to validate. + :raises pathvalidate.ValidationError: + If invalid character(s) found in the ``label`` for a LTSV format label. + """ + + validate_pathtype(label, allow_whitespaces=False, error_msg="label is empty") + + match_list = __RE_INVALID_LTSV_LABEL.findall(preprocess(label)) + if match_list: + raise InvalidCharError( + "invalid character found for a LTSV format label: {}".format(match_list) + ) + + +def sanitize_ltsv_label(label: str, replacement_text: str = "") -> str: + """ + Replace all of the symbols in text. + + :param label: Input text. + :param replacement_text: Replacement text. + :return: A replacement string. + :rtype: str + """ + + validate_pathtype(label, allow_whitespaces=False, error_msg="label is empty") + + return __RE_INVALID_LTSV_LABEL.sub(replacement_text, preprocess(label)) diff --git a/resources/lib/pathvalidate/_symbol.py b/resources/lib/pathvalidate/_symbol.py new file mode 100644 index 00000000..cc66b5db --- /dev/null +++ b/resources/lib/pathvalidate/_symbol.py @@ -0,0 +1,110 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +import re +from typing import Sequence + +from ._common import ascii_symbols, preprocess, unprintable_ascii_chars +from .error import InvalidCharError + + +__RE_UNPRINTABLE = re.compile( + "[{}]".format(re.escape("".join(unprintable_ascii_chars))), re.UNICODE +) +__RE_SYMBOL = re.compile( + "[{}]".format(re.escape("".join(ascii_symbols + unprintable_ascii_chars))), re.UNICODE +) + + +def validate_unprintable(text: str) -> None: + # deprecated + match_list = __RE_UNPRINTABLE.findall(preprocess(text)) + if match_list: + raise InvalidCharError("unprintable character found: {}".format(match_list)) + + +def replace_unprintable(text: str, replacement_text: str = "") -> str: + # deprecated + try: + return __RE_UNPRINTABLE.sub(replacement_text, preprocess(text)) + except (TypeError, AttributeError): + raise TypeError("text must be a string") + + +def validate_symbol(text: str) -> None: + """ + Verifying whether symbol(s) included in the ``text`` or not. + + Args: + text: + Input text to validate. + + Raises: + ValidationError (ErrorReason.INVALID_CHARACTER): + If symbol(s) included in the ``text``. + """ + + match_list = __RE_SYMBOL.findall(preprocess(text)) + if match_list: + raise InvalidCharError("invalid symbols found: {}".format(match_list)) + + +def replace_symbol( + text: str, + replacement_text: str = "", + exclude_symbols: Sequence[str] = [], + is_replace_consecutive_chars: bool = False, + is_strip: bool = False, +) -> str: + """ + Replace all of the symbols in the ``text``. + + Args: + text: + Input text. + replacement_text: + Replacement text. + exclude_symbols: + Symbols that exclude from the replacement. + is_replace_consecutive_chars: + If |True|, replace consecutive multiple ``replacement_text`` characters + to a single character. + is_strip: + If |True|, strip ``replacement_text`` from the beginning/end of the replacement text. + + Returns: + A replacement string. + + Example: + + :ref:`example-sanitize-symbol` + """ + + if exclude_symbols: + regexp = re.compile( + "[{}]".format( + re.escape( + "".join(set(ascii_symbols + unprintable_ascii_chars) - set(exclude_symbols)) + ) + ), + re.UNICODE, + ) + else: + regexp = __RE_SYMBOL + + try: + new_text = regexp.sub(replacement_text, preprocess(text)) + except TypeError: + raise TypeError("text must be a string") + + if not replacement_text: + return new_text + + if is_replace_consecutive_chars: + new_text = re.sub("{}+".format(re.escape(replacement_text)), replacement_text, new_text) + + if is_strip: + new_text = new_text.strip(replacement_text) + + return new_text diff --git a/resources/lib/pathvalidate/argparse.py b/resources/lib/pathvalidate/argparse.py new file mode 100644 index 00000000..806b4b00 --- /dev/null +++ b/resources/lib/pathvalidate/argparse.py @@ -0,0 +1,68 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +from argparse import ArgumentTypeError + +from ._common import PathType +from ._filename import sanitize_filename, validate_filename +from ._filepath import sanitize_filepath, validate_filepath +from .error import ValidationError + + +def validate_filename_arg(value: str) -> str: + if not value: + return "" + + try: + validate_filename(value) + except ValidationError as e: + raise ArgumentTypeError(e) + + return value + + +def validate_filepath_arg(value: str) -> str: + if not value: + return "" + + try: + validate_filepath(value, platform="auto") + except ValidationError as e: + raise ArgumentTypeError(e) + + return value + + +def sanitize_filename_arg(value: str) -> PathType: + if not value: + return "" + + return sanitize_filename(value) + + +def sanitize_filepath_arg(value: str) -> PathType: + if not value: + return "" + + return sanitize_filepath(value, platform="auto") + + +def filename(value: PathType) -> PathType: # pragma: no cover + # Deprecated + try: + validate_filename(value) + except ValidationError as e: + raise ArgumentTypeError(e) + + return sanitize_filename(value) + + +def filepath(value: PathType) -> PathType: # pragma: no cover + # Deprecated + try: + validate_filepath(value) + except ValidationError as e: + raise ArgumentTypeError(e) + + return sanitize_filepath(value) diff --git a/resources/lib/pathvalidate/click.py b/resources/lib/pathvalidate/click.py new file mode 100644 index 00000000..63c095ef --- /dev/null +++ b/resources/lib/pathvalidate/click.py @@ -0,0 +1,74 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +import click + +from ._common import PathType +from ._filename import sanitize_filename, validate_filename +from ._filepath import sanitize_filepath, validate_filepath +from .error import ValidationError + + +def validate_filename_arg(ctx, param, value) -> str: + if not value: + return "" + + try: + validate_filename(value) + except ValidationError as e: + raise click.BadParameter(str(e)) + + return value + + +def validate_filepath_arg(ctx, param, value) -> str: + if not value: + return "" + + try: + validate_filepath(value) + except ValidationError as e: + raise click.BadParameter(str(e)) + + return value + + +def sanitize_filename_arg(ctx, param, value) -> PathType: + if not value: + return "" + + return sanitize_filename(value) + + +def sanitize_filepath_arg(ctx, param, value) -> PathType: + if not value: + return "" + + return sanitize_filepath(value) + + +def filename(ctx, param, value): # pragma: no cover + # Deprecated + if not value: + return None + + try: + validate_filename(value) + except ValidationError as e: + raise click.BadParameter(str(e)) + + return sanitize_filename(value) + + +def filepath(ctx, param, value): # pragma: no cover + # Deprecated + if not value: + return None + + try: + validate_filepath(value) + except ValidationError as e: + raise click.BadParameter(str(e)) + + return sanitize_filepath(value) diff --git a/resources/lib/pathvalidate/error.py b/resources/lib/pathvalidate/error.py new file mode 100644 index 00000000..331df904 --- /dev/null +++ b/resources/lib/pathvalidate/error.py @@ -0,0 +1,155 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +import enum +from typing import Optional, cast + +from ._common import Platform + + +@enum.unique +class ErrorReason(enum.Enum): + """ + Validation error reasons. + """ + + FOUND_ABS_PATH = "FOUND_ABS_PATH" #: found an absolute path when expecting a file name + NULL_NAME = "NULL_NAME" #: empty value + INVALID_CHARACTER = "INVALID_CHARACTER" #: found invalid characters(s) in a value + INVALID_LENGTH = "INVALID_LENGTH" #: found invalid string length + MALFORMED_ABS_PATH = "MALFORMED_ABS_PATH" #: found invalid absolute path format + RESERVED_NAME = "RESERVED_NAME" #: found a reserved name by a platform + + +class ValidationError(ValueError): + """ + Exception class of validation errors. + + .. py:attribute:: reason + + The cause of the error. + + Returns: + :py:class:`~pathvalidate.error.ErrorReason`: + """ + + @property + def platform(self) -> Platform: + return self.__platform + + @property + def reason(self) -> Optional[ErrorReason]: + return self.__reason + + @property + def description(self) -> str: + return self.__description + + @property + def reserved_name(self) -> str: + return self.__reserved_name + + @property + def reusable_name(self) -> bool: + return self.__reusable_name + + def __init__(self, *args, **kwargs): + self.__platform = kwargs.pop("platform", None) + self.__reason = kwargs.pop("reason", None) + self.__description = kwargs.pop("description", None) + self.__reserved_name = kwargs.pop("reserved_name", None) + self.__reusable_name = kwargs.pop("reusable_name", None) + + try: + super().__init__(*args[0], **kwargs) + except IndexError: + super().__init__(*args, **kwargs) + + def __str__(self) -> str: + item_list = [] + + if Exception.__str__(self): + item_list.append(Exception.__str__(self)) + + if self.reason: + item_list.append("reason={}".format(cast(ErrorReason, self.reason).value)) + if self.platform: + item_list.append("target-platform={}".format(self.platform.value)) + if self.description: + item_list.append("description={}".format(self.description)) + if self.__reusable_name is not None: + item_list.append("reusable_name={}".format(self.reusable_name)) + + return ", ".join(item_list).strip() + + def __repr__(self, *args, **kwargs): + return self.__str__(*args, **kwargs) + + +class NullNameError(ValidationError): + """ + Exception raised when a name is empty. + """ + + def __init__(self, *args, **kwargs) -> None: + kwargs["reason"] = ErrorReason.NULL_NAME + + super().__init__(args, **kwargs) + + +class InvalidCharError(ValidationError): + """ + Exception raised when includes invalid character(s) within a string. + """ + + def __init__(self, *args, **kwargs) -> None: + kwargs["reason"] = ErrorReason.INVALID_CHARACTER + + super().__init__(args, **kwargs) + + +class InvalidLengthError(ValidationError): + """ + Exception raised when a string too long/short. + """ + + def __init__(self, *args, **kwargs) -> None: + kwargs["reason"] = ErrorReason.INVALID_LENGTH + + super().__init__(args, **kwargs) + + +class ReservedNameError(ValidationError): + """ + Exception raised when a string matched a reserved name. + """ + + def __init__(self, *args, **kwargs) -> None: + kwargs["reason"] = ErrorReason.RESERVED_NAME + + super().__init__(args, **kwargs) + + +class ValidReservedNameError(ReservedNameError): + """ + Exception raised when a string matched a reserved name. + However, it can be used as a name. + """ + + def __init__(self, *args, **kwargs) -> None: + kwargs["reusable_name"] = True + + super().__init__(args, **kwargs) + + +class InvalidReservedNameError(ReservedNameError): + """ + Exception raised when a string matched a reserved name. + Moreover, the reserved name is invalid as a name. + """ + + def __init__(self, *args, **kwargs) -> None: + kwargs["reusable_name"] = False + + super().__init__(args, **kwargs) diff --git a/resources/lib/pathvalidate/py.typed b/resources/lib/pathvalidate/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/resources/lib/utils.py b/resources/lib/utils.py index 675c3ad0..002e96ca 100644 --- a/resources/lib/utils.py +++ b/resources/lib/utils.py @@ -26,7 +26,7 @@ import xbmc import xbmcaddon import xbmcgui -from . import path_ops, variables as v +from . import pathvalidate, path_ops, variables as v LOG = getLogger('PLEX.utils') @@ -422,29 +422,10 @@ def valid_filename(text): """ Return a valid filename after passing it in [unicode]. """ - # Get rid of all whitespace except a normal space - text = re.sub(r'(?! )\s', '', text) - # ASCII characters 0 to 31 (non-printable, just in case) - text = re.sub(u'[\x00-\x1f]', '', text) - if v.DEVICE == 'Windows': - # Whitespace at the end of the filename is illegal - text = text.strip() - # Dot at the end of a filename is illegal - text = re.sub(r'\.+$', '', text) - # Illegal Windows characters - text = re.sub(r'[/\\:*?"<>|\^]', '', text) - elif v.DEVICE == 'MacOSX': - # Colon is illegal - text = re.sub(r':', '', text) - # Files cannot begin with a dot - text = re.sub(r'^\.+', '', text) - else: - # Linux - text = re.sub(r'/', '', text) - # Ensure that filename length is at most 255 chars (including 3 chars for - # filename extension and 1 dot to separate the extension) - text = text[:min(len(text), 251)] - return text + return pathvalidate.sanitize_filename(text, + replacement_text='_', + platform='auto', + max_len=248) def escape_html(string):