Merge pull request #1450 from croneter/fix-unicode

Fix UnicodeEncodeError if Plex playlist name contains illegal chars
This commit is contained in:
croneter 2021-04-17 14:19:23 +02:00 committed by GitHub
commit 7d4a144521
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 1566 additions and 24 deletions

View file

@ -0,0 +1,35 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
from .__version__ import __author__, __copyright__, __email__, __license__, __version__
from ._common import (
Platform,
ascii_symbols,
normalize_platform,
replace_unprintable_char,
unprintable_ascii_chars,
validate_null_string,
validate_pathtype,
)
from ._filename import FileNameSanitizer, is_valid_filename, sanitize_filename, validate_filename
from ._filepath import (
FilePathSanitizer,
is_valid_filepath,
sanitize_file_path,
sanitize_filepath,
validate_file_path,
validate_filepath,
)
from ._ltsv import sanitize_ltsv_label, validate_ltsv_label
from ._symbol import replace_symbol, validate_symbol
from .error import (
ErrorReason,
InvalidCharError,
InvalidLengthError,
InvalidReservedNameError,
NullNameError,
ReservedNameError,
ValidationError,
ValidReservedNameError,
)

View file

@ -0,0 +1,6 @@
__author__ = "Tsuyoshi Hombashi"
__copyright__ = "Copyright 2016, {}".format(__author__)
__license__ = "MIT License"
__version__ = "2.4.1"
__maintainer__ = __author__
__email__ = "tsuyoshi.hombashi@gmail.com"

View file

@ -0,0 +1,137 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
import abc
import os
from typing import Optional, Tuple, cast
from ._common import PathType, Platform, PlatformType, normalize_platform, unprintable_ascii_chars
from .error import ReservedNameError, ValidationError
class BaseFile:
_INVALID_PATH_CHARS = "".join(unprintable_ascii_chars)
_INVALID_FILENAME_CHARS = _INVALID_PATH_CHARS + "/"
_INVALID_WIN_PATH_CHARS = _INVALID_PATH_CHARS + ':*?"<>|\t\n\r\x0b\x0c'
_INVALID_WIN_FILENAME_CHARS = _INVALID_FILENAME_CHARS + _INVALID_WIN_PATH_CHARS + "\\"
_ERROR_MSG_TEMPLATE = "invalid char found: invalids=({invalid}), value={value}"
@property
def platform(self) -> Platform:
return self.__platform
@property
def reserved_keywords(self) -> Tuple[str, ...]:
return tuple()
@property
def min_len(self) -> int:
return self._min_len
@property
def max_len(self) -> int:
return self._max_len
def __init__(
self,
min_len: Optional[int],
max_len: Optional[int],
check_reserved: bool,
platform_max_len: Optional[int] = None,
platform: PlatformType = None,
) -> None:
self.__platform = normalize_platform(platform)
self._check_reserved = check_reserved
if min_len is None:
min_len = 1
self._min_len = max(min_len, 1)
if platform_max_len is None:
platform_max_len = self._get_default_max_path_len()
if max_len in [None, -1]:
self._max_len = platform_max_len
else:
self._max_len = cast(int, max_len)
self._max_len = min(self._max_len, platform_max_len)
self._validate_max_len()
def _is_posix(self) -> bool:
return self.platform == Platform.POSIX
def _is_universal(self) -> bool:
return self.platform == Platform.UNIVERSAL
def _is_linux(self) -> bool:
return self.platform == Platform.LINUX
def _is_windows(self) -> bool:
return self.platform == Platform.WINDOWS
def _is_macos(self) -> bool:
return self.platform == Platform.MACOS
def _validate_max_len(self) -> None:
if self.max_len < 1:
raise ValueError("max_len must be greater or equals to one")
if self.min_len > self.max_len:
raise ValueError("min_len must be lower than max_len")
def _get_default_max_path_len(self) -> int:
if self._is_linux():
return 4096
if self._is_windows():
return 260
if self._is_posix() or self._is_macos():
return 1024
return 260 # universal
class AbstractValidator(BaseFile, metaclass=abc.ABCMeta):
@abc.abstractmethod
def validate(self, value: PathType) -> None: # pragma: no cover
pass
def is_valid(self, value: PathType) -> bool:
try:
self.validate(value)
except (TypeError, ValidationError):
return False
return True
def _is_reserved_keyword(self, value: str) -> bool:
return value in self.reserved_keywords
class AbstractSanitizer(BaseFile, metaclass=abc.ABCMeta):
@abc.abstractmethod
def sanitize(self, value: PathType, replacement_text: str = "") -> PathType: # pragma: no cover
pass
class BaseValidator(AbstractValidator):
def _validate_reserved_keywords(self, name: str) -> None:
if not self._check_reserved:
return
root_name = self.__extract_root_name(name)
if self._is_reserved_keyword(root_name.upper()):
raise ReservedNameError(
"'{}' is a reserved name".format(root_name),
reusable_name=False,
reserved_name=root_name,
platform=self.platform,
)
@staticmethod
def __extract_root_name(path: str) -> str:
return os.path.splitext(os.path.basename(path))[0]

View file

@ -0,0 +1,147 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
import enum
import platform
import re
import string
from pathlib import Path
from typing import Any, List, Optional, Union, cast
_re_whitespaces = re.compile(r"^[\s]+$")
@enum.unique
class Platform(enum.Enum):
POSIX = "POSIX"
UNIVERSAL = "universal"
LINUX = "Linux"
WINDOWS = "Windows"
MACOS = "macOS"
PathType = Union[str, Path]
PlatformType = Union[str, Platform, None]
def is_pathlike_obj(value: PathType) -> bool:
return isinstance(value, Path)
def validate_pathtype(
text: PathType, allow_whitespaces: bool = False, error_msg: Optional[str] = None
) -> None:
from .error import ErrorReason, ValidationError
if _is_not_null_string(text) or is_pathlike_obj(text):
return
if allow_whitespaces and _re_whitespaces.search(str(text)):
return
if is_null_string(text):
if not error_msg:
error_msg = "the value must be a not empty"
raise ValidationError(
description=error_msg,
reason=ErrorReason.NULL_NAME,
)
raise TypeError("text must be a string: actual={}".format(type(text)))
def validate_null_string(text: PathType, error_msg: Optional[str] = None) -> None:
# Deprecated: alias to validate_pathtype
validate_pathtype(text, False, error_msg)
def preprocess(name: PathType) -> str:
if is_pathlike_obj(name):
name = str(name)
return cast(str, name)
def is_null_string(value: Any) -> bool:
if value is None:
return True
try:
return len(value.strip()) == 0
except AttributeError:
return False
def _is_not_null_string(value: Any) -> bool:
try:
return len(value.strip()) > 0
except AttributeError:
return False
def _get_unprintable_ascii_chars() -> List[str]:
return [chr(c) for c in range(128) if chr(c) not in string.printable]
unprintable_ascii_chars = tuple(_get_unprintable_ascii_chars())
def _get_ascii_symbols() -> List[str]:
symbol_list = [] # type: List[str]
for i in range(128):
c = chr(i)
if c in unprintable_ascii_chars or c in string.digits + string.ascii_letters:
continue
symbol_list.append(c)
return symbol_list
ascii_symbols = tuple(_get_ascii_symbols())
__RE_UNPRINTABLE_CHARS = re.compile(
"[{}]".format(re.escape("".join(unprintable_ascii_chars))), re.UNICODE
)
def replace_unprintable_char(text: str, replacement_text: str = "") -> str:
try:
return __RE_UNPRINTABLE_CHARS.sub(replacement_text, text)
except (TypeError, AttributeError):
raise TypeError("text must be a string")
def normalize_platform(name: PlatformType) -> Platform:
if isinstance(name, Platform):
return name
if name:
name = name.strip().lower()
if name == "posix":
return Platform.POSIX
if name == "auto":
name = platform.system().lower()
if name in ["linux"]:
return Platform.LINUX
if name and name.startswith("win"):
return Platform.WINDOWS
if name in ["mac", "macos", "darwin"]:
return Platform.MACOS
return Platform.UNIVERSAL
def findall_to_str(match: List[Any]) -> str:
return ", ".join([repr(text) for text in match])

View file

@ -0,0 +1,16 @@
_NTFS_RESERVED_FILE_NAMES = (
"$Mft",
"$MftMirr",
"$LogFile",
"$Volume",
"$AttrDef",
"$Bitmap",
"$Boot",
"$BadClus",
"$Secure",
"$Upcase",
"$Extend",
"$Quota",
"$ObjId",
"$Reparse",
) # Only in root directory

View file

@ -0,0 +1,341 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
import itertools
import ntpath
import posixpath
import re
from pathlib import Path
from typing import Optional, Pattern, Tuple
from ._base import AbstractSanitizer, BaseFile, BaseValidator
from ._common import (
PathType,
Platform,
PlatformType,
findall_to_str,
is_pathlike_obj,
preprocess,
validate_pathtype,
)
from .error import ErrorReason, InvalidCharError, InvalidLengthError, ValidationError
_DEFAULT_MAX_FILENAME_LEN = 255
_RE_INVALID_FILENAME = re.compile(
"[{:s}]".format(re.escape(BaseFile._INVALID_FILENAME_CHARS)), re.UNICODE
)
_RE_INVALID_WIN_FILENAME = re.compile(
"[{:s}]".format(re.escape(BaseFile._INVALID_WIN_FILENAME_CHARS)), re.UNICODE
)
class FileNameSanitizer(AbstractSanitizer):
def __init__(
self,
min_len: Optional[int] = 1,
max_len: Optional[int] = _DEFAULT_MAX_FILENAME_LEN,
platform: PlatformType = None,
check_reserved: bool = True,
) -> None:
super().__init__(
min_len=min_len,
max_len=max_len,
check_reserved=check_reserved,
platform_max_len=_DEFAULT_MAX_FILENAME_LEN,
platform=platform,
)
self._sanitize_regexp = self._get_sanitize_regexp()
self.__validator = FileNameValidator(
min_len=self.min_len,
max_len=self.max_len,
check_reserved=check_reserved,
platform=self.platform,
)
def sanitize(self, value: PathType, replacement_text: str = "") -> PathType:
try:
validate_pathtype(value, allow_whitespaces=True if not self._is_windows() else False)
except ValidationError as e:
if e.reason == ErrorReason.NULL_NAME:
return ""
raise
sanitized_filename = self._sanitize_regexp.sub(replacement_text, str(value))
sanitized_filename = sanitized_filename[: self.max_len]
try:
self.__validator.validate(sanitized_filename)
except ValidationError as e:
if e.reason == ErrorReason.RESERVED_NAME and e.reusable_name is False:
sanitized_filename = re.sub(
re.escape(e.reserved_name), "{}_".format(e.reserved_name), sanitized_filename
)
elif e.reason == ErrorReason.INVALID_CHARACTER:
if self.platform in [Platform.UNIVERSAL, Platform.WINDOWS]:
sanitized_filename = sanitized_filename.rstrip(" .")
if is_pathlike_obj(value):
return Path(sanitized_filename)
return sanitized_filename
def _get_sanitize_regexp(self) -> Pattern:
if self.platform in [Platform.UNIVERSAL, Platform.WINDOWS]:
return _RE_INVALID_WIN_FILENAME
return _RE_INVALID_FILENAME
class FileNameValidator(BaseValidator):
_WINDOWS_RESERVED_FILE_NAMES = ("CON", "PRN", "AUX", "CLOCK$", "NUL") + tuple(
"{:s}{:d}".format(name, num)
for name, num in itertools.product(("COM", "LPT"), range(1, 10))
)
_MACOS_RESERVED_FILE_NAMES = (":",)
@property
def reserved_keywords(self) -> Tuple[str, ...]:
common_keywords = super().reserved_keywords
if self._is_universal():
return (
common_keywords
+ self._WINDOWS_RESERVED_FILE_NAMES
+ self._MACOS_RESERVED_FILE_NAMES
)
if self._is_windows():
return common_keywords + self._WINDOWS_RESERVED_FILE_NAMES
if self._is_posix() or self._is_macos():
return common_keywords + self._MACOS_RESERVED_FILE_NAMES
return common_keywords
def __init__(
self,
min_len: Optional[int] = 1,
max_len: Optional[int] = _DEFAULT_MAX_FILENAME_LEN,
platform: PlatformType = None,
check_reserved: bool = True,
) -> None:
super().__init__(
min_len=min_len,
max_len=max_len,
check_reserved=check_reserved,
platform_max_len=_DEFAULT_MAX_FILENAME_LEN,
platform=platform,
)
def validate(self, value: PathType) -> None:
validate_pathtype(
value,
allow_whitespaces=False
if self.platform in [Platform.UNIVERSAL, Platform.WINDOWS]
else True,
)
unicode_filename = preprocess(value)
value_len = len(unicode_filename)
self.validate_abspath(unicode_filename)
if value_len > self.max_len:
raise InvalidLengthError(
"filename is too long: expected<={:d}, actual={:d}".format(self.max_len, value_len)
)
if value_len < self.min_len:
raise InvalidLengthError(
"filename is too short: expected>={:d}, actual={:d}".format(self.min_len, value_len)
)
self._validate_reserved_keywords(unicode_filename)
if self._is_universal() or self._is_windows():
self.__validate_win_filename(unicode_filename)
else:
self.__validate_unix_filename(unicode_filename)
def validate_abspath(self, value: str) -> None:
err = ValidationError(
description="found an absolute path ({}), expected a filename".format(value),
platform=self.platform,
reason=ErrorReason.FOUND_ABS_PATH,
)
if self._is_universal() or self._is_windows():
if ntpath.isabs(value):
raise err
if posixpath.isabs(value):
raise err
def __validate_unix_filename(self, unicode_filename: str) -> None:
match = _RE_INVALID_FILENAME.findall(unicode_filename)
if match:
raise InvalidCharError(
self._ERROR_MSG_TEMPLATE.format(
invalid=findall_to_str(match), value=repr(unicode_filename)
)
)
def __validate_win_filename(self, unicode_filename: str) -> None:
match = _RE_INVALID_WIN_FILENAME.findall(unicode_filename)
if match:
raise InvalidCharError(
self._ERROR_MSG_TEMPLATE.format(
invalid=findall_to_str(match), value=repr(unicode_filename)
),
platform=Platform.WINDOWS,
)
if unicode_filename in (".", ".."):
return
if unicode_filename[-1] in (" ", "."):
raise InvalidCharError(
self._ERROR_MSG_TEMPLATE.format(
invalid=re.escape(unicode_filename[-1]), value=repr(unicode_filename)
),
platform=Platform.WINDOWS,
description="Do not end a file or directory name with a space or a period",
)
def validate_filename(
filename: PathType,
platform: Optional[str] = None,
min_len: int = 1,
max_len: int = _DEFAULT_MAX_FILENAME_LEN,
check_reserved: bool = True,
) -> None:
"""Verifying whether the ``filename`` is a valid file name or not.
Args:
filename:
Filename to validate.
platform:
Target platform name of the filename.
.. include:: platform.txt
min_len:
Minimum length of the ``filename``. The value must be greater or equal to one.
Defaults to ``1``.
max_len:
Maximum length of the ``filename``. The value must be lower than:
- ``Linux``: 4096
- ``macOS``: 1024
- ``Windows``: 260
- ``universal``: 260
Defaults to ``255``.
check_reserved:
If |True|, check reserved names of the ``platform``.
Raises:
ValidationError (ErrorReason.INVALID_LENGTH):
If the ``filename`` is longer than ``max_len`` characters.
ValidationError (ErrorReason.INVALID_CHARACTER):
If the ``filename`` includes invalid character(s) for a filename:
|invalid_filename_chars|.
The following characters are also invalid for Windows platform:
|invalid_win_filename_chars|.
ValidationError (ErrorReason.RESERVED_NAME):
If the ``filename`` equals reserved name by OS.
Windows reserved name is as follows:
``"CON"``, ``"PRN"``, ``"AUX"``, ``"NUL"``, ``"COM[1-9]"``, ``"LPT[1-9]"``.
Example:
:ref:`example-validate-filename`
See Also:
`Naming Files, Paths, and Namespaces - Win32 apps | Microsoft Docs
<https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file>`__
"""
FileNameValidator(
platform=platform, min_len=min_len, max_len=max_len, check_reserved=check_reserved
).validate(filename)
def is_valid_filename(
filename: PathType,
platform: Optional[str] = None,
min_len: int = 1,
max_len: Optional[int] = None,
check_reserved: bool = True,
) -> bool:
"""Check whether the ``filename`` is a valid name or not.
Args:
filename:
A filename to be checked.
Example:
:ref:`example-is-valid-filename`
See Also:
:py:func:`.validate_filename()`
"""
return FileNameValidator(
platform=platform, min_len=min_len, max_len=max_len, check_reserved=check_reserved
).is_valid(filename)
def sanitize_filename(
filename: PathType,
replacement_text: str = "",
platform: Optional[str] = None,
max_len: Optional[int] = _DEFAULT_MAX_FILENAME_LEN,
check_reserved: bool = True,
) -> PathType:
"""Make a valid filename from a string.
To make a valid filename the function does:
- Replace invalid characters as file names included in the ``filename``
with the ``replacement_text``. Invalid characters are:
- unprintable characters
- |invalid_filename_chars|
- for Windows (or universal) only: |invalid_win_filename_chars|
- Append underscore (``"_"``) at the tail of the name if sanitized name
is one of the reserved names by operating systems
(only when ``check_reserved`` is |True|).
Args:
filename: Filename to sanitize.
replacement_text:
Replacement text for invalid characters. Defaults to ``""``.
platform:
Target platform name of the filename.
.. include:: platform.txt
max_len:
Maximum length of the ``filename`` length. Truncate the name length if
the ``filename`` length exceeds this value.
Defaults to ``255``.
check_reserved:
If |True|, sanitize reserved names of the ``platform``.
Returns:
Same type as the ``filename`` (str or PathLike object):
Sanitized filename.
Raises:
ValueError:
If the ``filename`` is an invalid filename.
Example:
:ref:`example-sanitize-filename`
"""
return FileNameSanitizer(
platform=platform, max_len=max_len, check_reserved=check_reserved
).sanitize(filename, replacement_text)

View file

@ -0,0 +1,427 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
import ntpath
import os.path
import posixpath
import re
from pathlib import Path
from typing import List, Optional, Pattern, Tuple # noqa
from ._base import AbstractSanitizer, BaseFile, BaseValidator
from ._common import (
PathType,
Platform,
PlatformType,
findall_to_str,
is_pathlike_obj,
preprocess,
validate_pathtype,
)
from ._const import _NTFS_RESERVED_FILE_NAMES
from ._filename import FileNameSanitizer, FileNameValidator
from .error import (
ErrorReason,
InvalidCharError,
InvalidLengthError,
ReservedNameError,
ValidationError,
)
_RE_INVALID_PATH = re.compile("[{:s}]".format(re.escape(BaseFile._INVALID_PATH_CHARS)), re.UNICODE)
_RE_INVALID_WIN_PATH = re.compile(
"[{:s}]".format(re.escape(BaseFile._INVALID_WIN_PATH_CHARS)), re.UNICODE
)
class FilePathSanitizer(AbstractSanitizer):
def __init__(
self,
min_len: Optional[int] = 1,
max_len: Optional[int] = None,
platform: PlatformType = None,
check_reserved: bool = True,
normalize: bool = True,
) -> None:
super().__init__(
min_len=min_len,
max_len=max_len,
check_reserved=check_reserved,
platform=platform,
)
self._sanitize_regexp = self._get_sanitize_regexp()
self.__fpath_validator = FilePathValidator(
min_len=self.min_len,
max_len=self.max_len,
check_reserved=check_reserved,
platform=self.platform,
)
self.__fname_sanitizer = FileNameSanitizer(
min_len=self.min_len,
max_len=self.max_len,
check_reserved=check_reserved,
platform=self.platform,
)
self.__normalize = normalize
if self._is_universal() or self._is_windows():
self.__split_drive = ntpath.splitdrive
else:
self.__split_drive = posixpath.splitdrive
def sanitize(self, value: PathType, replacement_text: str = "") -> PathType:
if not value:
return ""
self.__fpath_validator.validate_abspath(value)
unicode_filepath = preprocess(value)
if self.__normalize:
unicode_filepath = os.path.normpath(unicode_filepath)
drive, unicode_filepath = self.__split_drive(unicode_filepath)
sanitized_path = self._sanitize_regexp.sub(replacement_text, unicode_filepath)
if self._is_windows():
path_separator = "\\"
else:
path_separator = "/"
sanitized_entries = [] # type: List[str]
if drive:
sanitized_entries.append(drive)
for entry in sanitized_path.replace("\\", "/").split("/"):
if entry in _NTFS_RESERVED_FILE_NAMES:
sanitized_entries.append("{}_".format(entry))
continue
sanitized_entry = str(self.__fname_sanitizer.sanitize(entry))
if not sanitized_entry:
if not sanitized_entries:
sanitized_entries.append("")
continue
sanitized_entries.append(sanitized_entry)
sanitized_path = path_separator.join(sanitized_entries)
if is_pathlike_obj(value):
return Path(sanitized_path)
return sanitized_path
def _get_sanitize_regexp(self) -> Pattern:
if self.platform in [Platform.UNIVERSAL, Platform.WINDOWS]:
return _RE_INVALID_WIN_PATH
return _RE_INVALID_PATH
class FilePathValidator(BaseValidator):
_RE_NTFS_RESERVED = re.compile(
"|".join("^/{}$".format(re.escape(pattern)) for pattern in _NTFS_RESERVED_FILE_NAMES),
re.IGNORECASE,
)
_MACOS_RESERVED_FILE_PATHS = ("/", ":")
@property
def reserved_keywords(self) -> Tuple[str, ...]:
common_keywords = super().reserved_keywords
if any([self._is_universal(), self._is_posix(), self._is_macos()]):
return common_keywords + self._MACOS_RESERVED_FILE_PATHS
if self._is_linux():
return common_keywords + ("/",)
return common_keywords
def __init__(
self,
min_len: Optional[int] = 1,
max_len: Optional[int] = None,
platform: PlatformType = None,
check_reserved: bool = True,
) -> None:
super().__init__(
min_len=min_len,
max_len=max_len,
check_reserved=check_reserved,
platform=platform,
)
self.__fname_validator = FileNameValidator(
min_len=min_len, max_len=max_len, check_reserved=check_reserved, platform=platform
)
if self._is_universal() or self._is_windows():
self.__split_drive = ntpath.splitdrive
else:
self.__split_drive = posixpath.splitdrive
def validate(self, value: PathType) -> None:
validate_pathtype(
value,
allow_whitespaces=False
if self.platform in [Platform.UNIVERSAL, Platform.WINDOWS]
else True,
)
self.validate_abspath(value)
_drive, value = self.__split_drive(str(value))
if not value:
return
filepath = os.path.normpath(value)
unicode_filepath = preprocess(filepath)
value_len = len(unicode_filepath)
if value_len > self.max_len:
raise InvalidLengthError(
"file path is too long: expected<={:d}, actual={:d}".format(self.max_len, value_len)
)
if value_len < self.min_len:
raise InvalidLengthError(
"file path is too short: expected>={:d}, actual={:d}".format(
self.min_len, value_len
)
)
self._validate_reserved_keywords(unicode_filepath)
unicode_filepath = unicode_filepath.replace("\\", "/")
for entry in unicode_filepath.split("/"):
if not entry or entry in (".", ".."):
continue
self.__fname_validator._validate_reserved_keywords(entry)
if self._is_universal() or self._is_windows():
self.__validate_win_filepath(unicode_filepath)
else:
self.__validate_unix_filepath(unicode_filepath)
def validate_abspath(self, value: PathType) -> None:
value = str(value)
is_posix_abs = posixpath.isabs(value)
is_nt_abs = ntpath.isabs(value)
err_object = ValidationError(
description=(
"an invalid absolute file path ({}) for the platform ({}).".format(
value, self.platform.value
)
+ " to avoid the error, specify an appropriate platform correspond"
+ " with the path format, or 'auto'."
),
platform=self.platform,
reason=ErrorReason.MALFORMED_ABS_PATH,
)
if any([self._is_windows() and is_nt_abs, self._is_linux() and is_posix_abs]):
return
if self._is_universal() and any([is_posix_abs, is_nt_abs]):
ValidationError(
description=(
"{}. expected a platform independent file path".format(
"POSIX absolute file path found"
if is_posix_abs
else "NT absolute file path found"
)
),
platform=self.platform,
reason=ErrorReason.MALFORMED_ABS_PATH,
)
if any([self._is_windows(), self._is_universal()]) and is_posix_abs:
raise err_object
drive, _tail = ntpath.splitdrive(value)
if not self._is_windows() and drive and is_nt_abs:
raise err_object
def __validate_unix_filepath(self, unicode_filepath: str) -> None:
match = _RE_INVALID_PATH.findall(unicode_filepath)
if match:
raise InvalidCharError(
self._ERROR_MSG_TEMPLATE.format(
invalid=findall_to_str(match), value=repr(unicode_filepath)
)
)
def __validate_win_filepath(self, unicode_filepath: str) -> None:
match = _RE_INVALID_WIN_PATH.findall(unicode_filepath)
if match:
raise InvalidCharError(
self._ERROR_MSG_TEMPLATE.format(
invalid=findall_to_str(match), value=repr(unicode_filepath)
),
platform=Platform.WINDOWS,
)
_drive, value = self.__split_drive(unicode_filepath)
if value:
match_reserved = self._RE_NTFS_RESERVED.search(value)
if match_reserved:
reserved_name = match_reserved.group()
raise ReservedNameError(
"'{}' is a reserved name".format(reserved_name),
reusable_name=False,
reserved_name=reserved_name,
platform=self.platform,
)
def validate_filepath(
file_path: PathType,
platform: Optional[str] = None,
min_len: int = 1,
max_len: Optional[int] = None,
check_reserved: bool = True,
) -> None:
"""Verifying whether the ``file_path`` is a valid file path or not.
Args:
file_path:
File path to validate.
platform:
Target platform name of the file path.
.. include:: platform.txt
min_len:
Minimum length of the ``file_path``. The value must be greater or equal to one.
Defaults to ``1``.
max_len:
Maximum length of the ``file_path`` length. If the value is |None|,
automatically determined by the ``platform``:
- ``Linux``: 4096
- ``macOS``: 1024
- ``Windows``: 260
- ``universal``: 260
check_reserved:
If |True|, check reserved names of the ``platform``.
Raises:
ValidationError (ErrorReason.INVALID_CHARACTER):
If the ``file_path`` includes invalid char(s):
|invalid_file_path_chars|.
The following characters are also invalid for Windows platform:
|invalid_win_file_path_chars|
ValidationError (ErrorReason.INVALID_LENGTH):
If the ``file_path`` is longer than ``max_len`` characters.
ValidationError:
If ``file_path`` include invalid values.
Example:
:ref:`example-validate-file-path`
See Also:
`Naming Files, Paths, and Namespaces - Win32 apps | Microsoft Docs
<https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file>`__
"""
FilePathValidator(
platform=platform, min_len=min_len, max_len=max_len, check_reserved=check_reserved
).validate(file_path)
def validate_file_path(file_path, platform=None, max_path_len=None):
# Deprecated
validate_filepath(file_path, platform, max_path_len)
def is_valid_filepath(
file_path: PathType,
platform: Optional[str] = None,
min_len: int = 1,
max_len: Optional[int] = None,
check_reserved: bool = True,
) -> bool:
"""Check whether the ``file_path`` is a valid name or not.
Args:
file_path:
A filepath to be checked.
Example:
:ref:`example-is-valid-filepath`
See Also:
:py:func:`.validate_filepath()`
"""
return FilePathValidator(
platform=platform, min_len=min_len, max_len=max_len, check_reserved=check_reserved
).is_valid(file_path)
def sanitize_filepath(
file_path: PathType,
replacement_text: str = "",
platform: Optional[str] = None,
max_len: Optional[int] = None,
check_reserved: bool = True,
normalize: bool = True,
) -> PathType:
"""Make a valid file path from a string.
To make a valid file path the function does:
- replace invalid characters for a file path within the ``file_path``
with the ``replacement_text``. Invalid characters are as follows:
- unprintable characters
- |invalid_file_path_chars|
- for Windows (or universal) only: |invalid_win_file_path_chars|
- Append underscore (``"_"``) at the tail of the name if sanitized name
is one of the reserved names by operating systems
(only when ``check_reserved`` is |True|).
Args:
file_path:
File path to sanitize.
replacement_text:
Replacement text for invalid characters.
Defaults to ``""``.
platform:
Target platform name of the file path.
.. include:: platform.txt
max_len:
Maximum length of the ``file_path`` length. Truncate the name if the ``file_path``
length exceedd this value. If the value is |None|,
``max_len`` will automatically determined by the ``platform``:
- ``Linux``: 4096
- ``macOS``: 1024
- ``Windows``: 260
- ``universal``: 260
check_reserved:
If |True|, sanitize reserved names of the ``platform``.
normalize:
If |True|, normalize the the file path.
Returns:
Same type as the argument (str or PathLike object):
Sanitized filepath.
Raises:
ValueError:
If the ``file_path`` is an invalid file path.
Example:
:ref:`example-sanitize-file-path`
"""
return FilePathSanitizer(
platform=platform, max_len=max_len, check_reserved=check_reserved, normalize=normalize
).sanitize(file_path, replacement_text)
def sanitize_file_path(file_path, replacement_text="", platform=None, max_path_len=None):
# Deprecated
return sanitize_filepath(file_path, platform, max_path_len)

View file

@ -0,0 +1,45 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
import re
from ._common import preprocess, validate_pathtype
from .error import InvalidCharError
__RE_INVALID_LTSV_LABEL = re.compile("[^0-9A-Za-z_.-]", re.UNICODE)
def validate_ltsv_label(label: str) -> None:
"""
Verifying whether ``label`` is a valid
`Labeled Tab-separated Values (LTSV) <http://ltsv.org/>`__ label or not.
:param label: Label to validate.
:raises pathvalidate.ValidationError:
If invalid character(s) found in the ``label`` for a LTSV format label.
"""
validate_pathtype(label, allow_whitespaces=False, error_msg="label is empty")
match_list = __RE_INVALID_LTSV_LABEL.findall(preprocess(label))
if match_list:
raise InvalidCharError(
"invalid character found for a LTSV format label: {}".format(match_list)
)
def sanitize_ltsv_label(label: str, replacement_text: str = "") -> str:
"""
Replace all of the symbols in text.
:param label: Input text.
:param replacement_text: Replacement text.
:return: A replacement string.
:rtype: str
"""
validate_pathtype(label, allow_whitespaces=False, error_msg="label is empty")
return __RE_INVALID_LTSV_LABEL.sub(replacement_text, preprocess(label))

View file

@ -0,0 +1,110 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
import re
from typing import Sequence
from ._common import ascii_symbols, preprocess, unprintable_ascii_chars
from .error import InvalidCharError
__RE_UNPRINTABLE = re.compile(
"[{}]".format(re.escape("".join(unprintable_ascii_chars))), re.UNICODE
)
__RE_SYMBOL = re.compile(
"[{}]".format(re.escape("".join(ascii_symbols + unprintable_ascii_chars))), re.UNICODE
)
def validate_unprintable(text: str) -> None:
# deprecated
match_list = __RE_UNPRINTABLE.findall(preprocess(text))
if match_list:
raise InvalidCharError("unprintable character found: {}".format(match_list))
def replace_unprintable(text: str, replacement_text: str = "") -> str:
# deprecated
try:
return __RE_UNPRINTABLE.sub(replacement_text, preprocess(text))
except (TypeError, AttributeError):
raise TypeError("text must be a string")
def validate_symbol(text: str) -> None:
"""
Verifying whether symbol(s) included in the ``text`` or not.
Args:
text:
Input text to validate.
Raises:
ValidationError (ErrorReason.INVALID_CHARACTER):
If symbol(s) included in the ``text``.
"""
match_list = __RE_SYMBOL.findall(preprocess(text))
if match_list:
raise InvalidCharError("invalid symbols found: {}".format(match_list))
def replace_symbol(
text: str,
replacement_text: str = "",
exclude_symbols: Sequence[str] = [],
is_replace_consecutive_chars: bool = False,
is_strip: bool = False,
) -> str:
"""
Replace all of the symbols in the ``text``.
Args:
text:
Input text.
replacement_text:
Replacement text.
exclude_symbols:
Symbols that exclude from the replacement.
is_replace_consecutive_chars:
If |True|, replace consecutive multiple ``replacement_text`` characters
to a single character.
is_strip:
If |True|, strip ``replacement_text`` from the beginning/end of the replacement text.
Returns:
A replacement string.
Example:
:ref:`example-sanitize-symbol`
"""
if exclude_symbols:
regexp = re.compile(
"[{}]".format(
re.escape(
"".join(set(ascii_symbols + unprintable_ascii_chars) - set(exclude_symbols))
)
),
re.UNICODE,
)
else:
regexp = __RE_SYMBOL
try:
new_text = regexp.sub(replacement_text, preprocess(text))
except TypeError:
raise TypeError("text must be a string")
if not replacement_text:
return new_text
if is_replace_consecutive_chars:
new_text = re.sub("{}+".format(re.escape(replacement_text)), replacement_text, new_text)
if is_strip:
new_text = new_text.strip(replacement_text)
return new_text

View file

@ -0,0 +1,68 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
from argparse import ArgumentTypeError
from ._common import PathType
from ._filename import sanitize_filename, validate_filename
from ._filepath import sanitize_filepath, validate_filepath
from .error import ValidationError
def validate_filename_arg(value: str) -> str:
if not value:
return ""
try:
validate_filename(value)
except ValidationError as e:
raise ArgumentTypeError(e)
return value
def validate_filepath_arg(value: str) -> str:
if not value:
return ""
try:
validate_filepath(value, platform="auto")
except ValidationError as e:
raise ArgumentTypeError(e)
return value
def sanitize_filename_arg(value: str) -> PathType:
if not value:
return ""
return sanitize_filename(value)
def sanitize_filepath_arg(value: str) -> PathType:
if not value:
return ""
return sanitize_filepath(value, platform="auto")
def filename(value: PathType) -> PathType: # pragma: no cover
# Deprecated
try:
validate_filename(value)
except ValidationError as e:
raise ArgumentTypeError(e)
return sanitize_filename(value)
def filepath(value: PathType) -> PathType: # pragma: no cover
# Deprecated
try:
validate_filepath(value)
except ValidationError as e:
raise ArgumentTypeError(e)
return sanitize_filepath(value)

View file

@ -0,0 +1,74 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
import click
from ._common import PathType
from ._filename import sanitize_filename, validate_filename
from ._filepath import sanitize_filepath, validate_filepath
from .error import ValidationError
def validate_filename_arg(ctx, param, value) -> str:
if not value:
return ""
try:
validate_filename(value)
except ValidationError as e:
raise click.BadParameter(str(e))
return value
def validate_filepath_arg(ctx, param, value) -> str:
if not value:
return ""
try:
validate_filepath(value)
except ValidationError as e:
raise click.BadParameter(str(e))
return value
def sanitize_filename_arg(ctx, param, value) -> PathType:
if not value:
return ""
return sanitize_filename(value)
def sanitize_filepath_arg(ctx, param, value) -> PathType:
if not value:
return ""
return sanitize_filepath(value)
def filename(ctx, param, value): # pragma: no cover
# Deprecated
if not value:
return None
try:
validate_filename(value)
except ValidationError as e:
raise click.BadParameter(str(e))
return sanitize_filename(value)
def filepath(ctx, param, value): # pragma: no cover
# Deprecated
if not value:
return None
try:
validate_filepath(value)
except ValidationError as e:
raise click.BadParameter(str(e))
return sanitize_filepath(value)

View file

@ -0,0 +1,155 @@
"""
.. codeauthor:: Tsuyoshi Hombashi <tsuyoshi.hombashi@gmail.com>
"""
import enum
from typing import Optional, cast
from ._common import Platform
@enum.unique
class ErrorReason(enum.Enum):
"""
Validation error reasons.
"""
FOUND_ABS_PATH = "FOUND_ABS_PATH" #: found an absolute path when expecting a file name
NULL_NAME = "NULL_NAME" #: empty value
INVALID_CHARACTER = "INVALID_CHARACTER" #: found invalid characters(s) in a value
INVALID_LENGTH = "INVALID_LENGTH" #: found invalid string length
MALFORMED_ABS_PATH = "MALFORMED_ABS_PATH" #: found invalid absolute path format
RESERVED_NAME = "RESERVED_NAME" #: found a reserved name by a platform
class ValidationError(ValueError):
"""
Exception class of validation errors.
.. py:attribute:: reason
The cause of the error.
Returns:
:py:class:`~pathvalidate.error.ErrorReason`:
"""
@property
def platform(self) -> Platform:
return self.__platform
@property
def reason(self) -> Optional[ErrorReason]:
return self.__reason
@property
def description(self) -> str:
return self.__description
@property
def reserved_name(self) -> str:
return self.__reserved_name
@property
def reusable_name(self) -> bool:
return self.__reusable_name
def __init__(self, *args, **kwargs):
self.__platform = kwargs.pop("platform", None)
self.__reason = kwargs.pop("reason", None)
self.__description = kwargs.pop("description", None)
self.__reserved_name = kwargs.pop("reserved_name", None)
self.__reusable_name = kwargs.pop("reusable_name", None)
try:
super().__init__(*args[0], **kwargs)
except IndexError:
super().__init__(*args, **kwargs)
def __str__(self) -> str:
item_list = []
if Exception.__str__(self):
item_list.append(Exception.__str__(self))
if self.reason:
item_list.append("reason={}".format(cast(ErrorReason, self.reason).value))
if self.platform:
item_list.append("target-platform={}".format(self.platform.value))
if self.description:
item_list.append("description={}".format(self.description))
if self.__reusable_name is not None:
item_list.append("reusable_name={}".format(self.reusable_name))
return ", ".join(item_list).strip()
def __repr__(self, *args, **kwargs):
return self.__str__(*args, **kwargs)
class NullNameError(ValidationError):
"""
Exception raised when a name is empty.
"""
def __init__(self, *args, **kwargs) -> None:
kwargs["reason"] = ErrorReason.NULL_NAME
super().__init__(args, **kwargs)
class InvalidCharError(ValidationError):
"""
Exception raised when includes invalid character(s) within a string.
"""
def __init__(self, *args, **kwargs) -> None:
kwargs["reason"] = ErrorReason.INVALID_CHARACTER
super().__init__(args, **kwargs)
class InvalidLengthError(ValidationError):
"""
Exception raised when a string too long/short.
"""
def __init__(self, *args, **kwargs) -> None:
kwargs["reason"] = ErrorReason.INVALID_LENGTH
super().__init__(args, **kwargs)
class ReservedNameError(ValidationError):
"""
Exception raised when a string matched a reserved name.
"""
def __init__(self, *args, **kwargs) -> None:
kwargs["reason"] = ErrorReason.RESERVED_NAME
super().__init__(args, **kwargs)
class ValidReservedNameError(ReservedNameError):
"""
Exception raised when a string matched a reserved name.
However, it can be used as a name.
"""
def __init__(self, *args, **kwargs) -> None:
kwargs["reusable_name"] = True
super().__init__(args, **kwargs)
class InvalidReservedNameError(ReservedNameError):
"""
Exception raised when a string matched a reserved name.
Moreover, the reserved name is invalid as a name.
"""
def __init__(self, *args, **kwargs) -> None:
kwargs["reusable_name"] = False
super().__init__(args, **kwargs)

View file

View file

@ -26,7 +26,7 @@ import xbmc
import xbmcaddon import xbmcaddon
import xbmcgui import xbmcgui
from . import path_ops, variables as v from . import pathvalidate, path_ops, variables as v
LOG = getLogger('PLEX.utils') LOG = getLogger('PLEX.utils')
@ -422,29 +422,10 @@ def valid_filename(text):
""" """
Return a valid filename after passing it in [unicode]. Return a valid filename after passing it in [unicode].
""" """
# Get rid of all whitespace except a normal space return pathvalidate.sanitize_filename(text,
text = re.sub(r'(?! )\s', '', text) replacement_text='_',
# ASCII characters 0 to 31 (non-printable, just in case) platform='auto',
text = re.sub(u'[\x00-\x1f]', '', text) max_len=248)
if v.DEVICE == 'Windows':
# Whitespace at the end of the filename is illegal
text = text.strip()
# Dot at the end of a filename is illegal
text = re.sub(r'\.+$', '', text)
# Illegal Windows characters
text = re.sub(r'[/\\:*?"<>|\^]', '', text)
elif v.DEVICE == 'MacOSX':
# Colon is illegal
text = re.sub(r':', '', text)
# Files cannot begin with a dot
text = re.sub(r'^\.+', '', text)
else:
# Linux
text = re.sub(r'/', '', text)
# Ensure that filename length is at most 255 chars (including 3 chars for
# filename extension and 1 dot to separate the extension)
text = text[:min(len(text), 251)]
return text
def escape_html(string): def escape_html(string):