2020-12-19 03:50:39 +11:00
|
|
|
# coding: utf-8
|
2018-04-28 17:12:29 +10:00
|
|
|
#
|
|
|
|
# Copyright 2011 Yesudeep Mangalapilly <yesudeep@gmail.com>
|
2020-12-19 03:50:39 +11:00
|
|
|
# Copyright 2012 Google, Inc & contributors.
|
2018-04-28 17:12:29 +10:00
|
|
|
# Copyright 2014 Thomas Amland <thomas.amland@gmail.com>
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
|
|
|
"""
|
|
|
|
:module: watchdog.utils.dirsnapshot
|
|
|
|
:synopsis: Directory snapshots and comparison.
|
|
|
|
:author: yesudeep@google.com (Yesudeep Mangalapilly)
|
2020-12-19 03:50:39 +11:00
|
|
|
:author: contact@tiger-222.fr (Mickaël Schoentgen)
|
2018-04-28 17:12:29 +10:00
|
|
|
|
|
|
|
.. ADMONITION:: Where are the moved events? They "disappeared"
|
|
|
|
|
|
|
|
This implementation does not take partition boundaries
|
|
|
|
into consideration. It will only work when the directory
|
|
|
|
tree is entirely on the same file system. More specifically,
|
|
|
|
any part of the code that depends on inode numbers can
|
|
|
|
break if partition boundaries are crossed. In these cases,
|
|
|
|
the snapshot diff will represent file/directory movement as
|
|
|
|
created and deleted events.
|
|
|
|
|
|
|
|
Classes
|
|
|
|
-------
|
|
|
|
.. autoclass:: DirectorySnapshot
|
|
|
|
:members:
|
|
|
|
:show-inheritance:
|
|
|
|
|
|
|
|
.. autoclass:: DirectorySnapshotDiff
|
|
|
|
:members:
|
|
|
|
:show-inheritance:
|
|
|
|
|
2020-12-19 03:50:39 +11:00
|
|
|
.. autoclass:: EmptyDirectorySnapshot
|
|
|
|
:members:
|
|
|
|
:show-inheritance:
|
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
"""
|
|
|
|
|
|
|
|
import errno
|
|
|
|
import os
|
|
|
|
from stat import S_ISDIR
|
|
|
|
|
|
|
|
|
2020-12-19 03:50:39 +11:00
|
|
|
class DirectorySnapshotDiff:
|
2018-04-28 17:12:29 +10:00
|
|
|
"""
|
|
|
|
Compares two directory snapshots and creates an object that represents
|
|
|
|
the difference between the two snapshots.
|
|
|
|
|
|
|
|
:param ref:
|
|
|
|
The reference directory snapshot.
|
|
|
|
:type ref:
|
|
|
|
:class:`DirectorySnapshot`
|
|
|
|
:param snapshot:
|
|
|
|
The directory snapshot which will be compared
|
|
|
|
with the reference snapshot.
|
|
|
|
:type snapshot:
|
|
|
|
:class:`DirectorySnapshot`
|
2020-12-19 03:50:39 +11:00
|
|
|
:param ignore_device:
|
|
|
|
A boolean indicating whether to ignore the device id or not.
|
|
|
|
By default, a file may be uniquely identified by a combination of its first
|
|
|
|
inode and its device id. The problem is that the device id may (or may not)
|
|
|
|
change between system boots. This problem would cause the DirectorySnapshotDiff
|
|
|
|
to think a file has been deleted and created again but it would be the
|
|
|
|
exact same file.
|
|
|
|
Set to True only if you are sure you will always use the same device.
|
|
|
|
:type ignore_device:
|
|
|
|
:class:`bool`
|
2018-04-28 17:12:29 +10:00
|
|
|
"""
|
2020-12-19 03:50:39 +11:00
|
|
|
|
|
|
|
def __init__(self, ref, snapshot, ignore_device=False):
|
2018-04-28 17:12:29 +10:00
|
|
|
created = snapshot.paths - ref.paths
|
|
|
|
deleted = ref.paths - snapshot.paths
|
2020-12-19 03:50:39 +11:00
|
|
|
|
|
|
|
if ignore_device:
|
|
|
|
def get_inode(directory, full_path):
|
|
|
|
return directory.inode(full_path)[0]
|
|
|
|
else:
|
|
|
|
def get_inode(directory, full_path):
|
|
|
|
return directory.inode(full_path)
|
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
# check that all unchanged paths have the same inode
|
|
|
|
for path in ref.paths & snapshot.paths:
|
2020-12-19 03:50:39 +11:00
|
|
|
if get_inode(ref, path) != get_inode(snapshot, path):
|
2018-04-28 17:12:29 +10:00
|
|
|
created.add(path)
|
|
|
|
deleted.add(path)
|
2020-12-19 03:50:39 +11:00
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
# find moved paths
|
|
|
|
moved = set()
|
|
|
|
for path in set(deleted):
|
|
|
|
inode = ref.inode(path)
|
|
|
|
new_path = snapshot.path(inode)
|
|
|
|
if new_path:
|
|
|
|
# file is not deleted but moved
|
|
|
|
deleted.remove(path)
|
|
|
|
moved.add((path, new_path))
|
2020-12-19 03:50:39 +11:00
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
for path in set(created):
|
|
|
|
inode = snapshot.inode(path)
|
|
|
|
old_path = ref.path(inode)
|
|
|
|
if old_path:
|
|
|
|
created.remove(path)
|
|
|
|
moved.add((old_path, path))
|
2020-12-19 03:50:39 +11:00
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
# find modified paths
|
|
|
|
# first check paths that have not moved
|
|
|
|
modified = set()
|
|
|
|
for path in ref.paths & snapshot.paths:
|
2020-12-19 03:50:39 +11:00
|
|
|
if get_inode(ref, path) == get_inode(snapshot, path):
|
|
|
|
if ref.mtime(path) != snapshot.mtime(path) or ref.size(path) != snapshot.size(path):
|
2018-04-28 17:12:29 +10:00
|
|
|
modified.add(path)
|
2020-12-19 03:50:39 +11:00
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
for (old_path, new_path) in moved:
|
2020-12-19 03:50:39 +11:00
|
|
|
if ref.mtime(old_path) != snapshot.mtime(new_path) or ref.size(old_path) != snapshot.size(new_path):
|
2018-04-28 17:12:29 +10:00
|
|
|
modified.add(old_path)
|
2020-12-19 03:50:39 +11:00
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
self._dirs_created = [path for path in created if snapshot.isdir(path)]
|
|
|
|
self._dirs_deleted = [path for path in deleted if ref.isdir(path)]
|
|
|
|
self._dirs_modified = [path for path in modified if ref.isdir(path)]
|
|
|
|
self._dirs_moved = [(frm, to) for (frm, to) in moved if ref.isdir(frm)]
|
2020-12-19 03:50:39 +11:00
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
self._files_created = list(created - set(self._dirs_created))
|
|
|
|
self._files_deleted = list(deleted - set(self._dirs_deleted))
|
|
|
|
self._files_modified = list(modified - set(self._dirs_modified))
|
|
|
|
self._files_moved = list(moved - set(self._dirs_moved))
|
2020-12-19 03:50:39 +11:00
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return self.__repr__()
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
fmt = (
|
|
|
|
'<{0} files(created={1}, deleted={2}, modified={3}, moved={4}),'
|
|
|
|
' folders(created={5}, deleted={6}, modified={7}, moved={8})>'
|
|
|
|
)
|
|
|
|
return fmt.format(
|
|
|
|
type(self).__name__,
|
|
|
|
len(self._files_created),
|
|
|
|
len(self._files_deleted),
|
|
|
|
len(self._files_modified),
|
|
|
|
len(self._files_moved),
|
|
|
|
len(self._dirs_created),
|
|
|
|
len(self._dirs_deleted),
|
|
|
|
len(self._dirs_modified),
|
|
|
|
len(self._dirs_moved)
|
|
|
|
)
|
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
@property
|
|
|
|
def files_created(self):
|
|
|
|
"""List of files that were created."""
|
|
|
|
return self._files_created
|
|
|
|
|
|
|
|
@property
|
|
|
|
def files_deleted(self):
|
|
|
|
"""List of files that were deleted."""
|
|
|
|
return self._files_deleted
|
|
|
|
|
|
|
|
@property
|
|
|
|
def files_modified(self):
|
|
|
|
"""List of files that were modified."""
|
|
|
|
return self._files_modified
|
|
|
|
|
|
|
|
@property
|
|
|
|
def files_moved(self):
|
|
|
|
"""
|
|
|
|
List of files that were moved.
|
|
|
|
|
|
|
|
Each event is a two-tuple the first item of which is the path
|
|
|
|
that has been renamed to the second item in the tuple.
|
|
|
|
"""
|
|
|
|
return self._files_moved
|
|
|
|
|
|
|
|
@property
|
|
|
|
def dirs_modified(self):
|
|
|
|
"""
|
|
|
|
List of directories that were modified.
|
|
|
|
"""
|
|
|
|
return self._dirs_modified
|
|
|
|
|
|
|
|
@property
|
|
|
|
def dirs_moved(self):
|
|
|
|
"""
|
|
|
|
List of directories that were moved.
|
|
|
|
|
|
|
|
Each event is a two-tuple the first item of which is the path
|
|
|
|
that has been renamed to the second item in the tuple.
|
|
|
|
"""
|
|
|
|
return self._dirs_moved
|
|
|
|
|
|
|
|
@property
|
|
|
|
def dirs_deleted(self):
|
|
|
|
"""
|
|
|
|
List of directories that were deleted.
|
|
|
|
"""
|
|
|
|
return self._dirs_deleted
|
|
|
|
|
|
|
|
@property
|
|
|
|
def dirs_created(self):
|
|
|
|
"""
|
|
|
|
List of directories that were created.
|
|
|
|
"""
|
|
|
|
return self._dirs_created
|
|
|
|
|
2020-12-19 03:50:39 +11:00
|
|
|
|
|
|
|
class DirectorySnapshot:
|
2018-04-28 17:12:29 +10:00
|
|
|
"""
|
|
|
|
A snapshot of stat information of files in a directory.
|
|
|
|
|
|
|
|
:param path:
|
|
|
|
The directory path for which a snapshot should be taken.
|
|
|
|
:type path:
|
|
|
|
``str``
|
|
|
|
:param recursive:
|
|
|
|
``True`` if the entire directory tree should be included in the
|
|
|
|
snapshot; ``False`` otherwise.
|
|
|
|
:type recursive:
|
|
|
|
``bool``
|
|
|
|
:param stat:
|
|
|
|
Use custom stat function that returns a stat structure for path.
|
|
|
|
Currently only st_dev, st_ino, st_mode and st_mtime are needed.
|
2020-12-19 03:50:39 +11:00
|
|
|
|
|
|
|
A function taking a ``path`` as argument which will be called
|
|
|
|
for every entry in the directory tree.
|
2018-04-28 17:12:29 +10:00
|
|
|
:param listdir:
|
2020-12-19 03:50:39 +11:00
|
|
|
Use custom listdir function. For details see ``os.scandir``.
|
2018-04-28 17:12:29 +10:00
|
|
|
"""
|
2020-12-19 03:50:39 +11:00
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
def __init__(self, path, recursive=True,
|
2020-12-19 03:50:39 +11:00
|
|
|
stat=os.stat, listdir=os.scandir):
|
|
|
|
self.recursive = recursive
|
|
|
|
self.stat = stat
|
|
|
|
self.listdir = listdir
|
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
self._stat_info = {}
|
|
|
|
self._inode_to_path = {}
|
2020-12-19 03:50:39 +11:00
|
|
|
|
|
|
|
st = self.stat(path)
|
2018-04-28 17:12:29 +10:00
|
|
|
self._stat_info[path] = st
|
|
|
|
self._inode_to_path[(st.st_ino, st.st_dev)] = path
|
|
|
|
|
2020-12-19 03:50:39 +11:00
|
|
|
for p, st in self.walk(path):
|
2018-04-28 17:12:29 +10:00
|
|
|
i = (st.st_ino, st.st_dev)
|
|
|
|
self._inode_to_path[i] = p
|
|
|
|
self._stat_info[p] = st
|
2020-12-19 03:50:39 +11:00
|
|
|
|
|
|
|
def walk(self, root):
|
|
|
|
try:
|
|
|
|
paths = [os.path.join(root, entry if isinstance(entry, str) else entry.name)
|
|
|
|
for entry in self.listdir(root)]
|
|
|
|
except OSError as e:
|
|
|
|
# Directory may have been deleted between finding it in the directory
|
|
|
|
# list of its parent and trying to delete its contents. If this
|
|
|
|
# happens we treat it as empty. Likewise if the directory was replaced
|
|
|
|
# with a file of the same name (less likely, but possible).
|
|
|
|
if e.errno in (errno.ENOENT, errno.ENOTDIR, errno.EINVAL):
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
raise
|
|
|
|
|
|
|
|
entries = []
|
|
|
|
for p in paths:
|
|
|
|
try:
|
|
|
|
entry = (p, self.stat(p))
|
|
|
|
entries.append(entry)
|
|
|
|
yield entry
|
|
|
|
except OSError:
|
|
|
|
continue
|
|
|
|
|
|
|
|
if self.recursive:
|
|
|
|
for path, st in entries:
|
|
|
|
try:
|
|
|
|
if S_ISDIR(st.st_mode):
|
|
|
|
for entry in self.walk(path):
|
|
|
|
yield entry
|
|
|
|
except PermissionError:
|
|
|
|
pass
|
2018-04-28 17:12:29 +10:00
|
|
|
|
|
|
|
@property
|
|
|
|
def paths(self):
|
|
|
|
"""
|
|
|
|
Set of file/directory paths in the snapshot.
|
|
|
|
"""
|
|
|
|
return set(self._stat_info.keys())
|
2020-12-19 03:50:39 +11:00
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
def path(self, id):
|
|
|
|
"""
|
|
|
|
Returns path for id. None if id is unknown to this snapshot.
|
|
|
|
"""
|
|
|
|
return self._inode_to_path.get(id)
|
2020-12-19 03:50:39 +11:00
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
def inode(self, path):
|
|
|
|
""" Returns an id for path. """
|
|
|
|
st = self._stat_info[path]
|
|
|
|
return (st.st_ino, st.st_dev)
|
2020-12-19 03:50:39 +11:00
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
def isdir(self, path):
|
|
|
|
return S_ISDIR(self._stat_info[path].st_mode)
|
2020-12-19 03:50:39 +11:00
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
def mtime(self, path):
|
|
|
|
return self._stat_info[path].st_mtime
|
2020-12-19 03:50:39 +11:00
|
|
|
|
|
|
|
def size(self, path):
|
|
|
|
return self._stat_info[path].st_size
|
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
def stat_info(self, path):
|
|
|
|
"""
|
|
|
|
Returns a stat information object for the specified path from
|
|
|
|
the snapshot.
|
|
|
|
|
|
|
|
Attached information is subject to change. Do not use unless
|
|
|
|
you specify `stat` in constructor. Use :func:`inode`, :func:`mtime`,
|
|
|
|
:func:`isdir` instead.
|
|
|
|
|
|
|
|
:param path:
|
|
|
|
The path for which stat information should be obtained
|
|
|
|
from a snapshot.
|
|
|
|
"""
|
|
|
|
return self._stat_info[path]
|
|
|
|
|
|
|
|
def __sub__(self, previous_dirsnap):
|
|
|
|
"""Allow subtracting a DirectorySnapshot object instance from
|
|
|
|
another.
|
|
|
|
|
|
|
|
:returns:
|
|
|
|
A :class:`DirectorySnapshotDiff` object.
|
|
|
|
"""
|
|
|
|
return DirectorySnapshotDiff(previous_dirsnap, self)
|
2020-12-19 03:50:39 +11:00
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
def __str__(self):
|
|
|
|
return self.__repr__()
|
2020-12-19 03:50:39 +11:00
|
|
|
|
2018-04-28 17:12:29 +10:00
|
|
|
def __repr__(self):
|
|
|
|
return str(self._stat_info)
|
2020-12-19 03:50:39 +11:00
|
|
|
|
|
|
|
|
|
|
|
class EmptyDirectorySnapshot:
|
|
|
|
"""Class to implement an empty snapshot. This is used together with
|
|
|
|
DirectorySnapshot and DirectorySnapshotDiff in order to get all the files/folders
|
|
|
|
in the directory as created.
|
|
|
|
"""
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def path(_):
|
|
|
|
"""Mock up method to return the path of the received inode. As the snapshot
|
|
|
|
is intended to be empty, it always returns None.
|
|
|
|
|
|
|
|
:returns:
|
|
|
|
None.
|
|
|
|
"""
|
|
|
|
return None
|
|
|
|
|
|
|
|
@property
|
|
|
|
def paths(self):
|
|
|
|
"""Mock up method to return a set of file/directory paths in the snapshot. As
|
|
|
|
the snapshot is intended to be empty, it always returns an empty set.
|
|
|
|
|
|
|
|
:returns:
|
|
|
|
An empty set.
|
|
|
|
"""
|
|
|
|
return set()
|