Rsync probably works now. Woot.
This commit is contained in:
parent
41b8996b52
commit
f5986f068b
233
backup-diff.py
233
backup-diff.py
@ -66,14 +66,20 @@ class BackupDiff:
|
|||||||
|
|
||||||
def log(self, s, o=None):
|
def log(self, s, o=None):
|
||||||
|
|
||||||
now = self.current_time()
|
to_log = self.make_log_prefix() + str(s)
|
||||||
|
|
||||||
to_log = "[" + now + "][Mike's Backup Diff] " + str(s)
|
|
||||||
if o is not None:
|
if o is not None:
|
||||||
to_log += " " + str(o)
|
to_log += " " + str(o)
|
||||||
|
|
||||||
print(to_log)
|
print(to_log)
|
||||||
|
|
||||||
|
def make_log_prefix(self):
|
||||||
|
|
||||||
|
now = self.current_time()
|
||||||
|
|
||||||
|
prefix = "[" + now + "][Mike's Backup Diff] "
|
||||||
|
|
||||||
|
return prefix
|
||||||
|
|
||||||
def consume_arguments(self):
|
def consume_arguments(self):
|
||||||
|
|
||||||
for i in range(1, len(sys.argv)):
|
for i in range(1, len(sys.argv)):
|
||||||
@ -221,27 +227,32 @@ class BackupDiff:
|
|||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
|
|
||||||
do_test = False
|
stdout_lines, stderr_lines, return_code = self.execute_rsync()
|
||||||
|
|
||||||
stdout, stderr, return_code = self.execute_rsync()
|
# print("STDOUT LINES:")
|
||||||
|
# print(stdout_lines)
|
||||||
|
|
||||||
print("STDOUT:")
|
# print("STDERR LINES:")
|
||||||
print(stdout)
|
# print(stderr_lines)
|
||||||
|
|
||||||
#print("STDERR:")
|
|
||||||
#print(stderr)
|
|
||||||
|
|
||||||
#
|
#
|
||||||
print("Calculating difference entries ...")
|
self.log("Calculating difference entries ...")
|
||||||
|
|
||||||
# Parse normal lines (Flags and Path)
|
# Regex patterns
|
||||||
pattern_general = re.compile("""^(?P<line>(?P<flags>[^\s]{11})(?P<item>.*))$""", re.MULTILINE)
|
pattern_regular = re.compile("""^(?P<line>(?P<flags>[^\s]{11})(?P<item>.*))$""")
|
||||||
matches = pattern_general.finditer(stdout)
|
pattern_message = re.compile("""^(?P<line>\*(?P<message>[\w]+)(?P<item>.*))$""")
|
||||||
for match in matches:
|
|
||||||
|
|
||||||
line = match.group("line")
|
# Iterate over each stdout line
|
||||||
|
for line in stdout_lines:
|
||||||
|
|
||||||
flags = match.group("flags")
|
# Try to match regular expressions
|
||||||
|
match_regular = pattern_regular.match(line)
|
||||||
|
match_message = pattern_message.match(line)
|
||||||
|
|
||||||
|
# Regular line (Flags and Path)
|
||||||
|
if match_regular:
|
||||||
|
|
||||||
|
flags = match_regular.group("flags")
|
||||||
change_type_character = flags[0]
|
change_type_character = flags[0]
|
||||||
item_type = flags[1]
|
item_type = flags[1]
|
||||||
|
|
||||||
@ -267,7 +278,7 @@ class BackupDiff:
|
|||||||
or different_extended_attributes
|
or different_extended_attributes
|
||||||
)
|
)
|
||||||
|
|
||||||
item = match.group("item").strip()
|
item = match_regular.group("item").strip()
|
||||||
|
|
||||||
entry = DifferenceEntry(item)
|
entry = DifferenceEntry(item)
|
||||||
|
|
||||||
@ -277,8 +288,29 @@ class BackupDiff:
|
|||||||
elif item_type == "f":
|
elif item_type == "f":
|
||||||
entry.set_is_file()
|
entry.set_is_file()
|
||||||
|
|
||||||
|
# Different attributes
|
||||||
|
# (before 'missing' stuff, because attribute syncs show up as xfers)
|
||||||
|
if different_checksum:
|
||||||
|
entry.set_is_different_checksum()
|
||||||
|
elif different_size:
|
||||||
|
entry.set_is_different_sizes()
|
||||||
|
elif different_modification_time:
|
||||||
|
entry.set_is_different_modification_times()
|
||||||
|
elif different_permissions:
|
||||||
|
entry.set_is_different_permissions()
|
||||||
|
elif different_owner:
|
||||||
|
entry.set_is_different_owner()
|
||||||
|
elif different_group:
|
||||||
|
entry.set_is_different_group()
|
||||||
|
elif different_acl:
|
||||||
|
entry.set_is_different_acl()
|
||||||
|
elif different_extended_attributes:
|
||||||
|
entry.set_is_different_extended_attributes()
|
||||||
|
elif different_any_attribute:
|
||||||
|
entry.set_is_different_attributes()
|
||||||
|
|
||||||
# Missing from backup
|
# Missing from backup
|
||||||
if change_type_character == "<":
|
elif change_type_character == "<":
|
||||||
entry.set_is_missing_from_backup()
|
entry.set_is_missing_from_backup()
|
||||||
# Missing from ... backup? (confusing symbolstuffs)
|
# Missing from ... backup? (confusing symbolstuffs)
|
||||||
elif change_type_character == ">":
|
elif change_type_character == ">":
|
||||||
@ -288,10 +320,6 @@ class BackupDiff:
|
|||||||
elif change_type_character == "c":
|
elif change_type_character == "c":
|
||||||
entry.set_is_missing_from_backup()
|
entry.set_is_missing_from_backup()
|
||||||
|
|
||||||
# Different attributes
|
|
||||||
elif different_any_attribute:
|
|
||||||
entry.set_is_different_attributes()
|
|
||||||
|
|
||||||
# Item is a hard link
|
# Item is a hard link
|
||||||
elif change_type_character == "h":
|
elif change_type_character == "h":
|
||||||
entry.set_is_unknown("Rsync says this is a hard link")
|
entry.set_is_unknown("Rsync says this is a hard link")
|
||||||
@ -303,13 +331,11 @@ class BackupDiff:
|
|||||||
#
|
#
|
||||||
entries.append(entry)
|
entries.append(entry)
|
||||||
|
|
||||||
# Parse message lines
|
# Message line
|
||||||
pattern_messages = re.compile("""^(?P<line>\*(?P<message>[\w]+)(?P<item>.*))$""", re.MULTILINE)
|
elif match_message:
|
||||||
matches = pattern_messages.finditer(stdout)
|
|
||||||
for match in matches:
|
|
||||||
|
|
||||||
message = match.group("message").strip()
|
message = match_message.group("message").strip()
|
||||||
item = match.group("item").strip()
|
item = match_message.group("item").strip()
|
||||||
|
|
||||||
entry = DifferenceEntry(item)
|
entry = DifferenceEntry(item)
|
||||||
|
|
||||||
@ -319,12 +345,18 @@ class BackupDiff:
|
|||||||
entry.set_is_file(not item[-1] == "/")
|
entry.set_is_file(not item[-1] == "/")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
print("IS UNKNOWN MESSAGE:", message)
|
self.log("IS UNKNOWN MESSAGE:" + message)
|
||||||
entry.set_is_unknown("Unhandled message: " + message)
|
entry.set_is_unknown("Unhandled message: " + message)
|
||||||
|
|
||||||
entries.append(entry)
|
entries.append(entry)
|
||||||
|
|
||||||
print("Finished calculating difference entries")
|
# Unsupported type of line
|
||||||
|
else:
|
||||||
|
|
||||||
|
#
|
||||||
|
self.log("IS UNSUPPORTED LINE:" + line)
|
||||||
|
|
||||||
|
self.log("Finished calculating difference entries")
|
||||||
|
|
||||||
self.__difference_entries = entries
|
self.__difference_entries = entries
|
||||||
|
|
||||||
@ -358,25 +390,39 @@ class BackupDiff:
|
|||||||
args.append(self.make_rsync_path(self.__backup_ssh_host, self.__backup_ssh_user, self.__backup_path))
|
args.append(self.make_rsync_path(self.__backup_ssh_host, self.__backup_ssh_user, self.__backup_path))
|
||||||
|
|
||||||
#
|
#
|
||||||
print("Executing rsync with the following arguments:")
|
self.log("Executing rsync")
|
||||||
print(args)
|
# self.log("Executing rsync with the following arguments:")
|
||||||
|
# self.log(str(args))
|
||||||
|
# self.log(" ".join(args))
|
||||||
|
|
||||||
# Spawn SSH in shell
|
# Start the subprocess
|
||||||
process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
stdout, stderr = process.communicate()
|
|
||||||
|
|
||||||
#
|
# Live output of stdout
|
||||||
print("Rsync has finished executing")
|
print()
|
||||||
|
stdout_lines = []
|
||||||
|
for line in iter(process.stdout.readline, b''):
|
||||||
|
line = line.decode().strip()
|
||||||
|
stdout_lines.append(line)
|
||||||
|
# print(line)
|
||||||
|
self.print_progress_message("Captured " + str(len(stdout_lines)) + " lines from Rsync")
|
||||||
|
|
||||||
#
|
# Grab all the stderr lines
|
||||||
stdout = stdout.decode()
|
stderr_lines = []
|
||||||
stderr = stderr.decode()
|
for line in iter(process.stderr.readline, b''):
|
||||||
|
line = line.decode().strip()
|
||||||
|
stderr_lines.append(line)
|
||||||
|
|
||||||
|
# Make sure it's completely finished
|
||||||
|
process.communicate()
|
||||||
|
|
||||||
|
self.log("Rsync has finished executing")
|
||||||
|
|
||||||
# Accept Success (0), and Partial Transfer Codes (23 and 24)
|
# Accept Success (0), and Partial Transfer Codes (23 and 24)
|
||||||
if process.returncode not in [0, 23, 24]:
|
if process.returncode not in [0, 23, 24]:
|
||||||
raise Exception("Failed to execute Rsync; Exited with code " + str(process.returncode))
|
raise Exception("Failed to execute Rsync; Exited with code " + str(process.returncode))
|
||||||
|
|
||||||
return stdout, stderr, process.returncode
|
return stdout_lines, stderr_lines, process.returncode
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def make_rsync_path(ssh_host, ssh_user, path):
|
def make_rsync_path(ssh_host, ssh_user, path):
|
||||||
@ -457,7 +503,7 @@ class BackupDiff:
|
|||||||
if entries is None:
|
if entries is None:
|
||||||
entries = self.__difference_entries
|
entries = self.__difference_entries
|
||||||
|
|
||||||
self.log("Cleaning difference entries")
|
self.log("Cleaning " + str(len(entries)) + " difference entries")
|
||||||
|
|
||||||
# Build a temp list of all known difference entries
|
# Build a temp list of all known difference entries
|
||||||
temp_entries = []
|
temp_entries = []
|
||||||
@ -467,8 +513,12 @@ class BackupDiff:
|
|||||||
|
|
||||||
# Loop through entries, attempting to clean for one at a time,
|
# Loop through entries, attempting to clean for one at a time,
|
||||||
# until no cleaning has been done
|
# until no cleaning has been done
|
||||||
|
print()
|
||||||
|
clean_iterations = 0
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
|
clean_iterations += 1
|
||||||
|
|
||||||
most_shallow_entry = None
|
most_shallow_entry = None
|
||||||
|
|
||||||
# Locate the most shallow entry
|
# Locate the most shallow entry
|
||||||
@ -489,12 +539,26 @@ class BackupDiff:
|
|||||||
|
|
||||||
# Finish if we haven't found anything
|
# Finish if we haven't found anything
|
||||||
if not most_shallow_entry:
|
if not most_shallow_entry:
|
||||||
|
self.print_progress_message(
|
||||||
|
"Cleaning difference entries; "
|
||||||
|
+ str(clean_iterations) + " iterations; "
|
||||||
|
+ str(len(temp_entries)) + " total"
|
||||||
|
)
|
||||||
break
|
break
|
||||||
|
|
||||||
# Remove this entry from the temp list, and clean with it as root
|
# Remove this entry from the temp list, and clean with it as root
|
||||||
temp_entries.remove(most_shallow_entry)
|
temp_entries.remove(most_shallow_entry)
|
||||||
|
self.clean_child_difference_entries(temp_entries, most_shallow_entry)
|
||||||
self.clean_child_difference_entries(entries, most_shallow_entry)
|
self.clean_child_difference_entries(entries, most_shallow_entry)
|
||||||
|
|
||||||
|
self.print_progress_message(
|
||||||
|
"Cleaning difference entries; "
|
||||||
|
+ str(clean_iterations) + " iterations; "
|
||||||
|
+ str(len(temp_entries)) + " total"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.__difference_entries = entries
|
||||||
|
|
||||||
def clean_child_difference_entries(self, entries: list, root_entry):
|
def clean_child_difference_entries(self, entries: list, root_entry):
|
||||||
|
|
||||||
if entries is None:
|
if entries is None:
|
||||||
@ -504,12 +568,21 @@ class BackupDiff:
|
|||||||
# print(root_entry)
|
# print(root_entry)
|
||||||
|
|
||||||
root_entry_item = root_entry.get_item()
|
root_entry_item = root_entry.get_item()
|
||||||
|
# print("Cleaning child entries for root entry")
|
||||||
|
# print(root_entry)
|
||||||
|
# print()
|
||||||
|
# print()
|
||||||
|
|
||||||
entries_to_delete = []
|
entries_to_delete = []
|
||||||
|
|
||||||
# Check every other entry as a possible child of the root
|
# Check every other entry as a possible child of the root
|
||||||
|
child_iteration = 0
|
||||||
for child_entry in entries:
|
for child_entry in entries:
|
||||||
|
|
||||||
|
child_iteration += 1
|
||||||
|
|
||||||
|
self.print_progress_message("Looking for child entry to clean " + str(child_iteration))
|
||||||
|
|
||||||
if child_entry != root_entry:
|
if child_entry != root_entry:
|
||||||
|
|
||||||
child_entry_item = child_entry.get_item()
|
child_entry_item = child_entry.get_item()
|
||||||
@ -525,9 +598,20 @@ class BackupDiff:
|
|||||||
# print("Deleting unneeded child entry:")
|
# print("Deleting unneeded child entry:")
|
||||||
# print("> Root:", root_entry_item)
|
# print("> Root:", root_entry_item)
|
||||||
# print("> Child:", child_entry_item)
|
# print("> Child:", child_entry_item)
|
||||||
|
# print()
|
||||||
|
# print()
|
||||||
|
|
||||||
# Handle entries to delete
|
# Handle entries to delete
|
||||||
|
delete_iteration = 0
|
||||||
for entry in entries_to_delete:
|
for entry in entries_to_delete:
|
||||||
|
|
||||||
|
delete_iteration += 1
|
||||||
|
|
||||||
|
self.print_progress_message(
|
||||||
|
"Deleting child entry "
|
||||||
|
+ str(delete_iteration) + " / " + str(len(entries_to_delete))
|
||||||
|
)
|
||||||
|
|
||||||
entries.remove(entry)
|
entries.remove(entry)
|
||||||
|
|
||||||
return len(entries_to_delete) > 0
|
return len(entries_to_delete) > 0
|
||||||
@ -763,12 +847,14 @@ class BackupDiff:
|
|||||||
|
|
||||||
return report
|
return report
|
||||||
|
|
||||||
@staticmethod
|
def print_progress_message(self, s):
|
||||||
def print_progress_message(s):
|
|
||||||
|
|
||||||
sys.stdout.write("\033[F") # back to previous line
|
sys.stdout.write("\033[F") # back to previous line
|
||||||
sys.stdout.write("\033[K") # clear line
|
sys.stdout.write("\033[K") # clear line
|
||||||
print(s)
|
|
||||||
|
to_print = self.make_log_prefix() + s
|
||||||
|
|
||||||
|
print(to_print)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def print_report_heading(s, hooded: bool=False):
|
def print_report_heading(s, hooded: bool=False):
|
||||||
@ -797,6 +883,7 @@ class BackupDiff:
|
|||||||
]
|
]
|
||||||
|
|
||||||
#
|
#
|
||||||
|
print()
|
||||||
self.print_report_heading("Mike's Backup Diff Report", True)
|
self.print_report_heading("Mike's Backup Diff Report", True)
|
||||||
print("Source:", self.__source_path)
|
print("Source:", self.__source_path)
|
||||||
print("Backup:", self.__backup_path)
|
print("Backup:", self.__backup_path)
|
||||||
@ -806,6 +893,7 @@ class BackupDiff:
|
|||||||
for section_key in section_order:
|
for section_key in section_order:
|
||||||
if len(report[section_key]["entries"]):
|
if len(report[section_key]["entries"]):
|
||||||
found_anything = True
|
found_anything = True
|
||||||
|
print("")
|
||||||
self.print_report_heading(report[section_key]["label"])
|
self.print_report_heading(report[section_key]["label"])
|
||||||
for entry in report[section_key]["entries"]:
|
for entry in report[section_key]["entries"]:
|
||||||
|
|
||||||
@ -816,12 +904,22 @@ class BackupDiff:
|
|||||||
else:
|
else:
|
||||||
prefix = ""
|
prefix = ""
|
||||||
|
|
||||||
print(prefix + entry.get_item())
|
message = entry.get_message()
|
||||||
|
if message:
|
||||||
|
suffix = " (" + message + ")"
|
||||||
|
else:
|
||||||
|
suffix = ""
|
||||||
|
|
||||||
print("")
|
print(prefix + entry.get_item() + suffix)
|
||||||
|
|
||||||
|
# Lil debebuggin'
|
||||||
|
for section_key in report:
|
||||||
|
if section_key not in section_order:
|
||||||
|
raise Exception("Report key " + section_key + " wasn't found in the section_order ... whoopsies")
|
||||||
|
|
||||||
if not found_anything:
|
if not found_anything:
|
||||||
print("Everything seems to match")
|
print()
|
||||||
|
print("Everything seems to match !")
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
@ -845,6 +943,8 @@ class DifferenceEntry:
|
|||||||
self.CONST_TYPE_DIFFERENT_ATTRIBUTES = "different_attributes"
|
self.CONST_TYPE_DIFFERENT_ATTRIBUTES = "different_attributes"
|
||||||
self.CONST_TYPE_UNKNOWN = "unknown"
|
self.CONST_TYPE_UNKNOWN = "unknown"
|
||||||
|
|
||||||
|
self.set_is_unknown("DEFAULT MESSAGE")
|
||||||
|
|
||||||
if item:
|
if item:
|
||||||
self.set_item(item)
|
self.set_item(item)
|
||||||
|
|
||||||
@ -867,6 +967,12 @@ class DifferenceEntry:
|
|||||||
|
|
||||||
return self.__item
|
return self.__item
|
||||||
|
|
||||||
|
def set_message(self, m):
|
||||||
|
self.__message = m
|
||||||
|
|
||||||
|
def get_message(self):
|
||||||
|
return self.__message
|
||||||
|
|
||||||
def set_is_dir(self, is_dir: bool=True):
|
def set_is_dir(self, is_dir: bool=True):
|
||||||
|
|
||||||
if is_dir:
|
if is_dir:
|
||||||
@ -899,14 +1005,14 @@ class DifferenceEntry:
|
|||||||
def set_is_missing_from_source(self):
|
def set_is_missing_from_source(self):
|
||||||
|
|
||||||
self.__type = self.CONST_TYPE_MISSING_IN_SOURCE
|
self.__type = self.CONST_TYPE_MISSING_IN_SOURCE
|
||||||
self.__message = "Item is in backup but not in source"
|
self.__message = None
|
||||||
|
|
||||||
def get_is_missing_from_source(self):
|
def get_is_missing_from_source(self):
|
||||||
return self.__type == self.CONST_TYPE_MISSING_IN_SOURCE
|
return self.__type == self.CONST_TYPE_MISSING_IN_SOURCE
|
||||||
|
|
||||||
def set_is_missing_from_backup(self):
|
def set_is_missing_from_backup(self):
|
||||||
self.__type = self.CONST_TYPE_MISSING_IN_BACKUP
|
self.__type = self.CONST_TYPE_MISSING_IN_BACKUP
|
||||||
self.__message = "Item is in source but not in backup"
|
self.__message = None
|
||||||
|
|
||||||
def get_is_missing_from_backup(self):
|
def get_is_missing_from_backup(self):
|
||||||
return self.__type == self.CONST_TYPE_MISSING_IN_BACKUP
|
return self.__type == self.CONST_TYPE_MISSING_IN_BACKUP
|
||||||
@ -938,11 +1044,15 @@ class DifferenceEntry:
|
|||||||
def get_backup_is_newer(self):
|
def get_backup_is_newer(self):
|
||||||
return self.__type == self.CONST_TYPE_BACKUP_IS_NEWER
|
return self.__type == self.CONST_TYPE_BACKUP_IS_NEWER
|
||||||
|
|
||||||
def set_is_different_sizes(self, source_item_size, backup_item_size):
|
def set_is_different_sizes(self, source_item_size=None, backup_item_size=None):
|
||||||
self.__type = self.CONST_TYPE_DIFFERENT_SIZES
|
self.__type = self.CONST_TYPE_DIFFERENT_SIZES
|
||||||
|
|
||||||
|
if source_item_size and backup_item_size:
|
||||||
self.__message = \
|
self.__message = \
|
||||||
"Source has a file size of " + str(source_item_size) \
|
"Source has a file size of " + str(source_item_size) \
|
||||||
+ ", but backup has a file size of " + str(backup_item_size)
|
+ ", but backup has a file size of " + str(backup_item_size)
|
||||||
|
else:
|
||||||
|
self.__message = None
|
||||||
|
|
||||||
def get_is_different_sizes(self):
|
def get_is_different_sizes(self):
|
||||||
return self.__type == self.CONST_TYPE_DIFFERENT_SIZES
|
return self.__type == self.CONST_TYPE_DIFFERENT_SIZES
|
||||||
@ -954,6 +1064,27 @@ class DifferenceEntry:
|
|||||||
def get_is_different_attributes(self):
|
def get_is_different_attributes(self):
|
||||||
return self.__type == self.CONST_TYPE_DIFFERENT_ATTRIBUTES
|
return self.__type == self.CONST_TYPE_DIFFERENT_ATTRIBUTES
|
||||||
|
|
||||||
|
def set_is_different_checksum(self):
|
||||||
|
self.set_is_different_attributes("Different checksums")
|
||||||
|
|
||||||
|
def set_is_different_modification_times(self):
|
||||||
|
self.set_is_different_attributes("Different modification times")
|
||||||
|
|
||||||
|
def set_is_different_permissions(self):
|
||||||
|
self.set_is_different_attributes("Different permissions")
|
||||||
|
|
||||||
|
def set_is_different_owner(self):
|
||||||
|
self.set_is_different_attributes("Different owners")
|
||||||
|
|
||||||
|
def set_is_different_group(self):
|
||||||
|
self.set_is_different_attributes("Different groups")
|
||||||
|
|
||||||
|
def set_is_different_acl(self):
|
||||||
|
self.set_is_different_attributes("Different ACLs")
|
||||||
|
|
||||||
|
def set_is_different_extended_attributes(self):
|
||||||
|
self.set_is_different_attributes("Different extended attributes")
|
||||||
|
|
||||||
def set_is_unknown(self, message):
|
def set_is_unknown(self, message):
|
||||||
self.__type = self.CONST_TYPE_UNKNOWN
|
self.__type = self.CONST_TYPE_UNKNOWN
|
||||||
self.__message = message
|
self.__message = message
|
||||||
|
Loading…
Reference in New Issue
Block a user