Kinda trying to add support for rsync
This commit is contained in:
		
							
								
								
									
										277
									
								
								backup-diff.py
									
									
									
									
									
								
							
							
						
						
									
										277
									
								
								backup-diff.py
									
									
									
									
									
								
							| @@ -18,6 +18,8 @@ import datetime | |||||||
| import functools | import functools | ||||||
| import humanfriendly | import humanfriendly | ||||||
| import os | import os | ||||||
|  | import re | ||||||
|  | import subprocess | ||||||
| import sys | import sys | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -27,7 +29,14 @@ class BackupDiff: | |||||||
| 	def __init__(self): | 	def __init__(self): | ||||||
| 		 | 		 | ||||||
| 		self.__source_path = None | 		self.__source_path = None | ||||||
|  | 		self.__source_ssh_host = None | ||||||
|  | 		self.__source_ssh_user = None | ||||||
|  | 		 | ||||||
| 		self.__backup_path = None | 		self.__backup_path = None | ||||||
|  | 		self.__backup_ssh_host = None | ||||||
|  | 		self.__backup_ssh_user = None | ||||||
|  | 		 | ||||||
|  | 		self.__ssh_key = None | ||||||
| 		 | 		 | ||||||
| 		self.__source_path_items = None | 		self.__source_path_items = None | ||||||
| 		self.__backup_path_items = None | 		self.__backup_path_items = None | ||||||
| @@ -35,10 +44,12 @@ class BackupDiff: | |||||||
| 		self.__difference_entries = None | 		self.__difference_entries = None | ||||||
| 		self.__do_clean_difference_entries = True | 		self.__do_clean_difference_entries = True | ||||||
| 		 | 		 | ||||||
|  | 		self.__force_rsync = False | ||||||
|  | 		 | ||||||
| 	def run(self): | 	def run(self): | ||||||
| 		 | 		 | ||||||
| 		self.consume_arguments() | 		self.consume_arguments() | ||||||
| 		self.calculate_comparison_items() | 		 | ||||||
| 		self.calculate_difference_entries() | 		self.calculate_difference_entries() | ||||||
| 		 | 		 | ||||||
| 		if self.__do_clean_difference_entries: | 		if self.__do_clean_difference_entries: | ||||||
| @@ -74,11 +85,40 @@ class BackupDiff: | |||||||
| 				self.__source_path = os.path.abspath(one_path) | 				self.__source_path = os.path.abspath(one_path) | ||||||
| 				self.log("Found source path argument:", self.__source_path) | 				self.log("Found source path argument:", self.__source_path) | ||||||
| 			 | 			 | ||||||
|  | 			elif arg == "--source-remote-host": | ||||||
|  | 				i, host = self.consume_argument_companion(i) | ||||||
|  | 				self.__source_ssh_host = host | ||||||
|  | 				self.log("Will use source remote host: " + str(self.__source_ssh_host)) | ||||||
|  | 			 | ||||||
|  | 			elif arg == "--source-remote-user": | ||||||
|  | 				i, user = self.consume_argument_companion(i) | ||||||
|  | 				self.__source_ssh_user = user | ||||||
|  | 				self.log("Will use source remote user: " + str(self.__source_ssh_user)) | ||||||
|  | 			 | ||||||
| 			elif arg == "--backup-path": | 			elif arg == "--backup-path": | ||||||
| 				i, one_path = self.consume_argument_companion(i) | 				i, one_path = self.consume_argument_companion(i) | ||||||
| 				self.__backup_path = os.path.abspath(one_path) | 				self.__backup_path = os.path.abspath(one_path) | ||||||
| 				self.log("Found backup destination path argument:", self.__backup_path) | 				self.log("Found backup destination path argument:", self.__backup_path) | ||||||
| 			 | 			 | ||||||
|  | 			elif arg == "--backup-remote-host": | ||||||
|  | 				i, host = self.consume_argument_companion(i) | ||||||
|  | 				self.__backup_ssh_host = host | ||||||
|  | 				self.log("Will use backup remote host: " + str(self.__backup_ssh_host)) | ||||||
|  | 			 | ||||||
|  | 			elif arg == "--backup-remote-user": | ||||||
|  | 				i, user = self.consume_argument_companion(i) | ||||||
|  | 				self.__backup_ssh_user = user | ||||||
|  | 				self.log("Will use backup remote user: " + str(self.__backup_ssh_user)) | ||||||
|  | 			 | ||||||
|  | 			elif arg == "--ssh-key": | ||||||
|  | 				i, key = self.consume_argument_companion(i) | ||||||
|  | 				self.__ssh_key = key | ||||||
|  | 				self.log("Will use ssh key: " + str(self.__ssh_key)) | ||||||
|  | 			 | ||||||
|  | 			elif arg == "--use-rsync" or arg == "--rsync": | ||||||
|  | 				self.__force_rsync = True | ||||||
|  | 				self.log("Forcing comparison with rsync tool") | ||||||
|  | 			 | ||||||
| 			elif arg == "--no-clean": | 			elif arg == "--no-clean": | ||||||
| 				self.__do_clean_difference_entries = False | 				self.__do_clean_difference_entries = False | ||||||
| 				self.log("Won't clean Difference entries") | 				self.log("Won't clean Difference entries") | ||||||
| @@ -113,6 +153,22 @@ class BackupDiff: | |||||||
| 		 | 		 | ||||||
| 		self.__source_path_items = source_path_items | 		self.__source_path_items = source_path_items | ||||||
| 	 | 	 | ||||||
|  | 	def should_use_rsync(self): | ||||||
|  | 		 | ||||||
|  | 		if self.__force_rsync: | ||||||
|  | 			return True | ||||||
|  | 		 | ||||||
|  | 		if self.__source_ssh_host or self.__source_ssh_user: | ||||||
|  | 			return True | ||||||
|  | 		 | ||||||
|  | 		if self.__backup_ssh_host or self.__backup_ssh_user: | ||||||
|  | 			return True | ||||||
|  | 		 | ||||||
|  | 		if self.__ssh_key: | ||||||
|  | 			return True | ||||||
|  | 		 | ||||||
|  | 		return False | ||||||
|  | 	 | ||||||
| 	def consume_backup_path(self): | 	def consume_backup_path(self): | ||||||
| 		 | 		 | ||||||
| 		if self.__backup_path is None: | 		if self.__backup_path is None: | ||||||
| @@ -156,6 +212,205 @@ class BackupDiff: | |||||||
| 	 | 	 | ||||||
| 	def calculate_difference_entries(self): | 	def calculate_difference_entries(self): | ||||||
| 	 | 	 | ||||||
|  | 		if self.should_use_rsync(): | ||||||
|  | 			self.calculate_difference_entries_with_rsync() | ||||||
|  | 		else: | ||||||
|  | 			self.calculate_difference_entries_directly() | ||||||
|  | 	 | ||||||
|  | 	def calculate_difference_entries_with_rsync(self): | ||||||
|  | 		 | ||||||
|  | 		entries = [] | ||||||
|  | 		 | ||||||
|  | 		do_test = False | ||||||
|  | 		 | ||||||
|  | 		stdout, stderr, return_code = self.execute_rsync() | ||||||
|  | 		 | ||||||
|  | 		print("STDOUT:") | ||||||
|  | 		print(stdout) | ||||||
|  | 		 | ||||||
|  | 		#print("STDERR:") | ||||||
|  | 		#print(stderr) | ||||||
|  | 		 | ||||||
|  | 		# | ||||||
|  | 		print("Calculating difference entries ...") | ||||||
|  | 		 | ||||||
|  | 		# Parse normal lines (Flags and Path) | ||||||
|  | 		pattern_general = re.compile("""^(?P<line>(?P<flags>[^\s]{11})(?P<item>.*))$""", re.MULTILINE) | ||||||
|  | 		matches = pattern_general.finditer(stdout) | ||||||
|  | 		for match in matches: | ||||||
|  | 			 | ||||||
|  | 			line = match.group("line") | ||||||
|  | 			 | ||||||
|  | 			flags = match.group("flags") | ||||||
|  | 			change_type_character = flags[0] | ||||||
|  | 			item_type = flags[1] | ||||||
|  | 			 | ||||||
|  | 			# Determine which attributes are different | ||||||
|  | 			attributes_part = flags[2:] | ||||||
|  | 			different_checksum = "c" in attributes_part | ||||||
|  | 			different_size = "s" in attributes_part | ||||||
|  | 			different_modification_time = "t" in attributes_part | ||||||
|  | 			different_permissions = "p" in attributes_part | ||||||
|  | 			different_owner = "o" in attributes_part | ||||||
|  | 			different_group = "g" in attributes_part | ||||||
|  | 			different_acl = "a" in attributes_part | ||||||
|  | 			different_extended_attributes = "x" in attributes_part | ||||||
|  | 			# | ||||||
|  | 			different_any_attribute = ( | ||||||
|  | 				different_checksum | ||||||
|  | 				or different_size | ||||||
|  | 				or different_modification_time | ||||||
|  | 				or different_permissions | ||||||
|  | 				or different_owner | ||||||
|  | 				or different_group | ||||||
|  | 				or different_acl | ||||||
|  | 				or different_extended_attributes | ||||||
|  | 			) | ||||||
|  | 			 | ||||||
|  | 			item = match.group("item").strip() | ||||||
|  | 			 | ||||||
|  | 			entry = DifferenceEntry(item) | ||||||
|  | 			 | ||||||
|  | 			# File folder, whatever | ||||||
|  | 			if item_type == "d": | ||||||
|  | 				entry.set_is_dir() | ||||||
|  | 			elif item_type == "f": | ||||||
|  | 				entry.set_is_file() | ||||||
|  | 			 | ||||||
|  | 			# Missing from backup | ||||||
|  | 			if change_type_character == "<": | ||||||
|  | 				entry.set_is_missing_from_backup() | ||||||
|  | 			 | ||||||
|  | 			# Missing from source | ||||||
|  | 			elif change_type_character == ">": | ||||||
|  | 				entry.set_is_missing_from_source() | ||||||
|  | 			 | ||||||
|  | 			# Local change is occurring | ||||||
|  | 			elif change_type_character == "c": | ||||||
|  | 				entry.set_is_unknown("Rsync says a local change is occurring") | ||||||
|  | 			 | ||||||
|  | 			# Item is a hard link | ||||||
|  | 			elif change_type_character == "h": | ||||||
|  | 				entry.set_is_unknown("Rsync says this is a hard link") | ||||||
|  | 			 | ||||||
|  | 			# "no change / transfer (could still be changing attributes)" | ||||||
|  | 			elif change_type_character == ".": | ||||||
|  | 				entry.set_is_unknown("Rsync says no change, but could be changing attributes") | ||||||
|  | 			 | ||||||
|  | 			# | ||||||
|  | 			entries.append(entry) | ||||||
|  | 		 | ||||||
|  | 		# Parse message lines | ||||||
|  | 		pattern_messages = re.compile("""^(?P<line>\*(?P<message>[\w]+)(?P<item>.*))$""", re.MULTILINE) | ||||||
|  | 		matches = pattern_messages.finditer(stdout) | ||||||
|  | 		for match in matches: | ||||||
|  | 			 | ||||||
|  | 			message = match.group("message").strip() | ||||||
|  | 			item = match.group("item").strip() | ||||||
|  | 			 | ||||||
|  | 			entry = DifferenceEntry(item) | ||||||
|  | 			 | ||||||
|  | 			if message == "deleting": | ||||||
|  | 				entry.set_is_missing_from_source() | ||||||
|  | 				entry.set_is_dir(item[-1] == "/") | ||||||
|  | 				entry.set_is_file(not item[-1] == "/") | ||||||
|  | 			 | ||||||
|  | 			else: | ||||||
|  | 				print("IS UNKNOWN MESSAGE:", message) | ||||||
|  | 				entry.set_is_unknown("Unhandled message: " + message) | ||||||
|  | 			 | ||||||
|  | 			entries.append(entry) | ||||||
|  | 		 | ||||||
|  | 		print("Finished calculating difference entries") | ||||||
|  | 		 | ||||||
|  | 		self.__difference_entries = entries | ||||||
|  | 	 | ||||||
|  | 	def execute_rsync(self): | ||||||
|  | 		 | ||||||
|  | 		# | ||||||
|  | 		args = list() | ||||||
|  | 		 | ||||||
|  | 		# Rsync | ||||||
|  | 		args.append("rsync") | ||||||
|  | 		 | ||||||
|  | 		# Dry run!! | ||||||
|  | 		args.append("--dry-run") | ||||||
|  | 		 | ||||||
|  | 		# Produces the main output we'll parse | ||||||
|  | 		args.append("--itemize-changes") | ||||||
|  | 		 | ||||||
|  | 		# Rsh command | ||||||
|  | 		rsh_command = self.make_rsync_rsh_argument(self.__ssh_key) | ||||||
|  | 		if rsh_command: | ||||||
|  | 			args.append(rsh_command) | ||||||
|  | 		 | ||||||
|  | 		# Main sync flags | ||||||
|  | 		args.append("--archive") | ||||||
|  | 		args.append("--delete") | ||||||
|  | 		 | ||||||
|  | 		# Source path | ||||||
|  | 		args.append(self.make_rsync_path(self.__source_ssh_host, self.__source_ssh_user, self.__source_path)) | ||||||
|  | 		 | ||||||
|  | 		# Backup path | ||||||
|  | 		args.append(self.make_rsync_path(self.__backup_ssh_host, self.__backup_ssh_user, self.__backup_path)) | ||||||
|  | 		 | ||||||
|  | 		# | ||||||
|  | 		print("Executing rsync with the following arguments:") | ||||||
|  | 		print(args) | ||||||
|  | 		 | ||||||
|  | 		# Spawn SSH in shell | ||||||
|  | 		process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||||
|  | 		stdout, stderr = process.communicate() | ||||||
|  | 		 | ||||||
|  | 		# | ||||||
|  | 		print("Rsync has finished executing") | ||||||
|  | 		 | ||||||
|  | 		# | ||||||
|  | 		stdout = stdout.decode() | ||||||
|  | 		stderr = stderr.decode() | ||||||
|  | 		 | ||||||
|  | 		# Accept Success (0), and Partial Transfer Codes (23 and 24) | ||||||
|  | 		if process.returncode not in [0, 23, 24]: | ||||||
|  | 			raise Exception("Failed to execute Rsync; Exited with code " + str(process.returncode)) | ||||||
|  | 		 | ||||||
|  | 		return stdout, stderr, process.returncode | ||||||
|  | 	 | ||||||
|  | 	@staticmethod | ||||||
|  | 	def make_rsync_path(ssh_host, ssh_user, path): | ||||||
|  | 		 | ||||||
|  | 		rsync_path = "" | ||||||
|  | 		 | ||||||
|  | 		if (not ssh_host) and ssh_user: | ||||||
|  | 			raise Exception("ssh_user provided (" + str(ssh_user) + ") without ssh_host") | ||||||
|  | 		 | ||||||
|  | 		if ssh_user: | ||||||
|  | 			rsync_path += ssh_user + "@" | ||||||
|  | 		 | ||||||
|  | 		if ssh_host: | ||||||
|  | 			rsync_path += ssh_host + ":" + path | ||||||
|  | 		else: | ||||||
|  | 			rsync_path += path | ||||||
|  | 		 | ||||||
|  | 		# Absolute path doesn't have trailing slash, which works well for rsync here | ||||||
|  | 		rsync_path += "/" | ||||||
|  | 		 | ||||||
|  | 		return rsync_path | ||||||
|  | 	 | ||||||
|  | 	@staticmethod | ||||||
|  | 	def make_rsync_rsh_argument(ssh_key): | ||||||
|  | 	 | ||||||
|  | 		if not ssh_key: | ||||||
|  | 			return None | ||||||
|  | 		 | ||||||
|  | 		if not os.path.isfile(ssh_key): | ||||||
|  | 			raise Exception("SSH key does not exist: " + str(ssh_key)) | ||||||
|  | 		 | ||||||
|  | 		return "--rsh=ssh -i " + ssh_key | ||||||
|  | 	 | ||||||
|  | 	def calculate_difference_entries_directly(self): | ||||||
|  | 		 | ||||||
|  | 		self.calculate_comparison_items() | ||||||
|  | 		 | ||||||
| 		entries = [] | 		entries = [] | ||||||
| 		 | 		 | ||||||
| 		# Compare everything in the source path | 		# Compare everything in the source path | ||||||
| @@ -443,6 +698,10 @@ class BackupDiff: | |||||||
| 			"size_difference": { | 			"size_difference": { | ||||||
| 				"label": "Items with different file sizes", | 				"label": "Items with different file sizes", | ||||||
| 				"entries": [] | 				"entries": [] | ||||||
|  | 			}, | ||||||
|  | 			"unknown": { | ||||||
|  | 				"label": "Differences of an unknown type", | ||||||
|  | 				"entries": [] | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 		 | 		 | ||||||
| @@ -481,6 +740,11 @@ class BackupDiff: | |||||||
| 			if entry.get_is_different_sizes(): | 			if entry.get_is_different_sizes(): | ||||||
| 				report["size_difference"]["entries"].append(entry) | 				report["size_difference"]["entries"].append(entry) | ||||||
| 		 | 		 | ||||||
|  | 		# Differences of an unknown nature | ||||||
|  | 		for entry in self.__difference_entries: | ||||||
|  | 			if entry.get_is_unknown(): | ||||||
|  | 				report["unknown"]["entries"].append(entry) | ||||||
|  | 		 | ||||||
| 		# Sort all entries | 		# Sort all entries | ||||||
| 		for section_key in report: | 		for section_key in report: | ||||||
| 			self.sort_difference_entries(report[section_key]["entries"]) | 			self.sort_difference_entries(report[section_key]["entries"]) | ||||||
| @@ -515,7 +779,8 @@ class BackupDiff: | |||||||
| 			"missing_from_both", | 			"missing_from_both", | ||||||
| 			"missing_from_source", "newer_source", | 			"missing_from_source", "newer_source", | ||||||
| 			"missing_from_backup", "newer_backup", | 			"missing_from_backup", "newer_backup", | ||||||
| 			"size_difference" | 			"size_difference", | ||||||
|  | 			"unknown" | ||||||
| 		] | 		] | ||||||
| 		 | 		 | ||||||
| 		# | 		# | ||||||
| @@ -564,6 +829,7 @@ class DifferenceEntry: | |||||||
| 		self.CONST_TYPE_SOURCE_IS_NEWER = "source_is_newer" | 		self.CONST_TYPE_SOURCE_IS_NEWER = "source_is_newer" | ||||||
| 		self.CONST_TYPE_BACKUP_IS_NEWER = "backup_is_newer" | 		self.CONST_TYPE_BACKUP_IS_NEWER = "backup_is_newer" | ||||||
| 		self.CONST_TYPE_DIFFERENT_SIZES = "different_sizes" | 		self.CONST_TYPE_DIFFERENT_SIZES = "different_sizes" | ||||||
|  | 		self.CONST_TYPE_UNKNOWN = "unknown" | ||||||
| 		 | 		 | ||||||
| 		if item: | 		if item: | ||||||
| 			self.set_item(item) | 			self.set_item(item) | ||||||
| @@ -667,6 +933,13 @@ class DifferenceEntry: | |||||||
| 	def get_is_different_sizes(self): | 	def get_is_different_sizes(self): | ||||||
| 		return self.__type == self.CONST_TYPE_DIFFERENT_SIZES | 		return self.__type == self.CONST_TYPE_DIFFERENT_SIZES | ||||||
| 	 | 	 | ||||||
|  | 	def set_is_unknown(self, message): | ||||||
|  | 		self.__type = self.CONST_TYPE_UNKNOWN | ||||||
|  | 		self.__message = message | ||||||
|  | 	 | ||||||
|  | 	def get_is_unknown(self): | ||||||
|  | 		return self.__type == self.CONST_TYPE_UNKNOWN | ||||||
|  | 	 | ||||||
| 	@staticmethod | 	@staticmethod | ||||||
| 	def friendly_time_difference(stamp1, stamp2): | 	def friendly_time_difference(stamp1, stamp2): | ||||||
| 		delta = abs(stamp1 - stamp2) | 		delta = abs(stamp1 - stamp2) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user