Kinda trying to add support for rsync
This commit is contained in:
		
							
								
								
									
										277
									
								
								backup-diff.py
									
									
									
									
									
								
							
							
						
						
									
										277
									
								
								backup-diff.py
									
									
									
									
									
								
							| @@ -18,6 +18,8 @@ import datetime | ||||
| import functools | ||||
| import humanfriendly | ||||
| import os | ||||
| import re | ||||
| import subprocess | ||||
| import sys | ||||
|  | ||||
|  | ||||
| @@ -27,7 +29,14 @@ class BackupDiff: | ||||
| 	def __init__(self): | ||||
| 		 | ||||
| 		self.__source_path = None | ||||
| 		self.__source_ssh_host = None | ||||
| 		self.__source_ssh_user = None | ||||
| 		 | ||||
| 		self.__backup_path = None | ||||
| 		self.__backup_ssh_host = None | ||||
| 		self.__backup_ssh_user = None | ||||
| 		 | ||||
| 		self.__ssh_key = None | ||||
| 		 | ||||
| 		self.__source_path_items = None | ||||
| 		self.__backup_path_items = None | ||||
| @@ -35,10 +44,12 @@ class BackupDiff: | ||||
| 		self.__difference_entries = None | ||||
| 		self.__do_clean_difference_entries = True | ||||
| 		 | ||||
| 		self.__force_rsync = False | ||||
| 		 | ||||
| 	def run(self): | ||||
| 		 | ||||
| 		self.consume_arguments() | ||||
| 		self.calculate_comparison_items() | ||||
| 		 | ||||
| 		self.calculate_difference_entries() | ||||
| 		 | ||||
| 		if self.__do_clean_difference_entries: | ||||
| @@ -74,11 +85,40 @@ class BackupDiff: | ||||
| 				self.__source_path = os.path.abspath(one_path) | ||||
| 				self.log("Found source path argument:", self.__source_path) | ||||
| 			 | ||||
| 			elif arg == "--source-remote-host": | ||||
| 				i, host = self.consume_argument_companion(i) | ||||
| 				self.__source_ssh_host = host | ||||
| 				self.log("Will use source remote host: " + str(self.__source_ssh_host)) | ||||
| 			 | ||||
| 			elif arg == "--source-remote-user": | ||||
| 				i, user = self.consume_argument_companion(i) | ||||
| 				self.__source_ssh_user = user | ||||
| 				self.log("Will use source remote user: " + str(self.__source_ssh_user)) | ||||
| 			 | ||||
| 			elif arg == "--backup-path": | ||||
| 				i, one_path = self.consume_argument_companion(i) | ||||
| 				self.__backup_path = os.path.abspath(one_path) | ||||
| 				self.log("Found backup destination path argument:", self.__backup_path) | ||||
| 			 | ||||
| 			elif arg == "--backup-remote-host": | ||||
| 				i, host = self.consume_argument_companion(i) | ||||
| 				self.__backup_ssh_host = host | ||||
| 				self.log("Will use backup remote host: " + str(self.__backup_ssh_host)) | ||||
| 			 | ||||
| 			elif arg == "--backup-remote-user": | ||||
| 				i, user = self.consume_argument_companion(i) | ||||
| 				self.__backup_ssh_user = user | ||||
| 				self.log("Will use backup remote user: " + str(self.__backup_ssh_user)) | ||||
| 			 | ||||
| 			elif arg == "--ssh-key": | ||||
| 				i, key = self.consume_argument_companion(i) | ||||
| 				self.__ssh_key = key | ||||
| 				self.log("Will use ssh key: " + str(self.__ssh_key)) | ||||
| 			 | ||||
| 			elif arg == "--use-rsync" or arg == "--rsync": | ||||
| 				self.__force_rsync = True | ||||
| 				self.log("Forcing comparison with rsync tool") | ||||
| 			 | ||||
| 			elif arg == "--no-clean": | ||||
| 				self.__do_clean_difference_entries = False | ||||
| 				self.log("Won't clean Difference entries") | ||||
| @@ -113,6 +153,22 @@ class BackupDiff: | ||||
| 		 | ||||
| 		self.__source_path_items = source_path_items | ||||
| 	 | ||||
| 	def should_use_rsync(self): | ||||
| 		 | ||||
| 		if self.__force_rsync: | ||||
| 			return True | ||||
| 		 | ||||
| 		if self.__source_ssh_host or self.__source_ssh_user: | ||||
| 			return True | ||||
| 		 | ||||
| 		if self.__backup_ssh_host or self.__backup_ssh_user: | ||||
| 			return True | ||||
| 		 | ||||
| 		if self.__ssh_key: | ||||
| 			return True | ||||
| 		 | ||||
| 		return False | ||||
| 	 | ||||
| 	def consume_backup_path(self): | ||||
| 		 | ||||
| 		if self.__backup_path is None: | ||||
| @@ -156,6 +212,205 @@ class BackupDiff: | ||||
| 	 | ||||
| 	def calculate_difference_entries(self): | ||||
| 	 | ||||
| 		if self.should_use_rsync(): | ||||
| 			self.calculate_difference_entries_with_rsync() | ||||
| 		else: | ||||
| 			self.calculate_difference_entries_directly() | ||||
| 	 | ||||
| 	def calculate_difference_entries_with_rsync(self): | ||||
| 		 | ||||
| 		entries = [] | ||||
| 		 | ||||
| 		do_test = False | ||||
| 		 | ||||
| 		stdout, stderr, return_code = self.execute_rsync() | ||||
| 		 | ||||
| 		print("STDOUT:") | ||||
| 		print(stdout) | ||||
| 		 | ||||
| 		#print("STDERR:") | ||||
| 		#print(stderr) | ||||
| 		 | ||||
| 		# | ||||
| 		print("Calculating difference entries ...") | ||||
| 		 | ||||
| 		# Parse normal lines (Flags and Path) | ||||
| 		pattern_general = re.compile("""^(?P<line>(?P<flags>[^\s]{11})(?P<item>.*))$""", re.MULTILINE) | ||||
| 		matches = pattern_general.finditer(stdout) | ||||
| 		for match in matches: | ||||
| 			 | ||||
| 			line = match.group("line") | ||||
| 			 | ||||
| 			flags = match.group("flags") | ||||
| 			change_type_character = flags[0] | ||||
| 			item_type = flags[1] | ||||
| 			 | ||||
| 			# Determine which attributes are different | ||||
| 			attributes_part = flags[2:] | ||||
| 			different_checksum = "c" in attributes_part | ||||
| 			different_size = "s" in attributes_part | ||||
| 			different_modification_time = "t" in attributes_part | ||||
| 			different_permissions = "p" in attributes_part | ||||
| 			different_owner = "o" in attributes_part | ||||
| 			different_group = "g" in attributes_part | ||||
| 			different_acl = "a" in attributes_part | ||||
| 			different_extended_attributes = "x" in attributes_part | ||||
| 			# | ||||
| 			different_any_attribute = ( | ||||
| 				different_checksum | ||||
| 				or different_size | ||||
| 				or different_modification_time | ||||
| 				or different_permissions | ||||
| 				or different_owner | ||||
| 				or different_group | ||||
| 				or different_acl | ||||
| 				or different_extended_attributes | ||||
| 			) | ||||
| 			 | ||||
| 			item = match.group("item").strip() | ||||
| 			 | ||||
| 			entry = DifferenceEntry(item) | ||||
| 			 | ||||
| 			# File folder, whatever | ||||
| 			if item_type == "d": | ||||
| 				entry.set_is_dir() | ||||
| 			elif item_type == "f": | ||||
| 				entry.set_is_file() | ||||
| 			 | ||||
| 			# Missing from backup | ||||
| 			if change_type_character == "<": | ||||
| 				entry.set_is_missing_from_backup() | ||||
| 			 | ||||
| 			# Missing from source | ||||
| 			elif change_type_character == ">": | ||||
| 				entry.set_is_missing_from_source() | ||||
| 			 | ||||
| 			# Local change is occurring | ||||
| 			elif change_type_character == "c": | ||||
| 				entry.set_is_unknown("Rsync says a local change is occurring") | ||||
| 			 | ||||
| 			# Item is a hard link | ||||
| 			elif change_type_character == "h": | ||||
| 				entry.set_is_unknown("Rsync says this is a hard link") | ||||
| 			 | ||||
| 			# "no change / transfer (could still be changing attributes)" | ||||
| 			elif change_type_character == ".": | ||||
| 				entry.set_is_unknown("Rsync says no change, but could be changing attributes") | ||||
| 			 | ||||
| 			# | ||||
| 			entries.append(entry) | ||||
| 		 | ||||
| 		# Parse message lines | ||||
| 		pattern_messages = re.compile("""^(?P<line>\*(?P<message>[\w]+)(?P<item>.*))$""", re.MULTILINE) | ||||
| 		matches = pattern_messages.finditer(stdout) | ||||
| 		for match in matches: | ||||
| 			 | ||||
| 			message = match.group("message").strip() | ||||
| 			item = match.group("item").strip() | ||||
| 			 | ||||
| 			entry = DifferenceEntry(item) | ||||
| 			 | ||||
| 			if message == "deleting": | ||||
| 				entry.set_is_missing_from_source() | ||||
| 				entry.set_is_dir(item[-1] == "/") | ||||
| 				entry.set_is_file(not item[-1] == "/") | ||||
| 			 | ||||
| 			else: | ||||
| 				print("IS UNKNOWN MESSAGE:", message) | ||||
| 				entry.set_is_unknown("Unhandled message: " + message) | ||||
| 			 | ||||
| 			entries.append(entry) | ||||
| 		 | ||||
| 		print("Finished calculating difference entries") | ||||
| 		 | ||||
| 		self.__difference_entries = entries | ||||
| 	 | ||||
| 	def execute_rsync(self): | ||||
| 		 | ||||
| 		# | ||||
| 		args = list() | ||||
| 		 | ||||
| 		# Rsync | ||||
| 		args.append("rsync") | ||||
| 		 | ||||
| 		# Dry run!! | ||||
| 		args.append("--dry-run") | ||||
| 		 | ||||
| 		# Produces the main output we'll parse | ||||
| 		args.append("--itemize-changes") | ||||
| 		 | ||||
| 		# Rsh command | ||||
| 		rsh_command = self.make_rsync_rsh_argument(self.__ssh_key) | ||||
| 		if rsh_command: | ||||
| 			args.append(rsh_command) | ||||
| 		 | ||||
| 		# Main sync flags | ||||
| 		args.append("--archive") | ||||
| 		args.append("--delete") | ||||
| 		 | ||||
| 		# Source path | ||||
| 		args.append(self.make_rsync_path(self.__source_ssh_host, self.__source_ssh_user, self.__source_path)) | ||||
| 		 | ||||
| 		# Backup path | ||||
| 		args.append(self.make_rsync_path(self.__backup_ssh_host, self.__backup_ssh_user, self.__backup_path)) | ||||
| 		 | ||||
| 		# | ||||
| 		print("Executing rsync with the following arguments:") | ||||
| 		print(args) | ||||
| 		 | ||||
| 		# Spawn SSH in shell | ||||
| 		process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | ||||
| 		stdout, stderr = process.communicate() | ||||
| 		 | ||||
| 		# | ||||
| 		print("Rsync has finished executing") | ||||
| 		 | ||||
| 		# | ||||
| 		stdout = stdout.decode() | ||||
| 		stderr = stderr.decode() | ||||
| 		 | ||||
| 		# Accept Success (0), and Partial Transfer Codes (23 and 24) | ||||
| 		if process.returncode not in [0, 23, 24]: | ||||
| 			raise Exception("Failed to execute Rsync; Exited with code " + str(process.returncode)) | ||||
| 		 | ||||
| 		return stdout, stderr, process.returncode | ||||
| 	 | ||||
| 	@staticmethod | ||||
| 	def make_rsync_path(ssh_host, ssh_user, path): | ||||
| 		 | ||||
| 		rsync_path = "" | ||||
| 		 | ||||
| 		if (not ssh_host) and ssh_user: | ||||
| 			raise Exception("ssh_user provided (" + str(ssh_user) + ") without ssh_host") | ||||
| 		 | ||||
| 		if ssh_user: | ||||
| 			rsync_path += ssh_user + "@" | ||||
| 		 | ||||
| 		if ssh_host: | ||||
| 			rsync_path += ssh_host + ":" + path | ||||
| 		else: | ||||
| 			rsync_path += path | ||||
| 		 | ||||
| 		# Absolute path doesn't have trailing slash, which works well for rsync here | ||||
| 		rsync_path += "/" | ||||
| 		 | ||||
| 		return rsync_path | ||||
| 	 | ||||
| 	@staticmethod | ||||
| 	def make_rsync_rsh_argument(ssh_key): | ||||
| 	 | ||||
| 		if not ssh_key: | ||||
| 			return None | ||||
| 		 | ||||
| 		if not os.path.isfile(ssh_key): | ||||
| 			raise Exception("SSH key does not exist: " + str(ssh_key)) | ||||
| 		 | ||||
| 		return "--rsh=ssh -i " + ssh_key | ||||
| 	 | ||||
| 	def calculate_difference_entries_directly(self): | ||||
| 		 | ||||
| 		self.calculate_comparison_items() | ||||
| 		 | ||||
| 		entries = [] | ||||
| 		 | ||||
| 		# Compare everything in the source path | ||||
| @@ -443,6 +698,10 @@ class BackupDiff: | ||||
| 			"size_difference": { | ||||
| 				"label": "Items with different file sizes", | ||||
| 				"entries": [] | ||||
| 			}, | ||||
| 			"unknown": { | ||||
| 				"label": "Differences of an unknown type", | ||||
| 				"entries": [] | ||||
| 			} | ||||
| 		} | ||||
| 		 | ||||
| @@ -481,6 +740,11 @@ class BackupDiff: | ||||
| 			if entry.get_is_different_sizes(): | ||||
| 				report["size_difference"]["entries"].append(entry) | ||||
| 		 | ||||
| 		# Differences of an unknown nature | ||||
| 		for entry in self.__difference_entries: | ||||
| 			if entry.get_is_unknown(): | ||||
| 				report["unknown"]["entries"].append(entry) | ||||
| 		 | ||||
| 		# Sort all entries | ||||
| 		for section_key in report: | ||||
| 			self.sort_difference_entries(report[section_key]["entries"]) | ||||
| @@ -515,7 +779,8 @@ class BackupDiff: | ||||
| 			"missing_from_both", | ||||
| 			"missing_from_source", "newer_source", | ||||
| 			"missing_from_backup", "newer_backup", | ||||
| 			"size_difference" | ||||
| 			"size_difference", | ||||
| 			"unknown" | ||||
| 		] | ||||
| 		 | ||||
| 		# | ||||
| @@ -564,6 +829,7 @@ class DifferenceEntry: | ||||
| 		self.CONST_TYPE_SOURCE_IS_NEWER = "source_is_newer" | ||||
| 		self.CONST_TYPE_BACKUP_IS_NEWER = "backup_is_newer" | ||||
| 		self.CONST_TYPE_DIFFERENT_SIZES = "different_sizes" | ||||
| 		self.CONST_TYPE_UNKNOWN = "unknown" | ||||
| 		 | ||||
| 		if item: | ||||
| 			self.set_item(item) | ||||
| @@ -667,6 +933,13 @@ class DifferenceEntry: | ||||
| 	def get_is_different_sizes(self): | ||||
| 		return self.__type == self.CONST_TYPE_DIFFERENT_SIZES | ||||
| 	 | ||||
| 	def set_is_unknown(self, message): | ||||
| 		self.__type = self.CONST_TYPE_UNKNOWN | ||||
| 		self.__message = message | ||||
| 	 | ||||
| 	def get_is_unknown(self): | ||||
| 		return self.__type == self.CONST_TYPE_UNKNOWN | ||||
| 	 | ||||
| 	@staticmethod | ||||
| 	def friendly_time_difference(stamp1, stamp2): | ||||
| 		delta = abs(stamp1 - stamp2) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user