#!/usr/bin/env python3 # import datetime import os import re import shlex import subprocess import sys # class MikesBackup: # __log_dir = None __log_name = None __remote_host = None __remote_user = None __destination_dir_base = None __source_dir = None __source_dir_includes = [] __source_dir_excludes = [] __source_mountpoint_demands = [] __ssh_key = None __quiet_ssh = True __force_full = False __force_differential = False __no_incremental = False __log_file_handle = None CONST_FULL_DIRECTORY_NAME = "full" CONST_DIFFERENTIAL_DIRECTORY_NAME = "differential" CONST_DIFFERENTIAL_IN_PROGRESS_DIRECTORY_NAME = "IN-PROGRESS" # def __init__(self): self.parse_args() # def __del__(self): self.close_log_file() # def __str__(self): s = "" s += "MikesBackup Class Instance" s += "\nLog Dir: " + str(self.__log_dir) s += "\nLog Name: " + str(self.__log_name) s += "\nRemote Host: " + str(self.__remote_host) s += "\nRemote User: " + str(self.__remote_user) s += "\nDestination Dir Base: " + str(self.__destination_dir_base) s += "\nSource Dir (Main): " + str(self.__source_dir) s += "\nSource Dirs (Includes): " + str(self.__source_dir_includes) s += "\nSource Dirs (Excludes): " + str(self.__source_dir_excludes) s += "\nSource Mountpoint Demands: " + str(self.__source_mountpoint_demands) s += "\nSSH Key: " + str(self.__ssh_key) s += "\nQuiet SSH: " + str(self.__quiet_ssh) s += "\nForce Full Backup: " + str(self.__force_full) s += "\nForce Differential: " + str(self.__force_differential) s += "\nDisallow Incremental: " + str(self.__no_incremental) return s # def log(self, s, o=None): the_date = self.get_datetime_for_logging() to_log = "[MikesBackup][" + the_date + "] " + s # Print the log line print(to_log) # Append the log line to the log file f = self.open_log_file() if f: f.write(to_log + "\n") # Recurse in order to print whatever o is outputting, if anything if o is not None: o_lines = str(o).split("\n") for line in o_lines: self.log(line) # def open_log_file(self): if self.__log_file_handle: return self.__log_file_handle log_file_path = self.make_log_path() if log_file_path: self.__log_file_handle = open(log_file_path, "w") return self.__log_file_handle # def close_log_file(self): if self.__log_file_handle: self.__log_file_handle.close() self.__log_file_handle = None # def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) # def parse_args(self): # print() self.log("Parsing arguments") a = 0 while a + 1 < len(sys.argv): a += 1 # arg = sys.argv[a] # valid_arg = False if arg == "": valid_arg = True elif arg == "--full": valid_arg = True self.__force_full = True self.log("Forcing a full backup") elif arg == "--diff" or arg == "--differential": valid_arg = True self.__force_differential = True self.log("Forcing a differential backup") elif arg == "--no-incremental": valid_arg = True self.__no_incremental = True self.log("Disallowing incremental backups (differentials will only link back to full)") elif arg == "--log-dir": valid_arg = True self.__log_dir = sys.argv[a + 1] self.log("Found log dir: " + self.__log_dir) a = a + 1 elif arg == "--log-name": valid_arg = True self.__log_name = sys.argv[a + 1] self.log("Found log name: " + self.__log_name) self.close_log_file() a = a + 1 elif arg == "--source-dir": valid_arg = True if self.__source_dir: raise Exception("--source-dir can only be used once") self.__source_dir = sys.argv[a + 1] self.log("Found source dir: " + sys.argv[a + 1]) a = a + 1 elif arg == "--include": valid_arg = True self.__source_dir_includes.append(sys.argv[a + 1]) self.log("Found additional source dir include: " + sys.argv[a + 1]) a = a + 1 elif arg == "--source-mountpoint": valid_arg = True self.__source_mountpoint_demands.append(sys.argv[a + 1]) self.log("Found required source mountpoint: " + sys.argv[a + 1]) a += 1 elif arg == "--destination-dir": valid_arg = True self.__destination_dir_base = sys.argv[a + 1] self.log("Found destination dir: " + self.__destination_dir_base) a = a + 1 elif arg == "--exclude": valid_arg = True self.__source_dir_excludes.append(sys.argv[a + 1]) self.log("Found exclude dir: " + sys.argv[a + 1]) a = a + 1 elif arg == "--remote-host": valid_arg = True self.__remote_host = sys.argv[a + 1] self.log("Found remote host: " + self.__remote_host) a = a + 1 elif arg == "--remote-user": valid_arg = True self.__remote_user = sys.argv[a + 1] self.log("Found remote user: " + self.__remote_user) a = a + 1 elif arg == "--ssh-key": valid_arg = True self.__ssh_key = sys.argv[a + 1] self.log("Found ssh key: " + self.__ssh_key) a = a + 1 # if not valid_arg: raise Exception("Invalid argument:", arg) @staticmethod def get_datetime_for_logging(): # return datetime.datetime.now().strftime("%b %d %Y; %I%M%p") @staticmethod def get_datetime_for_filename(): # return datetime.datetime.now().strftime('%Y-%b-%d_%I%M%p') # def is_using_source_mountpoints(self): return len(self.__source_mountpoint_demands) > 0 # def demand_source_mountpoints(self): for mountpoint_path in self.__source_mountpoint_demands: if not os.path.ismount(mountpoint_path): raise Exception("Required mountpoint is not mounted: " + str(mountpoint_path)) self.log("Verified mountpoint: " + mountpoint_path) # def is_using_ssh(self): # if ( self.__remote_host is not None or self.__remote_user is not None or self.__ssh_key is not None ): return True return False # def demand_ssh_config(self): # if self.is_using_ssh(): if self.__remote_host is None: raise Exception("Please provide remote host") if self.__remote_user is None: raise Exception("Please provide remote user") # def demand_source_directory_config(self): # if self.__source_dir is None: raise Exception("Please provide a source directory") # def demand_destination_directory_config(self): # if self.__destination_dir_base is None: raise Exception("Please provide backup destination directory") # def does_destination_directory_exist(self, destination_path): # self.log("Trying to determine if destination path exists:" + destination_path) # Local? if not self.is_using_ssh(): self.log("Checking for local destination path") if os.path.isdir(destination_path): self.log("Local destination path exists") return True else: self.log("Local destination path does not exist") return False # self.log("Checking for remote destination path: " + destination_path) command = [ "[ -d " + destination_path + " ]" ] # code, stdout, stderr = self.execute_remote_ssh_command(command) if code == 0: self.log("Remote destination dir was found: " + destination_path) return True # self.log("Remote dir didn't seem to exist: " + destination_path) return False # def demand_destination_base_backup_directory(self): # self.demand_destination_directory_config() # destination_path = self.__destination_dir_base # if self.does_destination_directory_exist(destination_path) is False: raise Exception("Backup destination directory doesn't exist: " + destination_path) # def does_full_backup_destination_directory_exist(self): # dir_path = self.make_full_backup_destination_path() # self.log("Trying to determine if Full backup destination directory exists:", dir_path) return self.does_destination_directory_exist(dir_path) # def do_backup(self): # print() self.log("Enter: do_backup") # Source mountpoints must be mounted self.demand_source_mountpoints() # Remote base dir must exist self.demand_destination_base_backup_directory() # Forced full or differential by args? if self.__force_full is True or self.__force_differential is True: if self.__force_full is True: self.log("Forcing full backup") self.do_full_backup() else: self.log("Forcing differential backup") self.do_differential_backup() return # Automatically choose full or differential if self.does_full_backup_destination_directory_exist(): self.log("Automatically choosing differential backup, because full backup destination directory already exists") self.do_differential_backup() else: self.log("Automatically choosing full backup, because full backup destination directory wasn't found") self.do_full_backup() # def do_full_backup(self): # Start args args = [] # Get destination directory destination_dir = self.make_full_backup_destination_path() # Append source directory args.append(self.make_rsync_source_directory_part()) # Append remote destination directory # args.append( self.__remote_user + "@" + self.__remote_host + ":" + remote_dir) args.append(self.make_rsync_remote_destination_part(destination_dir)) # print("Args", str(args)) self.log("Destination dir:" + destination_dir) self.execute_rsync(args) self.log("Rsync seems to have finished successfully") self.log("Because a full backup has succeeded, will now delete any differential backups") args_remove_differentials = [ "rm", "-rfv", self.make_remote_differential_backup_path_base() ] if self.is_using_ssh(): self.execute_remote_ssh_command(args_remove_differentials) else: self.execute_command(args_remove_differentials) self.log("Finished deleting old differentials") # def do_differential_backup(self): # Start args args = [] # Get directories link_dest_dir = self.determine_rsync_backup_link_destination_path() destination_dir_in_progress = self.make_remote_differential_in_progress_backup_path() destination_dir_final = self.make_remote_differential_backup_path() self.ensure_destination_directory(destination_dir_in_progress) # Add link dest arg? if link_dest_dir: args.append("--link-dest") args.append(link_dest_dir) # Append source directory args.append(self.make_rsync_source_directory_part()) # Append remote destination directory args.append(self.make_rsync_remote_destination_part(destination_dir_in_progress)) self.log("Link destination dir: " + link_dest_dir) self.log("Destination dir: " + destination_dir_in_progress) self.execute_rsync(args) self.log("Rsync seems to have finished successfully") self.log("Renaming temporary directory") self.log("Old: " + destination_dir_in_progress) self.log("New: " + destination_dir_final) if self.is_using_ssh(): return_code, stdout, stderr = self.execute_remote_ssh_command([ "mv", destination_dir_in_progress, destination_dir_final ]) if return_code != 0: raise Exception("Failed to move temporary diff directory to its final home") else: os.rename(destination_dir_in_progress, destination_dir_final) self.log("Rename was successful") # def make_log_directory_path(self): # log_dir = self.__log_dir if log_dir is None: print("No log directory specified; Won't log") return None return log_dir # def make_log_path(self): # Log dir log_dir = self.make_log_directory_path() if not log_dir: return None # Filename file_name = self.get_datetime_for_filename() if self.__log_name: file_name += "-" + self.__log_name file_name += ".log" # Path log_path = os.path.join(log_dir, file_name) return log_path # def make_full_backup_destination_path(self): # if self.__destination_dir_base is None: raise Exception("No remote directory was specified") # return os.path.join(self.__destination_dir_base, self.CONST_FULL_DIRECTORY_NAME) # def make_remote_differential_backup_path_base(self): # if self.__destination_dir_base is None: raise Exception("No remote directory was specified") return os.path.join(self.__destination_dir_base, self.CONST_DIFFERENTIAL_DIRECTORY_NAME) # def make_remote_differential_in_progress_backup_path(self): diff_path_base = self.make_remote_differential_backup_path_base() return os.path.join(diff_path_base, self.CONST_DIFFERENTIAL_IN_PROGRESS_DIRECTORY_NAME) # def make_remote_differential_backup_path(self): diff_path_base = self.make_remote_differential_backup_path_base() # return os.path.join(diff_path_base, self.get_datetime_for_filename()) # def make_rsync_source_includes_part(self): args = [] for d in self.__source_dir_includes: args.append("--include") args.append(d) return args # def make_rsync_source_directory_part(self): self.demand_source_directory_config() # return self.__source_dir # def make_rsync_remote_destination_part(self, destination_dir): # part = "" # if self.__remote_host is not None: if self.__remote_user is not None: part += self.__remote_user + "@" part += self.__remote_host + ":" # part += destination_dir return part # def determine_rsync_backup_link_destination_path(self): self.demand_destination_directory_config() self.log("Begin trying to determine which previous backup path to use as link") # newest_path = None newest_path_date = None # Pattern to parse the 'ls' command pattern = re.compile( """.*(?P[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{9} .{5}) (?P.+)$""", re.MULTILINE ) # Get listing info for the full path destination_path_full = self.make_full_backup_destination_path() args_full_destination_path_ls = [ "ls", "-l", "-c", "--all", "--full-time", destination_path_full ] if self.is_using_ssh(): return_code, stdout, stderr = self.execute_remote_ssh_command(args_full_destination_path_ls) else: return_code, stdout, stderr = self.execute_command(args_full_destination_path_ls) if return_code != 0: raise Exception("Failed to get listing info for base destination directory") for match in pattern.finditer(stdout): name = match.group("name") date = match.group("date") if name == ".": self.log("Start by assuming \"Full\" is the most recent backup: " + destination_path_full) self.log("; With a date of " + date) newest_path = destination_path_full newest_path_date = date break if not newest_path: self.log("Didn't find a \"Full\" backup on remote") if self.__no_incremental: self.log("Incremental backups are disabled; Won't consider any differential directories for the link target") return newest_path # Get listing info for all differential directories differential_path_base = self.make_remote_differential_backup_path_base() self.ensure_destination_directory(differential_path_base) args_differential_destination_path_ls = [ "ls", "-l", "-c", "--all", "--full-time", differential_path_base ] if self.is_using_ssh(): return_code, stdout, stderr = self.execute_remote_ssh_command(args_differential_destination_path_ls) else: return_code, stdout, stderr = self.execute_command(args_differential_destination_path_ls) if return_code != 0: raise Exception("Failed to get listing info for destination differential base directory") # Look for the most recent differential directory # (must be newer than the Full directory too) for match in pattern.finditer(stdout): name = match.group("name") date = match.group("date") if name == "." or name == ".." or name == self.CONST_DIFFERENTIAL_IN_PROGRESS_DIRECTORY_NAME: continue if newest_path is None or date > newest_path_date: self.log("Found a newer differential backup: " + name + "; " + date) newest_path = os.path.join(differential_path_base, name) newest_path_date = date else: self.log("Not newer: " + name + "; " + date) # self.log("Newest backup path is: " + newest_path) self.log("; With a date of: " + newest_path_date) return newest_path @staticmethod def ensure_local_directory(d): # if not os.path.exists(d): os.makedirs(d) # def ensure_destination_directory(self, d): # if not self.does_destination_directory_exist(d): # self.log("Destination directory doesn't exist; Will create:" + d) # if self.is_using_ssh(): command = [ "mkdir", "--parents", d ] self.execute_remote_ssh_command(command) else: os.makedirs(d) # def start_rsync_args(self): # args = [ "rsync", "--archive", "--compress", "--progress", "--stats", "--verbose", "--human-readable", "--itemize-changes", "--no-links", "--one-file-system", "--delete", "--delete-excluded" ] log_dir = self.make_log_directory_path() log_path = self.make_log_path() if log_dir and log_path: self.ensure_local_directory(log_dir) args.append("--log-file") args.append(log_path) # Only allow recursion into multiple file systems # if any mountpoints were specified if not self.is_using_source_mountpoints(): args.append("--one-file-system") # for i in self.__source_dir_includes: args.append("--include") args.append(i) # for e in self.__source_dir_excludes: args.append("--exclude") args.append(e) # # args.append("--dry-run") # DEBUG !!! return args # def start_rsync_environment_variables(self): # env = {} # if self.__ssh_key is not None or self.__quiet_ssh is True: env["RSYNC_RSH"] = "ssh" if self.__ssh_key is not None: env["RSYNC_RSH"] += " -i " + shlex.quote(self.__ssh_key) if self.__quiet_ssh is True: env["RSYNC_RSH"] += " -q" return env @staticmethod def execute_command(command): # args = list() # Append the command if isinstance(command, str): args.append(command) elif isinstance(command, list): args.extend(command) else: raise Exception("Unsupported command datatype") # Spawn process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = process.communicate() stdout = stdout.decode() stderr = stderr.decode() return process.returncode, stdout, stderr # def execute_remote_ssh_command(self, command): # self.demand_ssh_config() # args = list() # ssh command args.append("ssh") # Quiet? if self.__quiet_ssh is True: args.append("-q") # ssh key if self.__ssh_key is not None: args.append("-i") args.append(self.__ssh_key) # ssh user@host args.append(self.__remote_user + "@" + self.__remote_host) # Append the command args.append("--") if isinstance(command, str): args.append(command) elif isinstance(command, list): args.extend(command) else: raise Exception("Unsupported command datatype") # Spawn # print(args) process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # process = subprocess.Popen(args) stdout, stderr = process.communicate() stdout = stdout.decode() stderr = stderr.decode() # process.communicate() # stdout = "" # stderr = "" # print(stderr.decode()) return process.returncode, stdout, stderr # def execute_rsync(self, _args): # Demand stuff self.demand_source_directory_config() self.demand_destination_directory_config() if self.is_using_ssh(): self.demand_ssh_config() # args = self.start_rsync_args() args.extend(_args) # print(str(args)) # env = self.start_rsync_environment_variables() # self.log("Executing rsync with the following arguments:", args) self.log("; And the following environment:", env) # # print("Debug -> Want to execute Rsync") # print("Args:", str(args)) # print("Env:", str(env)) # return (0, "", "") # Spawn Rsync in shell # process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env) process = subprocess.Popen(args, env=env) # stdout, stderr = process.communicate() process.communicate() # self.eprint(stderr.decode()) stdout = "" stderr = "" # Check return code # 0 = Success # 24 = Source files vanished return_code = process.returncode if return_code != 0 and return_code != 24: raise Exception("Rsync seems to have failed somehow! Got return code: " + str(return_code)) return return_code, stdout, stderr def main(): b = MikesBackup() b.do_backup() # if __name__ == "__main__": # main()