mikes-backup/mikes-backup

843 lines
20 KiB
Python
Executable File

#!/usr/bin/env python3
#
import datetime
import os
import re
import shlex
import subprocess
import sys
#
class MikesBackup:
#
__log_dir = None
__log_name = None
__remote_host = None
__remote_user = None
__destination_dir_base = None
__source_dir = None
__source_dir_includes = []
__source_dir_excludes = []
__source_mountpoint_demands = []
__ssh_key = None
__quiet_ssh = True
__force_full = False
__force_differential = False
__no_incremental = False
__log_file_handle = None
CONST_FULL_DIRECTORY_NAME = "full"
CONST_DIFFERENTIAL_DIRECTORY_NAME = "differential"
CONST_DIFFERENTIAL_IN_PROGRESS_DIRECTORY_NAME = "IN-PROGRESS"
#
def __init__(self):
self.parse_args()
#
def __del__(self):
self.close_log_file()
#
def __str__(self):
s = ""
s += "MikesBackup Class Instance"
s += "\nLog Dir: " + str(self.__log_dir)
s += "\nLog Name: " + str(self.__log_name)
s += "\nRemote Host: " + str(self.__remote_host)
s += "\nRemote User: " + str(self.__remote_user)
s += "\nDestination Dir Base: " + str(self.__destination_dir_base)
s += "\nSource Dir (Main): " + str(self.__source_dir)
s += "\nSource Dirs (Includes): " + str(self.__source_dir_includes)
s += "\nSource Dirs (Excludes): " + str(self.__source_dir_excludes)
s += "\nSource Mountpoint Demands: " + str(self.__source_mountpoint_demands)
s += "\nSSH Key: " + str(self.__ssh_key)
s += "\nQuiet SSH: " + str(self.__quiet_ssh)
s += "\nForce Full Backup: " + str(self.__force_full)
s += "\nForce Differential: " + str(self.__force_differential)
s += "\nDisallow Incremental: " + str(self.__no_incremental)
return s
#
def log(self, s, o=None):
the_date = self.get_datetime_for_logging()
to_log = "[MikesBackup][" + the_date + "] " + s
# Print the log line
print(to_log)
# Append the log line to the log file
f = self.open_log_file()
if f:
f.write(to_log + "\n")
# Recurse in order to print whatever o is outputting, if anything
if o is not None:
o_lines = str(o).split("\n")
for line in o_lines:
self.log(line)
#
def open_log_file(self):
if self.__log_file_handle:
return self.__log_file_handle
log_file_path = self.make_log_path()
if log_file_path:
self.__log_file_handle = open(log_file_path, "w")
return self.__log_file_handle
#
def close_log_file(self):
if self.__log_file_handle:
self.__log_file_handle.close()
self.__log_file_handle = None
#
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
#
def parse_args(self):
#
print()
self.log("Parsing arguments")
a = 0
while a + 1 < len(sys.argv):
a += 1
#
arg = sys.argv[a]
#
valid_arg = False
if arg == "":
valid_arg = True
elif arg == "--full":
valid_arg = True
self.__force_full = True
self.log("Forcing a full backup")
elif arg == "--diff" or arg == "--differential":
valid_arg = True
self.__force_differential = True
self.log("Forcing a differential backup")
elif arg == "--no-incremental":
valid_arg = True
self.__no_incremental = True
self.log("Disallowing incremental backups (differentials will only link back to full)")
elif arg == "--log-dir":
valid_arg = True
self.__log_dir = sys.argv[a + 1]
self.log("Found log dir: " + self.__log_dir)
a = a + 1
elif arg == "--log-name":
valid_arg = True
self.__log_name = sys.argv[a + 1]
self.log("Found log name: " + self.__log_name)
self.close_log_file()
a = a + 1
elif arg == "--source-dir":
valid_arg = True
if self.__source_dir:
raise Exception("--source-dir can only be used once")
self.__source_dir = sys.argv[a + 1]
self.log("Found source dir: " + sys.argv[a + 1])
a = a + 1
elif arg == "--include":
valid_arg = True
self.__source_dir_includes.append(sys.argv[a + 1])
self.log("Found additional source dir include: " + sys.argv[a + 1])
a = a + 1
elif arg == "--source-mountpoint":
valid_arg = True
self.__source_mountpoint_demands.append(sys.argv[a + 1])
self.log("Found required source mountpoint: " + sys.argv[a + 1])
a += 1
elif arg == "--destination-dir":
valid_arg = True
self.__destination_dir_base = sys.argv[a + 1]
self.log("Found destination dir: " + self.__destination_dir_base)
a = a + 1
elif arg == "--exclude":
valid_arg = True
self.__source_dir_excludes.append(sys.argv[a + 1])
self.log("Found exclude dir: " + sys.argv[a + 1])
a = a + 1
elif arg == "--remote-host":
valid_arg = True
self.__remote_host = sys.argv[a + 1]
self.log("Found remote host: " + self.__remote_host)
a = a + 1
elif arg == "--remote-user":
valid_arg = True
self.__remote_user = sys.argv[a + 1]
self.log("Found remote user: " + self.__remote_user)
a = a + 1
elif arg == "--ssh-key":
valid_arg = True
self.__ssh_key = sys.argv[a + 1]
self.log("Found ssh key: " + self.__ssh_key)
a = a + 1
#
if not valid_arg:
raise Exception("Invalid argument:", arg)
@staticmethod
def get_datetime_for_logging():
#
return datetime.datetime.now().strftime("%b %d %Y; %I%M%p")
@staticmethod
def get_datetime_for_filename():
#
return datetime.datetime.now().strftime('%Y-%b-%d_%I%M%p')
#
def is_using_source_mountpoints(self):
return len(self.__source_mountpoint_demands) > 0
#
def demand_source_mountpoints(self):
for mountpoint_path in self.__source_mountpoint_demands:
if not os.path.ismount(mountpoint_path):
raise Exception("Required mountpoint is not mounted: " + str(mountpoint_path))
self.log("Verified mountpoint: " + mountpoint_path)
#
def is_using_ssh(self):
#
if (
self.__remote_host is not None
or self.__remote_user is not None
or self.__ssh_key is not None
):
return True
return False
#
def demand_ssh_config(self):
#
if self.is_using_ssh():
if self.__remote_host is None:
raise Exception("Please provide remote host")
if self.__remote_user is None:
raise Exception("Please provide remote user")
#
def demand_source_directory_config(self):
#
if self.__source_dir is None:
raise Exception("Please provide a source directory")
#
def demand_destination_directory_config(self):
#
if self.__destination_dir_base is None:
raise Exception("Please provide backup destination directory")
#
def does_destination_directory_exist(self, destination_path):
#
self.log("Trying to determine if destination path exists:" + destination_path)
# Local?
if not self.is_using_ssh():
self.log("Checking for local destination path")
if os.path.isdir(destination_path):
self.log("Local destination path exists")
return True
else:
self.log("Local destination path does not exist")
return False
#
self.log("Checking for remote destination path: " + destination_path)
command = [
"[ -d " + destination_path + " ]"
]
#
code, stdout, stderr = self.execute_remote_ssh_command(command)
if code == 0:
self.log("Remote destination dir was found: " + destination_path)
return True
#
self.log("Remote dir didn't seem to exist: " + destination_path)
return False
#
def demand_destination_base_backup_directory(self):
#
self.demand_destination_directory_config()
#
destination_path = self.__destination_dir_base
#
if self.does_destination_directory_exist(destination_path) is False:
raise Exception("Backup destination directory doesn't exist: " + destination_path)
#
def does_full_backup_destination_directory_exist(self):
#
dir_path = self.make_full_backup_destination_path()
#
self.log("Trying to determine if Full backup destination directory exists:", dir_path)
return self.does_destination_directory_exist(dir_path)
#
def do_backup(self):
#
print()
self.log("Enter: do_backup")
# Source mountpoints must be mounted
self.demand_source_mountpoints()
# Remote base dir must exist
self.demand_destination_base_backup_directory()
# Forced full or differential by args?
if self.__force_full is True or self.__force_differential is True:
if self.__force_full is True:
self.log("Forcing full backup")
self.do_full_backup()
else:
self.log("Forcing differential backup")
self.do_differential_backup()
return
# Automatically choose full or differential
if self.does_full_backup_destination_directory_exist():
self.log("Automatically choosing differential backup, because full backup destination directory already exists")
self.do_differential_backup()
else:
self.log("Automatically choosing full backup, because full backup destination directory wasn't found")
self.do_full_backup()
#
def do_full_backup(self):
# Start args
args = []
# Get destination directory
destination_dir = self.make_full_backup_destination_path()
# Append source directory
args.append(self.make_rsync_source_directory_part())
# Append remote destination directory
# args.append( self.__remote_user + "@" + self.__remote_host + ":" + remote_dir)
args.append(self.make_rsync_remote_destination_part(destination_dir))
# print("Args", str(args))
self.log("Destination dir:" + destination_dir)
self.execute_rsync(args)
self.log("Rsync seems to have finished successfully")
self.log("Because a full backup has succeeded, will now delete any differential backups")
args_remove_differentials = [
"rm",
"-rfv",
self.make_remote_differential_backup_path_base()
]
if self.is_using_ssh():
self.execute_remote_ssh_command(args_remove_differentials)
else:
self.execute_command(args_remove_differentials)
self.log("Finished deleting old differentials")
#
def do_differential_backup(self):
# Start args
args = []
# Get directories
link_dest_dir = self.determine_rsync_backup_link_destination_path()
destination_dir_in_progress = self.make_remote_differential_in_progress_backup_path()
destination_dir_final = self.make_remote_differential_backup_path()
self.ensure_destination_directory(destination_dir_in_progress)
# Add link dest arg?
if link_dest_dir:
args.append("--link-dest")
args.append(link_dest_dir)
# Append source directory
args.append(self.make_rsync_source_directory_part())
# Append remote destination directory
args.append(self.make_rsync_remote_destination_part(destination_dir_in_progress))
self.log("Link destination dir: " + link_dest_dir)
self.log("Destination dir: " + destination_dir_in_progress)
self.execute_rsync(args)
self.log("Rsync seems to have finished successfully")
self.log("Renaming temporary directory")
self.log("Old: " + destination_dir_in_progress)
self.log("New: " + destination_dir_final)
if self.is_using_ssh():
return_code, stdout, stderr = self.execute_remote_ssh_command([
"mv",
destination_dir_in_progress,
destination_dir_final
])
if return_code != 0:
raise Exception("Failed to move temporary diff directory to its final home")
else:
os.rename(destination_dir_in_progress, destination_dir_final)
self.log("Rename was successful")
#
def make_log_directory_path(self):
#
log_dir = self.__log_dir
if log_dir is None:
print("No log directory specified; Won't log")
return None
return log_dir
#
def make_log_path(self):
# Log dir
log_dir = self.make_log_directory_path()
if not log_dir:
return None
# Filename
file_name = self.get_datetime_for_filename()
if self.__log_name:
file_name += "-" + self.__log_name
file_name += ".log"
# Path
log_path = os.path.join(log_dir, file_name)
return log_path
#
def make_full_backup_destination_path(self):
#
if self.__destination_dir_base is None:
raise Exception("No remote directory was specified")
#
return os.path.join(self.__destination_dir_base, self.CONST_FULL_DIRECTORY_NAME)
#
def make_remote_differential_backup_path_base(self):
#
if self.__destination_dir_base is None:
raise Exception("No remote directory was specified")
return os.path.join(self.__destination_dir_base, self.CONST_DIFFERENTIAL_DIRECTORY_NAME)
#
def make_remote_differential_in_progress_backup_path(self):
diff_path_base = self.make_remote_differential_backup_path_base()
return os.path.join(diff_path_base, self.CONST_DIFFERENTIAL_IN_PROGRESS_DIRECTORY_NAME)
#
def make_remote_differential_backup_path(self):
diff_path_base = self.make_remote_differential_backup_path_base()
#
return os.path.join(diff_path_base, self.get_datetime_for_filename())
#
def make_rsync_source_includes_part(self):
args = []
for d in self.__source_dir_includes:
args.append("--include")
args.append(d)
return args
#
def make_rsync_source_directory_part(self):
self.demand_source_directory_config()
#
return self.__source_dir
#
def make_rsync_remote_destination_part(self, destination_dir):
#
part = ""
#
if self.__remote_host is not None:
if self.__remote_user is not None:
part += self.__remote_user + "@"
part += self.__remote_host + ":"
#
part += destination_dir
return part
#
def determine_rsync_backup_link_destination_path(self):
self.demand_destination_directory_config()
self.log("Begin trying to determine which previous backup path to use as link")
#
newest_path = None
newest_path_date = None
# Pattern to parse the 'ls' command
pattern = re.compile(
""".*(?P<date>[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{9} .{5}) (?P<name>.+)$""",
re.MULTILINE
)
# Get listing info for the full path
destination_path_full = self.make_full_backup_destination_path()
args_full_destination_path_ls = [
"ls",
"-l",
"-c",
"--all",
"--full-time",
destination_path_full
]
if self.is_using_ssh():
return_code, stdout, stderr = self.execute_remote_ssh_command(args_full_destination_path_ls)
else:
return_code, stdout, stderr = self.execute_command(args_full_destination_path_ls)
if return_code != 0:
raise Exception("Failed to get listing info for base destination directory")
for match in pattern.finditer(stdout):
name = match.group("name")
date = match.group("date")
if name == ".":
self.log("Start by assuming \"Full\" is the most recent backup: " + destination_path_full)
self.log("; With a date of " + date)
newest_path = destination_path_full
newest_path_date = date
break
if not newest_path:
self.log("Didn't find a \"Full\" backup on remote")
if self.__no_incremental:
self.log("Incremental backups are disabled; Won't consider any differential directories for the link target")
return newest_path
# Get listing info for all differential directories
differential_path_base = self.make_remote_differential_backup_path_base()
self.ensure_destination_directory(differential_path_base)
args_differential_destination_path_ls = [
"ls",
"-l",
"-c",
"--all",
"--full-time",
differential_path_base
]
if self.is_using_ssh():
return_code, stdout, stderr = self.execute_remote_ssh_command(args_differential_destination_path_ls)
else:
return_code, stdout, stderr = self.execute_command(args_differential_destination_path_ls)
if return_code != 0:
raise Exception("Failed to get listing info for destination differential base directory")
# Look for the most recent differential directory
# (must be newer than the Full directory too)
for match in pattern.finditer(stdout):
name = match.group("name")
date = match.group("date")
if name == "." or name == ".." or name == self.CONST_DIFFERENTIAL_IN_PROGRESS_DIRECTORY_NAME:
continue
if newest_path is None or date > newest_path_date:
self.log("Found a newer differential backup: " + name + "; " + date)
newest_path = os.path.join(differential_path_base, name)
newest_path_date = date
else:
self.log("Not newer: " + name + "; " + date)
#
self.log("Newest backup path is: " + newest_path)
self.log("; With a date of: " + newest_path_date)
return newest_path
@staticmethod
def ensure_local_directory(d):
#
if not os.path.exists(d):
os.makedirs(d)
#
def ensure_destination_directory(self, d):
#
if not self.does_destination_directory_exist(d):
#
self.log("Destination directory doesn't exist; Will create:" + d)
#
if self.is_using_ssh():
command = [
"mkdir",
"--parents",
d
]
self.execute_remote_ssh_command(command)
else:
os.makedirs(d)
#
def start_rsync_args(self):
#
args = [
"rsync",
"--archive",
"--compress",
"--progress",
"--stats",
"--verbose",
"--human-readable",
"--itemize-changes",
"--no-links",
"--one-file-system",
"--delete",
"--delete-excluded"
]
log_dir = self.make_log_directory_path()
log_path = self.make_log_path()
if log_dir and log_path:
self.ensure_local_directory(log_dir)
args.append("--log-file")
args.append(log_path)
# Only allow recursion into multiple file systems
# if any mountpoints were specified
if not self.is_using_source_mountpoints():
args.append("--one-file-system")
#
for i in self.__source_dir_includes:
args.append("--include")
args.append(i)
#
for e in self.__source_dir_excludes:
args.append("--exclude")
args.append(e)
#
# args.append("--dry-run") # DEBUG !!!
return args
#
def start_rsync_environment_variables(self):
#
env = {}
#
if self.__ssh_key is not None or self.__quiet_ssh is True:
env["RSYNC_RSH"] = "ssh"
if self.__ssh_key is not None:
env["RSYNC_RSH"] += " -i " + shlex.quote(self.__ssh_key)
if self.__quiet_ssh is True:
env["RSYNC_RSH"] += " -q"
return env
@staticmethod
def execute_command(command):
#
args = list()
# Append the command
if isinstance(command, str):
args.append(command)
elif isinstance(command, list):
args.extend(command)
else:
raise Exception("Unsupported command datatype")
# Spawn
process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
stdout = stdout.decode()
stderr = stderr.decode()
return process.returncode, stdout, stderr
#
def execute_remote_ssh_command(self, command):
#
self.demand_ssh_config()
#
args = list()
# ssh command
args.append("ssh")
# Quiet?
if self.__quiet_ssh is True:
args.append("-q")
# ssh key
if self.__ssh_key is not None:
args.append("-i")
args.append(self.__ssh_key)
# ssh user@host
args.append(self.__remote_user + "@" + self.__remote_host)
# Append the command
args.append("--")
if isinstance(command, str):
args.append(command)
elif isinstance(command, list):
args.extend(command)
else:
raise Exception("Unsupported command datatype")
# Spawn
# print(args)
process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# process = subprocess.Popen(args)
stdout, stderr = process.communicate()
stdout = stdout.decode()
stderr = stderr.decode()
# process.communicate()
# stdout = ""
# stderr = ""
# print(stderr.decode())
return process.returncode, stdout, stderr
#
def execute_rsync(self, _args):
# Demand stuff
self.demand_source_directory_config()
self.demand_destination_directory_config()
if self.is_using_ssh():
self.demand_ssh_config()
#
args = self.start_rsync_args()
args.extend(_args)
# print(str(args))
#
env = self.start_rsync_environment_variables()
#
self.log("Executing rsync with the following arguments:", args)
self.log("; And the following environment:", env)
#
# print("Debug -> Want to execute Rsync")
# print("Args:", str(args))
# print("Env:", str(env))
# return (0, "", "")
# Spawn Rsync in shell
# process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
process = subprocess.Popen(args, env=env)
# stdout, stderr = process.communicate()
process.communicate()
# self.eprint(stderr.decode())
stdout = ""
stderr = ""
# Check return code
# 0 = Success
# 24 = Source files vanished
return_code = process.returncode
if return_code != 0 and return_code != 24:
raise Exception("Rsync seems to have failed somehow! Got return code: " + str(return_code))
return return_code, stdout, stderr
def main():
b = MikesBackup()
b.do_backup()
#
if __name__ == "__main__":
#
main()