A simple script utilizing rsync for both full and differential backups, and auto folder naming.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

843 lines
20 KiB

  1. #!/usr/bin/env python3
  2. #
  3. import datetime
  4. import os
  5. import re
  6. import shlex
  7. import subprocess
  8. import sys
  9. #
  10. class MikesBackup:
  11. #
  12. __log_dir = None
  13. __log_name = None
  14. __remote_host = None
  15. __remote_user = None
  16. __destination_dir_base = None
  17. __source_dir = None
  18. __source_dir_includes = []
  19. __source_dir_excludes = []
  20. __source_mountpoint_demands = []
  21. __ssh_key = None
  22. __quiet_ssh = True
  23. __force_full = False
  24. __force_differential = False
  25. __no_incremental = False
  26. __log_file_handle = None
  27. CONST_FULL_DIRECTORY_NAME = "full"
  28. CONST_DIFFERENTIAL_DIRECTORY_NAME = "differential"
  29. CONST_DIFFERENTIAL_IN_PROGRESS_DIRECTORY_NAME = "IN-PROGRESS"
  30. #
  31. def __init__(self):
  32. self.parse_args()
  33. #
  34. def __del__(self):
  35. self.close_log_file()
  36. #
  37. def __str__(self):
  38. s = ""
  39. s += "MikesBackup Class Instance"
  40. s += "\nLog Dir: " + str(self.__log_dir)
  41. s += "\nLog Name: " + str(self.__log_name)
  42. s += "\nRemote Host: " + str(self.__remote_host)
  43. s += "\nRemote User: " + str(self.__remote_user)
  44. s += "\nDestination Dir Base: " + str(self.__destination_dir_base)
  45. s += "\nSource Dir (Main): " + str(self.__source_dir)
  46. s += "\nSource Dirs (Includes): " + str(self.__source_dir_includes)
  47. s += "\nSource Dirs (Excludes): " + str(self.__source_dir_excludes)
  48. s += "\nSource Mountpoint Demands: " + str(self.__source_mountpoint_demands)
  49. s += "\nSSH Key: " + str(self.__ssh_key)
  50. s += "\nQuiet SSH: " + str(self.__quiet_ssh)
  51. s += "\nForce Full Backup: " + str(self.__force_full)
  52. s += "\nForce Differential: " + str(self.__force_differential)
  53. s += "\nDisallow Incremental: " + str(self.__no_incremental)
  54. return s
  55. #
  56. def log(self, s, o=None):
  57. the_date = self.get_datetime_for_logging()
  58. to_log = "[MikesBackup][" + the_date + "] " + s
  59. # Print the log line
  60. print(to_log)
  61. # Append the log line to the log file
  62. f = self.open_log_file()
  63. if f:
  64. f.write(to_log + "\n")
  65. # Recurse in order to print whatever o is outputting, if anything
  66. if o is not None:
  67. o_lines = str(o).split("\n")
  68. for line in o_lines:
  69. self.log(line)
  70. #
  71. def open_log_file(self):
  72. if self.__log_file_handle:
  73. return self.__log_file_handle
  74. log_file_path = self.make_log_path()
  75. if log_file_path:
  76. self.__log_file_handle = open(log_file_path, "w")
  77. return self.__log_file_handle
  78. #
  79. def close_log_file(self):
  80. if self.__log_file_handle:
  81. self.__log_file_handle.close()
  82. self.__log_file_handle = None
  83. #
  84. def eprint(*args, **kwargs):
  85. print(*args, file=sys.stderr, **kwargs)
  86. #
  87. def parse_args(self):
  88. #
  89. print()
  90. self.log("Parsing arguments")
  91. a = 0
  92. while a + 1 < len(sys.argv):
  93. a += 1
  94. #
  95. arg = sys.argv[a]
  96. #
  97. valid_arg = False
  98. if arg == "":
  99. valid_arg = True
  100. elif arg == "--full":
  101. valid_arg = True
  102. self.__force_full = True
  103. self.log("Forcing a full backup")
  104. elif arg == "--diff" or arg == "--differential":
  105. valid_arg = True
  106. self.__force_differential = True
  107. self.log("Forcing a differential backup")
  108. elif arg == "--no-incremental":
  109. valid_arg = True
  110. self.__no_incremental = True
  111. self.log("Disallowing incremental backups (differentials will only link back to full)")
  112. elif arg == "--log-dir":
  113. valid_arg = True
  114. self.__log_dir = sys.argv[a + 1]
  115. self.log("Found log dir: " + self.__log_dir)
  116. a = a + 1
  117. elif arg == "--log-name":
  118. valid_arg = True
  119. self.__log_name = sys.argv[a + 1]
  120. self.log("Found log name: " + self.__log_name)
  121. self.close_log_file()
  122. a = a + 1
  123. elif arg == "--source-dir":
  124. valid_arg = True
  125. if self.__source_dir:
  126. raise Exception("--source-dir can only be used once")
  127. self.__source_dir = sys.argv[a + 1]
  128. self.log("Found source dir: " + sys.argv[a + 1])
  129. a = a + 1
  130. elif arg == "--include":
  131. valid_arg = True
  132. self.__source_dir_includes.append(sys.argv[a + 1])
  133. self.log("Found additional source dir include: " + sys.argv[a + 1])
  134. a = a + 1
  135. elif arg == "--source-mountpoint":
  136. valid_arg = True
  137. self.__source_mountpoint_demands.append(sys.argv[a + 1])
  138. self.log("Found required source mountpoint: " + sys.argv[a + 1])
  139. a += 1
  140. elif arg == "--destination-dir":
  141. valid_arg = True
  142. self.__destination_dir_base = sys.argv[a + 1]
  143. self.log("Found destination dir: " + self.__destination_dir_base)
  144. a = a + 1
  145. elif arg == "--exclude":
  146. valid_arg = True
  147. self.__source_dir_excludes.append(sys.argv[a + 1])
  148. self.log("Found exclude dir: " + sys.argv[a + 1])
  149. a = a + 1
  150. elif arg == "--remote-host":
  151. valid_arg = True
  152. self.__remote_host = sys.argv[a + 1]
  153. self.log("Found remote host: " + self.__remote_host)
  154. a = a + 1
  155. elif arg == "--remote-user":
  156. valid_arg = True
  157. self.__remote_user = sys.argv[a + 1]
  158. self.log("Found remote user: " + self.__remote_user)
  159. a = a + 1
  160. elif arg == "--ssh-key":
  161. valid_arg = True
  162. self.__ssh_key = sys.argv[a + 1]
  163. self.log("Found ssh key: " + self.__ssh_key)
  164. a = a + 1
  165. #
  166. if not valid_arg:
  167. raise Exception("Invalid argument:", arg)
  168. @staticmethod
  169. def get_datetime_for_logging():
  170. #
  171. return datetime.datetime.now().strftime("%b %d %Y; %I%M%p")
  172. @staticmethod
  173. def get_datetime_for_filename():
  174. #
  175. return datetime.datetime.now().strftime('%Y-%b-%d_%I%M%p')
  176. #
  177. def is_using_source_mountpoints(self):
  178. return len(self.__source_mountpoint_demands) > 0
  179. #
  180. def demand_source_mountpoints(self):
  181. for mountpoint_path in self.__source_mountpoint_demands:
  182. if not os.path.ismount(mountpoint_path):
  183. raise Exception("Required mountpoint is not mounted: " + str(mountpoint_path))
  184. self.log("Verified mountpoint: " + mountpoint_path)
  185. #
  186. def is_using_ssh(self):
  187. #
  188. if (
  189. self.__remote_host is not None
  190. or self.__remote_user is not None
  191. or self.__ssh_key is not None
  192. ):
  193. return True
  194. return False
  195. #
  196. def demand_ssh_config(self):
  197. #
  198. if self.is_using_ssh():
  199. if self.__remote_host is None:
  200. raise Exception("Please provide remote host")
  201. if self.__remote_user is None:
  202. raise Exception("Please provide remote user")
  203. #
  204. def demand_source_directory_config(self):
  205. #
  206. if self.__source_dir is None:
  207. raise Exception("Please provide a source directory")
  208. #
  209. def demand_destination_directory_config(self):
  210. #
  211. if self.__destination_dir_base is None:
  212. raise Exception("Please provide backup destination directory")
  213. #
  214. def does_destination_directory_exist(self, destination_path):
  215. #
  216. self.log("Trying to determine if destination path exists:" + destination_path)
  217. # Local?
  218. if not self.is_using_ssh():
  219. self.log("Checking for local destination path")
  220. if os.path.isdir(destination_path):
  221. self.log("Local destination path exists")
  222. return True
  223. else:
  224. self.log("Local destination path does not exist")
  225. return False
  226. #
  227. self.log("Checking for remote destination path: " + destination_path)
  228. command = [
  229. "[ -d " + destination_path + " ]"
  230. ]
  231. #
  232. code, stdout, stderr = self.execute_remote_ssh_command(command)
  233. if code == 0:
  234. self.log("Remote destination dir was found: " + destination_path)
  235. return True
  236. #
  237. self.log("Remote dir didn't seem to exist: " + destination_path)
  238. return False
  239. #
  240. def demand_destination_base_backup_directory(self):
  241. #
  242. self.demand_destination_directory_config()
  243. #
  244. destination_path = self.__destination_dir_base
  245. #
  246. if self.does_destination_directory_exist(destination_path) is False:
  247. raise Exception("Backup destination directory doesn't exist: " + destination_path)
  248. #
  249. def does_full_backup_destination_directory_exist(self):
  250. #
  251. dir_path = self.make_full_backup_destination_path()
  252. #
  253. self.log("Trying to determine if Full backup destination directory exists:", dir_path)
  254. return self.does_destination_directory_exist(dir_path)
  255. #
  256. def do_backup(self):
  257. #
  258. print()
  259. self.log("Enter: do_backup")
  260. # Source mountpoints must be mounted
  261. self.demand_source_mountpoints()
  262. # Remote base dir must exist
  263. self.demand_destination_base_backup_directory()
  264. # Forced full or differential by args?
  265. if self.__force_full is True or self.__force_differential is True:
  266. if self.__force_full is True:
  267. self.log("Forcing full backup")
  268. self.do_full_backup()
  269. else:
  270. self.log("Forcing differential backup")
  271. self.do_differential_backup()
  272. return
  273. # Automatically choose full or differential
  274. if self.does_full_backup_destination_directory_exist():
  275. self.log("Automatically choosing differential backup, because full backup destination directory already exists")
  276. self.do_differential_backup()
  277. else:
  278. self.log("Automatically choosing full backup, because full backup destination directory wasn't found")
  279. self.do_full_backup()
  280. #
  281. def do_full_backup(self):
  282. # Start args
  283. args = []
  284. # Get destination directory
  285. destination_dir = self.make_full_backup_destination_path()
  286. # Append source directory
  287. args.append(self.make_rsync_source_directory_part())
  288. # Append remote destination directory
  289. # args.append( self.__remote_user + "@" + self.__remote_host + ":" + remote_dir)
  290. args.append(self.make_rsync_remote_destination_part(destination_dir))
  291. # print("Args", str(args))
  292. self.log("Destination dir:" + destination_dir)
  293. self.execute_rsync(args)
  294. self.log("Rsync seems to have finished successfully")
  295. self.log("Because a full backup has succeeded, will now delete any differential backups")
  296. args_remove_differentials = [
  297. "rm",
  298. "-rfv",
  299. self.make_remote_differential_backup_path_base()
  300. ]
  301. if self.is_using_ssh():
  302. self.execute_remote_ssh_command(args_remove_differentials)
  303. else:
  304. self.execute_command(args_remove_differentials)
  305. self.log("Finished deleting old differentials")
  306. #
  307. def do_differential_backup(self):
  308. # Start args
  309. args = []
  310. # Get directories
  311. link_dest_dir = self.determine_rsync_backup_link_destination_path()
  312. destination_dir_in_progress = self.make_remote_differential_in_progress_backup_path()
  313. destination_dir_final = self.make_remote_differential_backup_path()
  314. self.ensure_destination_directory(destination_dir_in_progress)
  315. # Add link dest arg?
  316. if link_dest_dir:
  317. args.append("--link-dest")
  318. args.append(link_dest_dir)
  319. # Append source directory
  320. args.append(self.make_rsync_source_directory_part())
  321. # Append remote destination directory
  322. args.append(self.make_rsync_remote_destination_part(destination_dir_in_progress))
  323. self.log("Link destination dir: " + link_dest_dir)
  324. self.log("Destination dir: " + destination_dir_in_progress)
  325. self.execute_rsync(args)
  326. self.log("Rsync seems to have finished successfully")
  327. self.log("Renaming temporary directory")
  328. self.log("Old: " + destination_dir_in_progress)
  329. self.log("New: " + destination_dir_final)
  330. if self.is_using_ssh():
  331. return_code, stdout, stderr = self.execute_remote_ssh_command([
  332. "mv",
  333. destination_dir_in_progress,
  334. destination_dir_final
  335. ])
  336. if return_code != 0:
  337. raise Exception("Failed to move temporary diff directory to its final home")
  338. else:
  339. os.rename(destination_dir_in_progress, destination_dir_final)
  340. self.log("Rename was successful")
  341. #
  342. def make_log_directory_path(self):
  343. #
  344. log_dir = self.__log_dir
  345. if log_dir is None:
  346. print("No log directory specified; Won't log")
  347. return None
  348. return log_dir
  349. #
  350. def make_log_path(self):
  351. # Log dir
  352. log_dir = self.make_log_directory_path()
  353. if not log_dir:
  354. return None
  355. # Filename
  356. file_name = self.get_datetime_for_filename()
  357. if self.__log_name:
  358. file_name += "-" + self.__log_name
  359. file_name += ".log"
  360. # Path
  361. log_path = os.path.join(log_dir, file_name)
  362. return log_path
  363. #
  364. def make_full_backup_destination_path(self):
  365. #
  366. if self.__destination_dir_base is None:
  367. raise Exception("No remote directory was specified")
  368. #
  369. return os.path.join(self.__destination_dir_base, self.CONST_FULL_DIRECTORY_NAME)
  370. #
  371. def make_remote_differential_backup_path_base(self):
  372. #
  373. if self.__destination_dir_base is None:
  374. raise Exception("No remote directory was specified")
  375. return os.path.join(self.__destination_dir_base, self.CONST_DIFFERENTIAL_DIRECTORY_NAME)
  376. #
  377. def make_remote_differential_in_progress_backup_path(self):
  378. diff_path_base = self.make_remote_differential_backup_path_base()
  379. return os.path.join(diff_path_base, self.CONST_DIFFERENTIAL_IN_PROGRESS_DIRECTORY_NAME)
  380. #
  381. def make_remote_differential_backup_path(self):
  382. diff_path_base = self.make_remote_differential_backup_path_base()
  383. #
  384. return os.path.join(diff_path_base, self.get_datetime_for_filename())
  385. #
  386. def make_rsync_source_includes_part(self):
  387. args = []
  388. for d in self.__source_dir_includes:
  389. args.append("--include")
  390. args.append(d)
  391. return args
  392. #
  393. def make_rsync_source_directory_part(self):
  394. self.demand_source_directory_config()
  395. #
  396. return self.__source_dir
  397. #
  398. def make_rsync_remote_destination_part(self, destination_dir):
  399. #
  400. part = ""
  401. #
  402. if self.__remote_host is not None:
  403. if self.__remote_user is not None:
  404. part += self.__remote_user + "@"
  405. part += self.__remote_host + ":"
  406. #
  407. part += destination_dir
  408. return part
  409. #
  410. def determine_rsync_backup_link_destination_path(self):
  411. self.demand_destination_directory_config()
  412. self.log("Begin trying to determine which previous backup path to use as link")
  413. #
  414. newest_path = None
  415. newest_path_date = None
  416. # Pattern to parse the 'ls' command
  417. pattern = re.compile(
  418. """.*(?P<date>[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}\.[0-9]{9} .{5}) (?P<name>.+)$""",
  419. re.MULTILINE
  420. )
  421. # Get listing info for the full path
  422. destination_path_full = self.make_full_backup_destination_path()
  423. args_full_destination_path_ls = [
  424. "ls",
  425. "-l",
  426. "-c",
  427. "--all",
  428. "--full-time",
  429. destination_path_full
  430. ]
  431. if self.is_using_ssh():
  432. return_code, stdout, stderr = self.execute_remote_ssh_command(args_full_destination_path_ls)
  433. else:
  434. return_code, stdout, stderr = self.execute_command(args_full_destination_path_ls)
  435. if return_code != 0:
  436. raise Exception("Failed to get listing info for base destination directory")
  437. for match in pattern.finditer(stdout):
  438. name = match.group("name")
  439. date = match.group("date")
  440. if name == ".":
  441. self.log("Start by assuming \"Full\" is the most recent backup: " + destination_path_full)
  442. self.log("; With a date of " + date)
  443. newest_path = destination_path_full
  444. newest_path_date = date
  445. break
  446. if not newest_path:
  447. self.log("Didn't find a \"Full\" backup on remote")
  448. if self.__no_incremental:
  449. self.log("Incremental backups are disabled; Won't consider any differential directories for the link target")
  450. return newest_path
  451. # Get listing info for all differential directories
  452. differential_path_base = self.make_remote_differential_backup_path_base()
  453. self.ensure_destination_directory(differential_path_base)
  454. args_differential_destination_path_ls = [
  455. "ls",
  456. "-l",
  457. "-c",
  458. "--all",
  459. "--full-time",
  460. differential_path_base
  461. ]
  462. if self.is_using_ssh():
  463. return_code, stdout, stderr = self.execute_remote_ssh_command(args_differential_destination_path_ls)
  464. else:
  465. return_code, stdout, stderr = self.execute_command(args_differential_destination_path_ls)
  466. if return_code != 0:
  467. raise Exception("Failed to get listing info for destination differential base directory")
  468. # Look for the most recent differential directory
  469. # (must be newer than the Full directory too)
  470. for match in pattern.finditer(stdout):
  471. name = match.group("name")
  472. date = match.group("date")
  473. if name == "." or name == ".." or name == self.CONST_DIFFERENTIAL_IN_PROGRESS_DIRECTORY_NAME:
  474. continue
  475. if newest_path is None or date > newest_path_date:
  476. self.log("Found a newer differential backup: " + name + "; " + date)
  477. newest_path = os.path.join(differential_path_base, name)
  478. newest_path_date = date
  479. else:
  480. self.log("Not newer: " + name + "; " + date)
  481. #
  482. self.log("Newest backup path is: " + newest_path)
  483. self.log("; With a date of: " + newest_path_date)
  484. return newest_path
  485. @staticmethod
  486. def ensure_local_directory(d):
  487. #
  488. if not os.path.exists(d):
  489. os.makedirs(d)
  490. #
  491. def ensure_destination_directory(self, d):
  492. #
  493. if not self.does_destination_directory_exist(d):
  494. #
  495. self.log("Destination directory doesn't exist; Will create:" + d)
  496. #
  497. if self.is_using_ssh():
  498. command = [
  499. "mkdir",
  500. "--parents",
  501. d
  502. ]
  503. self.execute_remote_ssh_command(command)
  504. else:
  505. os.makedirs(d)
  506. #
  507. def start_rsync_args(self):
  508. #
  509. args = [
  510. "rsync",
  511. "--archive",
  512. "--compress",
  513. "--progress",
  514. "--stats",
  515. "--verbose",
  516. "--human-readable",
  517. "--itemize-changes",
  518. "--no-links",
  519. "--one-file-system",
  520. "--delete",
  521. "--delete-excluded"
  522. ]
  523. log_dir = self.make_log_directory_path()
  524. log_path = self.make_log_path()
  525. if log_dir and log_path:
  526. self.ensure_local_directory(log_dir)
  527. args.append("--log-file")
  528. args.append(log_path)
  529. # Only allow recursion into multiple file systems
  530. # if any mountpoints were specified
  531. if not self.is_using_source_mountpoints():
  532. args.append("--one-file-system")
  533. #
  534. for i in self.__source_dir_includes:
  535. args.append("--include")
  536. args.append(i)
  537. #
  538. for e in self.__source_dir_excludes:
  539. args.append("--exclude")
  540. args.append(e)
  541. #
  542. # args.append("--dry-run") # DEBUG !!!
  543. return args
  544. #
  545. def start_rsync_environment_variables(self):
  546. #
  547. env = {}
  548. #
  549. if self.__ssh_key is not None or self.__quiet_ssh is True:
  550. env["RSYNC_RSH"] = "ssh"
  551. if self.__ssh_key is not None:
  552. env["RSYNC_RSH"] += " -i " + shlex.quote(self.__ssh_key)
  553. if self.__quiet_ssh is True:
  554. env["RSYNC_RSH"] += " -q"
  555. return env
  556. @staticmethod
  557. def execute_command(command):
  558. #
  559. args = list()
  560. # Append the command
  561. if isinstance(command, str):
  562. args.append(command)
  563. elif isinstance(command, list):
  564. args.extend(command)
  565. else:
  566. raise Exception("Unsupported command datatype")
  567. # Spawn
  568. process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  569. stdout, stderr = process.communicate()
  570. stdout = stdout.decode()
  571. stderr = stderr.decode()
  572. return process.returncode, stdout, stderr
  573. #
  574. def execute_remote_ssh_command(self, command):
  575. #
  576. self.demand_ssh_config()
  577. #
  578. args = list()
  579. # ssh command
  580. args.append("ssh")
  581. # Quiet?
  582. if self.__quiet_ssh is True:
  583. args.append("-q")
  584. # ssh key
  585. if self.__ssh_key is not None:
  586. args.append("-i")
  587. args.append(self.__ssh_key)
  588. # ssh user@host
  589. args.append(self.__remote_user + "@" + self.__remote_host)
  590. # Append the command
  591. args.append("--")
  592. if isinstance(command, str):
  593. args.append(command)
  594. elif isinstance(command, list):
  595. args.extend(command)
  596. else:
  597. raise Exception("Unsupported command datatype")
  598. # Spawn
  599. # print(args)
  600. process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  601. # process = subprocess.Popen(args)
  602. stdout, stderr = process.communicate()
  603. stdout = stdout.decode()
  604. stderr = stderr.decode()
  605. # process.communicate()
  606. # stdout = ""
  607. # stderr = ""
  608. # print(stderr.decode())
  609. return process.returncode, stdout, stderr
  610. #
  611. def execute_rsync(self, _args):
  612. # Demand stuff
  613. self.demand_source_directory_config()
  614. self.demand_destination_directory_config()
  615. if self.is_using_ssh():
  616. self.demand_ssh_config()
  617. #
  618. args = self.start_rsync_args()
  619. args.extend(_args)
  620. # print(str(args))
  621. #
  622. env = self.start_rsync_environment_variables()
  623. #
  624. self.log("Executing rsync with the following arguments:", args)
  625. self.log("; And the following environment:", env)
  626. #
  627. # print("Debug -> Want to execute Rsync")
  628. # print("Args:", str(args))
  629. # print("Env:", str(env))
  630. # return (0, "", "")
  631. # Spawn Rsync in shell
  632. # process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env)
  633. process = subprocess.Popen(args, env=env)
  634. # stdout, stderr = process.communicate()
  635. process.communicate()
  636. # self.eprint(stderr.decode())
  637. stdout = ""
  638. stderr = ""
  639. # Check return code
  640. # 0 = Success
  641. # 24 = Source files vanished
  642. return_code = process.returncode
  643. if return_code != 0 and return_code != 24:
  644. raise Exception("Rsync seems to have failed somehow! Got return code: " + str(return_code))
  645. return return_code, stdout, stderr
  646. def main():
  647. b = MikesBackup()
  648. b.do_backup()
  649. #
  650. if __name__ == "__main__":
  651. #
  652. main()