Compare a backup destination folder with its source, to see what has changed since the last backup occurred.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1131 lines
30 KiB

  1. #!/usr/bin/env python3
  2. """
  3. Mike's Backup Diff
  4. A simple script to help compare changes between a backup destination directory, and its source
  5. Copyright 2019 Mike Peralta; All rights reserved
  6. Released under the GNU GENERAL PUBLIC LICENSE v3 (See LICENSE file for more)
  7. """
  8. #
  9. import datetime
  10. import functools
  11. import humanfriendly
  12. import os
  13. import re
  14. import subprocess
  15. import sys
  16. #
  17. class BackupDiff:
  18. def __init__(self):
  19. self.__source_path = None
  20. self.__source_ssh_host = None
  21. self.__source_ssh_user = None
  22. self.__backup_path = None
  23. self.__backup_ssh_host = None
  24. self.__backup_ssh_user = None
  25. self.__ssh_key = None
  26. self.__source_path_items = None
  27. self.__backup_path_items = None
  28. self.__difference_entries = None
  29. self.__do_clean_difference_entries = True
  30. self.__force_rsync = False
  31. def run(self):
  32. self.consume_arguments()
  33. self.calculate_difference_entries()
  34. if self.__do_clean_difference_entries:
  35. self.clean_difference_entries()
  36. self.print_report()
  37. @staticmethod
  38. def current_time():
  39. now = datetime.datetime.now()
  40. now_s = now.strftime("%b-%d-%Y %I:%M%p")
  41. return str(now_s)
  42. def log(self, s, o=None):
  43. to_log = self.make_log_prefix() + str(s)
  44. if o is not None:
  45. to_log += " " + str(o)
  46. print(to_log)
  47. def make_log_prefix(self):
  48. now = self.current_time()
  49. prefix = "[" + now + "][Mike's Backup Diff] "
  50. return prefix
  51. def consume_arguments(self):
  52. i = 0
  53. while i + 1 < len(sys.argv):
  54. i += 1
  55. arg = sys.argv[i]
  56. # print("I:", i, "; arg:", arg)
  57. if arg == "--source-path":
  58. i, one_path = self.consume_argument_companion(i)
  59. self.__source_path = os.path.abspath(one_path)
  60. self.log("Found source path argument:", self.__source_path)
  61. elif arg == "--source-remote-host":
  62. i, host = self.consume_argument_companion(i)
  63. self.__source_ssh_host = host
  64. self.log("Will use source remote host: " + str(self.__source_ssh_host))
  65. elif arg == "--source-remote-user":
  66. i, user = self.consume_argument_companion(i)
  67. self.__source_ssh_user = user
  68. self.log("Will use source remote user: " + str(self.__source_ssh_user))
  69. elif arg == "--backup-path":
  70. i, one_path = self.consume_argument_companion(i)
  71. self.__backup_path = os.path.abspath(one_path)
  72. self.log("Found backup destination path argument:", self.__backup_path)
  73. elif arg == "--backup-remote-host":
  74. i, host = self.consume_argument_companion(i)
  75. self.__backup_ssh_host = host
  76. self.log("Will use backup remote host: " + str(self.__backup_ssh_host))
  77. elif arg == "--backup-remote-user":
  78. i, user = self.consume_argument_companion(i)
  79. self.__backup_ssh_user = user
  80. self.log("Will use backup remote user: " + str(self.__backup_ssh_user))
  81. elif arg == "--ssh-key":
  82. i, key = self.consume_argument_companion(i)
  83. self.__ssh_key = key
  84. self.log("Will use ssh key: " + str(self.__ssh_key))
  85. elif arg == "--use-rsync" or arg == "--rsync":
  86. self.__force_rsync = True
  87. self.log("Forcing comparison with rsync tool")
  88. elif arg == "--no-clean":
  89. self.__do_clean_difference_entries = False
  90. self.log("Won't clean Difference entries")
  91. else:
  92. self.log("The heck are you doing?")
  93. self.log("Unsupported argument: " + arg)
  94. self.log("i is: " + str(i))
  95. raise Exception("THE HECK")
  96. @staticmethod
  97. def consume_argument_companion(arg_index):
  98. companion_index = arg_index + 1
  99. if companion_index >= len(sys.argv):
  100. raise Exception("Expected argument after", sys.argv[arg_index])
  101. return_index = companion_index
  102. return return_index, sys.argv[companion_index]
  103. def calculate_comparison_items(self):
  104. self.consume_source_path()
  105. self.consume_backup_path()
  106. def consume_source_path(self):
  107. if self.__source_path is None:
  108. raise Exception("Please provide a source path")
  109. if not os.path.isdir(self.__source_path):
  110. raise Exception("Source path isn't a valid directory")
  111. self.log("Consuming source path: " + str(self.__source_path))
  112. source_path_items = self.consume_dir(self.__source_path)
  113. source_path_items = self.strip_root_dir(self.__source_path, source_path_items)
  114. self.log("Done consuming source path items: " + str(len(source_path_items)))
  115. self.__source_path_items = source_path_items
  116. def should_use_rsync(self):
  117. if self.__force_rsync:
  118. return True
  119. if self.__source_ssh_host or self.__source_ssh_user:
  120. return True
  121. if self.__backup_ssh_host or self.__backup_ssh_user:
  122. return True
  123. if self.__ssh_key:
  124. return True
  125. return False
  126. def consume_backup_path(self):
  127. if self.__backup_path is None:
  128. raise Exception("Please provide a backup destination path")
  129. if not os.path.isdir(self.__backup_path):
  130. raise Exception("Backup destination path isn't a valid directory")
  131. self.log("Consuming backup path: " + str(self.__backup_path))
  132. backup_path_items = self.consume_dir(self.__backup_path)
  133. backup_path_items = self.strip_root_dir(self.__backup_path, backup_path_items)
  134. self.log("Done consuming backup path items: " + str(len(backup_path_items)))
  135. self.__backup_path_items = backup_path_items
  136. def consume_dir(self, dir_path):
  137. #
  138. paths = set()
  139. #
  140. self.log("")
  141. for root, dirs, filenames in os.walk(dir_path):
  142. paths.add(root)
  143. for d in dirs:
  144. path = os.path.join(root, d)
  145. paths.add(path)
  146. # print(path)
  147. for f in filenames:
  148. path = os.path.join(root, f)
  149. paths.add(path)
  150. # print(path)
  151. self.print_progress_message("Consuming paths ... " + str(len(paths)))
  152. return paths
  153. def calculate_difference_entries(self):
  154. if self.should_use_rsync():
  155. self.calculate_difference_entries_with_rsync()
  156. else:
  157. self.calculate_difference_entries_directly()
  158. def calculate_difference_entries_with_rsync(self):
  159. entries = []
  160. stdout_lines, stderr_lines, return_code = self.execute_rsync()
  161. # print("STDOUT LINES:")
  162. # print(stdout_lines)
  163. # print("STDERR LINES:")
  164. # print(stderr_lines)
  165. #
  166. self.log("Calculating difference entries ...")
  167. # Regex patterns
  168. pattern_regular = re.compile("""^(?P<line>(?P<flags>[^\s]{11})(?P<item>.*))$""")
  169. pattern_message = re.compile("""^(?P<line>\*(?P<message>[\w]+)(?P<item>.*))$""")
  170. # Iterate over each stdout line
  171. for line in stdout_lines:
  172. # Try to match regular expressions
  173. match_regular = pattern_regular.match(line)
  174. match_message = pattern_message.match(line)
  175. # Regular line (Flags and Path)
  176. if match_regular:
  177. flags = match_regular.group("flags")
  178. change_type_character = flags[0]
  179. item_type = flags[1]
  180. # Determine which attributes are different
  181. attributes_part = flags[2:]
  182. different_checksum = "c" in attributes_part
  183. different_size = "s" in attributes_part
  184. different_modification_time = "t" in attributes_part
  185. different_permissions = "p" in attributes_part
  186. different_owner = "o" in attributes_part
  187. different_group = "g" in attributes_part
  188. different_acl = "a" in attributes_part
  189. different_extended_attributes = "x" in attributes_part
  190. #
  191. different_any_attribute = (
  192. different_checksum
  193. or different_size
  194. or different_modification_time
  195. or different_permissions
  196. or different_owner
  197. or different_group
  198. or different_acl
  199. or different_extended_attributes
  200. )
  201. item = match_regular.group("item").strip()
  202. entry = DifferenceEntry(item)
  203. # File folder, whatever
  204. if item_type == "d":
  205. entry.set_is_dir()
  206. elif item_type == "f":
  207. entry.set_is_file()
  208. # Different attributes
  209. # (before 'missing' stuff, because attribute syncs show up as xfers)
  210. if different_checksum:
  211. entry.set_is_different_checksum()
  212. elif different_size:
  213. entry.set_is_different_sizes()
  214. elif different_modification_time:
  215. entry.set_is_different_modification_times()
  216. elif different_permissions:
  217. entry.set_is_different_permissions()
  218. elif different_owner:
  219. entry.set_is_different_owner()
  220. elif different_group:
  221. entry.set_is_different_group()
  222. elif different_acl:
  223. entry.set_is_different_acl()
  224. elif different_extended_attributes:
  225. entry.set_is_different_extended_attributes()
  226. elif different_any_attribute:
  227. entry.set_is_different_attributes()
  228. # Missing from backup
  229. elif change_type_character == "<":
  230. entry.set_is_missing_from_backup()
  231. # Missing from ... backup? (confusing symbolstuffs)
  232. elif change_type_character == ">":
  233. entry.set_is_missing_from_backup()
  234. # Local change is occurring
  235. elif change_type_character == "c":
  236. entry.set_is_missing_from_backup()
  237. # Item is a hard link
  238. elif change_type_character == "h":
  239. entry.set_is_unknown("Rsync says this is a hard link")
  240. # "no change / transfer (could still be changing attributes)"
  241. elif change_type_character == ".":
  242. entry.set_is_unknown("Rsync says no change, but could be changing attributes")
  243. #
  244. entries.append(entry)
  245. # Message line
  246. elif match_message:
  247. message = match_message.group("message").strip()
  248. item = match_message.group("item").strip()
  249. entry = DifferenceEntry(item)
  250. if message == "deleting":
  251. entry.set_is_missing_from_source()
  252. entry.set_is_dir(item[-1] == "/")
  253. entry.set_is_file(not item[-1] == "/")
  254. else:
  255. self.log("IS UNKNOWN MESSAGE:" + message)
  256. entry.set_is_unknown("Unhandled message: " + message)
  257. entries.append(entry)
  258. # Unsupported type of line
  259. else:
  260. #
  261. self.log("Don't know how to parse this line: " + line)
  262. self.log("Finished calculating difference entries")
  263. self.__difference_entries = entries
  264. def execute_rsync(self):
  265. #
  266. args = list()
  267. # Rsync
  268. args.append("rsync")
  269. # Dry run!!
  270. args.append("--dry-run")
  271. # Produces the main output we'll parse
  272. args.append("--itemize-changes")
  273. # Rsh command
  274. rsh_command = self.make_rsync_rsh_argument(self.__ssh_key)
  275. if rsh_command:
  276. args.append(rsh_command)
  277. # Main sync flags
  278. args.append("--archive")
  279. args.append("--delete")
  280. # Source path
  281. args.append(self.make_rsync_path(self.__source_ssh_host, self.__source_ssh_user, self.__source_path))
  282. # Backup path
  283. args.append(self.make_rsync_path(self.__backup_ssh_host, self.__backup_ssh_user, self.__backup_path))
  284. #
  285. self.log("Executing rsync")
  286. # self.log("Executing rsync with the following arguments:")
  287. # self.log(str(args))
  288. # self.log(" ".join(args))
  289. # Start the subprocess
  290. process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  291. # Live output of stdout
  292. print()
  293. stdout_lines = []
  294. for line in iter(process.stdout.readline, b''):
  295. line = line.decode().strip()
  296. stdout_lines.append(line)
  297. # print(line)
  298. self.print_progress_message("Captured " + str(len(stdout_lines)) + " lines from Rsync")
  299. # Grab all the stderr lines
  300. stderr_lines = []
  301. for line in iter(process.stderr.readline, b''):
  302. line = line.decode().strip()
  303. stderr_lines.append(line)
  304. # Make sure it's completely finished
  305. process.communicate()
  306. self.log("Rsync has finished executing")
  307. # Accept Success (0), and Partial Transfer Codes (23 and 24)
  308. if process.returncode not in [0, 23, 24]:
  309. raise Exception("Failed to execute Rsync; Exited with code " + str(process.returncode))
  310. return stdout_lines, stderr_lines, process.returncode
  311. @staticmethod
  312. def make_rsync_path(ssh_host, ssh_user, path):
  313. rsync_path = ""
  314. if (not ssh_host) and ssh_user:
  315. raise Exception("ssh_user provided (" + str(ssh_user) + ") without ssh_host")
  316. if ssh_user:
  317. rsync_path += ssh_user + "@"
  318. if ssh_host:
  319. rsync_path += ssh_host + ":" + path
  320. else:
  321. rsync_path += path
  322. # Absolute path doesn't have trailing slash, which works well for rsync here
  323. rsync_path += "/"
  324. return rsync_path
  325. @staticmethod
  326. def make_rsync_rsh_argument(ssh_key):
  327. if not ssh_key:
  328. return None
  329. if not os.path.isfile(ssh_key):
  330. raise Exception("SSH key does not exist: " + str(ssh_key))
  331. return "--rsh=ssh -i " + ssh_key
  332. def calculate_difference_entries_directly(self):
  333. self.calculate_comparison_items()
  334. entries = []
  335. # Compare everything in the source path
  336. self.log("")
  337. i = 1
  338. for item in self.__source_path_items:
  339. self.print_progress_message(
  340. "Looking for differences from source to backup ... "
  341. + str(i) + " of " + str(len(self.__source_path_items))
  342. )
  343. entry = self.calculate_difference_entry(item)
  344. if entry:
  345. entries.append(entry)
  346. i += 1
  347. # Compare only things in the backup path that weren't
  348. # in the source
  349. self.log("")
  350. i = 1
  351. backup_items_not_in_source = self.__backup_path_items - self.__source_path_items
  352. for item in backup_items_not_in_source:
  353. self.print_progress_message(
  354. "Looking for differences from backup to source ... "
  355. + str(i) + " of " + str(len(backup_items_not_in_source))
  356. )
  357. entry = self.calculate_difference_entry(item)
  358. if entry:
  359. entries.append(entry)
  360. i += 1
  361. self.__difference_entries = entries
  362. def clean_difference_entries(self, entries: list=None):
  363. if entries is None:
  364. entries = self.__difference_entries
  365. self.log("Cleaning " + str(len(entries)) + " difference entries")
  366. # Build a temp list of all known difference entries
  367. # *that refer to a missing directory*
  368. temp_entries = []
  369. for entry in entries:
  370. if entry.get_is_missing_from_source() or entry.get_is_missing_from_backup():
  371. if entry.get_is_dir():
  372. temp_entries.append(entry)
  373. # print("Temp entries count:", len(temp_entries))
  374. # Loop through entries, attempting to clean for one at a time,
  375. # until no cleaning has been done
  376. print()
  377. clean_iterations = 0
  378. while True:
  379. clean_iterations += 1
  380. most_shallow_entry = None
  381. # Locate the most shallow entry
  382. for entry in temp_entries:
  383. if entry.get_is_missing_from_source() or entry.get_is_missing_from_backup():
  384. # print("Found entry of type 'missing'")
  385. # print(entry)
  386. item = entry.get_item()
  387. if entry.get_is_dir():
  388. # print("Found entry dir:", item)
  389. if most_shallow_entry is None or len(item) < len(most_shallow_entry.get_item()):
  390. most_shallow_entry = entry
  391. # print("Found shallow entry:")
  392. # print(entry)
  393. # Finish if we haven't found anything
  394. if not most_shallow_entry:
  395. self.print_progress_message(
  396. "Cleaning difference entries; "
  397. + str(clean_iterations) + " iterations; "
  398. + str(len(temp_entries)) + " in examination pool; "
  399. + str(len(entries)) + " total"
  400. )
  401. break
  402. # Remove this entry from the temp list, and clean with it as root
  403. temp_entries.remove(most_shallow_entry)
  404. self.clean_child_difference_entries(entries, most_shallow_entry)
  405. self.clean_child_difference_entries(temp_entries, most_shallow_entry)
  406. self.log(
  407. "Cleaning difference entries; "
  408. + str(clean_iterations) + " iterations; "
  409. + str(len(temp_entries)) + " in examination pool; "
  410. + str(len(entries)) + " total"
  411. )
  412. self.__difference_entries = entries
  413. def clean_child_difference_entries(self, entries: list, root_entry):
  414. if entries is None:
  415. entries = self.__difference_entries
  416. # print("Enter clean_child_difference_entries")
  417. # print(root_entry)
  418. root_entry_item = root_entry.get_item()
  419. # print("Cleaning child entries for root entry")
  420. # print(root_entry)
  421. # print()
  422. # print()
  423. entries_to_delete = []
  424. # Check every other entry as a possible child of the root
  425. print("")
  426. child_iteration = 0
  427. for child_entry in entries:
  428. child_iteration += 1
  429. self.print_progress_message("Looking for child entry to clean " + str(child_iteration))
  430. if child_entry != root_entry:
  431. child_entry_item = child_entry.get_item()
  432. # Entry must be longer than the shallow entry
  433. if len(child_entry_item) >= len(root_entry_item):
  434. # Entry must begin with the shallow entry (ie shallow must be a root path of deeper)
  435. if child_entry_item.find(root_entry_item) == 0:
  436. # We can purge the deeper entry
  437. entries_to_delete.append(child_entry)
  438. # print("Deleting unneeded child entry:")
  439. # print("> Root:", root_entry_item)
  440. # print("> Child:", child_entry_item)
  441. # print()
  442. # print()
  443. # Handle entries to delete
  444. print("")
  445. delete_iteration = 0
  446. for entry in entries_to_delete:
  447. delete_iteration += 1
  448. self.print_progress_message(
  449. "Deleting child entry "
  450. + str(delete_iteration) + " / " + str(len(entries_to_delete))
  451. )
  452. entries.remove(entry)
  453. return len(entries_to_delete) > 0
  454. def strip_root_dir(self, root_dir, paths: set):
  455. if isinstance(paths, str):
  456. return self.strip_root_dir_from_string(root_dir, paths)
  457. paths_stripped = set()
  458. for path in paths:
  459. paths_stripped.add(self.strip_root_dir_from_string(root_dir, path))
  460. return paths_stripped
  461. @staticmethod
  462. def strip_root_dir_from_string(root_dir, path):
  463. #
  464. pos = path.find(root_dir)
  465. if pos == -1:
  466. raise Exception("Couldn't find root dir in path", str(root_dir), str(path))
  467. #
  468. if pos > 0:
  469. raise Exception("Root dir wasn't found at the beginning of path", str(root_dir), str(path))
  470. #
  471. path_stripped = path[len(root_dir) + 1:]
  472. # print(path, "===>", path_stripped)
  473. return path_stripped
  474. #
  475. def calculate_difference_entry(self, comparison_item):
  476. entry = DifferenceEntry(comparison_item)
  477. path_source = os.path.join(self.__source_path, comparison_item)
  478. path_source_exists = False
  479. path_source_is_dir = None
  480. path_source_mtime = None
  481. try:
  482. path_source_is_dir = os.path.isdir(path_source)
  483. path_source_mtime = int(os.path.getmtime(path_source))
  484. path_source_exists = True
  485. except FileNotFoundError:
  486. pass
  487. path_backup = os.path.join(self.__backup_path, comparison_item)
  488. path_backup_exists = False
  489. path_backup_is_dir = None
  490. path_backup_mtime = None
  491. try:
  492. path_backup_is_dir = os.path.isdir(path_backup)
  493. path_backup_mtime = int(os.path.getmtime(path_backup))
  494. path_backup_exists = True
  495. except FileNotFoundError:
  496. pass
  497. # In source but not backup
  498. if path_source_exists and not path_backup_exists:
  499. if path_source_is_dir is not None:
  500. entry.set_is_dir(path_source_is_dir)
  501. entry.set_is_missing_from_backup()
  502. # In backup but not source
  503. elif path_backup_exists and not path_source_exists:
  504. entry.set_is_dir(path_backup_is_dir)
  505. entry.set_is_missing_from_source()
  506. # In neither
  507. # Possible if a bad symlink is present
  508. elif not path_source_exists and not path_backup_exists:
  509. entry.set_is_missing_from_both()
  510. # Type mismatch
  511. elif os.path.isdir(path_source) and os.path.isfile(path_backup):
  512. entry.set_is_type_mismatch("Source is a directory, but backup is a file")
  513. elif os.path.isfile(path_source) and os.path.isdir(path_backup):
  514. entry.set_is_type_mismatch("Source is a file, but backup is a directory")
  515. # Compare props
  516. else:
  517. # print("Received item:", comparison_item)
  518. # print("Comparing props with:", path_source)
  519. # print("Comparing props with:", path_backup)
  520. path_source_size = os.path.getsize(path_source)
  521. path_backup_size = os.path.getsize(path_backup)
  522. entry.set_is_dir(os.path.isdir(path_source))
  523. # Different file sizes
  524. if os.path.isfile(path_source) \
  525. and os.path.isfile(path_backup) \
  526. and (path_source_size != path_backup_size):
  527. entry.set_is_different_sizes(path_source_size, path_backup_size)
  528. # Source modification time is newer
  529. elif path_source_mtime > path_backup_mtime:
  530. entry.set_source_is_newer(path_source_mtime, path_backup_mtime)
  531. # Backup modification time is newer
  532. elif path_backup_mtime > path_source_mtime:
  533. entry.set_backup_is_newer(path_source_mtime, path_backup_mtime)
  534. # No difference
  535. else:
  536. entry = None
  537. return entry
  538. @staticmethod
  539. def sort_difference_entries(entries):
  540. entries.sort(
  541. key=functools.cmp_to_key(
  542. lambda entry_a, entry_b: BackupDiff.sort_difference_entries_key_callback(entry_a, entry_b)
  543. )
  544. )
  545. @staticmethod
  546. def sort_difference_entries_key_callback(entry_a, entry_b):
  547. if entry_a.get_is_dir() and not entry_b.get_is_dir():
  548. return -1
  549. if not entry_a.get_is_dir() and entry_b.get_is_dir():
  550. return 1
  551. item_a = entry_a.get_item()
  552. item_b = entry_b.get_item()
  553. if item_a > item_b:
  554. return -1
  555. elif item_b > item_b:
  556. return 1
  557. return 0
  558. def generate_report(self):
  559. # Start report structure
  560. report = {
  561. "missing_from_source": {
  562. "label": "Items missing from the source",
  563. "entries": []
  564. },
  565. "missing_from_backup": {
  566. "label": "Items missing from the backup",
  567. "entries": []
  568. },
  569. "missing_from_both": {
  570. "label": "Items missing from both source and backup (bad link?)",
  571. "entries": []
  572. },
  573. "newer_source": {
  574. "label": "Items newer in the source",
  575. "entries": []
  576. },
  577. "newer_backup": {
  578. "label": "Items newer in the backup",
  579. "entries": []
  580. },
  581. "type_mismatch": {
  582. "label": "Directory/File type mismatch",
  583. "entries": []
  584. },
  585. "size_difference": {
  586. "label": "Items with different file sizes",
  587. "entries": []
  588. },
  589. "different_attributes": {
  590. "label": "Items with different attributes",
  591. "entries": []
  592. },
  593. "unknown": {
  594. "label": "Differences of an unknown type",
  595. "entries": []
  596. }
  597. }
  598. # Find entries missing from source
  599. for entry in self.__difference_entries:
  600. if entry.get_is_missing_from_source():
  601. report["missing_from_source"]["entries"].append(entry)
  602. # Find entries missing from backup
  603. for entry in self.__difference_entries:
  604. if entry.get_is_missing_from_backup():
  605. report["missing_from_backup"]["entries"].append(entry)
  606. # Find entries missing from both
  607. for entry in self.__difference_entries:
  608. if entry.get_is_missing_from_both():
  609. report["missing_from_both"]["entries"].append(entry)
  610. # Find directory/file type mismatches
  611. for entry in self.__difference_entries:
  612. if entry.get_is_type_mismatch():
  613. report["type_mismatch"]["entries"].append(entry)
  614. # Find newer in source
  615. for entry in self.__difference_entries:
  616. if entry.get_source_is_newer():
  617. report["newer_source"]["entries"].append(entry)
  618. # Find newer in backup
  619. for entry in self.__difference_entries:
  620. if entry.get_backup_is_newer():
  621. report["newer_backup"]["entries"].append(entry)
  622. # Different file sizes
  623. for entry in self.__difference_entries:
  624. if entry.get_is_different_sizes():
  625. report["size_difference"]["entries"].append(entry)
  626. # Different attributes
  627. for entry in self.__difference_entries:
  628. if entry.get_is_different_attributes():
  629. report["different_attributes"]["entries"].append(entry)
  630. # Differences of an unknown nature
  631. for entry in self.__difference_entries:
  632. if entry.get_is_unknown():
  633. report["unknown"]["entries"].append(entry)
  634. # Sort all entries
  635. for section_key in report:
  636. self.sort_difference_entries(report[section_key]["entries"])
  637. return report
  638. def print_progress_message(self, s):
  639. sys.stdout.write("\033[F") # back to previous line
  640. sys.stdout.write("\033[K") # clear line
  641. to_print = self.make_log_prefix() + s
  642. print(to_print)
  643. @staticmethod
  644. def print_report_heading(s, hooded: bool=False):
  645. star_count = 5
  646. stars = "*" * star_count
  647. title = stars + " " + s + " " + stars
  648. print("")
  649. if hooded:
  650. print("*" * len(title))
  651. print(title)
  652. def print_report(self):
  653. report = self.generate_report()
  654. section_order = [
  655. "type_mismatch",
  656. "missing_from_both",
  657. "missing_from_source", "newer_source",
  658. "missing_from_backup", "newer_backup",
  659. "size_difference",
  660. "different_attributes",
  661. "unknown"
  662. ]
  663. #
  664. print()
  665. self.print_report_heading("Mike's Backup Diff Report", True)
  666. print("Source:", self.__source_path)
  667. print("Backup:", self.__backup_path)
  668. # Print each non-empty report section
  669. found_anything = False
  670. for section_key in section_order:
  671. if len(report[section_key]["entries"]):
  672. found_anything = True
  673. print("")
  674. self.print_report_heading(report[section_key]["label"])
  675. for entry in report[section_key]["entries"]:
  676. if entry.get_is_dir():
  677. prefix = "Directory: "
  678. elif entry.get_is_file():
  679. prefix = "File: "
  680. else:
  681. prefix = ""
  682. message = entry.get_message()
  683. if message:
  684. suffix = " (" + message + ")"
  685. else:
  686. suffix = ""
  687. print(prefix + entry.get_item() + suffix)
  688. # Lil debebuggin'
  689. for section_key in report:
  690. if section_key not in section_order:
  691. raise Exception("Report key " + section_key + " wasn't found in the section_order ... whoopsies")
  692. if not found_anything:
  693. print()
  694. print("Everything seems to match !")
  695. #
  696. class DifferenceEntry:
  697. def __init__(self, item):
  698. self.__item = None
  699. self.__item_is_file = None
  700. self.__item_is_dir = None
  701. self.__type = None
  702. self.__message = None
  703. self.CONST_TYPE_TYPE_MISMATCH = "type_mismatch"
  704. self.CONST_TYPE_MISSING_IN_SOURCE = "missing_in_source"
  705. self.CONST_TYPE_MISSING_IN_BACKUP = "missing_in_backup"
  706. self.CONST_TYPE_MISSING_IN_BOTH = "missing_in_both"
  707. self.CONST_TYPE_SOURCE_IS_NEWER = "source_is_newer"
  708. self.CONST_TYPE_BACKUP_IS_NEWER = "backup_is_newer"
  709. self.CONST_TYPE_DIFFERENT_SIZES = "different_sizes"
  710. self.CONST_TYPE_DIFFERENT_ATTRIBUTES = "different_attributes"
  711. self.CONST_TYPE_UNKNOWN = "unknown"
  712. self.set_is_unknown("DEFAULT MESSAGE")
  713. if item:
  714. self.set_item(item)
  715. def __str__(self):
  716. s = ""
  717. s += "--- DifferenceEntry ---"
  718. s += "\nItem: " + str(self.__item)
  719. s += "\nType: " + self.__type
  720. s += "\nMessage: " + str(self.__message)
  721. return s
  722. def set_item(self, i):
  723. self.__item = i
  724. def get_item(self):
  725. return self.__item
  726. def set_message(self, m):
  727. self.__message = m
  728. def get_message(self):
  729. return self.__message
  730. def set_is_dir(self, is_dir: bool=True):
  731. if is_dir:
  732. self.__item_is_dir = True
  733. self.__item_is_file = False
  734. else:
  735. self.__item_is_dir = False
  736. self.__item_is_file = True
  737. def get_is_dir(self):
  738. return self.__item_is_dir
  739. def set_is_file(self, is_file: bool=True):
  740. self.set_is_dir(not is_file)
  741. def get_is_file(self):
  742. return self.__item_is_file
  743. def set_is_type_mismatch(self, message):
  744. self.__type = self.CONST_TYPE_TYPE_MISMATCH
  745. self.__message = message
  746. def get_is_type_mismatch(self):
  747. return self.__type == self.CONST_TYPE_TYPE_MISMATCH
  748. def set_is_missing_from_source(self):
  749. self.__type = self.CONST_TYPE_MISSING_IN_SOURCE
  750. self.__message = None
  751. def get_is_missing_from_source(self):
  752. return self.__type == self.CONST_TYPE_MISSING_IN_SOURCE
  753. def set_is_missing_from_backup(self):
  754. self.__type = self.CONST_TYPE_MISSING_IN_BACKUP
  755. self.__message = None
  756. def get_is_missing_from_backup(self):
  757. return self.__type == self.CONST_TYPE_MISSING_IN_BACKUP
  758. def set_is_missing_from_both(self):
  759. self.__type = self.CONST_TYPE_MISSING_IN_BOTH
  760. self.__message = "Item isn't in source or backup (bad link?)"
  761. def get_is_missing_from_both(self):
  762. return self.__type == self.CONST_TYPE_MISSING_IN_BOTH
  763. def set_source_is_newer(self, stamp_source, stamp_backup):
  764. time_difference = self.friendly_time_difference(stamp_source, stamp_backup)
  765. self.__type = self.CONST_TYPE_SOURCE_IS_NEWER
  766. self.__message = "Item has been modified more recently in source (" + str(stamp_source) + ")" \
  767. + " than in backup (" + str(stamp_backup) + ")" \
  768. + "; Difference is " + str(time_difference)
  769. def get_source_is_newer(self):
  770. return self.__type == self.CONST_TYPE_SOURCE_IS_NEWER
  771. def set_backup_is_newer(self, stamp_source, stamp_backup):
  772. time_difference = self.friendly_time_difference(stamp_source, stamp_backup)
  773. self.__type = self.CONST_TYPE_BACKUP_IS_NEWER
  774. self.__message = "Item has been modified more recently in backup (" + str(stamp_backup) + ")" \
  775. + " than in source (" + str(stamp_source) + ")" \
  776. + "; Difference is " + str(time_difference)
  777. def get_backup_is_newer(self):
  778. return self.__type == self.CONST_TYPE_BACKUP_IS_NEWER
  779. def set_is_different_sizes(self, source_item_size=None, backup_item_size=None):
  780. self.__type = self.CONST_TYPE_DIFFERENT_SIZES
  781. if source_item_size and backup_item_size:
  782. self.__message = \
  783. "Source has a file size of " + str(source_item_size) \
  784. + ", but backup has a file size of " + str(backup_item_size)
  785. else:
  786. self.__message = None
  787. def get_is_different_sizes(self):
  788. return self.__type == self.CONST_TYPE_DIFFERENT_SIZES
  789. def set_is_different_attributes(self, message=None):
  790. self.__type = self.CONST_TYPE_DIFFERENT_ATTRIBUTES
  791. self.__message = message
  792. def get_is_different_attributes(self):
  793. return self.__type == self.CONST_TYPE_DIFFERENT_ATTRIBUTES
  794. def set_is_different_checksum(self):
  795. self.set_is_different_attributes("Different checksums")
  796. def set_is_different_modification_times(self):
  797. self.set_is_different_attributes("Different modification times")
  798. def set_is_different_permissions(self):
  799. self.set_is_different_attributes("Different permissions")
  800. def set_is_different_owner(self):
  801. self.set_is_different_attributes("Different owners")
  802. def set_is_different_group(self):
  803. self.set_is_different_attributes("Different groups")
  804. def set_is_different_acl(self):
  805. self.set_is_different_attributes("Different ACLs")
  806. def set_is_different_extended_attributes(self):
  807. self.set_is_different_attributes("Different extended attributes")
  808. def set_is_unknown(self, message):
  809. self.__type = self.CONST_TYPE_UNKNOWN
  810. self.__message = message
  811. def get_is_unknown(self):
  812. return self.__type == self.CONST_TYPE_UNKNOWN
  813. @staticmethod
  814. def friendly_time_difference(stamp1, stamp2):
  815. delta = abs(stamp1 - stamp2)
  816. friendly = humanfriendly.format_timespan(delta)
  817. return friendly
  818. #
  819. def main():
  820. bd = BackupDiff()
  821. bd.run()
  822. #
  823. if __name__ == "__main__":
  824. main()