|
View:
New views
2 Messages
—
Rating Filter:
Alert me
|
|
|
[Fwd: Resuming initial backups]After over a month, I finally have a patch to implement this. It detects
a failed initial backup by the following checks: * No current_mirror file * 0 or 1 error_log files * 0 or 1 mirror_metadata files * No files in the increments directory If all of the above are true, then it removes all contents of the rdiff-backup directory, allowing backup resumption. I believe that this guarantees that there will be no information lost, since there are no historical increments being lost. Andrew, what do you think of the patch? (Especially the delete_dir_no_files function.) Thanks, JoshN -------- Original Message -------- Subject: [rdiff-backup-users] Resuming initial backups Date: Fri, 22 Aug 2008 09:24:23 -0500 From: Josh Nisly <rdiffbackup@...> To: rdiff-backup <rdiff-backup-users@...> I have clients that back up large data sets over the internet. The initial backup is always difficult, since rdiff-backup does not handle it well when the initial backup fails. If it fails, subsequent backups display an error saying that the initial backup probably failed, and it should be removed. My question is this: couldn't rdiff-backup detect this and handle it automatically? I've added code to my private branch that checks if there is no current mirror and one or fewer error_log and mirror_metadata files. If so, it removes them and runs normally. Does this seem like a reasonable way to handle it? If so, I'd be happy to submit a patch implementing it. Thanks, JoshN --- rdiff_backup/Security.py 20 Aug 2008 17:43:25 -0000 1.36 +++ rdiff_backup/Security.py 3 Oct 2008 16:14:15 -0000 @@ -45,7 +45,8 @@ 'os.chown':0, 'os.remove':0, 'os.removedirs':0, 'os.rename':0, 'os.renames':0, 'os.rmdir':0, 'os.unlink':0, 'os.utime':0, 'os.lchown':0, 'os.link':1, 'os.symlink':1, - 'os.mkdir':0, 'os.makedirs':0} + 'os.mkdir':0, 'os.makedirs':0, + 'rpath.delete_dir_no_files':0} def initialize(action, cmdpairs): """Initialize allowable request list and chroot""" @@ -180,6 +181,7 @@ if sec_level == "all": l.extend(["os.mkdir", "os.chown", "os.lchown", "os.rename", "os.unlink", "os.remove", "os.chmod", "os.makedirs", + "rpath.delete_dir_no_files", "backup.DestinationStruct.patch", "restore.TargetStruct.get_initial_iter", "restore.TargetStruct.patch", --- rdiff_backup/rpath.py 22 Jul 2008 17:46:59 -0000 1.126 +++ rdiff_backup/rpath.py 3 Oct 2008 16:14:07 -0000 @@ -358,6 +358,14 @@ else: basestr = ".".join(dotsplit[:-2]) return (compressed, timestring, ext, basestr) +def delete_dir_no_files(rp): + """Deletes the directory at self.path if empty. Raises if the + directory contains files.""" + log.Log("Determining if directory contains files: %s" % rp.path, 7) + if rp.contains_files(): + raise RPathException("Directory contains files.") + rp.delete() + class RORPath: """Read Only RPath - carry information about a path @@ -1047,6 +1055,18 @@ else: self.conn.os.unlink(self.path) self.setdata() + def contains_files(self): + log.Log("Determining if directory contains files: %s" % self.path, 7) + dir_entries = self.listdir() + for entry in dir_entries: + child_rp = self.append_path(entry) + if not child_rp.isdir(): + return False + else: + if not child_rp.contains_files(): + return False + return True + def quote(self): """Return quoted self.path for use with os.system()""" return '"%s"' % self.regex_chars_to_quote.sub( --- rdiff_backup/Main.py 20 Aug 2008 17:43:25 -0000 1.119 +++ rdiff_backup/Main.py 3 Oct 2008 14:34:11 -0000 @@ -378,6 +378,46 @@ Log.FatalError("Source %s is not a directory" % rpin.path) Globals.rbdir = rpout.append_path("rdiff-backup-data") +def fix_failed_initial_backup(rpout): + if Globals.rbdir.lstat(): + rbdir_files = Globals.rbdir.listdir() + mirror_markers = filter(lambda x: x.startswith("current_mirror"), + rbdir_files) + error_logs = filter(lambda x: x.startswith("error_log"), + rbdir_files) + metadata_mirrors = filter(lambda x: x.startswith("mirror_metadata"), + rbdir_files) + # If we have no current_mirror marker, and the increments directory + # is empty, we most likely have a failed backup. At any rate, if the + # increments directory is empty, we can't lose any historical diffs, + # because there aren't any. + if not mirror_markers and len(error_logs) <= 1 and \ + len(metadata_mirrors) <= 1: + Log("Found interrupted initial backup. Removing...", 2) + # Try to delete the increments dir first + if 'increments' in rbdir_files: + rbdir_files.remove('increments') # Only try to delete once + rp = Globals.rbdir.append('increments') + try: + rp.conn.rpath.delete_dir_no_files(rp) + except rpath.RPathException: + Log("Increments dir contains files.", 4) + return + except Security.Violation: + Log("Server doesn't support resuming.", 4) + return + for file_name in rbdir_files: + rp = Globals.rbdir.append_path(file_name) + if rp.isdir(): + try: + rp.conn.rpath.delete_dir_no_files(rp) + except rpath.RPathException: + Log("Directory contains files.", 4) + return + else: + rp.delete() + + def backup_set_rbdir(rpin, rpout): """Initialize data dir and logging""" global incdir @@ -389,6 +429,8 @@ rdiff-backup like this could mess up what is currently in it. If you want to update or overwrite it, run rdiff-backup with the --force option.""" % rpout.path) + else: # Check for interrupted initial backup + fix_failed_initial_backup(rpout) if not Globals.rbdir.lstat(): Globals.rbdir.mkdir() SetConnections.UpdateGlobal('rbdir', Globals.rbdir) _______________________________________________ rdiff-backup-users mailing list at rdiff-backup-users@... http://lists.nongnu.org/mailman/listinfo/rdiff-backup-users Wiki URL: http://rdiff-backup.solutionsfirst.com.au/index.php/RdiffBackupWiki |
|
|
Re: [Fwd: Resuming initial backups]On Oct 3, 2008, at 12:36 PM, Josh Nisly wrote:
> After over a month, I finally have a patch to implement this. It > detects a failed initial backup by the following checks: > * No current_mirror file > * 0 or 1 error_log files > * 0 or 1 mirror_metadata files > * No files in the increments directory > > If all of the above are true, then it removes all contents of the > rdiff-backup directory, allowing backup resumption. I believe that > this guarantees that there will be no information lost, since there > are no historical increments being lost. > > Andrew, what do you think of the patch? (Especially the > delete_dir_no_files function.) I think that criteria makes sense for establishing that the initial backup failed. I have other comments about the patch itself below. > --- rdiff_backup/Security.py 20 Aug 2008 17:43:25 -0000 1.36 > +++ rdiff_backup/Security.py 3 Oct 2008 16:14:15 -0000 > @@ -45,7 +45,8 @@ > 'os.chown':0, 'os.remove':0, 'os.removedirs':0, > 'os.rename':0, 'os.renames':0, 'os.rmdir':0, 'os.unlink':0, > 'os.utime':0, 'os.lchown':0, 'os.link':1, 'os.symlink':1, > - 'os.mkdir':0, 'os.makedirs':0} > + 'os.mkdir':0, 'os.makedirs':0, > + 'rpath.delete_dir_no_files':0} > > def initialize(action, cmdpairs): > """Initialize allowable request list and chroot""" > @@ -180,6 +181,7 @@ > if sec_level == "all": > l.extend(["os.mkdir", "os.chown", "os.lchown", "os.rename", > "os.unlink", "os.remove", "os.chmod", "os.makedirs", > + "rpath.delete_dir_no_files", > "backup.DestinationStruct.patch", > "restore.TargetStruct.get_initial_iter", > "restore.TargetStruct.patch", > --- rdiff_backup/rpath.py 22 Jul 2008 17:46:59 -0000 1.126 > +++ rdiff_backup/rpath.py 3 Oct 2008 16:14:07 -0000 > @@ -358,6 +358,14 @@ > else: basestr = ".".join(dotsplit[:-2]) > return (compressed, timestring, ext, basestr) > > +def delete_dir_no_files(rp): > + """Deletes the directory at self.path if empty. Raises if the > + directory contains files.""" > + log.Log("Determining if directory contains files: %s" % rp.path, 7) > + if rp.contains_files(): > + raise RPathException("Directory contains files.") > + rp.delete() > + Perhaps verify that rp is a directory? (either with assert or a simple return if not true) > class RORPath: > """Read Only RPath - carry information about a path > @@ -1047,6 +1055,18 @@ > else: self.conn.os.unlink(self.path) > self.setdata() > > + def contains_files(self): > + log.Log("Determining if directory contains files: %s" % > self.path, 7) > + dir_entries = self.listdir() > + for entry in dir_entries: > + child_rp = self.append_path(entry) > + if not child_rp.isdir(): > + return False > + else: > + if not child_rp.contains_files(): > + return False > + return True > + I think there are some issues here: 1) "child_rp = self.append_path(entry)" should be "child_rp = self.append(entry)" 2) I think there should be a check that self is a directory before the call to self.listdir() ... otherwise, if called on a file, OSError is thrown. We want to guard against potential future mistakes. 3) What should contains_files() return? Based on the name, my instinct says that it should return True if the directory contains files. It looks to me like it returns False if it contains files and True if empty.... 4) There's a logic problem here: let's say the directory self contains a file called 'bar' and a directory called 'foo', which contains files.The first time through "for entry in dir_entries" loop, we check 'if not child_rp.isdir()' on 'bar' and return False, without testing 'foo' at all ... is this something you wanted to do? Maybe I'm confused on what you intend. Note: that problem is even worse than I made it out because os.listdir() returns files in arbitrary order. See iterate_in_dir() and listdir() functions in selection.py to see how rdiff-backup handles recursing through directories. 5) Lastly, why is the function recursing? If it contains a directory (even an empty one), then it technically contains a file -- no recursion necessary. > def quote(self): > """Return quoted self.path for use with os.system()""" > return '"%s"' % self.regex_chars_to_quote.sub( > --- rdiff_backup/Main.py 20 Aug 2008 17:43:25 -0000 1.119 > +++ rdiff_backup/Main.py 3 Oct 2008 14:34:11 -0000 > @@ -378,6 +378,46 @@ > Log.FatalError("Source %s is not a directory" % rpin.path) > Globals.rbdir = rpout.append_path("rdiff-backup-data") > > +def fix_failed_initial_backup(rpout): > + if Globals.rbdir.lstat(): > + rbdir_files = Globals.rbdir.listdir() > + mirror_markers = filter(lambda x: x.startswith("current_mirror"), > + rbdir_files) > + error_logs = filter(lambda x: x.startswith("error_log"), > + rbdir_files) > + metadata_mirrors = filter(lambda x: > x.startswith("mirror_metadata"), > + rbdir_files) > + # If we have no current_mirror marker, and the increments directory > + # is empty, we most likely have a failed backup. At any rate, if > the > + # increments directory is empty, we can't lose any historical > diffs, > + # because there aren't any. > + if not mirror_markers and len(error_logs) <= 1 and \ > + len(metadata_mirrors) <= 1: > + Log("Found interrupted initial backup. Removing...", 2) > + # Try to delete the increments dir first > + if 'increments' in rbdir_files: > + rbdir_files.remove('increments') # Only try to delete once > + rp = Globals.rbdir.append('increments') > + try: > + rp.conn.rpath.delete_dir_no_files(rp) > + except rpath.RPathException: > + Log("Increments dir contains files.", 4) > + return > + except Security.Violation: > + Log("Server doesn't support resuming.", 4) > + return > + for file_name in rbdir_files: > + rp = Globals.rbdir.append_path(file_name) > + if rp.isdir(): > + try: > + rp.conn.rpath.delete_dir_no_files(rp) > + except rpath.RPathException: > + Log("Directory contains files.", 4) > + return > + else: > + rp.delete() > + > + In the rdiff-backup-data directory, I believe the only directories which could potentially exist besides increments/ are the temporary directories created by the FS abilities tests -- those would be safe to delete without stopping with the "Directory contains files" message. Oh, there's also the long_filename_data/ directory ... not sure how you want to deal with that. I suppose we should keep all of the entries in there. > > def backup_set_rbdir(rpin, rpout): > """Initialize data dir and logging""" > global incdir > @@ -389,6 +429,8 @@ > rdiff-backup like this could mess up what is currently in it. If you > want to update or overwrite it, run rdiff-backup with the --force > option.""" % rpout.path) > + else: # Check for interrupted initial backup > + fix_failed_initial_backup(rpout) > > if not Globals.rbdir.lstat(): Globals.rbdir.mkdir() > SetConnections.UpdateGlobal('rbdir', Globals.rbdir) > I think it might be cleaner to separate this code into two steps: elif check_failed_initial_backup(rpout): fix_failed_initial_backup() so that we can work on the test and the fix separately. Andrew _______________________________________________ rdiff-backup-users mailing list at rdiff-backup-users@... http://lists.nongnu.org/mailman/listinfo/rdiff-backup-users Wiki URL: http://rdiff-backup.solutionsfirst.com.au/index.php/RdiffBackupWiki |
| Free Forum Powered by Nabble | Forum Help |