İlker Emre Koç has proposed merging ~ilkeremrekoc/launchpad:librarian-gc-file-dryrun into launchpad:master.
Commit message: Add dry-run to librarian-gc file deletions Requested reviews: Launchpad code reviewers (launchpad-reviewers) For more details, see: https://code.launchpad.net/~ilkeremrekoc/launchpad/+git/launchpad/+merge/491422 To test a change to the librarian-gc in a safe manner, we need a dry-run to ensure nothing is deleted when we run the file deletion script. -- Your team Launchpad code reviewers is requested to review the proposed merge of ~ilkeremrekoc/launchpad:librarian-gc-file-dryrun into launchpad:master.
diff --git a/cronscripts/librarian-gc.py b/cronscripts/librarian-gc.py index 7f8d39e..b5aa082 100755 --- a/cronscripts/librarian-gc.py +++ b/cronscripts/librarian-gc.py @@ -24,6 +24,14 @@ class LibrarianGC(LaunchpadCronScript): def add_my_options(self): self.parser.add_option( "", + "--dry-run-files", + action="store_true", + default=False, + dest="dry_run_files", + help="Run over the file deletions without deleting any files", + ) + self.parser.add_option( + "", "--skip-duplicates", action="store_true", default=False, @@ -103,7 +111,7 @@ class LibrarianGC(LaunchpadCronScript): # Second sweep. librariangc.delete_unreferenced_content(conn) if not self.options.skip_files: - librariangc.delete_unwanted_files(conn) + librariangc.delete_unwanted_files(conn, self.options.dry_run_files) if __name__ == "__main__": diff --git a/lib/lp/services/librarianserver/librariangc.py b/lib/lp/services/librarianserver/librariangc.py index 5c04c55..1b82f11 100644 --- a/lib/lp/services/librarianserver/librariangc.py +++ b/lib/lp/services/librarianserver/librariangc.py @@ -669,22 +669,22 @@ def delete_unreferenced_content(con): loop_tuner.run() -def delete_unwanted_files(con): +def delete_unwanted_files(con, dry_run=False): con.rollback() orig_autocommit = con.autocommit try: # Disable autocommit so that we can use named cursors. con.autocommit = False - delete_unwanted_disk_files(con) + delete_unwanted_disk_files(con, dry_run) swift_enabled = getFeatureFlag("librarian.swift.enabled") or False if swift_enabled: - delete_unwanted_swift_files(con) + delete_unwanted_swift_files(con, dry_run) finally: con.rollback() con.autocommit = orig_autocommit -def delete_unwanted_disk_files(con): +def delete_unwanted_disk_files(con, dry_run=False): """Delete files found on disk that have no corresponding record in the database. @@ -693,7 +693,10 @@ def delete_unwanted_disk_files(con): the database records committed. """ - log.info("Deleting unwanted files from disk.") + if dry_run: + log.info("Dry run - not deleting any files from disk.") + else: + log.info("Deleting unwanted files from disk.") swift_enabled = getFeatureFlag("librarian.swift.enabled") or False @@ -819,8 +822,11 @@ def delete_unwanted_disk_files(con): ) else: # File uploaded a while ago but no longer wanted. - os.unlink(path) - log.debug3("Deleted %s" % path) + if dry_run: + log.info("Would delete %s" % path) + else: + os.unlink(path) + log.debug3("Deleted %s" % path) removed_count += 1 # Report any remaining LibraryFileContent that the database says @@ -837,10 +843,16 @@ def delete_unwanted_disk_files(con): next_wanted_content_id = next(content_id_iter, None) cur.close() - log.info( - "Deleted %d files from disk that were no longer referenced " - "in the db." % removed_count - ) + if dry_run: + log.info( + "Would have deleted %d files from disk that were no longer " + "referenced in the db." % removed_count + ) + else: + log.info( + "Deleted %d files from disk that were no longer referenced " + "in the db." % removed_count + ) def swift_files(max_lfc_id): @@ -896,11 +908,14 @@ def swift_files(max_lfc_id): seen_names.add((obj["name"], pool_index)) -def delete_unwanted_swift_files(con): +def delete_unwanted_swift_files(con, dry_run=False): """Delete files found in Swift that have no corresponding db record.""" assert getFeatureFlag("librarian.swift.enabled") - log.info("Deleting unwanted files from Swift.") + if dry_run: + log.info("Dry run - not deleting any files from Swift.") + else: + log.info("Deleting unwanted files from Swift.") # Get the largest LibraryFileContent id in the database. This lets # us know when to stop looking in Swift for more files. @@ -992,13 +1007,21 @@ def delete_unwanted_swift_files(con): else: with swift.connection(connection_pool) as swift_connection: try: - swift_connection.delete_object(container, name) - log.debug3( - "Deleted (%s, %s) from Swift (%s)", - container, - name, - connection_pool.os_auth_url, - ) + if dry_run: + log.info( + "Would delete (%s, %s) from Swift (%s)", + container, + name, + connection_pool.os_auth_url, + ) + else: + swift_connection.delete_object(container, name) + log.debug3( + "Deleted (%s, %s) from Swift (%s)", + container, + name, + connection_pool.os_auth_url, + ) except swiftclient.ClientException as e: if e.http_status != 404: log.exception( @@ -1032,10 +1055,18 @@ def delete_unwanted_swift_files(con): next_wanted_content_id = next(content_id_iter, None) cur.close() - log.info( - "Deleted {} files from Swift that were no longer referenced " - "in the db.".format(removed_count) - ) + + if dry_run: + log.info( + "Dry run - would have deleted {} files from Swift.".format( + removed_count + ) + ) + else: + log.info( + "Deleted {} files from Swift that were no longer referenced " + "in the db.".format(removed_count) + ) def get_file_path(content_id): diff --git a/lib/lp/services/librarianserver/tests/test_gc.py b/lib/lp/services/librarianserver/tests/test_gc.py index 23a7eac..efec95f 100644 --- a/lib/lp/services/librarianserver/tests/test_gc.py +++ b/lib/lp/services/librarianserver/tests/test_gc.py @@ -517,6 +517,59 @@ class TestLibrarianGarbageCollectionBase: for content_id in (row[0] for row in cur.fetchall()): self.assertTrue(self.file_exists(content_id)) + def test_delete_unwanted_files_dry_run(self): + # Test the --dry-run-files option to the librarian-gc script. + + self.ztm.begin() + cur = cursor() + + # We may find files in the LibraryFileContent repository + # that do not have an corresponding LibraryFileContent row. + + # Find a content_id we can easily delete and do so. This row is + # removed from the database, leaving an orphaned file on the + # filesystem that should be removed. + cur.execute( + """ + SELECT LibraryFileContent.id + FROM LibraryFileContent + LEFT OUTER JOIN LibraryFileAlias + ON LibraryFileContent.id = content + WHERE LibraryFileAlias.id IS NULL + LIMIT 1 + """ + ) + content_id = cur.fetchone()[0] + cur.execute( + """ + DELETE FROM LibraryFileContent WHERE id=%s + """, + (content_id,), + ) + self.ztm.commit() + + self.assertTrue(self.file_exists(content_id)) + + # To test removal does occur when we want it to, we need to trick + # the garbage collector into thinking it is tomorrow. + with self.librariangc_thinking_it_is_tomorrow(): + # Add the dry_run=True argument to ensure no files are deleted. + librariangc.delete_unwanted_files(self.con, dry_run=True) + + # The file should still be there. + self.assertTrue(self.file_exists(content_id)) + + # Make sure nothing else has been removed from disk + self.ztm.begin() + cur = cursor() + cur.execute( + """ + SELECT id FROM LibraryFileContent + """ + ) + for content_id in (row[0] for row in cur.fetchall()): + self.assertTrue(self.file_exists(content_id)) + def test_delete_unwanted_files_bug437084(self): # There was a bug where delete_unwanted_files() would die # if the last file found on disk was unwanted.
_______________________________________________ Mailing list: https://launchpad.net/~launchpad-reviewers Post to : launchpad-reviewers@lists.launchpad.net Unsubscribe : https://launchpad.net/~launchpad-reviewers More help : https://help.launchpad.net/ListHelp