Colin Watson has proposed merging lp:~cjwatson/launchpad/codeimport-worker-refactor into lp:launchpad with lp:~cjwatson/launchpad/codeimport-source-details-refactor as a prerequisite.
Commit message: Refactor various bits of the code import worker to be less Bazaar-specific. Requested reviews: Launchpad code reviewers (launchpad-reviewers) Related bugs: Bug #1469459 in Launchpad itself: "import external code into a LP git repo (natively)" https://bugs.launchpad.net/launchpad/+bug/1469459 For more details, see: https://code.launchpad.net/~cjwatson/launchpad/codeimport-worker-refactor/+merge/308122 -- Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/codeimport-worker-refactor into lp:launchpad.
=== modified file 'lib/lp/code/xmlrpc/codeimportscheduler.py' --- lib/lp/code/xmlrpc/codeimportscheduler.py 2016-10-11 13:46:57 +0000 +++ lib/lp/code/xmlrpc/codeimportscheduler.py 2016-10-11 13:46:57 +0000 @@ -69,10 +69,10 @@ job = self._getJob(job_id) arguments = CodeImportSourceDetails.fromCodeImportJob( job).asArguments() - branch = job.code_import.branch - branch_url = canonical_url(branch) - log_file_name = '%s.log' % branch.unique_name[1:].replace('/', '-') - return (arguments, branch_url, log_file_name) + target = job.code_import.target + target_url = canonical_url(target) + log_file_name = '%s.log' % target.unique_name[1:].replace('/', '-') + return (arguments, target_url, log_file_name) @return_fault def _updateHeartbeat(self, job_id, log_tail): === modified file 'lib/lp/code/xmlrpc/tests/test_codeimportscheduler.py' --- lib/lp/code/xmlrpc/tests/test_codeimportscheduler.py 2016-10-11 13:46:57 +0000 +++ lib/lp/code/xmlrpc/tests/test_codeimportscheduler.py 2016-10-11 13:46:57 +0000 @@ -57,20 +57,20 @@ self.assertEqual(code_import_job.id, job_id) def test_getImportDataForJobID(self): - # getImportDataForJobID returns the worker arguments, branch url and + # getImportDataForJobID returns the worker arguments, target url and # log file name for an import corresponding to a particular job. code_import_job = self.makeCodeImportJob(running=True) code_import = removeSecurityProxy(code_import_job).code_import - code_import_arguments, branch_url, log_file_name = \ + code_import_arguments, target_url, log_file_name = \ self.api.getImportDataForJobID(code_import_job.id) import_as_arguments = CodeImportSourceDetails.fromCodeImportJob( code_import_job).asArguments() expected_log_file_name = '%s.log' % ( - code_import.branch.unique_name[1:].replace('/', '-')) + code_import.target.unique_name[1:].replace('/', '-')) self.assertEqual( - (import_as_arguments, canonical_url(code_import.branch), + (import_as_arguments, canonical_url(code_import.target), expected_log_file_name), - (code_import_arguments, branch_url, log_file_name)) + (code_import_arguments, target_url, log_file_name)) def test_getImportDataForJobID_not_found(self): # getImportDataForJobID returns a NoSuchCodeImportJob fault when there === modified file 'lib/lp/codehosting/codeimport/tests/test_worker.py' --- lib/lp/codehosting/codeimport/tests/test_worker.py 2016-10-11 13:46:57 +0000 +++ lib/lp/codehosting/codeimport/tests/test_worker.py 2016-10-11 13:46:57 +0000 @@ -78,7 +78,7 @@ get_default_bazaar_branch_store, GitImportWorker, ImportDataStore, - ImportWorker, + ToBzrImportWorker, ) from lp.codehosting.safe_open import ( AcceptAnythingPolicy, @@ -428,7 +428,7 @@ source_details = self.factory.makeCodeImportSourceDetails() # That the remote name is like this is part of the interface of # ImportDataStore. - remote_name = '%08x.tar.gz' % (source_details.branch_id,) + remote_name = '%08x.tar.gz' % (source_details.target_id,) local_name = '%s.tar.gz' % (self.factory.getUniqueString(),) transport = self.get_transport() transport.put_bytes(remote_name, '') @@ -442,7 +442,7 @@ source_details = self.factory.makeCodeImportSourceDetails() # That the remote name is like this is part of the interface of # ImportDataStore. - remote_name = '%08x.tar.gz' % (source_details.branch_id,) + remote_name = '%08x.tar.gz' % (source_details.target_id,) content = self.factory.getUniqueString() transport = self.get_transport() transport.put_bytes(remote_name, content) @@ -457,7 +457,7 @@ source_details = self.factory.makeCodeImportSourceDetails() # That the remote name is like this is part of the interface of # ImportDataStore. - remote_name = '%08x.tar.gz' % (source_details.branch_id,) + remote_name = '%08x.tar.gz' % (source_details.target_id,) content = self.factory.getUniqueString() transport = self.get_transport() transport.put_bytes(remote_name, content) @@ -481,7 +481,7 @@ store.put(local_name) # That the remote name is like this is part of the interface of # ImportDataStore. - remote_name = '%08x.tar.gz' % (source_details.branch_id,) + remote_name = '%08x.tar.gz' % (source_details.target_id,) self.assertEquals(content, transport.get_bytes(remote_name)) def test_put_ensures_base(self): @@ -509,7 +509,7 @@ store.put(local_name, self.get_transport(local_prefix)) # That the remote name is like this is part of the interface of # ImportDataStore. - remote_name = '%08x.tar.gz' % (source_details.branch_id,) + remote_name = '%08x.tar.gz' % (source_details.target_id,) self.assertEquals(content, transport.get_bytes(remote_name)) @@ -585,8 +585,8 @@ transport = store.import_data_store._transport source_details = store.import_data_store.source_details self.assertTrue( - transport.has('%08x.tar.gz' % source_details.branch_id), - "Couldn't find '%08x.tar.gz'" % source_details.branch_id) + transport.has('%08x.tar.gz' % source_details.target_id), + "Couldn't find '%08x.tar.gz'" % source_details.target_id) def test_fetchFromArchiveFailure(self): # If a tree has not been archived yet, but we try to retrieve it from @@ -631,7 +631,7 @@ def makeImportWorker(self): """Make an ImportWorker.""" - return ImportWorker( + return ToBzrImportWorker( self.source_details, self.get_transport('import_data'), self.makeBazaarBranchStore(), logging.getLogger("silent"), AcceptAnythingPolicy()) @@ -728,7 +728,7 @@ import_worker.import_data_store.put('git.db') # Make sure there's a Bazaar branch in the branch store. branch = self.make_branch('branch') - ImportWorker.pushBazaarBranch(import_worker, branch) + ToBzrImportWorker.pushBazaarBranch(import_worker, branch) # Finally, fetching the tree gets the git.db file too. branch = import_worker.getBazaarBranch() self.assertEqual( @@ -747,7 +747,7 @@ import_worker.import_data_store.put('git-cache.tar.gz') # Make sure there's a Bazaar branch in the branch store. branch = self.make_branch('branch') - ImportWorker.pushBazaarBranch(import_worker, branch) + ToBzrImportWorker.pushBazaarBranch(import_worker, branch) # Finally, fetching the tree gets the git.db file too. new_branch = import_worker.getBazaarBranch() self.assertEqual( @@ -767,13 +767,13 @@ :source_details: A `CodeImportSourceDetails` describing the import. """ tree_transport = get_transport(config.codeimport.foreign_tree_store) - prefix = '%08x' % source_details.branch_id + prefix = '%08x' % source_details.target_id if tree_transport.has('.'): for filename in tree_transport.list_dir('.'): if filename.startswith(prefix): tree_transport.delete(filename) branchstore = get_default_bazaar_branch_store() - branch_name = '%08x' % source_details.branch_id + branch_name = '%08x' % source_details.target_id if branchstore.transport.has(branch_name): branchstore.transport.delete_tree(branch_name) @@ -822,7 +822,7 @@ """Get the Bazaar branch 'worker' stored into its BazaarBranchStore. """ branch_url = worker.bazaar_branch_store._getMirrorURL( - worker.source_details.branch_id) + worker.source_details.target_id) return Branch.open(branch_url) def test_import(self): @@ -885,7 +885,7 @@ self.addCleanup(lambda: shutil.rmtree(tree_path)) branch_url = get_default_bazaar_branch_store()._getMirrorURL( - source_details.branch_id) + source_details.target_id) branch = Branch.open(branch_url) self.assertEqual(self.foreign_commit_count, branch.revno()) @@ -1345,7 +1345,7 @@ self.assertEqual( CodeImportWorkerExitCode.SUCCESS, worker.run()) branch_url = self.bazaar_store._getMirrorURL( - worker.source_details.branch_id) + worker.source_details.target_id) branch = Branch.open(branch_url) self.assertEquals(self.revid, branch.last_revision()) === modified file 'lib/lp/codehosting/codeimport/tests/test_workermonitor.py' --- lib/lp/codehosting/codeimport/tests/test_workermonitor.py 2016-10-11 13:46:57 +0000 +++ lib/lp/codehosting/codeimport/tests/test_workermonitor.py 2016-10-11 13:46:57 +0000 @@ -241,21 +241,21 @@ return worker_monitor.getWorkerArguments().addCallback( self.assertEqual, args) - def test_getWorkerArguments_sets_branch_url_and_logfilename(self): - # getWorkerArguments sets the _branch_url (for use in oops reports) + def test_getWorkerArguments_sets_target_url_and_logfilename(self): + # getWorkerArguments sets the _target_url (for use in oops reports) # and _log_file_name (for upload to the librarian) attributes on the # WorkerMonitor from the data returned by getImportDataForJobID. - branch_url = self.factory.getUniqueString() + target_url = self.factory.getUniqueString() log_file_name = self.factory.getUniqueString() worker_monitor = self.makeWorkerMonitorWithJob( - 1, (['a'], branch_url, log_file_name)) + 1, (['a'], target_url, log_file_name)) def check_branch_log(ignored): # Looking at the _ attributes here is in slightly poor taste, but # much much easier than them by logging and parsing an oops, etc. self.assertEqual( - (branch_url, log_file_name), - (worker_monitor._branch_url, worker_monitor._log_file_name)) + (target_url, log_file_name), + (worker_monitor._target_url, worker_monitor._log_file_name)) return worker_monitor.getWorkerArguments().addCallback( check_branch_log) === modified file 'lib/lp/codehosting/codeimport/worker.py' --- lib/lp/codehosting/codeimport/worker.py 2016-10-11 13:46:57 +0000 +++ lib/lp/codehosting/codeimport/worker.py 2016-10-11 13:46:57 +0000 @@ -15,6 +15,7 @@ 'ForeignTreeStore', 'GitImportWorker', 'ImportWorker', + 'ToBzrImportWorker', 'get_default_bazaar_branch_store', ] @@ -268,18 +269,20 @@ of the information suitable for passing around on executables' command lines. - :ivar branch_id: The id of the branch associated to this code import, used - for locating the existing import and the foreign tree. + :ivar target_id: The id of the Bazaar branch associated with this code + import, used for locating the existing import and the foreign tree. :ivar rcstype: 'cvs', 'git', 'bzr-svn', 'bzr' as appropriate. :ivar url: The branch URL if rcstype in ['bzr-svn', 'git', 'bzr'], None otherwise. :ivar cvs_root: The $CVSROOT if rcstype == 'cvs', None otherwise. :ivar cvs_module: The CVS module if rcstype == 'cvs', None otherwise. + :ivar stacked_on_url: The URL of the branch that the associated branch + is stacked on, if any. """ - def __init__(self, branch_id, rcstype, url=None, cvs_root=None, + def __init__(self, target_id, rcstype, url=None, cvs_root=None, cvs_module=None, stacked_on_url=None): - self.branch_id = branch_id + self.target_id = target_id self.rcstype = rcstype self.url = url self.cvs_root = cvs_root @@ -289,7 +292,7 @@ @classmethod def fromArguments(cls, arguments): """Convert command line-style arguments to an instance.""" - branch_id = int(arguments.pop(0)) + target_id = int(arguments.pop(0)) rcstype = arguments.pop(0) if rcstype in ['bzr-svn', 'git', 'bzr']: url = arguments.pop(0) @@ -305,34 +308,34 @@ else: raise AssertionError("Unknown rcstype %r." % rcstype) return cls( - branch_id, rcstype, url, cvs_root, cvs_module, stacked_on_url) + target_id, rcstype, url, cvs_root, cvs_module, stacked_on_url) @classmethod def fromCodeImportJob(cls, job): """Convert a `CodeImportJob` to an instance.""" code_import = job.code_import - branch = code_import.branch - if branch.stacked_on is not None and not branch.stacked_on.private: - stacked_path = branch_id_alias(branch.stacked_on) + target = code_import.target + if target.stacked_on is not None and not target.stacked_on.private: + stacked_path = branch_id_alias(target.stacked_on) stacked_on_url = compose_public_url('http', stacked_path) else: stacked_on_url = None if code_import.rcs_type == RevisionControlSystems.BZR_SVN: return cls( - branch.id, 'bzr-svn', str(code_import.url), + target.id, 'bzr-svn', str(code_import.url), stacked_on_url=stacked_on_url) elif code_import.rcs_type == RevisionControlSystems.CVS: return cls( - branch.id, 'cvs', + target.id, 'cvs', cvs_root=str(code_import.cvs_root), cvs_module=str(code_import.cvs_module)) elif code_import.rcs_type == RevisionControlSystems.GIT: return cls( - branch.id, 'git', str(code_import.url), + target.id, 'git', str(code_import.url), stacked_on_url=stacked_on_url) elif code_import.rcs_type == RevisionControlSystems.BZR: return cls( - branch.id, 'bzr', str(code_import.url), + target.id, 'bzr', str(code_import.url), stacked_on_url=stacked_on_url) else: raise AssertionError("Unknown rcstype %r." % code_import.rcs_type) @@ -340,7 +343,7 @@ def asArguments(self): """Return a list of arguments suitable for passing to a child process. """ - result = [str(self.branch_id), self.rcstype] + result = [str(self.target_id), self.rcstype] if self.rcstype in ['bzr-svn', 'git', 'bzr']: result.append(self.url) if self.stacked_on_url is not None: @@ -374,7 +377,7 @@ """ self.source_details = source_details self._transport = transport - self._branch_id = source_details.branch_id + self._target_id = source_details.target_id def _getRemoteName(self, local_name): """Convert `local_name` to the name used to store a file. @@ -394,7 +397,7 @@ if dot_index < 0: raise AssertionError("local_name must have an extension.") ext = local_name[dot_index:] - return '%08x%s' % (self._branch_id, ext) + return '%08x%s' % (self._target_id, ext) def fetch(self, filename, dest_transport=None): """Retrieve `filename` from the store. @@ -505,57 +508,23 @@ class ImportWorker: """Oversees the actual work of a code import.""" - # Where the Bazaar working tree will be stored. - BZR_BRANCH_PATH = 'bzr_branch' - - # Should `getBazaarBranch` create a working tree? - needs_bzr_tree = True - - required_format = BzrDirFormat.get_default_format() - - def __init__(self, source_details, import_data_transport, - bazaar_branch_store, logger, opener_policy): + def __init__(self, source_details, logger, opener_policy): """Construct an `ImportWorker`. :param source_details: A `CodeImportSourceDetails` object. - :param bazaar_branch_store: A `BazaarBranchStore`. The import worker - uses this to fetch and store the Bazaar branches that are created - and updated during the import process. :param logger: A `Logger` to pass to cscvs. :param opener_policy: Policy object that decides what branches can be imported """ self.source_details = source_details - self.bazaar_branch_store = bazaar_branch_store - self.import_data_store = ImportDataStore( - import_data_transport, self.source_details) self._logger = logger self._opener_policy = opener_policy - def getBazaarBranch(self): - """Return the Bazaar `Branch` that we are importing into.""" - if os.path.isdir(self.BZR_BRANCH_PATH): - shutil.rmtree(self.BZR_BRANCH_PATH) - return self.bazaar_branch_store.pull( - self.source_details.branch_id, self.BZR_BRANCH_PATH, - self.required_format, self.needs_bzr_tree, - stacked_on_url=self.source_details.stacked_on_url) - - def pushBazaarBranch(self, bazaar_branch): - """Push the updated Bazaar branch to the server. - - :return: True if revisions were transferred. - """ - return self.bazaar_branch_store.push( - self.source_details.branch_id, bazaar_branch, - self.required_format, - stacked_on_url=self.source_details.stacked_on_url) - def getWorkingDirectory(self): """The directory we should change to and store all scratch files in. """ base = config.codeimportworker.working_directory_root - dirname = 'worker-for-branch-%s' % self.source_details.branch_id + dirname = 'worker-for-branch-%s' % self.source_details.target_id return os.path.join(base, dirname) def run(self): @@ -594,7 +563,56 @@ raise NotImplementedError() -class CSCVSImportWorker(ImportWorker): +class ToBzrImportWorker(ImportWorker): + """Oversees the actual work of a code import to Bazaar.""" + + # Where the Bazaar working tree will be stored. + BZR_BRANCH_PATH = 'bzr_branch' + + # Should `getBazaarBranch` create a working tree? + needs_bzr_tree = True + + required_format = BzrDirFormat.get_default_format() + + def __init__(self, source_details, import_data_transport, + bazaar_branch_store, logger, opener_policy): + """Construct a `ToBzrImportWorker`. + + :param source_details: A `CodeImportSourceDetails` object. + :param bazaar_branch_store: A `BazaarBranchStore`. The import worker + uses this to fetch and store the Bazaar branches that are created + and updated during the import process. + :param logger: A `Logger` to pass to cscvs. + :param opener_policy: Policy object that decides what branches can + be imported + """ + super(ToBzrImportWorker, self).__init__( + source_details, logger, opener_policy) + self.bazaar_branch_store = bazaar_branch_store + self.import_data_store = ImportDataStore( + import_data_transport, self.source_details) + + def getBazaarBranch(self): + """Return the Bazaar `Branch` that we are importing into.""" + if os.path.isdir(self.BZR_BRANCH_PATH): + shutil.rmtree(self.BZR_BRANCH_PATH) + return self.bazaar_branch_store.pull( + self.source_details.target_id, self.BZR_BRANCH_PATH, + self.required_format, self.needs_bzr_tree, + stacked_on_url=self.source_details.stacked_on_url) + + def pushBazaarBranch(self, bazaar_branch): + """Push the updated Bazaar branch to the server. + + :return: True if revisions were transferred. + """ + return self.bazaar_branch_store.push( + self.source_details.target_id, bazaar_branch, + self.required_format, + stacked_on_url=self.source_details.stacked_on_url) + + +class CSCVSImportWorker(ToBzrImportWorker): """An ImportWorker for imports that use CSCVS. As well as invoking cscvs to do the import, this class also needs to @@ -674,7 +692,7 @@ return CodeImportWorkerExitCode.SUCCESS_NOCHANGE -class PullingImportWorker(ImportWorker): +class PullingImportWorker(ToBzrImportWorker): """An import worker for imports that can be done by a bzr plugin. Subclasses need to implement `probers`. @@ -806,7 +824,7 @@ return config.codeimport.git_revisions_import_limit def getBazaarBranch(self): - """See `ImportWorker.getBazaarBranch`. + """See `ToBzrImportWorker.getBazaarBranch`. In addition to the superclass' behaviour, we retrieve bzr-git's caches, both legacy and modern, from the import data store and put @@ -828,7 +846,7 @@ return branch def pushBazaarBranch(self, bazaar_branch): - """See `ImportWorker.pushBazaarBranch`. + """See `ToBzrImportWorker.pushBazaarBranch`. In addition to the superclass' behaviour, we store bzr-git's cache directory at .bzr/repository/git in the import data store. === modified file 'lib/lp/codehosting/codeimport/workermonitor.py' --- lib/lp/codehosting/codeimport/workermonitor.py 2013-01-07 02:40:55 +0000 +++ lib/lp/codehosting/codeimport/workermonitor.py 2016-10-11 13:46:57 +0000 @@ -134,7 +134,7 @@ self.codeimport_endpoint = codeimport_endpoint self._call_finish_job = True self._log_file = tempfile.TemporaryFile() - self._branch_url = None + self._target_url = None self._log_file_name = 'no-name-set.txt' self._access_policy = access_policy @@ -143,7 +143,7 @@ context = { 'twisted_failure': failure, 'http_request': errorlog.ScriptRequest( - [('code_import_job_id', self._job_id)], self._branch_url), + [('code_import_job_id', self._job_id)], self._target_url), } report = config.create(context) @@ -169,15 +169,15 @@ def getWorkerArguments(self): """Get arguments for the worker for the import we are working on. - This also sets the _branch_url and _log_file_name attributes for use + This also sets the _target_url and _log_file_name attributes for use in the _logOopsFromFailure and finishJob methods respectively. """ deferred = self.codeimport_endpoint.callRemote( 'getImportDataForJobID', self._job_id) def _processResult(result): - code_import_arguments, branch_url, log_file_name = result - self._branch_url = branch_url + code_import_arguments, target_url, log_file_name = result + self._target_url = target_url self._log_file_name = log_file_name self._logger.info( 'Found source details: %s', code_import_arguments) === modified file 'lib/lp/testing/factory.py' --- lib/lp/testing/factory.py 2016-10-03 17:00:56 +0000 +++ lib/lp/testing/factory.py 2016-10-11 13:46:57 +0000 @@ -492,11 +492,11 @@ epoch = datetime(2009, 1, 1, tzinfo=pytz.UTC) return epoch + timedelta(minutes=self.getUniqueInteger()) - def makeCodeImportSourceDetails(self, branch_id=None, rcstype=None, + def makeCodeImportSourceDetails(self, target_id=None, rcstype=None, url=None, cvs_root=None, cvs_module=None, stacked_on_url=None): - if branch_id is None: - branch_id = self.getUniqueInteger() + if target_id is None: + target_id = self.getUniqueInteger() if rcstype is None: rcstype = 'bzr-svn' if rcstype in ['bzr-svn', 'bzr']: @@ -516,7 +516,7 @@ else: raise AssertionError("Unknown rcstype %r." % rcstype) return CodeImportSourceDetails( - branch_id, rcstype, url, cvs_root, cvs_module, + target_id, rcstype, url, cvs_root, cvs_module, stacked_on_url=stacked_on_url)
_______________________________________________ Mailing list: https://launchpad.net/~launchpad-reviewers Post to : launchpad-reviewers@lists.launchpad.net Unsubscribe : https://launchpad.net/~launchpad-reviewers More help : https://help.launchpad.net/ListHelp