[Launchpad-reviewers] [Merge] lp:~cjwatson/launchpad/codeimport-git-worker into lp:launchpad
The proposal to merge lp:~cjwatson/launchpad/codeimport-git-worker into lp:launchpad has been updated. Status: Needs review => Merged For more details, see: https://code.launchpad.net/~cjwatson/launchpad/codeimport-git-worker/+merge/308142 -- Your team Launchpad code reviewers is subscribed to branch lp:launchpad. ___ Mailing list: https://launchpad.net/~launchpad-reviewers Post to : launchpad-reviewers@lists.launchpad.net Unsubscribe : https://launchpad.net/~launchpad-reviewers More help : https://help.launchpad.net/ListHelp
Re: [Launchpad-reviewers] [Merge] lp:~cjwatson/launchpad/codeimport-git-worker into lp:launchpad
Review: Approve code Diff comments: > > === modified file 'lib/lp/codehosting/codeimport/worker.py' > --- lib/lp/codehosting/codeimport/worker.py 2016-10-11 15:36:32 + > +++ lib/lp/codehosting/codeimport/worker.py 2016-10-11 15:36:32 + > @@ -918,3 +973,67 @@ > """See `PullingImportWorker.probers`.""" > from bzrlib.bzrdir import BzrProber, RemoteBzrProber > return [BzrProber, RemoteBzrProber] > + > + > +class GitToGitImportWorker(ImportWorker): > +"""An import worker for imports from Git to Git.""" > + > +def _runGit(self, *args, **kwargs): > +"""Run git with arguments, sending output to the logger.""" > +cmd = ["git"] + list(args) > +git_process = subprocess.Popen( > +cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kwargs) > +for line in git_process.stdout: > +line = line.decode("UTF-8", "replace").rstrip("\n") > +# Remove any user/password data from URLs. > +line = re.sub(r"://([^:]*:[^@]*@)(\S+)", r"://\2", line) > +self._logger.info(line) > +retcode = git_process.wait() > +if retcode: > +raise subprocess.CalledProcessError(retcode, cmd) > + > +def _doImport(self): > +self._logger.info("Starting job.") > +self._logger.info(config.codeimport.git_repository_store) > +try: > +self._opener_policy.checkOneURL(self.source_details.url) When we move these to prodstack we should probably set up some more sensible outbound rules, but I guess it'll do for now. Hopefully git can't be confused into emitting random HTTP things to stderr. > +except BadUrl as e: > +self._logger.info("Invalid URL: %s" % e) > +return CodeImportWorkerExitCode.FAILURE_FORBIDDEN > +unauth_target_url = urljoin( > +config.codeimport.git_repository_store, > +self.source_details.target_id) > +split = urlsplit(unauth_target_url) > +if split.hostname: > +target_netloc = ":%s@%s" % ( > +self.source_details.macaroon.serialize(), split.hostname) > +else: > +target_netloc = "" > +target_url = urlunsplit([ > +split.scheme, target_netloc, split.path, "", ""]) > +# XXX cjwatson 2016-10-11: Ideally we'd put credentials in a > +# credentials store instead. However, git only accepts credentials > +# that have both a non-empty username and a non-empty password. > +self._logger.info("Getting existing repository from hosting > service.") > +try: > +self._runGit("clone", "--bare", target_url, "repository") > +except subprocess.CalledProcessError as e: > +self._logger.info( > +"Unable to get existing repository from hosting service: " > +"%s" % e) > +return CodeImportWorkerExitCode.FAILURE > +self._logger.info("Fetching remote repository.") > +try: > +self._runGit( > +"remote", "add", "-f", "--mirror=fetch", > +"mirror", self.source_details.url, cwd="repository") HEAD handling is always atrocious... but I hadn't realised git-remote fiddled around so manually in ways that aren't exposed elsewhere. > +except subprocess.CalledProcessError as e: > +self._logger.info("Unable to fetch remote repository: %s" % e) > +return CodeImportWorkerExitCode.FAILURE_INVALID > +self._logger.info("Pushing repository to hosting service.") > +try: > +self._runGit("push", "--mirror", target_url, cwd="repository") > +except subprocess.CalledProcessError as e: > +self._logger.info("Unable to push to hosting service: %s" % e) > +return CodeImportWorkerExitCode.FAILURE > +return CodeImportWorkerExitCode.SUCCESS > > === modified file 'lib/lp/services/config/schema-lazr.conf' > --- lib/lp/services/config/schema-lazr.conf 2016-10-11 15:36:32 + > +++ lib/lp/services/config/schema-lazr.conf 2016-10-11 15:36:32 + > @@ -417,7 +421,7 @@ > svn_revisions_import_limit: 500 > > # Secret key for macaroons used to grant git push permission to workers. > -macaroon_secret_key: > +macaroon_secret_key: none I have no words, but carry on. > > [codeimportdispatcher] > # The directory where the code import worker should be directed to -- https://code.launchpad.net/~cjwatson/launchpad/codeimport-git-worker/+merge/308142 Your team Launchpad code reviewers is subscribed to branch lp:launchpad. ___ Mailing list: https://launchpad.net/~launchpad-reviewers Post to : launchpad-reviewers@lists.launchpad.net Unsubscribe : https://launchpad.net/~launchpad-reviewers More help : https://help.launchpad.net/ListHelp
Re: [Launchpad-reviewers] [Merge] lp:~cjwatson/launchpad/codeimport-git-worker into lp:launchpad
I've sprinkled a bunch of extra comments around in various places. Diff comments: > > === modified file 'lib/lp/codehosting/codeimport/worker.py' > --- lib/lp/codehosting/codeimport/worker.py 2016-10-11 15:36:32 + > +++ lib/lp/codehosting/codeimport/worker.py 2016-10-11 15:36:32 + > @@ -918,3 +973,67 @@ > """See `PullingImportWorker.probers`.""" > from bzrlib.bzrdir import BzrProber, RemoteBzrProber > return [BzrProber, RemoteBzrProber] > + > + > +class GitToGitImportWorker(ImportWorker): > +"""An import worker for imports from Git to Git.""" > + > +def _runGit(self, *args, **kwargs): > +"""Run git with arguments, sending output to the logger.""" > +cmd = ["git"] + list(args) > +git_process = subprocess.Popen( > +cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kwargs) > +for line in git_process.stdout: > +line = line.decode("UTF-8", "replace").rstrip("\n") > +# Remove any user/password data from URLs. > +line = re.sub(r"://([^:]*:[^@]*@)(\S+)", r"://\2", line) > +self._logger.info(line) > +retcode = git_process.wait() > +if retcode: > +raise subprocess.CalledProcessError(retcode, cmd) > + > +def _doImport(self): > +self._logger.info("Starting job.") > +self._logger.info(config.codeimport.git_repository_store) > +try: > +self._opener_policy.checkOneURL(self.source_details.url) As far as I can see from http.c, git follows up to 20 redirects as long as they're to http/https/ftp/ftps schemes, and there's no way to configure this. The only things that this would let through that checkOneURL tries to forbid would be redirects to git.launchpad.net and redirects to a blacklisted hostname, which in practice is always localhost or 127.0.0.1. Perhaps we can get away with this state for the time being? Redirects to git.launchpad.net would basically be a slightly annoying waste of resources but nothing more, while redirects to localhost should in practice be harmless given that code import workers don't run web or FTP servers. The only way I can think of to improve isolation here would be to direct all git requests through a proxy, which would be a non-trivial chunk of work. > +except BadUrl as e: > +self._logger.info("Invalid URL: %s" % e) > +return CodeImportWorkerExitCode.FAILURE_FORBIDDEN > +unauth_target_url = urljoin( > +config.codeimport.git_repository_store, > +self.source_details.target_id) > +split = urlsplit(unauth_target_url) > +if split.hostname: > +target_netloc = ":%s@%s" % ( > +self.source_details.macaroon.serialize(), split.hostname) > +else: > +target_netloc = "" > +target_url = urlunsplit([ > +split.scheme, target_netloc, split.path, "", ""]) > +# XXX cjwatson 2016-10-11: Ideally we'd put credentials in a > +# credentials store instead. However, git only accepts credentials > +# that have both a non-empty username and a non-empty password. > +self._logger.info("Getting existing repository from hosting > service.") > +try: > +self._runGit("clone", "--bare", target_url, "repository") > +except subprocess.CalledProcessError as e: > +self._logger.info( > +"Unable to get existing repository from hosting service: " > +"%s" % e) > +return CodeImportWorkerExitCode.FAILURE > +self._logger.info("Fetching remote repository.") > +try: > +self._runGit( > +"remote", "add", "-f", "--mirror=fetch", > +"mirror", self.source_details.url, cwd="repository") More or less, with the addition of --prune. Though good grief HEAD handling is horrible, but I've just added a comment about that for now. > +except subprocess.CalledProcessError as e: > +self._logger.info("Unable to fetch remote repository: %s" % e) > +return CodeImportWorkerExitCode.FAILURE_INVALID > +self._logger.info("Pushing repository to hosting service.") > +try: > +self._runGit("push", "--mirror", target_url, cwd="repository") > +except subprocess.CalledProcessError as e: > +self._logger.info("Unable to push to hosting service: %s" % e) > +return CodeImportWorkerExitCode.FAILURE > +return CodeImportWorkerExitCode.SUCCESS > > === modified file 'lib/lp/services/config/schema-lazr.conf' > --- lib/lp/services/config/schema-lazr.conf 2016-10-11 15:36:32 + > +++ lib/lp/services/config/schema-lazr.conf 2016-10-11 15:36:32 + > @@ -417,7 +421,7 @@ > svn_revisions_import_limit: 500 > > # Secret key for macaroons used to grant git push permission to workers. > -macaroon_secret_key: >
Re: [Launchpad-reviewers] [Merge] lp:~cjwatson/launchpad/codeimport-git-worker into lp:launchpad
Review: Approve code Diff comments: > === modified file 'configs/development/launchpad-lazr.conf' > --- configs/development/launchpad-lazr.conf 2016-06-30 16:05:11 + > +++ configs/development/launchpad-lazr.conf 2016-10-11 15:36:32 + > @@ -55,6 +55,7 @@ > > [codeimport] > bazaar_branch_store: file:///tmp/bazaar-branches > +git_repository_store: https://git.launchpad.dev/ It's entirely confusing that git_repository_store and bazaar_branch_store are pretty radically different things. bazaar_branch_store is the intermediate tree on maple (formerly escudero), whereas git_repository_store is the actual hosting tree. The lack of puller for git imports is certainly a relief, but the chance of this not causing someone to faceplant is tiny. A comment might be sufficient, but could we also get away with using the existing git_browse_root (GitRepository.git_https_url would be ideal, but returns None for private repos which are technically a possibility for imports)? > foreign_tree_store: file:///tmp/foreign-branches > > [codeimportdispatcher] > > === modified file 'lib/lp/codehosting/codeimport/tests/test_worker.py' > --- lib/lp/codehosting/codeimport/tests/test_worker.py2016-10-11 > 15:36:32 + > +++ lib/lp/codehosting/codeimport/tests/test_worker.py2016-10-11 > 15:36:32 + > @@ -1416,6 +1453,19 @@ > str(code_import.branch.id), 'git', > 'git://git.example.com/project.git'])) > > +def test_git_to_git_arguments(self): > +self.pushConfig('codeimport', macaroon_secret_key='some-secret') > +self.useFixture(GitHostingFixture()) > +code_import = self.factory.makeCodeImport( > +git_repo_url="git://git.example.com/project.git", > +target_rcs_type=TargetRevisionControlSystems.GIT) > +self.assertArgumentsMatch( > +code_import, MatchesListwise([ > +Equals(code_import.git_repository.unique_name), > +Equals('git:git'), > Equals('git://git.example.com/project.git'), > +CodeImportJobMacaroonVerifies(code_import.git_repository)]), > +start_job=True) A long time in the future in an editor far, far away: "Why would I want to start the job? Silly test author, I'll drop that pointlessness while I'm refactoring. Why is the macaroon verification silently failing now?" > + > def test_cvs_arguments(self): > code_import = self.factory.makeCodeImport( > cvs_root=':pserver:f...@example.com/bar', cvs_module='bar') > > === modified file 'lib/lp/codehosting/codeimport/worker.py' > --- lib/lp/codehosting/codeimport/worker.py 2016-10-11 15:36:32 + > +++ lib/lp/codehosting/codeimport/worker.py 2016-10-11 15:36:32 + > @@ -269,36 +282,50 @@ > of the information suitable for passing around on executables' command > lines. > > -:ivar target_id: The id of the Bazaar branch associated with this code > -import, used for locating the existing import and the foreign tree. > +:ivar target_id: The id of the Bazaar branch or the path of the Git > +repository associated with this code import, used for locating the > +existing import and the foreign tree. > :ivar rcstype: 'cvs', 'git', 'bzr-svn', 'bzr' as appropriate. > +:ivar target_rcstype: 'bzr' or 'git' as appropriate. > :ivar url: The branch URL if rcstype in ['bzr-svn', 'git', 'bzr'], None > otherwise. > :ivar cvs_root: The $CVSROOT if rcstype == 'cvs', None otherwise. > :ivar cvs_module: The CVS module if rcstype == 'cvs', None otherwise. > :ivar stacked_on_url: The URL of the branch that the associated branch > is stacked on, if any. > +:ivar macaroon: A macaroon granting authority to push to the target > +repository if target_rcstype == 'git', None otherwise. > """ > > -def __init__(self, target_id, rcstype, url=None, cvs_root=None, > - cvs_module=None, stacked_on_url=None): > +def __init__(self, target_id, rcstype, target_rcstype, url=None, > + cvs_root=None, cvs_module=None, stacked_on_url=None, > + macaroon=None): > self.target_id = target_id > self.rcstype = rcstype > +self.target_rcstype = target_rcstype > self.url = url > self.cvs_root = cvs_root > self.cvs_module = cvs_module > self.stacked_on_url = stacked_on_url > +self.macaroon = macaroon > > @classmethod > def fromArguments(cls, arguments): > """Convert command line-style arguments to an instance.""" > -target_id = int(arguments.pop(0)) > +target_id = arguments.pop(0) > rcstype = arguments.pop(0) > +if ':' in rcstype: > +rcstype, target_rcstype = rcstype.split(':', 1) > +else: > +target_rcstype = 'bzr' XXX this for cleanup post-deployment? > if
[Launchpad-reviewers] [Merge] lp:~cjwatson/launchpad/codeimport-git-worker into lp:launchpad
Colin Watson has proposed merging lp:~cjwatson/launchpad/codeimport-git-worker into lp:launchpad with lp:~cjwatson/launchpad/codeimport-create-hosting as a prerequisite. Commit message: Add a Git-to-Git import worker. Requested reviews: Launchpad code reviewers (launchpad-reviewers) Related bugs: Bug #1469459 in Launchpad itself: "import external code into a LP git repo (natively)" https://bugs.launchpad.net/launchpad/+bug/1469459 For more details, see: https://code.launchpad.net/~cjwatson/launchpad/codeimport-git-worker/+merge/308142 This takes the easy approach for now: clone existing imported repository, fetch-mirror remote repository into it, and push it back. Once turnip has more advanced plumbing then we can always switch over to use that (although it would involve some work setting up a turnip fixture). -- Your team Launchpad code reviewers is requested to review the proposed merge of lp:~cjwatson/launchpad/codeimport-git-worker into lp:launchpad. === modified file 'configs/development/launchpad-lazr.conf' --- configs/development/launchpad-lazr.conf 2016-06-30 16:05:11 + +++ configs/development/launchpad-lazr.conf 2016-10-11 15:36:32 + @@ -55,6 +55,7 @@ [codeimport] bazaar_branch_store: file:///tmp/bazaar-branches +git_repository_store: https://git.launchpad.dev/ foreign_tree_store: file:///tmp/foreign-branches [codeimportdispatcher] === modified file 'configs/testrunner-appserver/launchpad-lazr.conf' --- configs/testrunner-appserver/launchpad-lazr.conf 2014-02-27 08:39:44 + +++ configs/testrunner-appserver/launchpad-lazr.conf 2016-10-11 15:36:32 + @@ -8,6 +8,9 @@ [codehosting] launch: False +[codeimport] +macaroon_secret_key: dev-macaroon-secret + [google_test_service] launch: False === modified file 'lib/lp/codehosting/codeimport/tests/servers.py' --- lib/lp/codehosting/codeimport/tests/servers.py 2016-02-05 16:51:12 + +++ lib/lp/codehosting/codeimport/tests/servers.py 2016-10-11 15:36:32 + @@ -253,12 +253,15 @@ else: return local_path_to_url(self.repository_path) -def createRepository(self, path): -GitRepo.init(path) +def createRepository(self, path, bare=False): +if bare: +GitRepo.init_bare(path) +else: +GitRepo.init(path) -def start_server(self): +def start_server(self, bare=False): super(GitServer, self).start_server() -self.createRepository(self.repository_path) +self.createRepository(self.repository_path, bare=bare) if self._use_server: repo = GitRepo(self.repository_path) self._server = TCPGitServerThread( === modified file 'lib/lp/codehosting/codeimport/tests/test_worker.py' --- lib/lp/codehosting/codeimport/tests/test_worker.py 2016-10-11 15:36:32 + +++ lib/lp/codehosting/codeimport/tests/test_worker.py 2016-10-11 15:36:32 + @@ -45,16 +45,26 @@ ) from dulwich.repo import Repo as GitRepo from fixtures import FakeLogger +from pymacaroons import Macaroon import subvertpy import subvertpy.client import subvertpy.ra -from testtools.matchers import Equals +from testtools.matchers import ( +Equals, +Matcher, +MatchesListwise, +Mismatch, +) +from zope.component import getUtility from lp.app.enums import InformationType +from lp.code.enums import TargetRevisionControlSystems from lp.code.interfaces.codehosting import ( branch_id_alias, compose_public_url, ) +from lp.code.interfaces.codeimportjob import ICodeImportJobWorkflow +from lp.code.tests.helpers import GitHostingFixture import lp.codehosting from lp.codehosting.codeimport.tarball import ( create_tarball, @@ -90,7 +100,9 @@ from lp.codehosting.tests.helpers import create_branch_with_one_revision from lp.services.config import config from lp.services.log.logger import BufferLogger +from lp.services.macaroons.interfaces import IMacaroonIssuer from lp.testing import ( +celebrity_logged_in, TestCase, TestCaseWithFactory, ) @@ -1049,7 +1061,7 @@ worker = self.makeImportWorker( self.factory.makeCodeImportSourceDetails( rcstype=self.rcstype, url="file:///local/path"), -opener_policy=CodeImportBranchOpenPolicy("bzr")) +opener_policy=CodeImportBranchOpenPolicy("bzr", "bzr")) self.assertEqual( CodeImportWorkerExitCode.FAILURE_FORBIDDEN, worker.run()) @@ -1285,7 +1297,7 @@ def setUp(self): super(CodeImportBranchOpenPolicyTests, self).setUp() -self.policy = CodeImportBranchOpenPolicy("bzr") +self.policy = CodeImportBranchOpenPolicy("bzr", "bzr") def test_follows_references(self): self.assertEquals(True, self.policy.shouldFollowReferences()) @@ -1309,14 +1321,22 @@ self.assertGoodUrl("bzr://bzr.example.com/somebzrurl/") self.assertBadUrl("bzr://bazaar.launchpad.dev/example") -def