[Launchpad-reviewers] [Merge] lp:~cjwatson/launchpad/codeimport-git-worker into lp:launchpad

2016-10-13 Thread noreply
The proposal to merge lp:~cjwatson/launchpad/codeimport-git-worker into 
lp:launchpad has been updated.

Status: Needs review => Merged

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/codeimport-git-worker/+merge/308142
-- 
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.

___
Mailing list: https://launchpad.net/~launchpad-reviewers
Post to : launchpad-reviewers@lists.launchpad.net
Unsubscribe : https://launchpad.net/~launchpad-reviewers
More help   : https://help.launchpad.net/ListHelp


Re: [Launchpad-reviewers] [Merge] lp:~cjwatson/launchpad/codeimport-git-worker into lp:launchpad

2016-10-12 Thread William Grant
Review: Approve code



Diff comments:

> 
> === modified file 'lib/lp/codehosting/codeimport/worker.py'
> --- lib/lp/codehosting/codeimport/worker.py   2016-10-11 15:36:32 +
> +++ lib/lp/codehosting/codeimport/worker.py   2016-10-11 15:36:32 +
> @@ -918,3 +973,67 @@
>  """See `PullingImportWorker.probers`."""
>  from bzrlib.bzrdir import BzrProber, RemoteBzrProber
>  return [BzrProber, RemoteBzrProber]
> +
> +
> +class GitToGitImportWorker(ImportWorker):
> +"""An import worker for imports from Git to Git."""
> +
> +def _runGit(self, *args, **kwargs):
> +"""Run git with arguments, sending output to the logger."""
> +cmd = ["git"] + list(args)
> +git_process = subprocess.Popen(
> +cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kwargs)
> +for line in git_process.stdout:
> +line = line.decode("UTF-8", "replace").rstrip("\n")
> +# Remove any user/password data from URLs.
> +line = re.sub(r"://([^:]*:[^@]*@)(\S+)", r"://\2", line)
> +self._logger.info(line)
> +retcode = git_process.wait()
> +if retcode:
> +raise subprocess.CalledProcessError(retcode, cmd)
> +
> +def _doImport(self):
> +self._logger.info("Starting job.")
> +self._logger.info(config.codeimport.git_repository_store)
> +try:
> +self._opener_policy.checkOneURL(self.source_details.url)

When we move these to prodstack we should probably set up some more sensible 
outbound rules, but I guess it'll do for now. Hopefully git can't be confused 
into emitting random HTTP things to stderr.

> +except BadUrl as e:
> +self._logger.info("Invalid URL: %s" % e)
> +return CodeImportWorkerExitCode.FAILURE_FORBIDDEN
> +unauth_target_url = urljoin(
> +config.codeimport.git_repository_store,
> +self.source_details.target_id)
> +split = urlsplit(unauth_target_url)
> +if split.hostname:
> +target_netloc = ":%s@%s" % (
> +self.source_details.macaroon.serialize(), split.hostname)
> +else:
> +target_netloc = ""
> +target_url = urlunsplit([
> +split.scheme, target_netloc, split.path, "", ""])
> +# XXX cjwatson 2016-10-11: Ideally we'd put credentials in a
> +# credentials store instead.  However, git only accepts credentials
> +# that have both a non-empty username and a non-empty password.
> +self._logger.info("Getting existing repository from hosting 
> service.")
> +try:
> +self._runGit("clone", "--bare", target_url, "repository")
> +except subprocess.CalledProcessError as e:
> +self._logger.info(
> +"Unable to get existing repository from hosting service: "
> +"%s" % e)
> +return CodeImportWorkerExitCode.FAILURE
> +self._logger.info("Fetching remote repository.")
> +try:
> +self._runGit(
> +"remote", "add", "-f", "--mirror=fetch",
> +"mirror", self.source_details.url, cwd="repository")

HEAD handling is always atrocious... but I hadn't realised git-remote fiddled 
around so manually in ways that aren't exposed elsewhere.

> +except subprocess.CalledProcessError as e:
> +self._logger.info("Unable to fetch remote repository: %s" % e)
> +return CodeImportWorkerExitCode.FAILURE_INVALID
> +self._logger.info("Pushing repository to hosting service.")
> +try:
> +self._runGit("push", "--mirror", target_url, cwd="repository")
> +except subprocess.CalledProcessError as e:
> +self._logger.info("Unable to push to hosting service: %s" % e)
> +return CodeImportWorkerExitCode.FAILURE
> +return CodeImportWorkerExitCode.SUCCESS
> 
> === modified file 'lib/lp/services/config/schema-lazr.conf'
> --- lib/lp/services/config/schema-lazr.conf   2016-10-11 15:36:32 +
> +++ lib/lp/services/config/schema-lazr.conf   2016-10-11 15:36:32 +
> @@ -417,7 +421,7 @@
>  svn_revisions_import_limit: 500
>  
>  # Secret key for macaroons used to grant git push permission to workers.
> -macaroon_secret_key:
> +macaroon_secret_key: none

I have no words, but carry on.

>  
>  [codeimportdispatcher]
>  # The directory where the code import worker should be directed to


-- 
https://code.launchpad.net/~cjwatson/launchpad/codeimport-git-worker/+merge/308142
Your team Launchpad code reviewers is subscribed to branch lp:launchpad.

___
Mailing list: https://launchpad.net/~launchpad-reviewers
Post to : launchpad-reviewers@lists.launchpad.net
Unsubscribe : https://launchpad.net/~launchpad-reviewers
More help   : https://help.launchpad.net/ListHelp


Re: [Launchpad-reviewers] [Merge] lp:~cjwatson/launchpad/codeimport-git-worker into lp:launchpad

2016-10-12 Thread Colin Watson
I've sprinkled a bunch of extra comments around in various places.

Diff comments:

> 
> === modified file 'lib/lp/codehosting/codeimport/worker.py'
> --- lib/lp/codehosting/codeimport/worker.py   2016-10-11 15:36:32 +
> +++ lib/lp/codehosting/codeimport/worker.py   2016-10-11 15:36:32 +
> @@ -918,3 +973,67 @@
>  """See `PullingImportWorker.probers`."""
>  from bzrlib.bzrdir import BzrProber, RemoteBzrProber
>  return [BzrProber, RemoteBzrProber]
> +
> +
> +class GitToGitImportWorker(ImportWorker):
> +"""An import worker for imports from Git to Git."""
> +
> +def _runGit(self, *args, **kwargs):
> +"""Run git with arguments, sending output to the logger."""
> +cmd = ["git"] + list(args)
> +git_process = subprocess.Popen(
> +cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, **kwargs)
> +for line in git_process.stdout:
> +line = line.decode("UTF-8", "replace").rstrip("\n")
> +# Remove any user/password data from URLs.
> +line = re.sub(r"://([^:]*:[^@]*@)(\S+)", r"://\2", line)
> +self._logger.info(line)
> +retcode = git_process.wait()
> +if retcode:
> +raise subprocess.CalledProcessError(retcode, cmd)
> +
> +def _doImport(self):
> +self._logger.info("Starting job.")
> +self._logger.info(config.codeimport.git_repository_store)
> +try:
> +self._opener_policy.checkOneURL(self.source_details.url)

As far as I can see from http.c, git follows up to 20 redirects as long as 
they're to http/https/ftp/ftps schemes, and there's no way to configure this.

The only things that this would let through that checkOneURL tries to forbid 
would be redirects to git.launchpad.net and redirects to a blacklisted 
hostname, which in practice is always localhost or 127.0.0.1.  Perhaps we can 
get away with this state for the time being?  Redirects to git.launchpad.net 
would basically be a slightly annoying waste of resources but nothing more, 
while redirects to localhost should in practice be harmless given that code 
import workers don't run web or FTP servers.

The only way I can think of to improve isolation here would be to direct all 
git requests through a proxy, which would be a non-trivial chunk of work.

> +except BadUrl as e:
> +self._logger.info("Invalid URL: %s" % e)
> +return CodeImportWorkerExitCode.FAILURE_FORBIDDEN
> +unauth_target_url = urljoin(
> +config.codeimport.git_repository_store,
> +self.source_details.target_id)
> +split = urlsplit(unauth_target_url)
> +if split.hostname:
> +target_netloc = ":%s@%s" % (
> +self.source_details.macaroon.serialize(), split.hostname)
> +else:
> +target_netloc = ""
> +target_url = urlunsplit([
> +split.scheme, target_netloc, split.path, "", ""])
> +# XXX cjwatson 2016-10-11: Ideally we'd put credentials in a
> +# credentials store instead.  However, git only accepts credentials
> +# that have both a non-empty username and a non-empty password.
> +self._logger.info("Getting existing repository from hosting 
> service.")
> +try:
> +self._runGit("clone", "--bare", target_url, "repository")
> +except subprocess.CalledProcessError as e:
> +self._logger.info(
> +"Unable to get existing repository from hosting service: "
> +"%s" % e)
> +return CodeImportWorkerExitCode.FAILURE
> +self._logger.info("Fetching remote repository.")
> +try:
> +self._runGit(
> +"remote", "add", "-f", "--mirror=fetch",
> +"mirror", self.source_details.url, cwd="repository")

More or less, with the addition of --prune.  Though good grief HEAD handling is 
horrible, but I've just added a comment about that for now.

> +except subprocess.CalledProcessError as e:
> +self._logger.info("Unable to fetch remote repository: %s" % e)
> +return CodeImportWorkerExitCode.FAILURE_INVALID
> +self._logger.info("Pushing repository to hosting service.")
> +try:
> +self._runGit("push", "--mirror", target_url, cwd="repository")
> +except subprocess.CalledProcessError as e:
> +self._logger.info("Unable to push to hosting service: %s" % e)
> +return CodeImportWorkerExitCode.FAILURE
> +return CodeImportWorkerExitCode.SUCCESS
> 
> === modified file 'lib/lp/services/config/schema-lazr.conf'
> --- lib/lp/services/config/schema-lazr.conf   2016-10-11 15:36:32 +
> +++ lib/lp/services/config/schema-lazr.conf   2016-10-11 15:36:32 +
> @@ -417,7 +421,7 @@
>  svn_revisions_import_limit: 500
>  
>  # Secret key for macaroons used to grant git push permission to workers.
> -macaroon_secret_key:
> 

Re: [Launchpad-reviewers] [Merge] lp:~cjwatson/launchpad/codeimport-git-worker into lp:launchpad

2016-10-12 Thread William Grant
Review: Approve code



Diff comments:

> === modified file 'configs/development/launchpad-lazr.conf'
> --- configs/development/launchpad-lazr.conf   2016-06-30 16:05:11 +
> +++ configs/development/launchpad-lazr.conf   2016-10-11 15:36:32 +
> @@ -55,6 +55,7 @@
>  
>  [codeimport]
>  bazaar_branch_store: file:///tmp/bazaar-branches
> +git_repository_store: https://git.launchpad.dev/

It's entirely confusing that git_repository_store and bazaar_branch_store are 
pretty radically different things. bazaar_branch_store is the intermediate tree 
on maple (formerly escudero), whereas git_repository_store is the actual 
hosting tree. The lack of puller for git imports is certainly a relief, but the 
chance of this not causing someone to faceplant is tiny.

A comment might be sufficient, but could we also get away with using the 
existing git_browse_root (GitRepository.git_https_url would be ideal, but 
returns None for private repos which are technically a possibility for imports)?

>  foreign_tree_store: file:///tmp/foreign-branches
>  
>  [codeimportdispatcher]
> 
> === modified file 'lib/lp/codehosting/codeimport/tests/test_worker.py'
> --- lib/lp/codehosting/codeimport/tests/test_worker.py2016-10-11 
> 15:36:32 +
> +++ lib/lp/codehosting/codeimport/tests/test_worker.py2016-10-11 
> 15:36:32 +
> @@ -1416,6 +1453,19 @@
>  str(code_import.branch.id), 'git',
>  'git://git.example.com/project.git']))
>  
> +def test_git_to_git_arguments(self):
> +self.pushConfig('codeimport', macaroon_secret_key='some-secret')
> +self.useFixture(GitHostingFixture())
> +code_import = self.factory.makeCodeImport(
> +git_repo_url="git://git.example.com/project.git",
> +target_rcs_type=TargetRevisionControlSystems.GIT)
> +self.assertArgumentsMatch(
> +code_import, MatchesListwise([
> +Equals(code_import.git_repository.unique_name),
> +Equals('git:git'), 
> Equals('git://git.example.com/project.git'),
> +CodeImportJobMacaroonVerifies(code_import.git_repository)]),
> +start_job=True)

A long time in the future in an editor far, far away: "Why would I want to 
start the job? Silly test author, I'll drop that pointlessness while I'm 
refactoring. Why is the macaroon verification silently failing now?"

> +
>  def test_cvs_arguments(self):
>  code_import = self.factory.makeCodeImport(
>  cvs_root=':pserver:f...@example.com/bar', cvs_module='bar')
> 
> === modified file 'lib/lp/codehosting/codeimport/worker.py'
> --- lib/lp/codehosting/codeimport/worker.py   2016-10-11 15:36:32 +
> +++ lib/lp/codehosting/codeimport/worker.py   2016-10-11 15:36:32 +
> @@ -269,36 +282,50 @@
>  of the information suitable for passing around on executables' command
>  lines.
>  
> -:ivar target_id: The id of the Bazaar branch associated with this code
> -import, used for locating the existing import and the foreign tree.
> +:ivar target_id: The id of the Bazaar branch or the path of the Git
> +repository associated with this code import, used for locating the
> +existing import and the foreign tree.
>  :ivar rcstype: 'cvs', 'git', 'bzr-svn', 'bzr' as appropriate.
> +:ivar target_rcstype: 'bzr' or 'git' as appropriate.
>  :ivar url: The branch URL if rcstype in ['bzr-svn', 'git', 'bzr'], None
>  otherwise.
>  :ivar cvs_root: The $CVSROOT if rcstype == 'cvs', None otherwise.
>  :ivar cvs_module: The CVS module if rcstype == 'cvs', None otherwise.
>  :ivar stacked_on_url: The URL of the branch that the associated branch
>  is stacked on, if any.
> +:ivar macaroon: A macaroon granting authority to push to the target
> +repository if target_rcstype == 'git', None otherwise.
>  """
>  
> -def __init__(self, target_id, rcstype, url=None, cvs_root=None,
> - cvs_module=None, stacked_on_url=None):
> +def __init__(self, target_id, rcstype, target_rcstype, url=None,
> + cvs_root=None, cvs_module=None, stacked_on_url=None,
> + macaroon=None):
>  self.target_id = target_id
>  self.rcstype = rcstype
> +self.target_rcstype = target_rcstype
>  self.url = url
>  self.cvs_root = cvs_root
>  self.cvs_module = cvs_module
>  self.stacked_on_url = stacked_on_url
> +self.macaroon = macaroon
>  
>  @classmethod
>  def fromArguments(cls, arguments):
>  """Convert command line-style arguments to an instance."""
> -target_id = int(arguments.pop(0))
> +target_id = arguments.pop(0)
>  rcstype = arguments.pop(0)
> +if ':' in rcstype:
> +rcstype, target_rcstype = rcstype.split(':', 1)
> +else:
> +target_rcstype = 'bzr'

XXX this for cleanup post-deployment?

>  if 

[Launchpad-reviewers] [Merge] lp:~cjwatson/launchpad/codeimport-git-worker into lp:launchpad

2016-10-11 Thread Colin Watson
Colin Watson has proposed merging lp:~cjwatson/launchpad/codeimport-git-worker 
into lp:launchpad with lp:~cjwatson/launchpad/codeimport-create-hosting as a 
prerequisite.

Commit message:
Add a Git-to-Git import worker.

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1469459 in Launchpad itself: "import external code into a LP git repo 
(natively)"
  https://bugs.launchpad.net/launchpad/+bug/1469459

For more details, see:
https://code.launchpad.net/~cjwatson/launchpad/codeimport-git-worker/+merge/308142

This takes the easy approach for now: clone existing imported repository, 
fetch-mirror remote repository into it, and push it back.  Once turnip has more 
advanced plumbing then we can always switch over to use that (although it would 
involve some work setting up a turnip fixture).
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of 
lp:~cjwatson/launchpad/codeimport-git-worker into lp:launchpad.
=== modified file 'configs/development/launchpad-lazr.conf'
--- configs/development/launchpad-lazr.conf	2016-06-30 16:05:11 +
+++ configs/development/launchpad-lazr.conf	2016-10-11 15:36:32 +
@@ -55,6 +55,7 @@
 
 [codeimport]
 bazaar_branch_store: file:///tmp/bazaar-branches
+git_repository_store: https://git.launchpad.dev/
 foreign_tree_store: file:///tmp/foreign-branches
 
 [codeimportdispatcher]

=== modified file 'configs/testrunner-appserver/launchpad-lazr.conf'
--- configs/testrunner-appserver/launchpad-lazr.conf	2014-02-27 08:39:44 +
+++ configs/testrunner-appserver/launchpad-lazr.conf	2016-10-11 15:36:32 +
@@ -8,6 +8,9 @@
 [codehosting]
 launch: False
 
+[codeimport]
+macaroon_secret_key: dev-macaroon-secret
+
 [google_test_service]
 launch: False
 

=== modified file 'lib/lp/codehosting/codeimport/tests/servers.py'
--- lib/lp/codehosting/codeimport/tests/servers.py	2016-02-05 16:51:12 +
+++ lib/lp/codehosting/codeimport/tests/servers.py	2016-10-11 15:36:32 +
@@ -253,12 +253,15 @@
 else:
 return local_path_to_url(self.repository_path)
 
-def createRepository(self, path):
-GitRepo.init(path)
+def createRepository(self, path, bare=False):
+if bare:
+GitRepo.init_bare(path)
+else:
+GitRepo.init(path)
 
-def start_server(self):
+def start_server(self, bare=False):
 super(GitServer, self).start_server()
-self.createRepository(self.repository_path)
+self.createRepository(self.repository_path, bare=bare)
 if self._use_server:
 repo = GitRepo(self.repository_path)
 self._server = TCPGitServerThread(

=== modified file 'lib/lp/codehosting/codeimport/tests/test_worker.py'
--- lib/lp/codehosting/codeimport/tests/test_worker.py	2016-10-11 15:36:32 +
+++ lib/lp/codehosting/codeimport/tests/test_worker.py	2016-10-11 15:36:32 +
@@ -45,16 +45,26 @@
 )
 from dulwich.repo import Repo as GitRepo
 from fixtures import FakeLogger
+from pymacaroons import Macaroon
 import subvertpy
 import subvertpy.client
 import subvertpy.ra
-from testtools.matchers import Equals
+from testtools.matchers import (
+Equals,
+Matcher,
+MatchesListwise,
+Mismatch,
+)
+from zope.component import getUtility
 
 from lp.app.enums import InformationType
+from lp.code.enums import TargetRevisionControlSystems
 from lp.code.interfaces.codehosting import (
 branch_id_alias,
 compose_public_url,
 )
+from lp.code.interfaces.codeimportjob import ICodeImportJobWorkflow
+from lp.code.tests.helpers import GitHostingFixture
 import lp.codehosting
 from lp.codehosting.codeimport.tarball import (
 create_tarball,
@@ -90,7 +100,9 @@
 from lp.codehosting.tests.helpers import create_branch_with_one_revision
 from lp.services.config import config
 from lp.services.log.logger import BufferLogger
+from lp.services.macaroons.interfaces import IMacaroonIssuer
 from lp.testing import (
+celebrity_logged_in,
 TestCase,
 TestCaseWithFactory,
 )
@@ -1049,7 +1061,7 @@
 worker = self.makeImportWorker(
 self.factory.makeCodeImportSourceDetails(
 rcstype=self.rcstype, url="file:///local/path"),
-opener_policy=CodeImportBranchOpenPolicy("bzr"))
+opener_policy=CodeImportBranchOpenPolicy("bzr", "bzr"))
 self.assertEqual(
 CodeImportWorkerExitCode.FAILURE_FORBIDDEN, worker.run())
 
@@ -1285,7 +1297,7 @@
 
 def setUp(self):
 super(CodeImportBranchOpenPolicyTests, self).setUp()
-self.policy = CodeImportBranchOpenPolicy("bzr")
+self.policy = CodeImportBranchOpenPolicy("bzr", "bzr")
 
 def test_follows_references(self):
 self.assertEquals(True, self.policy.shouldFollowReferences())
@@ -1309,14 +1321,22 @@
 self.assertGoodUrl("bzr://bzr.example.com/somebzrurl/")
 self.assertBadUrl("bzr://bazaar.launchpad.dev/example")
 
-def