On Wed, 2011-03-30 at 12:10 +0800, Jeremy Kerr wrote:
> Hi Guilherme,
> 
> >  apps/patchwork/models.py                      |    1 +
> >  lib/sql/migration/008-project-source-tree.sql |    3 +++
> >  2 files changed, 4 insertions(+), 0 deletions(-)
> >  create mode 100644 lib/sql/migration/008-project-source-tree.sql
> 
> Looks good. I'd like to wait until there is a user of this field before 
> merging the change though - or are you using this for linaro-internal things?

I'm using it on a script I'm writing to fetch the git history of every
project and scan that looking for patches that have been committed. Just
like the existing patchwork-update-commits script does but this one is
fully automated, and to make it more easily testable I'm experimenting
with python-dulwich to scan the git history. 

I haven't submitted it for feedback yet because it's still very much a
work in progress, but I would certainly do so. (I'm including what I
have so far here, though)

Oh, there's one extra DB schema change I'm considering... That will
allow us to keep track of the last seen commit ref so that we can scan
the git history incrementally.  I've added it as another field to
Project, named last_seen_commit_ref.  How does that sound to you?


-- 
Guilherme Salgado <https://launchpad.net/~salgado>
commit dd10a245c50f651b38e7074d49413e91b2d82e14
Author: Guilherme Salgado <[email protected]>
Date:   Fri Mar 25 15:59:28 2011 -0300

    Adds a script which goes through all registered projects looking for patches that have been committed already
    
    It does that by checking out the project's source code from its VCS of choice
    (currently only git is supported, though), scanning the commits there and
    comparing them to the patches in Patchwork.

diff --git a/apps/patchwork/bin/update-committed-patches.py b/apps/patchwork/bin/update-committed-patches.py
new file mode 100755
index 0000000..a444783
--- /dev/null
+++ b/apps/patchwork/bin/update-committed-patches.py
@@ -0,0 +1,35 @@
+#!/usr/bin/python
+
+import _pythonpath
+from patchwork.models import Patch, Project, State
+from patchwork.utils import (
+    ensure_source_checkout_for_project, get_hashes_for_commits)
+
+
+#for project in Project.objects.all():
+for project in Project.objects.filter(linkname='linux-kernel'):
+    if project.source_tree is None:
+        continue
+
+    repo = ensure_source_checkout_for_project(project)
+    if repo is None:
+        print ("Skipping %s as we couldn't get a source checkout" %
+               project.name)
+        continue
+
+    hashes = get_hashes_for_commits(repo,
+                                    stop_at=project.last_seen_commit_ref)
+    for commit_id, patch_hash in hashes:
+        # There may be multiple patches with the same hash. That's usually
+        # the case when a second version of a patch series is submitted
+        # and some of the patches in the series are identical in both
+        # series.
+        for patch in Patch.objects.filter(project=project, hash=patch_hash):
+            patch.state = State.objects.get(name='Accepted')
+            patch.commit_ref = commit_id
+            print patch, patch.state
+    else:
+        print "No new %s commits to parse" % project.name
+
+    project.last_seen_commit_ref = repo.head()
+    project.save()
diff --git a/apps/patchwork/tests/__init__.py b/apps/patchwork/tests/__init__.py
index 68fe563..e79331b 100644
--- a/apps/patchwork/tests/__init__.py
+++ b/apps/patchwork/tests/__init__.py
@@ -23,3 +23,4 @@ from patchwork.tests.bundles import *
 from patchwork.tests.mboxviews import *
 from patchwork.tests.updates import *
 from patchwork.tests.filters import *
+from patchwork.tests.test_utils import *
diff --git a/apps/patchwork/tests/test_utils.py b/apps/patchwork/tests/test_utils.py
new file mode 100644
index 0000000..0c60e74
--- /dev/null
+++ b/apps/patchwork/tests/test_utils.py
@@ -0,0 +1,74 @@
+
+import tempfile
+from time import time
+from unittest import TestCase
+
+from dulwich.objects import Blob, Commit, parse_timezone, Tree
+from dulwich.repo import Repo
+
+from patchwork.utils import get_hashes_for_commits
+
+
+class TestGetHashesForCommits(TestCase):
+
+    def test_one_commit(self):
+        repo = self.create_git_repo()
+        commit = self.add_file_and_commit(repo, 'foo', 'Content1')
+        # Here there are no hashes because get_hashes_for_commits() skips the
+        # first one as it's unlikely to be of any interest to us.
+        self.assertEqual(
+            [], list(get_hashes_for_commits(repo, stop_at=None)))
+
+    def test_two_commits(self):
+        repo = self.create_git_repo()
+        commit = self.add_file_and_commit(repo, 'foo', 'Content1')
+        commit2 = self.add_file_and_commit(repo, 'bar', 'Content2', commit)
+        self.assertEqual(
+            [(commit2.id, '5c010402c5673981ee3e1712e6a037de3ff9cae4')],
+            list(get_hashes_for_commits(repo, stop_at=None)))
+
+    def test_empty_patch(self):
+        repo = self.create_git_repo()
+        commit = self.add_file_and_commit(repo, 'foo', 'Content1')
+        head = self.add_file_and_commit(repo, 'bar', '', commit)
+        self.assertEqual(
+            [], list(get_hashes_for_commits(repo, stop_at=None)))
+
+    def test_stop_at(self):
+        repo = self.create_git_repo()
+        commit = self.add_file_and_commit(repo, 'foo', 'Content1')
+        commit2 = self.add_file_and_commit(repo, 'bar', 'Content2', commit)
+        commit3 = self.add_file_and_commit(repo, 'baz', 'Content3', commit2)
+        self.assertEqual(
+            [(commit3.id, '11d22fa0986b3bb341baa76b8a6a757a46a2f916')],
+            list(get_hashes_for_commits(repo, stop_at=commit2.id)))
+
+    def create_git_repo(self):
+        tmpdir = tempfile.mkdtemp()
+        repo = Repo.init(tmpdir)
+        return repo
+
+    def add_file_and_commit(self, repo, filename, data, parent=None):
+        blob = Blob.from_string(data)
+        parents = []
+        tree = Tree()
+        if parent is not None:
+            tree = repo[parent.tree]
+            parents = [parent.id]
+        tree.add(0100644, filename, blob.id)
+        commit = Commit()
+        commit.tree = tree.id
+        author = 'You <[email protected]>'
+        commit.author = commit.committer = author
+        commit.commit_time = commit.author_time = int(time())
+        tz = parse_timezone('-0200')[0]
+        commit.commit_timezone = commit.author_timezone = tz
+        commit.encoding = "UTF-8"
+        commit.message = "A commit"
+        commit.parents = parents
+        object_store = repo.object_store
+        object_store.add_object(blob)
+        object_store.add_object(tree)
+        object_store.add_object(commit)
+        repo.refs['refs/heads/master'] = commit.id
+        return commit
diff --git a/apps/patchwork/utils.py b/apps/patchwork/utils.py
index e41ffb6..353147e 100644
--- a/apps/patchwork/utils.py
+++ b/apps/patchwork/utils.py
@@ -17,8 +17,15 @@
 # along with Patchwork; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
+import os
+from StringIO import StringIO
 
-from patchwork.models import Bundle, Project, BundlePatch
+from dulwich.client import get_transport_and_path
+from dulwich.patch import write_tree_diff
+from dulwich.repo import Repo
+
+from patchwork.parser import hash_patch, parse_patch
+from patchwork.models import Bundle, BundlePatch
 from django.shortcuts import get_object_or_404
 
 def get_patch_ids(d, prefix = 'patch_id'):
@@ -137,3 +144,64 @@ def set_bundle(user, project, action, data, patches, context):
     bundle.save()
 
     return []
+
+
+def ensure_source_checkout_for_project(project):
+    forest = '/home/salgado/src' # This is where we store the trees we checkout
+    root = os.path.join(forest, project.linkname)
+    if not os.path.exists(root):
+        repo = Repo.init(root, mkdir=True)
+    else:
+        repo = Repo(root)
+
+    transport, path = get_transport_and_path(project.source_tree)
+    refs = transport.fetch(path, repo)
+    # XXX: Is this the appropriate thing to do?  will there always be a master
+    # branch?
+    repo.refs['refs/heads/master'] = refs['HEAD']
+    return repo
+
+
+def get_hashes_for_commits(repo, stop_at):
+    # We don't care about the first commit, but if needed it should be
+    # possible to diff it against an empty tree and yield its hash as well.
+    commit = repo['HEAD']
+
+    while len(commit.parents) > 0:
+        commit_id = commit.id
+        if commit_id == stop_at:
+            break
+
+        parent = repo[commit.parents[0]]
+        diff = StringIO()
+        # In the case of merges, this won't have the same behavior as 'git
+        # show', which seems to omit files not changed since any of the
+        # parents (thanks to 'git tree-diff --cc'), but I think this is not a
+        # big deal as patches in Patchwork would never be identical to the
+        # diff of a merge anyway, would they?
+        try:
+            write_tree_diff(
+                diff, repo.object_store, parent.tree, commit.tree)
+        except KeyError, e:
+            # XXX: This happens in qemu because there's a commit that is
+            # actually on a submodule
+            # (8b06c62ae48b67b320f7420dcd4854c5559e1532) and the old commit_id
+            # used for the submodule
+            # (06d0bdd9e2e20377b3180e4986b14c8549b393e4) is gone (possibly
+            # because of a rebase?), so dulwich crashes with a KeyError.
+            raise
+
+        commit = parent
+        diff.seek(0)
+        try:
+            patch, _ = parse_patch(diff.read().decode('utf-8'))
+        except UnicodeDecodeError:
+            # TODO: Need to find out why this happens and see if we really
+            # need to skip such commits.
+            print "Skipping %s" % commit.id
+            continue
+
+        # When commits just add files or change permissions the diff will be
+        # empty and thus parse_patch() will return None.
+        if patch is not None:
+            yield commit_id, hash_patch(patch).hexdigest()

Attachment: signature.asc
Description: This is a digitally signed message part

_______________________________________________
Patchwork mailing list
[email protected]
https://lists.ozlabs.org/listinfo/patchwork

Reply via email to