From: Dave Borowitz <[email protected]>

Change-Id: I54f903b90533cf821235060aa6f632ea9d4f827a
---
 dulwich/_diff_tree.c            |  168 ++++++++++++++++++++++++++++++++++++---
 dulwich/diff_tree.py            |    3 +-
 dulwich/tests/test_diff_tree.py |   31 ++++++-
 3 files changed, 185 insertions(+), 17 deletions(-)

diff --git a/dulwich/_diff_tree.c b/dulwich/_diff_tree.c
index aaefb46..063914e 100644
--- a/dulwich/_diff_tree.c
+++ b/dulwich/_diff_tree.c
@@ -28,7 +28,9 @@ typedef int Py_ssize_t;
 #define Py_SIZE(x) Py_Size(x)
 #endif
 
-static PyObject *tree_entry_cls, *null_entry;
+static PyObject *tree_entry_cls = NULL, *null_entry = NULL,
+       *defaultdict_cls = NULL, *int_cls = NULL;
+static int block_size;
 
 /**
  * Free an array of PyObject pointers, decrementing any references.
@@ -266,34 +268,178 @@ static PyObject *py_is_tree(PyObject *self, PyObject 
*args)
        return result;
 }
 
+static int add_hash(PyObject *get, PyObject *set, char *str, int n) {
+       PyObject *str_obj = NULL, *hash_obj = NULL, *value = NULL,
+               *set_value = NULL;
+       long hash;
+
+       /* It would be nice to hash without copying str into a PyString, but 
that
+        * isn't exposed by the API. */
+       str_obj = PyString_FromStringAndSize(str, n);
+       if (!str_obj)
+               goto error;
+       hash = PyObject_Hash(str_obj);
+       if (hash == -1)
+               goto error;
+       hash_obj = PyInt_FromLong(hash);
+       if (!hash_obj)
+               goto error;
+
+       value = PyObject_CallFunctionObjArgs(get, hash_obj, NULL);
+       if (!value)
+               goto error;
+       set_value = PyObject_CallFunction(set, "(Ol)", hash_obj,
+               PyInt_AS_LONG(value) + n);
+       if (!set_value)
+               goto error;
+
+       Py_DECREF(str_obj);
+       Py_DECREF(hash_obj);
+       Py_DECREF(value);
+       Py_DECREF(set_value);
+       return 0;
+
+error:
+       Py_XDECREF(str_obj);
+       Py_XDECREF(hash_obj);
+       Py_XDECREF(value);
+       Py_XDECREF(set_value);
+       return -1;
+}
+
+static PyObject *py_count_blocks(PyObject *self, PyObject *args)
+{
+       PyObject *obj, *chunks = NULL, *chunk, *counts = NULL, *get = NULL,
+               *set = NULL;
+       char *chunk_str, *block = NULL;
+       Py_ssize_t num_chunks, chunk_len;
+       int i, j, n = 0;
+       char c;
+
+       if (!PyArg_ParseTuple(args, "O", &obj))
+               goto error;
+
+       counts = PyObject_CallFunctionObjArgs(defaultdict_cls, int_cls, NULL);
+       if (!counts)
+               goto error;
+       get = PyObject_GetAttrString(counts, "__getitem__");
+       set = PyObject_GetAttrString(counts, "__setitem__");
+
+       chunks = PyObject_CallMethod(obj, "as_raw_chunks", NULL);
+       if (!chunks)
+               goto error;
+       if (!PyList_Check(chunks)) {
+               PyErr_SetString(PyExc_TypeError,
+                       "as_raw_chunks() did not return a list");
+               goto error;
+       }
+       num_chunks = PyList_GET_SIZE(chunks);
+       block = PyMem_New(char, block_size);
+       if (!block) {
+               PyErr_SetNone(PyExc_MemoryError);
+               goto error;
+       }
+
+       for (i = 0; i < num_chunks; i++) {
+               chunk = PyList_GET_ITEM(chunks, i);
+               if (!PyString_Check(chunk)) {
+                       PyErr_SetString(PyExc_TypeError, "chunk is not a 
string");
+                       goto error;
+               }
+               if (PyString_AsStringAndSize(chunk, &chunk_str, &chunk_len) == 
-1)
+                       goto error;
+
+               for (j = 0; j < chunk_len; j++) {
+                       c = chunk_str[j];
+                       block[n++] = c;
+                       if (c == '\n' || n == block_size) {
+                               if (add_hash(get, set, block, n) == -1)
+                                       goto error;
+                               n = 0;
+                       }
+               }
+       }
+       if (n && add_hash(get, set, block, n) == -1)
+               goto error;
+
+       Py_DECREF(chunks);
+       Py_DECREF(get);
+       Py_DECREF(set);
+       PyMem_Free(block);
+       return counts;
+
+error:
+       Py_XDECREF(chunks);
+       Py_XDECREF(get);
+       Py_XDECREF(set);
+       Py_XDECREF(counts);
+       PyMem_Free(block);
+       return NULL;
+}
+
 static PyMethodDef py_diff_tree_methods[] = {
        { "_is_tree", (PyCFunction)py_is_tree, METH_VARARGS, NULL },
        { "_merge_entries", (PyCFunction)py_merge_entries, METH_VARARGS, NULL },
+       { "_count_blocks", (PyCFunction)py_count_blocks, METH_VARARGS, NULL },
        { NULL, NULL, 0, NULL }
 };
 
 PyMODINIT_FUNC
 init_diff_tree(void)
 {
-       PyObject *m, *objects_mod, *diff_mod;
+       PyObject *m, *objects_mod = NULL, *diff_tree_mod = NULL;
+        PyObject *block_size_obj = NULL;
        m = Py_InitModule("_diff_tree", py_diff_tree_methods);
        if (!m)
-               return;
+               goto error;
 
        objects_mod = PyImport_ImportModule("dulwich.objects");
        if (!objects_mod)
-               return;
+               goto error;
 
        tree_entry_cls = PyObject_GetAttrString(objects_mod, "TreeEntry");
        Py_DECREF(objects_mod);
        if (!tree_entry_cls)
-               return;
+               goto error;
+
+       diff_tree_mod = PyImport_ImportModule("dulwich.diff_tree");
+       if (!diff_tree_mod)
+               goto error;
 
-       diff_mod = PyImport_ImportModule("dulwich.diff");
-       if (!diff_mod)
-               return;
-       null_entry = PyObject_GetAttrString(diff_mod, "_NULL_ENTRY");
-       Py_DECREF(diff_mod);
+       null_entry = PyObject_GetAttrString(diff_tree_mod, "_NULL_ENTRY");
        if (!null_entry)
-               return;
+               goto error;
+
+       block_size_obj = PyObject_GetAttrString(diff_tree_mod, "_BLOCK_SIZE");
+       if (!block_size_obj)
+               goto error;
+       block_size = (int)PyInt_AsLong(block_size_obj);
+
+       if (PyErr_Occurred())
+               goto error;
+
+       defaultdict_cls = PyObject_GetAttrString(diff_tree_mod, "defaultdict");
+       if (!defaultdict_cls)
+               goto error;
+
+       /* This is kind of hacky, but I don't know of a better way to get the
+        * PyObject* version of int. */
+       int_cls = PyDict_GetItemString(PyEval_GetBuiltins(), "int");
+       if (!int_cls) {
+               PyErr_SetString(PyExc_NameError, "int");
+               goto error;
+       }
+
+       Py_DECREF(objects_mod);
+       Py_DECREF(diff_tree_mod);
+       return;
+
+error:
+       Py_XDECREF(objects_mod);
+       Py_XDECREF(diff_tree_mod);
+       Py_XDECREF(null_entry);
+       Py_XDECREF(block_size_obj);
+       Py_XDECREF(defaultdict_cls);
+       Py_XDECREF(int_cls);
+       return;
 }
diff --git a/dulwich/diff_tree.py b/dulwich/diff_tree.py
index 08f7f29..9a16298 100644
--- a/dulwich/diff_tree.py
+++ b/dulwich/diff_tree.py
@@ -456,8 +456,9 @@ class RenameDetector(object):
 # Hold on to the pure-python implementations for testing.
 _is_tree_py = _is_tree
 _merge_entries_py = _merge_entries
+_count_blocks_py = _count_blocks
 try:
     # Try to import C versions
-    from dulwich._diff_tree import _is_tree, _merge_entries
+    from dulwich._diff_tree import _is_tree, _merge_entries, _count_blocks
 except ImportError:
     pass
diff --git a/dulwich/tests/test_diff_tree.py b/dulwich/tests/test_diff_tree.py
index 4a26e1f..ca0ee18 100644
--- a/dulwich/tests/test_diff_tree.py
+++ b/dulwich/tests/test_diff_tree.py
@@ -28,6 +28,7 @@ from dulwich.diff_tree import (
     _merge_entries_py,
     tree_changes,
     _count_blocks,
+    _count_blocks_py,
     _similarity_score,
     _tree_change_key,
     RenameDetector,
@@ -291,19 +292,34 @@ class TreeChangesTest(DiffTestCase):
 
 class RenameDetectionTest(DiffTestCase):
 
-    def test_count_blocks(self):
+    def _do_test_count_blocks(self, count_blocks):
         blob = make_object(Blob, data='a\nb\na\n')
-        self.assertEqual({hash('a\n'): 4, hash('b\n'): 2}, _count_blocks(blob))
+        self.assertEqual({hash('a\n'): 4, hash('b\n'): 2}, count_blocks(blob))
+
+    test_count_blocks = functest_builder(_do_test_count_blocks,
+                                         _count_blocks_py)
+    test_count_blocks_extension = ext_functest_builder(_do_test_count_blocks,
+                                                       _count_blocks)
 
-    def test_count_blocks_no_newline(self):
+    def _do_test_count_blocks_no_newline(self, count_blocks):
         blob = make_object(Blob, data='a\na')
         self.assertEqual({hash('a\n'): 2, hash('a'): 1}, _count_blocks(blob))
 
-    def test_count_blocks_chunks(self):
+    test_count_blocks_no_newline = functest_builder(
+      _do_test_count_blocks_no_newline, _count_blocks_py)
+    test_count_blocks_no_newline_extension = ext_functest_builder(
+       _do_test_count_blocks_no_newline, _count_blocks)
+
+    def _do_test_count_blocks_chunks(self, count_blocks):
         blob = ShaFile.from_raw_chunks(Blob.type_num, ['a\nb', '\na\n'])
         self.assertEqual({hash('a\n'): 4, hash('b\n'): 2}, _count_blocks(blob))
 
-    def test_count_blocks_long_lines(self):
+    test_count_blocks_chunks = functest_builder(_do_test_count_blocks_chunks,
+                                                _count_blocks_py)
+    test_count_blocks_chunks_extension = ext_functest_builder(
+      _do_test_count_blocks_chunks, _count_blocks)
+
+    def _do_test_count_blocks_long_lines(self, count_blocks):
         a = 'a' * 64
         data = a + 'xxx\ny\n' + a + 'zzz\n'
         blob = make_object(Blob, data=data)
@@ -311,6 +327,11 @@ class RenameDetectionTest(DiffTestCase):
                           hash('zzz\n'): 4},
                          _count_blocks(blob))
 
+    test_count_blocks_long_lines = functest_builder(
+      _do_test_count_blocks_long_lines, _count_blocks_py)
+    test_count_blocks_long_lines_extension = ext_functest_builder(
+      _do_test_count_blocks_long_lines, _count_blocks)
+
     def assertSimilar(self, expected_score, blob1, blob2):
         self.assertEqual(expected_score, _similarity_score(blob1, blob2))
         self.assertEqual(expected_score, _similarity_score(blob2, blob1))
-- 
1.7.3.2.168.gd6b63


_______________________________________________
Mailing list: https://launchpad.net/~dulwich-users
Post to     : [email protected]
Unsubscribe : https://launchpad.net/~dulwich-users
More help   : https://help.launchpad.net/ListHelp

Reply via email to