# HG changeset patch
# User Augie Fackler <[email protected]>
# Date 1289783885 -21600
# Node ID b469476c99d82eff40684e23ccd957815fd05282
# Parent  130bbd3c82edb682baf86c27e0fac5b9c8acb624
objects.parse_tree: more gracefully handle invalid modes in trees

Change-Id: I89c3a0095b508ad09131608e8323ecafb60f463b

diff --git a/dulwich/_objects.c b/dulwich/_objects.c
--- a/dulwich/_objects.c
+++ b/dulwich/_objects.c
@@ -36,6 +36,7 @@
 #define bytehex(x) (((x)<0xa)?('0'+(x)):('a'-0xa+(x)))
 
 static PyObject *tree_entry_cls;
+static PyObject *object_format_exception_cls;
 
 static PyObject *sha_to_pyhex(const unsigned char *sha)
 {
@@ -49,15 +50,20 @@
        return PyString_FromStringAndSize(hexsha, 40);
 }
 
-static PyObject *py_parse_tree(PyObject *self, PyObject *args)
+static PyObject *py_parse_tree(PyObject *self, PyObject *args, PyObject *kw)
 {
        char *text, *start, *end;
-       int len, namelen;
-       PyObject *ret, *item, *name;
+       int len, namelen, strict;
+       PyObject *ret, *item, *name, *py_strict = NULL;
+       static char *kwlist[] = {"text", "strict", NULL};
 
-       if (!PyArg_ParseTuple(args, "s#", &text, &len))
+       if (!PyArg_ParseTupleAndKeywords(args, kw, "s#|O", kwlist,
+                                        &text, &len, &py_strict))
                return NULL;
 
+
+       strict = py_strict ?  PyObject_IsTrue(py_strict) : 0;
+
        /* TODO: currently this returns a list; if memory usage is a concern,
         * consider rewriting as a custom iterator object */
        ret = PyList_New(0);
@@ -71,6 +77,13 @@
 
        while (text < end) {
                long mode;
+               if (strict && text[0] == '0') {
+                       PyErr_SetString(object_format_exception_cls,
+                                       "Illegal leading zero on mode");
+                       Py_DECREF(ret);
+                       return NULL;
+               }
+
                mode = strtol(text, &text, 8);
 
                if (*text != ' ') {
@@ -222,7 +235,8 @@
 }
 
 static PyMethodDef py_objects_methods[] = {
-       { "parse_tree", (PyCFunction)py_parse_tree, METH_VARARGS, NULL },
+       { "parse_tree", (PyCFunction)py_parse_tree, METH_VARARGS | 
METH_KEYWORDS,
+         NULL },
        { "sorted_tree_items", (PyCFunction)py_sorted_tree_items, METH_O, NULL 
},
        { NULL, NULL, 0, NULL }
 };
@@ -230,12 +244,23 @@
 PyMODINIT_FUNC
 init_objects(void)
 {
-       PyObject *m, *objects_mod;
+       PyObject *m, *objects_mod, *errors_mod;
 
        m = Py_InitModule3("_objects", py_objects_methods, NULL);
        if (m == NULL)
                return;
 
+
+       errors_mod = PyImport_ImportModule("dulwich.errors");
+       if (errors_mod == NULL)
+               return;
+
+       object_format_exception_cls = PyObject_GetAttrString(
+               errors_mod, "ObjectFormatException");
+       Py_DECREF(errors_mod);
+       if (object_format_exception_cls == NULL)
+               return;
+
        /* This is a circular import but should be safe since this module is
         * imported at at the very bottom of objects.py. */
        objects_mod = PyImport_ImportModule("dulwich.objects");
diff --git a/dulwich/objects.py b/dulwich/objects.py
--- a/dulwich/objects.py
+++ b/dulwich/objects.py
@@ -694,18 +694,20 @@
         return TreeEntry(posixpath.join(path, self.path), self.mode, self.sha)
 
 
-def parse_tree(text):
+def parse_tree(text, strict=False):
     """Parse a tree text.
 
     :param text: Serialized text to parse
     :return: iterator of tuples of (name, mode, sha)
+    :raise ObjectFormatException: if the object was malformed in some way
     """
     count = 0
     l = len(text)
     while count < l:
         mode_end = text.index(' ', count)
         mode_text = text[count:mode_end]
-        assert mode_text[0] != '0'
+        if strict and mode_text.startswith('0'):
+            raise ObjectFormatException("Invalid mode '%s'" % mode_text)
         try:
             mode = int(mode_text, 8)
         except ValueError:
@@ -869,7 +871,8 @@
                          stat.S_IFLNK, stat.S_IFDIR, S_IFGITLINK,
                          # TODO: optionally exclude as in git fsck --strict
                          stat.S_IFREG | 0664)
-        for name, mode, sha in parse_tree("".join(self._chunked_text)):
+        for name, mode, sha in parse_tree("".join(self._chunked_text),
+                                          True):
             check_hexsha(sha, 'invalid sha %s' % sha)
             if '/' in name or name in ('', '.', '..'):
                 raise ObjectFormatException('invalid name %s' % name)
@@ -968,7 +971,7 @@
         self._parents = []
         self._extra = []
         self._author = None
-        for field, value in parse_commit("".join(self._chunked_text)):
+        for field, value in parse_commit(''.join(self._chunked_text)):
             if field == _TREE_HEADER:
                 self._tree = value
             elif field == _PARENT_HEADER:
diff --git a/dulwich/tests/test_objects.py b/dulwich/tests/test_objects.py
--- a/dulwich/tests/test_objects.py
+++ b/dulwich/tests/test_objects.py
@@ -23,6 +23,7 @@
 
 
 from cStringIO import StringIO
+import binascii
 import datetime
 import os
 import stat
@@ -465,6 +466,16 @@
         o = Tree.from_path(hex_to_filename(dir, tree_sha))
         self.assertEquals([('a', 0100644, a_sha), ('b', 0100644, b_sha)],
                           list(parse_tree(o.as_raw_string())))
+        # test a broken tree that has a leading 0 on the file mode
+        broken_tree = '0100644 foo\0' + hex_to_sha(a_sha)
+
+        def eval_parse_tree(*args, **kwargs):
+          return list(parse_tree(*args, **kwargs))
+
+        self.assertEquals([('foo', 0100644, a_sha)],
+                          eval_parse_tree(broken_tree))
+        self.assertRaises(ObjectFormatException,
+                          eval_parse_tree, broken_tree, strict=True)
 
     def test_parse_tree(self):
         self._do_test_parse_tree(_parse_tree_py)
@@ -520,6 +531,8 @@
         # TODO more whitelisted modes
         self.assertCheckFails(t, '123456 a\0%s' % sha)
         self.assertCheckFails(t, '123abc a\0%s' % sha)
+        # should fail check, but parses ok
+        self.assertCheckFails(t, '0100644 foo\0' + sha)
 
         # shas
         self.assertCheckFails(t, '100644 a\0%s' % ('x' * 5))

_______________________________________________
Mailing list: https://launchpad.net/~dulwich-users
Post to     : [email protected]
Unsubscribe : https://launchpad.net/~dulwich-users
More help   : https://help.launchpad.net/ListHelp

Reply via email to