Hi,

On Thu, Sep 05, 2013 at 02:19:28AM -0400, Nicolas Pitre wrote:
> Let's create another dictionary table to hold the author and committer
> entries.  We use the same table format used for tree entries where the
> 16 bit data prefix is conveniently used to store the timezone value.
> 
> In order to copy straight from a commit object buffer, dict_add_entry()
> is modified to get the string length as the provided string pointer is
> not always be null terminated.
> 
> Signed-off-by: Nicolas Pitre <n...@fluxnic.net>
> ---
>  packv4-create.c | 98 
> +++++++++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 89 insertions(+), 9 deletions(-)
> 
> diff --git a/packv4-create.c b/packv4-create.c
> index eccd9fc..5c08871 100644
> --- a/packv4-create.c
> +++ b/packv4-create.c
> @@ -1,5 +1,5 @@
>  /*
> - * packv4-create.c: management of dictionary tables used in pack v4
> + * packv4-create.c: creation of dictionary tables and objects used in pack v4
>   *
>   * (C) Nicolas Pitre <n...@fluxnic.net>
>   *
> @@ -80,9 +80,9 @@ static void rehash_entries(struct dict_table *t)
>       }
>  }
>  
> -int dict_add_entry(struct dict_table *t, int val, const char *str)
> +int dict_add_entry(struct dict_table *t, int val, const char *str, int 
> str_len)
>  {
> -     int i, val_len = 2, str_len = strlen(str) + 1;
> +     int i, val_len = 2;
>  
>       if (t->ptr + val_len + str_len > t->size) {

We need a +1 here on the left side, i.e.

        if (t->ptr + val_len + str_len + 1 > t->size) {

The str_len variable accounted for the terminating null character
before, but this patch removes str_len = strlen(str) + 1; above, and
callers specify the length of str without the terminating null in
str_len.  Thus it can lead to memory corruption, when the new entry
happens to end at 't->ptr + val_len + str_len' and the line added in
the next hunk writes the terminating null beyond the end of the
buffer.  I couldn't create a v4 pack from a current linux repo because
of this; either glibc detected something or 'git packv4-create'
crashed.

Sidenote: couldn't we call the 'ptr' field something else, like
end_offset or end_idx?  It took me some headscratching to figure out
why is it OK to compare a pointer to an integer above, or use a
pointer without dereferencing as an index into an array below (because
ptr is, well, not a pointer after all).

>               t->size = (t->size + val_len + str_len + 1024) * 3 / 2;
> @@ -92,6 +92,7 @@ int dict_add_entry(struct dict_table *t, int val, const 
> char *str)
>       t->data[t->ptr] = val >> 8;
>       t->data[t->ptr + 1] = val;
>       memcpy(t->data + t->ptr + val_len, str, str_len);
> +     t->data[t->ptr + val_len + str_len] = 0;
>  
>       i = (t->nb_entries) ?
>               locate_entry(t, t->data + t->ptr, val_len + str_len) : -1;
> @@ -107,7 +108,7 @@ int dict_add_entry(struct dict_table *t, int val, const 
> char *str)
>       t->entry[t->nb_entries].offset = t->ptr;
>       t->entry[t->nb_entries].size = val_len + str_len;
>       t->entry[t->nb_entries].hits = 1;
> -     t->ptr += val_len + str_len;
> +     t->ptr += val_len + str_len + 1;

Good.


Best,
Gábor


>       t->nb_entries++;
>  
>       if (t->hash_size * 3 <= t->nb_entries * 4)
> @@ -135,8 +136,73 @@ static void sort_dict_entries_by_hits(struct dict_table 
> *t)
>       rehash_entries(t);
>  }
>  
> +static struct dict_table *commit_name_table;
>  static struct dict_table *tree_path_table;
>  
> +/*
> + * Parse the author/committer line from a canonical commit object.
> + * The 'from' argument points right after the "author " or "committer "
> + * string.  The time zone is parsed and stored in *tz_val.  The returned
> + * pointer is right after the end of the email address which is also just
> + * before the time value, or NULL if a parsing error is encountered.
> + */
> +static char *get_nameend_and_tz(char *from, int *tz_val)
> +{
> +     char *end, *tz;
> +
> +     tz = strchr(from, '\n');
> +     /* let's assume the smallest possible string to be "x <x> 0 +0000\n" */
> +     if (!tz || tz - from < 13)
> +             return NULL;
> +     tz -= 4;
> +     end = tz - 4;
> +     while (end - from > 5 && *end != ' ')
> +             end--;
> +     if (end[-1] != '>' || end[0] != ' ' || tz[-2] != ' ')
> +             return NULL;
> +     *tz_val = (tz[0] - '0') * 1000 +
> +               (tz[1] - '0') * 100 +
> +               (tz[2] - '0') * 10 +
> +               (tz[3] - '0');
> +     switch (tz[-1]) {
> +     default:        return NULL;
> +     case '+':       break;
> +     case '-':       *tz_val = -*tz_val;
> +     }
> +     return end;
> +}
> +
> +static int add_commit_dict_entries(void *buf, unsigned long size)
> +{
> +     char *name, *end = NULL;
> +     int tz_val;
> +
> +     if (!commit_name_table)
> +             commit_name_table = create_dict_table();
> +
> +     /* parse and add author info */
> +     name = strstr(buf, "\nauthor ");
> +     if (name) {
> +             name += 8;
> +             end = get_nameend_and_tz(name, &tz_val);
> +     }
> +     if (!name || !end)
> +             return -1;
> +     dict_add_entry(commit_name_table, tz_val, name, end - name);
> +
> +     /* parse and add committer info */
> +     name = strstr(end, "\ncommitter ");
> +     if (name) {
> +            name += 11;
> +            end = get_nameend_and_tz(name, &tz_val);
> +     }
> +     if (!name || !end)
> +             return -1;
> +     dict_add_entry(commit_name_table, tz_val, name, end - name);
> +
> +     return 0;
> +}
> +
>  static int add_tree_dict_entries(void *buf, unsigned long size)
>  {
>       struct tree_desc desc;
> @@ -146,13 +212,16 @@ static int add_tree_dict_entries(void *buf, unsigned 
> long size)
>               tree_path_table = create_dict_table();
>  
>       init_tree_desc(&desc, buf, size);
> -     while (tree_entry(&desc, &name_entry))
> +     while (tree_entry(&desc, &name_entry)) {
> +             int pathlen = tree_entry_len(&name_entry);
>               dict_add_entry(tree_path_table, name_entry.mode,
> -                            name_entry.path);
> +                             name_entry.path, pathlen);
> +     }
> +
>       return 0;
>  }
>  
> -void dict_dump(struct dict_table *t)
> +void dump_dict_table(struct dict_table *t)
>  {
>       int i;
>  
> @@ -169,6 +238,12 @@ void dict_dump(struct dict_table *t)
>       }
>  }
>  
> +static void dict_dump(void)
> +{
> +     dump_dict_table(commit_name_table);
> +     dump_dict_table(tree_path_table);
> +}
> +
>  struct idx_entry
>  {
>       off_t                offset;
> @@ -205,6 +280,7 @@ static int create_pack_dictionaries(struct packed_git *p)
>               enum object_type type;
>               unsigned long size;
>               struct object_info oi = {};
> +             int (*add_dict_entries)(void *, unsigned long);
>  
>               oi.typep = &type;
>               oi.sizep = &size;
> @@ -213,7 +289,11 @@ static int create_pack_dictionaries(struct packed_git *p)
>                           sha1_to_hex(objects[i].sha1), p->pack_name);
>  
>               switch (type) {
> +             case OBJ_COMMIT:
> +                     add_dict_entries = add_commit_dict_entries;
> +                     break;
>               case OBJ_TREE:
> +                     add_dict_entries = add_tree_dict_entries;
>                       break;
>               default:
>                       continue;
> @@ -225,7 +305,7 @@ static int create_pack_dictionaries(struct packed_git *p)
>               if (check_sha1_signature(objects[i].sha1, data, size, 
> typename(type)))
>                       die("packed %s from %s is corrupt",
>                           sha1_to_hex(objects[i].sha1), p->pack_name);
> -             if (add_tree_dict_entries(data, size) < 0)
> +             if (add_dict_entries(data, size) < 0)
>                       die("can't process %s object %s",
>                               typename(type), sha1_to_hex(objects[i].sha1));
>               free(data);
> @@ -285,6 +365,6 @@ int main(int argc, char *argv[])
>               exit(1);
>       }
>       process_one_pack(argv[1]);
> -     dict_dump(tree_path_table);
> +     dict_dump();
>       return 0;
>  }
> -- 
> 1.8.4.38.g317e65b
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to