Nicolas Pitre <n...@fluxnic.net> writes:

> This goes as follows:
>
> - Number of tree entries: variable length encoded.
>
> Then for each tree entry:
>
> - Path component reference: variable length encoded index into the path
>   string dictionary table which also covers the entry mode.  To make the
>   overall encoding efficient, the author table is already sorted by usage
>   frequency so the most used names are first and require the shortest
>   index encoding.

s/author table/tree path table/, I think. The reason why it is a
good idea to sort these tables by use count applies equally to both
the author table and the tree path table, though.

> - SHA1 reference: either variable length encoding of the index into the
>   SHA1 table or the literal SHA1 prefixed by 0 (see add_sha1_ref()).
>
> Rationale: all the tree object data is densely encoded while requiring
> no zlib inflate processing, and all SHA1 references are most likely to
> be direct indices into the pack index file requiring no SHA1 search.
> Path filtering can be accomplished on the path index directly without
> any string comparison during the tree traversal.
>
> Still lacking is some kind of delta encoding for multiple tree objects
> with only small differences between them.  But that'll come later.
>
> Signed-off-by: Nicolas Pitre <n...@fluxnic.net>
> ---
>  packv4-create.c | 63 
> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 63 insertions(+)
>
> diff --git a/packv4-create.c b/packv4-create.c
> index cedbbd9..f46fbd9 100644
> --- a/packv4-create.c
> +++ b/packv4-create.c
> @@ -408,6 +408,69 @@ bad:
>       return NULL;
>  }
>  
> +/*
> + * This converts a canonical tree object buffer into its
> + * tightly packed representation using the already populated
> + * and sorted tree_path_table dictionary.  The parsing is
> + * strict so to ensure the canonical version may always be
> + * regenerated and produce the same hash.
> + */
> +void * conv_to_dict_tree(void *buffer, unsigned long *psize)
> +{
> +     unsigned long size = *psize;
> +     unsigned char *in, *out, *end;
> +     struct tree_desc desc;
> +     struct name_entry name_entry;
> +     int nb_entries;
> +
> +     if (!size)
> +             return NULL;
> +
> +     /*
> +      * We can't make sure the result will always be smaller.
> +      * The smallest possible entry is "0 x\0<40 byte SHA1>"
> +      * or 44 bytes.  The output entry may have a realistic path index
> +      * encoding using up to 3 bytes, and a non indexable SHA1 meaning
> +      * 41 bytes.  And the output data already has the and nb_entries
> +      * headers.  In practice the output size will be significantly
> +      * smaller but for now let's make it simple.
> +      */
> +     in = buffer;
> +     out = xmalloc(size);
> +     end = out + size;
> +     buffer = out;
> +
> +     /* let's count how many entries there are */
> +     init_tree_desc(&desc, in, size);
> +     nb_entries = 0;
> +     while (tree_entry(&desc, &name_entry))
> +             nb_entries++;
> +     out = add_number(out, nb_entries);
> +
> +     init_tree_desc(&desc, in, size);
> +     while (tree_entry(&desc, &name_entry)) {
> +             int pathlen = tree_entry_len(&name_entry);
> +             int index = dict_add_entry(tree_path_table, name_entry.mode,
> +                                        name_entry.path, pathlen);
> +             if (index < 0) {
> +                     error("missing tree dict entry");
> +                     free(buffer);
> +                     return NULL;
> +             }
> +             if (end - out < 45) {
> +                     unsigned long sofar = out - (unsigned char *)buffer;
> +                     buffer = xrealloc(buffer, sofar + 45);
> +                     out = (unsigned char *)buffer + sofar;
> +                     end = out + 45;
> +             }
> +             out = add_number(out, index);
> +             out = add_sha1_ref(out, name_entry.sha1);
> +     }
> +
> +     *psize = out - (unsigned char *)buffer;
> +     return buffer;
> +}
> +
>  static struct pack_idx_entry *get_packed_object_list(struct packed_git *p)
>  {
>       unsigned i, nr_objects = p->num_objects;
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to