Re: [PATCH 01/23] pack v4: initial pack dictionary structure and code
Nicolas Pitre n...@fluxnic.net writes: Signed-off-by: Nicolas Pitre n...@fluxnic.net --- Was there a reason not to reuse the hash-table Linus did in hash.[ch]? It may not make much of a difference for something so small and isolated from the rest of the system, but if hash.[ch] can be easily fixed with a small tweak to suit the use by this subsystem better, it might be worth reusing the existing code with improvement, which may help other potential users. packv4-create.c | 137 1 file changed, 137 insertions(+) create mode 100644 packv4-create.c diff --git a/packv4-create.c b/packv4-create.c new file mode 100644 index 000..2de6d41 --- /dev/null +++ b/packv4-create.c @@ -0,0 +1,137 @@ +/* + * packv4-create.c: management of dictionary tables used in pack v4 + * + * (C) Nicolas Pitre n...@fluxnic.net + * + * This code is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include cache.h + +struct data_entry { + unsigned offset; + unsigned hits; +}; + +struct dict_table { + char *data; + unsigned ptr; + unsigned size; + struct data_entry *entry; + unsigned nb_entries; + unsigned max_entries; + unsigned *hash; + unsigned hash_size; +}; + +struct dict_table *create_dict_table(void) +{ + return xcalloc(sizeof(struct dict_table), 1); +} + +void destroy_dict_table(struct dict_table *t) +{ + free(t-data); + free(t-entry); + free(t-hash); + free(t); +} + +static int locate_entry(struct dict_table *t, const char *str) +{ + int i = 0; + const unsigned char *s = (const unsigned char *) str; + + while (*s) + i = i * 111 + *s++; + i = (unsigned)i % t-hash_size; + + while (t-hash[i]) { + unsigned n = t-hash[i] - 1; + if (!strcmp(str, t-data + t-entry[n].offset)) + return n; + if (++i = t-hash_size) + i = 0; + } + return -1 - i; +} + +static void rehash_entries(struct dict_table *t) +{ + unsigned n; + + t-hash_size *= 2; + if (t-hash_size 1024) + t-hash_size = 1024; + t-hash = xrealloc(t-hash, t-hash_size * sizeof(*t-hash)); + memset(t-hash, 0, t-hash_size * sizeof(*t-hash)); + + for (n = 0; n t-nb_entries; n++) { + int i = locate_entry(t, t-data + t-entry[n].offset); + if (i 0) + t-hash[-1 - i] = n + 1; + } +} + +int dict_add_entry(struct dict_table *t, const char *str) +{ + int i, len = strlen(str) + 1; + + if (t-ptr + len = t-size) { + t-size = (t-size + len + 1024) * 3 / 2; + t-data = xrealloc(t-data, t-size); + } + memcpy(t-data + t-ptr, str, len); + + i = (t-nb_entries) ? locate_entry(t, t-data + t-ptr) : -1; + if (i = 0) { + t-entry[i].hits++; + return i; + } + + if (t-nb_entries = t-max_entries) { + t-max_entries = (t-max_entries + 1024) * 3 / 2; + t-entry = xrealloc(t-entry, t-max_entries * sizeof(*t-entry)); + } + t-entry[t-nb_entries].offset = t-ptr; + t-entry[t-nb_entries].hits = 1; + t-ptr += len + 1; + t-nb_entries++; + + if (t-hash_size * 3 = t-nb_entries * 4) + rehash_entries(t); + else + t-hash[-1 - i] = t-nb_entries; + + return t-nb_entries - 1; +} + +static int cmp_dict_entries(const void *a_, const void *b_) +{ + const struct data_entry *a = a_; + const struct data_entry *b = b_; + int diff = b-hits - a-hits; + if (!diff) + diff = a-offset - b-offset; + return diff; +} + +static void sort_dict_entries_by_hits(struct dict_table *t) +{ + qsort(t-entry, t-nb_entries, sizeof(*t-entry), cmp_dict_entries); + t-hash_size = (t-nb_entries * 4 / 3) / 2; + rehash_entries(t); +} + +void dict_dump(struct dict_table *t) +{ + int i; + + sort_dict_entries_by_hits(t); + for (i = 0; i t-nb_entries; i++) + printf(%d\t%s\n, + t-entry[i].hits, + t-data + t-entry[i].offset); +} -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 01/23] pack v4: initial pack dictionary structure and code
On Tue, 27 Aug 2013, Junio C Hamano wrote: Nicolas Pitre n...@fluxnic.net writes: Signed-off-by: Nicolas Pitre n...@fluxnic.net --- Was there a reason not to reuse the hash-table Linus did in hash.[ch]? Well... Most likely because when I started that code (which used to be quite different initially) it might not have been served correctly by hash.c, or any other reasons I long have forgotten by now which might or might not still be valid. It may not make much of a difference for something so small and isolated from the rest of the system, but if hash.[ch] can be easily fixed with a small tweak to suit the use by this subsystem better, it might be worth reusing the existing code with improvement, which may help other potential users. Absolutely. If someone wants to give a hand in that direction I'll happily integrate patches into my series. I cannot promise I'll do the work myself as I prefer spending the time I have available on actually making pack v4 usable. Nicolas -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 01/23] pack v4: initial pack dictionary structure and code
Signed-off-by: Nicolas Pitre n...@fluxnic.net --- packv4-create.c | 137 1 file changed, 137 insertions(+) create mode 100644 packv4-create.c diff --git a/packv4-create.c b/packv4-create.c new file mode 100644 index 000..2de6d41 --- /dev/null +++ b/packv4-create.c @@ -0,0 +1,137 @@ +/* + * packv4-create.c: management of dictionary tables used in pack v4 + * + * (C) Nicolas Pitre n...@fluxnic.net + * + * This code is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include cache.h + +struct data_entry { + unsigned offset; + unsigned hits; +}; + +struct dict_table { + char *data; + unsigned ptr; + unsigned size; + struct data_entry *entry; + unsigned nb_entries; + unsigned max_entries; + unsigned *hash; + unsigned hash_size; +}; + +struct dict_table *create_dict_table(void) +{ + return xcalloc(sizeof(struct dict_table), 1); +} + +void destroy_dict_table(struct dict_table *t) +{ + free(t-data); + free(t-entry); + free(t-hash); + free(t); +} + +static int locate_entry(struct dict_table *t, const char *str) +{ + int i = 0; + const unsigned char *s = (const unsigned char *) str; + + while (*s) + i = i * 111 + *s++; + i = (unsigned)i % t-hash_size; + + while (t-hash[i]) { + unsigned n = t-hash[i] - 1; + if (!strcmp(str, t-data + t-entry[n].offset)) + return n; + if (++i = t-hash_size) + i = 0; + } + return -1 - i; +} + +static void rehash_entries(struct dict_table *t) +{ + unsigned n; + + t-hash_size *= 2; + if (t-hash_size 1024) + t-hash_size = 1024; + t-hash = xrealloc(t-hash, t-hash_size * sizeof(*t-hash)); + memset(t-hash, 0, t-hash_size * sizeof(*t-hash)); + + for (n = 0; n t-nb_entries; n++) { + int i = locate_entry(t, t-data + t-entry[n].offset); + if (i 0) + t-hash[-1 - i] = n + 1; + } +} + +int dict_add_entry(struct dict_table *t, const char *str) +{ + int i, len = strlen(str) + 1; + + if (t-ptr + len = t-size) { + t-size = (t-size + len + 1024) * 3 / 2; + t-data = xrealloc(t-data, t-size); + } + memcpy(t-data + t-ptr, str, len); + + i = (t-nb_entries) ? locate_entry(t, t-data + t-ptr) : -1; + if (i = 0) { + t-entry[i].hits++; + return i; + } + + if (t-nb_entries = t-max_entries) { + t-max_entries = (t-max_entries + 1024) * 3 / 2; + t-entry = xrealloc(t-entry, t-max_entries * sizeof(*t-entry)); + } + t-entry[t-nb_entries].offset = t-ptr; + t-entry[t-nb_entries].hits = 1; + t-ptr += len + 1; + t-nb_entries++; + + if (t-hash_size * 3 = t-nb_entries * 4) + rehash_entries(t); + else + t-hash[-1 - i] = t-nb_entries; + + return t-nb_entries - 1; +} + +static int cmp_dict_entries(const void *a_, const void *b_) +{ + const struct data_entry *a = a_; + const struct data_entry *b = b_; + int diff = b-hits - a-hits; + if (!diff) + diff = a-offset - b-offset; + return diff; +} + +static void sort_dict_entries_by_hits(struct dict_table *t) +{ + qsort(t-entry, t-nb_entries, sizeof(*t-entry), cmp_dict_entries); + t-hash_size = (t-nb_entries * 4 / 3) / 2; + rehash_entries(t); +} + +void dict_dump(struct dict_table *t) +{ + int i; + + sort_dict_entries_by_hits(t); + for (i = 0; i t-nb_entries; i++) + printf(%d\t%s\n, + t-entry[i].hits, + t-data + t-entry[i].offset); +} -- 1.8.4.22.g54757b7 -- To unsubscribe from this list: send the line unsubscribe git in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html