Re: [RFC/PATCH 1/2] config: Add cache for config value querying

2014-05-28 Thread Eric Sunshine
On Mon, May 26, 2014 at 1:33 PM, Tanay Abhra tanay...@gmail.com wrote:
 Add an internal cache with the all variable value pairs read from the usual
 config files(repo specific .git/config, user wide ~/.gitconfig and the global

s/(/ (/

 /etc/gitconfig). Also, add two external functions `git_config_get_string` and
 `git_config_get_string_multi` for querying in an non callback manner from the

s/an/a/
s/non/non-/

More below.

 cache.

 Signed-off-by: Tanay Abhra tanay...@gmail.com
 ---
  cache.h  |   2 ++
  config.c | 105 
 +++
  2 files changed, 107 insertions(+)

 diff --git a/cache.h b/cache.h
 index 107ac61..2038150 100644
 --- a/cache.h
 +++ b/cache.h
 @@ -1271,6 +1271,8 @@ extern int check_repository_format_version(const char 
 *var, const char *value, v
  extern int git_env_bool(const char *, int);
  extern int git_config_system(void);
  extern int config_error_nonbool(const char *);
 +extern const char *git_config_get_string(const char *);
 +extern const struct string_list *git_config_get_string_multi(const char *);
  #if defined(__GNUC__)  ! defined(__clang__)
  #define config_error_nonbool(s) (config_error_nonbool(s), -1)
  #endif
 diff --git a/config.c b/config.c
 index a30cb5c..f6515c2 100644
 --- a/config.c
 +++ b/config.c
 @@ -9,6 +9,8 @@
  #include exec_cmd.h
  #include strbuf.h
  #include quote.h
 +#include hashmap.h
 +#include string-list.h

  struct config_source {
 struct config_source *prev;
 @@ -37,6 +39,102 @@ static struct config_source *cf;

  static int zlib_compression_seen;

 +struct hashmap config_cache;

This should be 'static'.

 +struct config_cache_node {
 +   struct hashmap_entry ent;
 +   struct strbuf key;

Upon reading this, I had the same question as Torsten: Why isn't 'key'
a plain 'char *' allocated when the entry is created? Your answer:

To maintain consistency with config.c. config.c uses strbuf for
both key and value throughout. I found the reason by git-blaming
config.c. Key length is flexible so it would be better to use a
api construct such as strbuf for it.

doesn't make sense. The existing callback-based code re-uses the
strbuf for each key as it parses the file, however, you are just
storing each key once in the hashmap and never altering it, thus a
strbuf is overkill.

 +   struct string_list *value_list ;

Why isn't this just a plain 'struct string_list'? Why the extra
indirection of a pointer?

Drop space before semicolon.

 +};
 +
 +static int hashmap_init_v = 0;

It's not obvious what _v is meant to convey.

 +static int config_cache_node_cmp(const struct config_cache_node *e1,
 +  const struct config_cache_node *e2, const char 
 *key)
 +{
 +   return strcmp(e1-key.buf, key ? key : e2-key.buf);
 +}
 +
 +static int config_cache_node_cmp_icase(const struct config_cache_node *e1,
 +const struct config_cache_node *e2, const 
 char* key)
 +{
 +   return strcasecmp(e1-key.buf, key ? key : e2-key.buf);
 +}
 +
 +static void config_cache_init(const int icase)

'const int' as a function argument is very unusual in this code base.
(I found only a single instance of it in
log-tree.[ch]:parse_decorate_color_config.)

 +{
 +   hashmap_init(config_cache, (hashmap_cmp_fn) (icase ? 
 config_cache_node_cmp_icase
 +   : config_cache_node_cmp), 0);
 +}
 +
 +static void config_cache_free(void)
 +{
 +   hashmap_free(config_cache, 1);
 +}

Doesn't this leak the strbuf 'key' and string_list 'value_list'?

 +static struct config_cache_node *config_cache_find_entry(const char *key)
 +{
 +   struct hashmap_entry k;
 +   hashmap_entry_init(k, strihash(key));

How does this unconditional use of case-insensitive strihash()
interact with the 'icase' argument of config_cache_init()?

 +   return hashmap_get(config_cache, k, key);
 +}
 +
 +static struct string_list *config_cache_get_value(const char *key)
 +{
 +   struct config_cache_node *e = config_cache_find_entry(key);
 +   if (e == NULL)

Rephrase: if (!e)

 +   return NULL;
 +   else
 +   return (e-value_list);

Unnecessary parentheses.

 +}

A purely subject rewrite of the above:

return e ? e-value_list : NULL;

 +static void config_cache_set_value(const char *key, const char *value)
 +{
 +   struct config_cache_node *e;
 +
 +   if (!value)
 +   return;

Again, I had the same questions as Torsten. (Is this supposed to
delete the entry?, Is it a programmer error?, etc.) From a pure
reading, it's not obvious why a NULL 'value' is handled this way. The
intent might be clearer if the caller instead performs the check, and
invokes config_cache_set_value() only if 'value' is non-NULL.

 +   e = config_cache_find_entry(key);
 +   if (!e) {
 +   e = malloc(sizeof(*e));
 +   hashmap_entry_init(e, strihash(key));

Same question as above regarding 

Re: [RFC/PATCH 1/2] config: Add cache for config value querying

2014-05-27 Thread Tanay Abhra
Hi,

On 05/26/2014 01:02 PM, Torsten Bögershausen wrote:
 Add an internal cache with the all variable value pairs read from the usual
 cache: The word cache is in Git often used for index 
Okay, point noted. I thought about choosing between hashmap and cache and 
chose
the later.
 variable value can be written as key value

I  had used the term variable to be consistent with the documentation
(api-config.txt). But I think key is much clearer.

 usual: I don't think we handle unusual config files,
 (so can we drop the word usual ?)

Okay, noted.

 I think the (important) first line can be written like this:
 
 Add a hash table with the all key-value pairs read from the
 or
 Add a hash table to cache all key-value pairs read from the
 
 config files(repo specific .git/config, user wide ~/.gitconfig and the global
 /etc/gitconfig). Also, add two external functions `git_config_get_string` and
 Can we drop Also ?
 @@ -37,6 +39,102 @@ static struct config_source *cf;
  
  static int zlib_compression_seen;
  
 +struct hashmap config_cache;
 +
 +struct config_cache_node {
 +struct hashmap_entry ent;
 +struct strbuf key;
 Do we need a whole strbuf for the key?
 Or could a char *key work as well? 
 (and/or could it be const char *key ?

To maintain consistency with config.c. config.c uses strbuf for both key and 
value
throughout. I found the reason by git-blaming config.c. Key length is flexible 
so it
would be better to use a api construct such as strbuf for it.

 +struct string_list *value_list ;
 
 
 
 +static struct string_list *config_cache_get_value(const char *key)
 +{
 +struct config_cache_node *e = config_cache_find_entry(key);
 why e ? Will node be easier to read ? Or entry ? 

Noted. Entry is much better.

 +static void config_cache_set_value(const char *key, const char *value)
 +{
 +struct config_cache_node *e;
 +
 +if (!value)
 +return;
 Hm, either NULL could mean unset==remove the value, (but we don't do that, 
 do we?
 
 Or it could mean a programming or runtime error?, Should there be a warning ?

Nope. It is just a check to not save blank values for a key in the hashmap. 
Removal
functionality will come later. NULL==remove is implemented in
git_config_set_multivar_in_file(). We are not reading key value pairs from 
that, just
from git_config().
 +
 +e = config_cache_find_entry(key);
 +if (!e) {
 +e = malloc(sizeof(*e));
 +hashmap_entry_init(e, strihash(key));
 +strbuf_init((e-key), 1024);
 +strbuf_addstr((e-key),key);
 +e-value_list = malloc(sizeof(struct string_list));
 +e-value_list-strdup_strings = 1;
 +e-value_list-nr = 0;
 +e-value_list-alloc = 0;
 +e-value_list-items = NULL;
 +e-value_list-cmp = NULL;
 When malloc() is replaced by xcalloc()  the x = NULL and y = 0 can go away,
 and the code is shorter and easier to read.

Much better, thanks.

 +extern const char *git_config_get_string(const char *name)
 +{
 +struct string_list *values;
 +int num_values;
 +char *result;
 +values = config_cache_get_value(name);
 +if (!values)
 +return NULL;
 +num_values = values-nr;
 +result = values-items[num_values-1].string ;
 We could get rid of the variable  int num_values by simply writing
 result = values-items[values-nr-1].string;
 

Noted.


Cheers,
Tanay Abhra.

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC/PATCH 1/2] config: Add cache for config value querying

2014-05-26 Thread Tanay Abhra
Add an internal cache with the all variable value pairs read from the usual
config files(repo specific .git/config, user wide ~/.gitconfig and the global
/etc/gitconfig). Also, add two external functions `git_config_get_string` and
`git_config_get_string_multi` for querying in an non callback manner from the
cache.

Signed-off-by: Tanay Abhra tanay...@gmail.com
---
 cache.h  |   2 ++
 config.c | 105 +++
 2 files changed, 107 insertions(+)

diff --git a/cache.h b/cache.h
index 107ac61..2038150 100644
--- a/cache.h
+++ b/cache.h
@@ -1271,6 +1271,8 @@ extern int check_repository_format_version(const char 
*var, const char *value, v
 extern int git_env_bool(const char *, int);
 extern int git_config_system(void);
 extern int config_error_nonbool(const char *);
+extern const char *git_config_get_string(const char *);
+extern const struct string_list *git_config_get_string_multi(const char *);
 #if defined(__GNUC__)  ! defined(__clang__)
 #define config_error_nonbool(s) (config_error_nonbool(s), -1)
 #endif
diff --git a/config.c b/config.c
index a30cb5c..f6515c2 100644
--- a/config.c
+++ b/config.c
@@ -9,6 +9,8 @@
 #include exec_cmd.h
 #include strbuf.h
 #include quote.h
+#include hashmap.h
+#include string-list.h
 
 struct config_source {
struct config_source *prev;
@@ -37,6 +39,102 @@ static struct config_source *cf;
 
 static int zlib_compression_seen;
 
+struct hashmap config_cache;
+
+struct config_cache_node {
+   struct hashmap_entry ent;
+   struct strbuf key;
+   struct string_list *value_list ;
+};
+
+static int hashmap_init_v = 0;
+
+static int config_cache_node_cmp(const struct config_cache_node *e1,
+  const struct config_cache_node *e2, const char *key)
+{
+   return strcmp(e1-key.buf, key ? key : e2-key.buf);
+}
+
+static int config_cache_node_cmp_icase(const struct config_cache_node *e1,
+const struct config_cache_node *e2, const 
char* key)
+{
+   return strcasecmp(e1-key.buf, key ? key : e2-key.buf);
+}
+
+static void config_cache_init(const int icase)
+{
+   hashmap_init(config_cache, (hashmap_cmp_fn) (icase ? 
config_cache_node_cmp_icase
+   : config_cache_node_cmp), 0);
+}
+
+static void config_cache_free(void)
+{
+   hashmap_free(config_cache, 1);
+}
+
+static struct config_cache_node *config_cache_find_entry(const char *key)
+{
+   struct hashmap_entry k;
+   hashmap_entry_init(k, strihash(key));
+   return hashmap_get(config_cache, k, key);
+}
+
+static struct string_list *config_cache_get_value(const char *key)
+{
+   struct config_cache_node *e = config_cache_find_entry(key);
+   if (e == NULL)
+   return NULL;
+   else
+   return (e-value_list);
+}
+
+
+static void config_cache_set_value(const char *key, const char *value)
+{
+   struct config_cache_node *e;
+
+   if (!value)
+   return;
+
+   e = config_cache_find_entry(key);
+   if (!e) {
+   e = malloc(sizeof(*e));
+   hashmap_entry_init(e, strihash(key));
+   strbuf_init((e-key), 1024);
+   strbuf_addstr((e-key),key);
+   e-value_list = malloc(sizeof(struct string_list));
+   e-value_list-strdup_strings = 1;
+   e-value_list-nr = 0;
+   e-value_list-alloc = 0;
+   e-value_list-items = NULL;
+   e-value_list-cmp = NULL;
+   string_list_append(e-value_list, value);
+   hashmap_add(config_cache, e);
+   }
+   else {
+   if (!unsorted_string_list_has_string((e-value_list), value))
+   string_list_append((e-value_list), value);
+   }
+}
+
+extern const char *git_config_get_string(const char *name)
+{
+   struct string_list *values;
+   int num_values;
+   char *result;
+   values = config_cache_get_value(name);
+   if (!values)
+   return NULL;
+   num_values = values-nr;
+   result = values-items[num_values-1].string ;
+   return result;
+}
+
+extern const struct string_list *git_config_get_string_multi(const char *key)
+{
+   return config_cache_get_value(key);
+}
+
 static int config_file_fgetc(struct config_source *conf)
 {
return fgetc(conf-u.file);
@@ -333,6 +431,7 @@ static int get_value(config_fn_t fn, void *data, struct 
strbuf *name)
if (!value)
return -1;
}
+   config_cache_set_value(name-buf, value);
return fn(name-buf, value, data);
 }
 
@@ -1135,6 +1234,12 @@ int git_config_early(config_fn_t fn, void *data, const 
char *repo_config)
char *xdg_config = NULL;
char *user_config = NULL;
 
+   int icase = 1;
+   if (!hashmap_init_v) {
+   config_cache_init(icase);
+   hashmap_init_v = 1;
+   }
+
home_config_paths(user_config, 

Re: [RFC/PATCH 1/2] config: Add cache for config value querying

2014-05-26 Thread Torsten Bögershausen
On 2014-05-26 19.33, Tanay Abhra wrote:
I like the idea.
Please allow some minor comments (and read them as questions rather then 
answers)
 Add an internal cache with the all variable value pairs read from the usual
cache: The word cache is in Git often used for index 
variable value can be written as key value
usual: I don't think we handle unusual config files,
(so can we drop the word usual ?)
I think the (important) first line can be written like this:

Add a hash table with the all key-value pairs read from the
or
Add a hash table to cache all key-value pairs read from the

 config files(repo specific .git/config, user wide ~/.gitconfig and the global
 /etc/gitconfig). Also, add two external functions `git_config_get_string` and
Can we drop Also ?
 @@ -37,6 +39,102 @@ static struct config_source *cf;
  
  static int zlib_compression_seen;
  
 +struct hashmap config_cache;
 +
 +struct config_cache_node {
 + struct hashmap_entry ent;
 + struct strbuf key;
Do we need a whole strbuf for the key?
Or could a char *key work as well? 
(and/or could it be const char *key ?
 + struct string_list *value_list ;



 +static struct string_list *config_cache_get_value(const char *key)
 +{
 + struct config_cache_node *e = config_cache_find_entry(key);
why e ? Will node be easier to read ? Or entry ? 


 +static void config_cache_set_value(const char *key, const char *value)
 +{
 + struct config_cache_node *e;
 +
 + if (!value)
 + return;
Hm, either NULL could mean unset==remove the value, (but we don't do that, do 
we?

Or it could mean a programming or runtime error?, Should there be a warning ?

 +
 + e = config_cache_find_entry(key);
 + if (!e) {
 + e = malloc(sizeof(*e));
 + hashmap_entry_init(e, strihash(key));
 + strbuf_init((e-key), 1024);
 + strbuf_addstr((e-key),key);
 + e-value_list = malloc(sizeof(struct string_list));
 + e-value_list-strdup_strings = 1;
 + e-value_list-nr = 0;
 + e-value_list-alloc = 0;
 + e-value_list-items = NULL;
 + e-value_list-cmp = NULL;
When malloc() is replaced by xcalloc()  the x = NULL and y = 0 can go away,
and the code is shorter and easier to read.


 +extern const char *git_config_get_string(const char *name)
 +{
 + struct string_list *values;
 + int num_values;
 + char *result;
 + values = config_cache_get_value(name);
 + if (!values)
 + return NULL;
 + num_values = values-nr;
 + result = values-items[num_values-1].string ;
We could get rid of the variable  int num_values by simply writing
result = values-items[values-nr-1].string;




--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html