Re: [PATCH/WIP v2 05/14] read-cache: put some limits on file watching

2014-01-19 Thread Thomas Rast
Nguyễn Thái Ngọc Duy pclo...@gmail.com writes:

 watch_entries() is a lot of computation and could trigger a lot more
 lookups in file-watcher. Normally after the first set of watches are
 in place, we do not need to update often. Moreover if the number of
 entries is small, the overhead of file watcher may actually slow git
 down.

 This patch only allows to update watches if the number of watchable
 files is over a limit (and there are new files added if this is not
 the first time). Measurements on Core i5-2520M and Linux 3.7.6, about
 920 lstat() take 1ms. Somewhere between 2^16 and 2^17 lstat calls that
 it starts to take longer than 100ms. 2^16 is chosen at the minimum
 limit to start using file watcher.

 Recently updated files are not considered watchable because they are
 likely to be updated again soon, not worth the ping-pong game with
 file watcher. The default limit 30min is just a random value.

But then a fresh clone of a big repository would not get any benefit
from the watcher?

Not yet sure how to best handle this.

 Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
 ---
  Documentation/config.txt |  9 +
  cache.h  |  1 +
  read-cache.c | 44 
  3 files changed, 46 insertions(+), 8 deletions(-)

 diff --git a/Documentation/config.txt b/Documentation/config.txt
 index a405806..e394399 100644
 --- a/Documentation/config.txt
 +++ b/Documentation/config.txt
 @@ -1038,6 +1038,15 @@ difftool.tool.cmd::
  difftool.prompt::
   Prompt before each invocation of the diff tool.
  
 +filewatcher.minfiles::
 + Start watching files if the number of watchable files are
 + above this limit. Default value is 65536.
 +
 +filewatcher.recentlimit::
 + Files that are last updated within filewatcher.recentlimit
 + seconds from now are not considered watchable. Default value
 + is 1800 (30 minutes).
 +
  fetch.recurseSubmodules::
   This option can be either set to a boolean value or to 'on-demand'.
   Setting it to a boolean changes the behavior of fetch and pull to
 diff --git a/cache.h b/cache.h
 index 0d1..bcec29b 100644
 --- a/cache.h
 +++ b/cache.h
 @@ -278,6 +278,7 @@ struct index_state {
   struct cache_tree *cache_tree;
   struct cache_time timestamp;
   unsigned name_hash_initialized : 1,
 +  update_watches : 1,
initialized : 1;
   struct hashmap name_hash;
   struct hashmap dir_hash;
 diff --git a/read-cache.c b/read-cache.c
 index 21c3207..406834a 100644
 --- a/read-cache.c
 +++ b/read-cache.c
 @@ -38,6 +38,8 @@ static struct cache_entry *refresh_cache_entry(struct 
 cache_entry *ce, int reall
  #define CACHE_EXT_WATCH 0x57415443 /* WATC */
  
  struct index_state the_index;
 +static int watch_lowerlimit = 65536;
 +static int recent_limit = 1800;
  
  static void set_index_entry(struct index_state *istate, int nr, struct 
 cache_entry *ce)
  {
 @@ -1014,6 +1016,7 @@ int add_index_entry(struct index_state *istate, struct 
 cache_entry *ce, int opti
   (istate-cache_nr - pos - 1) * sizeof(ce));
   set_index_entry(istate, pos, ce);
   istate-cache_changed = 1;
 + istate-update_watches = 1;
   return 0;
  }
  
 @@ -1300,13 +1303,14 @@ static void read_watch_extension(struct index_state 
 *istate, uint8_t *data,
unsigned long sz)
  {
   int i;
 - if ((istate-cache_nr + 7) / 8 != sz) {
 + if ((istate-cache_nr + 7) / 8 + 1 != sz) {
   error(invalid 'WATC' extension);
   return;
   }
   for (i = 0; i  istate-cache_nr; i++)
   if (data[i / 8]  (1  (i % 8)))
   istate-cache[i]-ce_flags |= CE_WATCHED;
 + istate-update_watches = data[sz - 1];
  }
  
  static int read_index_extension(struct index_state *istate,
 @@ -1449,6 +1453,19 @@ static struct cache_entry *create_from_disk(struct 
 ondisk_cache_entry *ondisk,
   return ce;
  }
  
 +static int watcher_config(const char *var, const char *value, void *data)
 +{
 + if (!strcmp(var, filewatcher.minfiles)) {
 + watch_lowerlimit = git_config_int(var, value);
 + return 0;
 + }
 + if (!strcmp(var, filewatcher.recentlimit)) {
 + recent_limit = git_config_int(var, value);
 + return 0;
 + }
 + return 0;
 +}
 +
  static void validate_watcher(struct index_state *istate, const char *path)
  {
   int i;
 @@ -1458,6 +1475,7 @@ static void validate_watcher(struct index_state 
 *istate, const char *path)
   return;
   }
  
 + git_config(watcher_config, NULL);
   istate-watcher = connect_watcher(path);
   if (istate-watcher != -1) {
   struct strbuf sb = STRBUF_INIT;
 @@ -1478,6 +1496,7 @@ static void validate_watcher(struct index_state 
 *istate, const char *path)
   istate-cache[i]-ce_flags = ~CE_WATCHED;
   

Re: [PATCH/WIP v2 05/14] read-cache: put some limits on file watching

2014-01-19 Thread Duy Nguyen
On Mon, Jan 20, 2014 at 12:06 AM, Thomas Rast t...@thomasrast.ch wrote:
 Nguyễn Thái Ngọc Duy pclo...@gmail.com writes:

 watch_entries() is a lot of computation and could trigger a lot more
 lookups in file-watcher. Normally after the first set of watches are
 in place, we do not need to update often. Moreover if the number of
 entries is small, the overhead of file watcher may actually slow git
 down.

 This patch only allows to update watches if the number of watchable
 files is over a limit (and there are new files added if this is not
 the first time). Measurements on Core i5-2520M and Linux 3.7.6, about
 920 lstat() take 1ms. Somewhere between 2^16 and 2^17 lstat calls that
 it starts to take longer than 100ms. 2^16 is chosen at the minimum
 limit to start using file watcher.

 Recently updated files are not considered watchable because they are
 likely to be updated again soon, not worth the ping-pong game with
 file watcher. The default limit 30min is just a random value.

 But then a fresh clone of a big repository would not get any benefit
 from the watcher?

 Not yet sure how to best handle this.

Gaahh, perhaps limit the number of unwatchable recent files to a
hundred or so in addition to time limit.
-- 
Duy
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH/WIP v2 05/14] read-cache: put some limits on file watching

2014-01-17 Thread Nguyễn Thái Ngọc Duy
watch_entries() is a lot of computation and could trigger a lot more
lookups in file-watcher. Normally after the first set of watches are
in place, we do not need to update often. Moreover if the number of
entries is small, the overhead of file watcher may actually slow git
down.

This patch only allows to update watches if the number of watchable
files is over a limit (and there are new files added if this is not
the first time). Measurements on Core i5-2520M and Linux 3.7.6, about
920 lstat() take 1ms. Somewhere between 2^16 and 2^17 lstat calls that
it starts to take longer than 100ms. 2^16 is chosen at the minimum
limit to start using file watcher.

Recently updated files are not considered watchable because they are
likely to be updated again soon, not worth the ping-pong game with
file watcher. The default limit 30min is just a random value.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 Documentation/config.txt |  9 +
 cache.h  |  1 +
 read-cache.c | 44 
 3 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index a405806..e394399 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1038,6 +1038,15 @@ difftool.tool.cmd::
 difftool.prompt::
Prompt before each invocation of the diff tool.
 
+filewatcher.minfiles::
+   Start watching files if the number of watchable files are
+   above this limit. Default value is 65536.
+
+filewatcher.recentlimit::
+   Files that are last updated within filewatcher.recentlimit
+   seconds from now are not considered watchable. Default value
+   is 1800 (30 minutes).
+
 fetch.recurseSubmodules::
This option can be either set to a boolean value or to 'on-demand'.
Setting it to a boolean changes the behavior of fetch and pull to
diff --git a/cache.h b/cache.h
index 0d1..bcec29b 100644
--- a/cache.h
+++ b/cache.h
@@ -278,6 +278,7 @@ struct index_state {
struct cache_tree *cache_tree;
struct cache_time timestamp;
unsigned name_hash_initialized : 1,
+update_watches : 1,
 initialized : 1;
struct hashmap name_hash;
struct hashmap dir_hash;
diff --git a/read-cache.c b/read-cache.c
index 21c3207..406834a 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -38,6 +38,8 @@ static struct cache_entry *refresh_cache_entry(struct 
cache_entry *ce, int reall
 #define CACHE_EXT_WATCH 0x57415443   /* WATC */
 
 struct index_state the_index;
+static int watch_lowerlimit = 65536;
+static int recent_limit = 1800;
 
 static void set_index_entry(struct index_state *istate, int nr, struct 
cache_entry *ce)
 {
@@ -1014,6 +1016,7 @@ int add_index_entry(struct index_state *istate, struct 
cache_entry *ce, int opti
(istate-cache_nr - pos - 1) * sizeof(ce));
set_index_entry(istate, pos, ce);
istate-cache_changed = 1;
+   istate-update_watches = 1;
return 0;
 }
 
@@ -1300,13 +1303,14 @@ static void read_watch_extension(struct index_state 
*istate, uint8_t *data,
 unsigned long sz)
 {
int i;
-   if ((istate-cache_nr + 7) / 8 != sz) {
+   if ((istate-cache_nr + 7) / 8 + 1 != sz) {
error(invalid 'WATC' extension);
return;
}
for (i = 0; i  istate-cache_nr; i++)
if (data[i / 8]  (1  (i % 8)))
istate-cache[i]-ce_flags |= CE_WATCHED;
+   istate-update_watches = data[sz - 1];
 }
 
 static int read_index_extension(struct index_state *istate,
@@ -1449,6 +1453,19 @@ static struct cache_entry *create_from_disk(struct 
ondisk_cache_entry *ondisk,
return ce;
 }
 
+static int watcher_config(const char *var, const char *value, void *data)
+{
+   if (!strcmp(var, filewatcher.minfiles)) {
+   watch_lowerlimit = git_config_int(var, value);
+   return 0;
+   }
+   if (!strcmp(var, filewatcher.recentlimit)) {
+   recent_limit = git_config_int(var, value);
+   return 0;
+   }
+   return 0;
+}
+
 static void validate_watcher(struct index_state *istate, const char *path)
 {
int i;
@@ -1458,6 +1475,7 @@ static void validate_watcher(struct index_state *istate, 
const char *path)
return;
}
 
+   git_config(watcher_config, NULL);
istate-watcher = connect_watcher(path);
if (istate-watcher != -1) {
struct strbuf sb = STRBUF_INIT;
@@ -1478,6 +1496,7 @@ static void validate_watcher(struct index_state *istate, 
const char *path)
istate-cache[i]-ce_flags = ~CE_WATCHED;
istate-cache_changed = 1;
}
+   istate-update_watches = 1;
 }
 
 static int sort_by_date(const void *a_, const void *b_)
@@ -1524,7 +1543,7 @@ static int do_watch_entries(struct