Re: [PATCH v2 3/6] fsmonitor: teach git to optionally utilize a file system monitor to speed up detecting new or changed files.

2017-05-24 Thread Christian Couder
On Thu, May 18, 2017 at 10:13 PM, Ben Peart  wrote:
> When the index is read from disk, the query-fsmonitor index extension is
> used to flag the last known potentially dirty index and untracked cach

s/cach/cache/

> entries.

[...]

> diff --git a/cache.h b/cache.h
> index 188811920c..36423c77cc 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -201,6 +201,7 @@ struct cache_entry {
>  #define CE_ADDED (1 << 19)
>
>  #define CE_HASHED(1 << 20)
> +#define CE_FSMONITOR_DIRTY   (1 << 21)

It looks like the (1 << 21) value was used before (as CE_UNHASHED) and
was removed in:

419a597f64 (name-hash.c: remove cache entries instead of marking them
CE_UNHASHED, 2013-11-14)

I wondered if using this value again could make old and new versions
of git interact badly, but it looks like these are in memory only
flags, so it should be ok.

>  #define CE_WT_REMOVE (1 << 22) /* remove in work directory */
>  #define CE_CONFLICTED(1 << 23)
>
>  struct split_index;
>  struct untracked_cache;
> @@ -342,6 +344,8 @@ struct index_state {
> struct hashmap dir_hash;
> unsigned char sha1[20];
> struct untracked_cache *untracked;
> +   time_t fsmonitor_last_update;
> +   struct ewah_bitmap *fsmonitor_dirty_bitmap;

Maybe you could remove "_bitmap" at the end of the name.

>  };


> diff --git a/dir.c b/dir.c
> index 1b5558fdf9..da428489e2 100644
> --- a/dir.c
> +++ b/dir.c
> @@ -1652,6 +1652,18 @@ static int valid_cached_dir(struct dir_struct *dir,
> if (!untracked)
> return 0;
>
> +   refresh_by_fsmonitor(_index);
> +   if (dir->untracked->use_fsmonitor) {
> +   /*
> +* With fsmonitor, we can trust the untracked cache's
> +* valid field.
> +*/
> +   if (untracked->valid)
> +   goto skip_stat;

Maybe you could avoid this goto using a valid_cached_dir_after_stat()
function that would do what is after the "skip_stat:" label below?

> +   else
> +   invalidate_directory(dir->untracked, untracked);
> +   }
> +
> if (stat(path->len ? path->buf : ".", )) {
> invalidate_directory(dir->untracked, untracked);
> memset(>stat_data, 0, 
> sizeof(untracked->stat_data));
> @@ -1665,6 +1677,7 @@ static int valid_cached_dir(struct dir_struct *dir,
> return 0;
> }
>
> +skip_stat:
> if (untracked->check_only != !!check_only) {
> invalidate_directory(dir->untracked, untracked);
> return 0;

[...]

> +void refresh_by_fsmonitor(struct index_state *istate)
> +{
> +   static has_run_once = FALSE;
> +   struct strbuf query_result = STRBUF_INIT;
> +   int query_success = 0;
> +   size_t bol = 0; /* beginning of line */
> +   time_t last_update;
> +   char *buf, *entry;
> +   int i;
> +
> +   if (!core_fsmonitor || has_run_once)
> +   return;
> +   has_run_once = TRUE;
> +
> +   /*
> +* This could be racy so save the date/time now and the hook
> +* should be inclusive to ensure we don't miss potential changes.
> +*/
> +   last_update = time(NULL);
> +
> +   /* If we have a last update time, call query-monitor for the set of 
> changes since that time */
> +   if (istate->fsmonitor_last_update) {
> +   query_success = 
> !query_fsmonitor(istate->fsmonitor_last_update, _result);
> +   }

Braces can be removed.

> +   if (query_success) {
> +   /* Mark all entries returned by the monitor as dirty */
> +   buf = entry = query_result.buf;
> +   for (i = 0; i < query_result.len; i++) {
> +   if (buf[i] != '\0')
> +   continue;
> +   mark_file_dirty(istate, buf + bol);
> +   bol = i + 1;
> +   }
> +   if (bol < query_result.len)
> +   mark_file_dirty(istate, buf + bol);
> +
> +   /* Mark all clean entries up-to-date */
> +   for (i = 0; i < istate->cache_nr; i++) {
> +   struct cache_entry *ce = istate->cache[i];
> +   if (ce_stage(ce) || (ce->ce_flags & 
> CE_FSMONITOR_DIRTY))
> +   continue;
> +   ce_mark_uptodate(ce);
> +   }
> +
> +   /*
> +* Now that we've marked the invalid entries in the
> +* untracked-cache itself, we can mark the untracked cache for
> +* fsmonitor usage.
> +*/
> +   if (istate->untracked) {
> +   istate->untracked->use_fsmonitor = 1;
> +   }

Braces can be removed.

> +   }
> +   else {
> +   /* if we can't update the cache, fall back to checking them 
> all */
> 

Re: [PATCH v2 3/6] fsmonitor: teach git to optionally utilize a file system monitor to speed up detecting new or changed files.

2017-05-20 Thread Junio C Hamano
Ben Peart  writes:

> After sending this, I noticed that using a different compiler
> generated a couple of warnings I should fix.  I'm assuming I'll need
> another re-roll but if not, here are the changes that will be squashed
> in.
>
> Ben

Thanks, in addition, I am missing the definition of TRUE and FALSE.


Re: [PATCH v2 3/6] fsmonitor: teach git to optionally utilize a file system monitor to speed up detecting new or changed files.

2017-05-19 Thread Ben Peart
After sending this, I noticed that using a different compiler generated 
a couple of warnings I should fix.  I'm assuming I'll need another 
re-roll but if not, here are the changes that will be squashed in.


Ben



diff --git a/dir.c b/dir.c
index da428489e2..37f1c580a5 100644
--- a/dir.c
+++ b/dir.c
@@ -16,6 +16,7 @@
 #include "utf8.h"
 #include "varint.h"
 #include "ewah/ewok.h"
+#include "fsmonitor.h"


diff --git a/fsmonitor.c b/fsmonitor.c
index 6356dc795e..9f08e66db9 100644
--- a/fsmonitor.c
+++ b/fsmonitor.c
@@ -162,7 +162,7 @@ void process_fsmonitor_extension(struct index_state 
*istate)


 void refresh_by_fsmonitor(struct index_state *istate)
 {
-   static has_run_once = FALSE;
+   static int has_run_once = FALSE;
struct strbuf query_result = STRBUF_INIT;
int query_success = 0;
size_t bol = 0; /* beginning of line */


On 5/18/2017 4:13 PM, Ben Peart wrote:

When the index is read from disk, the query-fsmonitor index extension is
used to flag the last known potentially dirty index and untracked cach
entries.

If git finds out some entries are 'fsmonitor-dirty', but are really
unchanged (e.g. the file was changed, then reverted back), then Git will
clear the marking in the extension. If git adds or updates an index
entry, it is marked 'fsmonitor-dirty' to ensure it is checked for
changes in the working directory.

Before the 'fsmonitor-dirty' flags are used to limit the scope of the
files to be checked, the query-fsmonitor hook proc is called with the
time the index was last updated.  The hook proc returns the list of
files changed since that last updated time and the list of
potentially dirty entries is updated to reflect the current state.

refresh_index() and valid_cached_dir() are updated so that any entry not
flagged as potentially dirty is not checked as it cannot have any
changes.

Signed-off-by: Ben Peart 
---
 Makefile   |   1 +
 builtin/update-index.c |   1 +
 cache.h|   5 ++
 config.c   |   5 ++
 dir.c  |  13 +++
 dir.h  |   2 +
 entry.c|   1 +
 environment.c  |   1 +
 fsmonitor.c| 231 +
 fsmonitor.h|   9 ++
 read-cache.c   |  28 +-
 unpack-trees.c |   1 +
 12 files changed, 296 insertions(+), 2 deletions(-)
 create mode 100644 fsmonitor.c
 create mode 100644 fsmonitor.h

diff --git a/Makefile b/Makefile
index e35542e631..488a4466cc 100644
--- a/Makefile
+++ b/Makefile
@@ -760,6 +760,7 @@ LIB_OBJS += ewah/ewah_rlw.o
 LIB_OBJS += exec_cmd.o
 LIB_OBJS += fetch-pack.o
 LIB_OBJS += fsck.o
+LIB_OBJS += fsmonitor.o
 LIB_OBJS += gettext.o
 LIB_OBJS += gpg-interface.o
 LIB_OBJS += graph.o
diff --git a/builtin/update-index.c b/builtin/update-index.c
index ebfc09faa0..32fd977b43 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -232,6 +232,7 @@ static int mark_ce_flags(const char *path, int flag, int 
mark)
else
active_cache[pos]->ce_flags &= ~flag;
active_cache[pos]->ce_flags |= CE_UPDATE_IN_BASE;
+   active_cache[pos]->ce_flags |= CE_FSMONITOR_DIRTY;
cache_tree_invalidate_path(_index, path);
active_cache_changed |= CE_ENTRY_CHANGED;
return 0;
diff --git a/cache.h b/cache.h
index 188811920c..36423c77cc 100644
--- a/cache.h
+++ b/cache.h
@@ -201,6 +201,7 @@ struct cache_entry {
 #define CE_ADDED (1 << 19)

 #define CE_HASHED(1 << 20)
+#define CE_FSMONITOR_DIRTY   (1 << 21)
 #define CE_WT_REMOVE (1 << 22) /* remove in work directory */
 #define CE_CONFLICTED(1 << 23)

@@ -324,6 +325,7 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define CACHE_TREE_CHANGED (1 << 5)
 #define SPLIT_INDEX_ORDERED(1 << 6)
 #define UNTRACKED_CHANGED  (1 << 7)
+#define FSMONITOR_CHANGED  (1 << 8)

 struct split_index;
 struct untracked_cache;
@@ -342,6 +344,8 @@ struct index_state {
struct hashmap dir_hash;
unsigned char sha1[20];
struct untracked_cache *untracked;
+   time_t fsmonitor_last_update;
+   struct ewah_bitmap *fsmonitor_dirty_bitmap;
 };

 extern struct index_state the_index;
@@ -766,6 +770,7 @@ extern int precomposed_unicode;
 extern int protect_hfs;
 extern int protect_ntfs;
 extern int git_db_env, git_index_env, git_graft_env, git_common_dir_env;
+extern int core_fsmonitor;

 /*
  * Include broken refs in all ref iterations, which will
diff --git a/config.c b/config.c
index bb4d735701..1a8108636d 100644
--- a/config.c
+++ b/config.c
@@ -1232,6 +1232,11 @@ static int git_default_core_config(const char *var, 
const char *value)
return 0;
}

+   if (!strcmp(var, "core.fsmonitor")) {
+   core_fsmonitor = git_config_bool(var, value);
+   return 0;
+   }
+
/* Add other 

[PATCH v2 3/6] fsmonitor: teach git to optionally utilize a file system monitor to speed up detecting new or changed files.

2017-05-18 Thread Ben Peart
When the index is read from disk, the query-fsmonitor index extension is
used to flag the last known potentially dirty index and untracked cach
entries.

If git finds out some entries are 'fsmonitor-dirty', but are really
unchanged (e.g. the file was changed, then reverted back), then Git will
clear the marking in the extension. If git adds or updates an index
entry, it is marked 'fsmonitor-dirty' to ensure it is checked for
changes in the working directory.

Before the 'fsmonitor-dirty' flags are used to limit the scope of the
files to be checked, the query-fsmonitor hook proc is called with the
time the index was last updated.  The hook proc returns the list of
files changed since that last updated time and the list of
potentially dirty entries is updated to reflect the current state.

refresh_index() and valid_cached_dir() are updated so that any entry not
flagged as potentially dirty is not checked as it cannot have any
changes.

Signed-off-by: Ben Peart 
---
 Makefile   |   1 +
 builtin/update-index.c |   1 +
 cache.h|   5 ++
 config.c   |   5 ++
 dir.c  |  13 +++
 dir.h  |   2 +
 entry.c|   1 +
 environment.c  |   1 +
 fsmonitor.c| 231 +
 fsmonitor.h|   9 ++
 read-cache.c   |  28 +-
 unpack-trees.c |   1 +
 12 files changed, 296 insertions(+), 2 deletions(-)
 create mode 100644 fsmonitor.c
 create mode 100644 fsmonitor.h

diff --git a/Makefile b/Makefile
index e35542e631..488a4466cc 100644
--- a/Makefile
+++ b/Makefile
@@ -760,6 +760,7 @@ LIB_OBJS += ewah/ewah_rlw.o
 LIB_OBJS += exec_cmd.o
 LIB_OBJS += fetch-pack.o
 LIB_OBJS += fsck.o
+LIB_OBJS += fsmonitor.o
 LIB_OBJS += gettext.o
 LIB_OBJS += gpg-interface.o
 LIB_OBJS += graph.o
diff --git a/builtin/update-index.c b/builtin/update-index.c
index ebfc09faa0..32fd977b43 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -232,6 +232,7 @@ static int mark_ce_flags(const char *path, int flag, int 
mark)
else
active_cache[pos]->ce_flags &= ~flag;
active_cache[pos]->ce_flags |= CE_UPDATE_IN_BASE;
+   active_cache[pos]->ce_flags |= CE_FSMONITOR_DIRTY;
cache_tree_invalidate_path(_index, path);
active_cache_changed |= CE_ENTRY_CHANGED;
return 0;
diff --git a/cache.h b/cache.h
index 188811920c..36423c77cc 100644
--- a/cache.h
+++ b/cache.h
@@ -201,6 +201,7 @@ struct cache_entry {
 #define CE_ADDED (1 << 19)
 
 #define CE_HASHED(1 << 20)
+#define CE_FSMONITOR_DIRTY   (1 << 21)
 #define CE_WT_REMOVE (1 << 22) /* remove in work directory */
 #define CE_CONFLICTED(1 << 23)
 
@@ -324,6 +325,7 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define CACHE_TREE_CHANGED (1 << 5)
 #define SPLIT_INDEX_ORDERED(1 << 6)
 #define UNTRACKED_CHANGED  (1 << 7)
+#define FSMONITOR_CHANGED  (1 << 8)
 
 struct split_index;
 struct untracked_cache;
@@ -342,6 +344,8 @@ struct index_state {
struct hashmap dir_hash;
unsigned char sha1[20];
struct untracked_cache *untracked;
+   time_t fsmonitor_last_update;
+   struct ewah_bitmap *fsmonitor_dirty_bitmap;
 };
 
 extern struct index_state the_index;
@@ -766,6 +770,7 @@ extern int precomposed_unicode;
 extern int protect_hfs;
 extern int protect_ntfs;
 extern int git_db_env, git_index_env, git_graft_env, git_common_dir_env;
+extern int core_fsmonitor;
 
 /*
  * Include broken refs in all ref iterations, which will
diff --git a/config.c b/config.c
index bb4d735701..1a8108636d 100644
--- a/config.c
+++ b/config.c
@@ -1232,6 +1232,11 @@ static int git_default_core_config(const char *var, 
const char *value)
return 0;
}
 
+   if (!strcmp(var, "core.fsmonitor")) {
+   core_fsmonitor = git_config_bool(var, value);
+   return 0;
+   }
+
/* Add other config variables here and to Documentation/config.txt. */
return 0;
 }
diff --git a/dir.c b/dir.c
index 1b5558fdf9..da428489e2 100644
--- a/dir.c
+++ b/dir.c
@@ -1652,6 +1652,18 @@ static int valid_cached_dir(struct dir_struct *dir,
if (!untracked)
return 0;
 
+   refresh_by_fsmonitor(_index);
+   if (dir->untracked->use_fsmonitor) {
+   /*
+* With fsmonitor, we can trust the untracked cache's
+* valid field.
+*/
+   if (untracked->valid)
+   goto skip_stat;
+   else
+   invalidate_directory(dir->untracked, untracked);
+   }
+
if (stat(path->len ? path->buf : ".", )) {
invalidate_directory(dir->untracked, untracked);
memset(>stat_data, 0, sizeof(untracked->stat_data));
@@ -1665,6 +1677,7 @@