Re: [PATCH v5 3/7] fsmonitor: teach git to optionally utilize a file system monitor to speed up detecting new or changed files.

2017-07-03 Thread Ben Peart



On 6/27/2017 11:43 AM, Christian Couder wrote:

On Sat, Jun 10, 2017 at 3:40 PM, Ben Peart  wrote:


+int read_fsmonitor_extension(struct index_state *istate, const void *data,
+   unsigned long sz)
+{
+   const char *index = data;
+   uint32_t hdr_version;
+   uint32_t ewah_size;
+   int ret;
+
+   if (sz < sizeof(uint32_t) + sizeof(uint64_t) + sizeof(uint32_t))
+   return error("corrupt fsmonitor extension (too short)");
+
+   hdr_version = get_be32(index);


Here we use get_be32(), ...


+   index += sizeof(uint32_t);
+   if (hdr_version != INDEX_EXTENSION_VERSION)
+   return error("bad fsmonitor version %d", hdr_version);
+
+   istate->fsmonitor_last_update = get_be64(index);


...get_be64(), ...


+   index += sizeof(uint64_t);
+
+   ewah_size = get_be32(index);


... and get_be32 again, ...


+   index += sizeof(uint32_t);
+
+   istate->fsmonitor_dirty = ewah_new();
+   ret = ewah_read_mmap(istate->fsmonitor_dirty, index, ewah_size);
+   if (ret != ewah_size) {
+   ewah_free(istate->fsmonitor_dirty);
+   istate->fsmonitor_dirty = NULL;
+   return error("failed to parse ewah bitmap reading fsmonitor index 
extension");
+   }
+
+   return 0;
+}
+
+void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate)
+{
+   uint32_t hdr_version;
+   uint64_t tm;
+   struct ewah_bitmap *bitmap;
+   int i;
+   uint32_t ewah_start;
+   uint32_t ewah_size = 0;
+   int fixup = 0;
+
+   hdr_version = htonl(INDEX_EXTENSION_VERSION);


... but here we use htonl() instead of put_be32(), ...


+   strbuf_add(sb, _version, sizeof(uint32_t));
+
+   tm = htonll((uint64_t)istate->fsmonitor_last_update);


... htonll(), ...


+   strbuf_add(sb, , sizeof(uint64_t));
+   fixup = sb->len;
+   strbuf_add(sb, _size, sizeof(uint32_t)); /* we'll fix this up 
later */
+
+   ewah_start = sb->len;
+   bitmap = ewah_new();
+   for (i = 0; i < istate->cache_nr; i++)
+   if (istate->cache[i]->ce_flags & CE_FSMONITOR_DIRTY)
+   ewah_set(bitmap, i);
+   ewah_serialize_strbuf(bitmap, sb);
+   ewah_free(bitmap);
+
+   /* fix up size field */
+   ewah_size = htonl(sb->len - ewah_start);


... and htonl() again.

It would be more consistent (and perhaps more correct) to use
put_beXX() functions, instead of the htonl() and htonll() functions.


I agree that these are asymmetric.  I followed the pattern used in the 
untracked cache in which write_untracked_extension uses htonl and 
read_untracked_extension uses get_be32. I can change this to be more 
symmetric.





+   memcpy(sb->buf + fixup, _size, sizeof(uint32_t));
+}



+/*
+ * Call the query-fsmonitor hook passing the time of the last saved results.
+ */
+static int query_fsmonitor(int version, uint64_t last_update, struct strbuf 
*query_result)
+{
+   struct child_process cp = CHILD_PROCESS_INIT;
+   char ver[64];
+   char date[64];
+   const char *argv[4];
+
+   if (!(argv[0] = find_hook("query-fsmonitor")))
+   return -1;
+
+   snprintf(ver, sizeof(version), "%d", version);
+   snprintf(date, sizeof(date), "%" PRIuMAX, (uintmax_t)last_update);
+   argv[1] = ver;
+   argv[2] = date;
+   argv[3] = NULL;
+   cp.argv = argv;


Maybe it would be nicer using the argv_array_pushX() functions.


When the number of arguments is fixed and known, I prefer avoiding the 
dynamic allocations that come along with the argv_array_pushX() functions.





+   cp.out = -1;
+
+   return capture_command(, query_result, 1024);
+}
+
+static void mark_file_dirty(struct index_state *istate, const char *name)
+{
+   struct untracked_cache_dir *dir;
+   int pos;
+
+   /* find it in the index and mark that entry as dirty */
+   pos = index_name_pos(istate, name, strlen(name));
+   if (pos >= 0) {
+   if (!(istate->cache[pos]->ce_flags & CE_FSMONITOR_DIRTY)) {
+   istate->cache[pos]->ce_flags |= CE_FSMONITOR_DIRTY;
+   istate->cache_changed |= FSMONITOR_CHANGED;
+   }
+   }
+
+   /*
+* Find the corresponding directory in the untracked cache
+* and mark it as invalid
+*/
+   if (!istate->untracked || !istate->untracked->root)
+   return;
+
+   dir = find_untracked_cache_dir(istate->untracked, 
istate->untracked->root, name);
+   if (dir) {
+   if (dir->valid) {
+   dir->valid = 0;
+   istate->cache_changed |= FSMONITOR_CHANGED;
+   }
+   }


The code above is quite similar as what is in mark_fsmonitor_dirty(),
so I wonder if a refactoring is possible.


I've felt the same way and looked at how to refactor it better a number 
of times but never came up with a way that 

Re: [PATCH v5 3/7] fsmonitor: teach git to optionally utilize a file system monitor to speed up detecting new or changed files.

2017-06-27 Thread Christian Couder
On Sat, Jun 10, 2017 at 3:40 PM, Ben Peart  wrote:

> +int read_fsmonitor_extension(struct index_state *istate, const void *data,
> +   unsigned long sz)
> +{
> +   const char *index = data;
> +   uint32_t hdr_version;
> +   uint32_t ewah_size;
> +   int ret;
> +
> +   if (sz < sizeof(uint32_t) + sizeof(uint64_t) + sizeof(uint32_t))
> +   return error("corrupt fsmonitor extension (too short)");
> +
> +   hdr_version = get_be32(index);

Here we use get_be32(), ...

> +   index += sizeof(uint32_t);
> +   if (hdr_version != INDEX_EXTENSION_VERSION)
> +   return error("bad fsmonitor version %d", hdr_version);
> +
> +   istate->fsmonitor_last_update = get_be64(index);

...get_be64(), ...

> +   index += sizeof(uint64_t);
> +
> +   ewah_size = get_be32(index);

... and get_be32 again, ...

> +   index += sizeof(uint32_t);
> +
> +   istate->fsmonitor_dirty = ewah_new();
> +   ret = ewah_read_mmap(istate->fsmonitor_dirty, index, ewah_size);
> +   if (ret != ewah_size) {
> +   ewah_free(istate->fsmonitor_dirty);
> +   istate->fsmonitor_dirty = NULL;
> +   return error("failed to parse ewah bitmap reading fsmonitor 
> index extension");
> +   }
> +
> +   return 0;
> +}
> +
> +void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate)
> +{
> +   uint32_t hdr_version;
> +   uint64_t tm;
> +   struct ewah_bitmap *bitmap;
> +   int i;
> +   uint32_t ewah_start;
> +   uint32_t ewah_size = 0;
> +   int fixup = 0;
> +
> +   hdr_version = htonl(INDEX_EXTENSION_VERSION);

... but here we use htonl() instead of put_be32(), ...

> +   strbuf_add(sb, _version, sizeof(uint32_t));
> +
> +   tm = htonll((uint64_t)istate->fsmonitor_last_update);

... htonll(), ...

> +   strbuf_add(sb, , sizeof(uint64_t));
> +   fixup = sb->len;
> +   strbuf_add(sb, _size, sizeof(uint32_t)); /* we'll fix this up 
> later */
> +
> +   ewah_start = sb->len;
> +   bitmap = ewah_new();
> +   for (i = 0; i < istate->cache_nr; i++)
> +   if (istate->cache[i]->ce_flags & CE_FSMONITOR_DIRTY)
> +   ewah_set(bitmap, i);
> +   ewah_serialize_strbuf(bitmap, sb);
> +   ewah_free(bitmap);
> +
> +   /* fix up size field */
> +   ewah_size = htonl(sb->len - ewah_start);

... and htonl() again.

It would be more consistent (and perhaps more correct) to use
put_beXX() functions, instead of the htonl() and htonll() functions.

> +   memcpy(sb->buf + fixup, _size, sizeof(uint32_t));
> +}

> +/*
> + * Call the query-fsmonitor hook passing the time of the last saved results.
> + */
> +static int query_fsmonitor(int version, uint64_t last_update, struct strbuf 
> *query_result)
> +{
> +   struct child_process cp = CHILD_PROCESS_INIT;
> +   char ver[64];
> +   char date[64];
> +   const char *argv[4];
> +
> +   if (!(argv[0] = find_hook("query-fsmonitor")))
> +   return -1;
> +
> +   snprintf(ver, sizeof(version), "%d", version);
> +   snprintf(date, sizeof(date), "%" PRIuMAX, (uintmax_t)last_update);
> +   argv[1] = ver;
> +   argv[2] = date;
> +   argv[3] = NULL;
> +   cp.argv = argv;

Maybe it would be nicer using the argv_array_pushX() functions.

> +   cp.out = -1;
> +
> +   return capture_command(, query_result, 1024);
> +}
> +
> +static void mark_file_dirty(struct index_state *istate, const char *name)
> +{
> +   struct untracked_cache_dir *dir;
> +   int pos;
> +
> +   /* find it in the index and mark that entry as dirty */
> +   pos = index_name_pos(istate, name, strlen(name));
> +   if (pos >= 0) {
> +   if (!(istate->cache[pos]->ce_flags & CE_FSMONITOR_DIRTY)) {
> +   istate->cache[pos]->ce_flags |= CE_FSMONITOR_DIRTY;
> +   istate->cache_changed |= FSMONITOR_CHANGED;
> +   }
> +   }
> +
> +   /*
> +* Find the corresponding directory in the untracked cache
> +* and mark it as invalid
> +*/
> +   if (!istate->untracked || !istate->untracked->root)
> +   return;
> +
> +   dir = find_untracked_cache_dir(istate->untracked, 
> istate->untracked->root, name);
> +   if (dir) {
> +   if (dir->valid) {
> +   dir->valid = 0;
> +   istate->cache_changed |= FSMONITOR_CHANGED;
> +   }
> +   }

The code above is quite similar as what is in mark_fsmonitor_dirty(),
so I wonder if a refactoring is possible.

> +}
> +
> +void refresh_by_fsmonitor(struct index_state *istate)
> +{
> +   static int has_run_once = 0;
> +   struct strbuf query_result = STRBUF_INIT;
> +   int query_success = 0;
> +   size_t bol = 0; /* beginning of line */
> +   uint64_t last_update;
> +   char *buf, *entry;
> +   int i;
> +

[PATCH v5 3/7] fsmonitor: teach git to optionally utilize a file system monitor to speed up detecting new or changed files.

2017-06-10 Thread Ben Peart
When the index is read from disk, the query-fsmonitor index extension is
used to flag the last known potentially dirty index and untracked cache
entries.

If git finds out some entries are 'fsmonitor-dirty', but are really
unchanged (e.g. the file was changed, then reverted back), then Git will
clear the marking in the extension. If git adds or updates an index
entry, it is marked 'fsmonitor-dirty' to ensure it is checked for
changes in the working directory.

Before the 'fsmonitor-dirty' flags are used to limit the scope of the
files to be checked, the query-fsmonitor hook proc is called with the
time the index was last updated.  The hook proc returns the list of
files changed since that last updated time and the list of
potentially dirty entries is updated to reflect the current state.

refresh_index() and valid_cached_dir() are updated so that any entry not
flagged as potentially dirty is not checked as it cannot have any
changes.

Signed-off-by: Ben Peart 
---
 Makefile   |   1 +
 builtin/update-index.c |   1 +
 cache.h|   5 +
 config.c   |   4 +
 dir.c  |  27 +++--
 dir.h  |   2 +
 entry.c|   1 +
 environment.c  |   1 +
 fsmonitor.c| 261 +
 fsmonitor.h|   9 ++
 read-cache.c   |  28 +-
 unpack-trees.c |   1 +
 12 files changed, 329 insertions(+), 12 deletions(-)
 create mode 100644 fsmonitor.c
 create mode 100644 fsmonitor.h

diff --git a/Makefile b/Makefile
index 7c621f7f76..992dd58801 100644
--- a/Makefile
+++ b/Makefile
@@ -763,6 +763,7 @@ LIB_OBJS += ewah/ewah_rlw.o
 LIB_OBJS += exec_cmd.o
 LIB_OBJS += fetch-pack.o
 LIB_OBJS += fsck.o
+LIB_OBJS += fsmonitor.o
 LIB_OBJS += gettext.o
 LIB_OBJS += gpg-interface.o
 LIB_OBJS += graph.o
diff --git a/builtin/update-index.c b/builtin/update-index.c
index ebfc09faa0..32fd977b43 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -232,6 +232,7 @@ static int mark_ce_flags(const char *path, int flag, int 
mark)
else
active_cache[pos]->ce_flags &= ~flag;
active_cache[pos]->ce_flags |= CE_UPDATE_IN_BASE;
+   active_cache[pos]->ce_flags |= CE_FSMONITOR_DIRTY;
cache_tree_invalidate_path(_index, path);
active_cache_changed |= CE_ENTRY_CHANGED;
return 0;
diff --git a/cache.h b/cache.h
index 4d92aae0e8..220efbfde7 100644
--- a/cache.h
+++ b/cache.h
@@ -201,6 +201,7 @@ struct cache_entry {
 #define CE_ADDED (1 << 19)
 
 #define CE_HASHED(1 << 20)
+#define CE_FSMONITOR_DIRTY   (1 << 21)
 #define CE_WT_REMOVE (1 << 22) /* remove in work directory */
 #define CE_CONFLICTED(1 << 23)
 
@@ -324,6 +325,7 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define CACHE_TREE_CHANGED (1 << 5)
 #define SPLIT_INDEX_ORDERED(1 << 6)
 #define UNTRACKED_CHANGED  (1 << 7)
+#define FSMONITOR_CHANGED  (1 << 8)
 
 struct split_index;
 struct untracked_cache;
@@ -342,6 +344,8 @@ struct index_state {
struct hashmap dir_hash;
unsigned char sha1[20];
struct untracked_cache *untracked;
+   uint64_t fsmonitor_last_update;
+   struct ewah_bitmap *fsmonitor_dirty;
 };
 
 extern struct index_state the_index;
@@ -767,6 +771,7 @@ extern int precomposed_unicode;
 extern int protect_hfs;
 extern int protect_ntfs;
 extern int git_db_env, git_index_env, git_graft_env, git_common_dir_env;
+extern int core_fsmonitor;
 
 /*
  * Include broken refs in all ref iterations, which will
diff --git a/config.c b/config.c
index 146cb3452a..c70f667640 100644
--- a/config.c
+++ b/config.c
@@ -1245,6 +1245,10 @@ static int git_default_core_config(const char *var, 
const char *value)
hide_dotfiles = HIDE_DOTFILES_DOTGITONLY;
else
hide_dotfiles = git_config_bool(var, value);
+   }
+
+   if (!strcmp(var, "core.fsmonitor")) {
+   core_fsmonitor = git_config_bool(var, value);
return 0;
}
 
diff --git a/dir.c b/dir.c
index 5f531d9eed..1f4a43dc75 100644
--- a/dir.c
+++ b/dir.c
@@ -17,6 +17,7 @@
 #include "utf8.h"
 #include "varint.h"
 #include "ewah/ewok.h"
+#include "fsmonitor.h"
 
 /*
  * Tells read_directory_recursive how a file or directory should be treated.
@@ -1680,17 +1681,23 @@ static int valid_cached_dir(struct dir_struct *dir,
if (!untracked)
return 0;
 
-   if (stat(path->len ? path->buf : ".", )) {
-   invalidate_directory(dir->untracked, untracked);
-   memset(>stat_data, 0, sizeof(untracked->stat_data));
-   return 0;
-   }
-   if (!untracked->valid ||
-   match_stat_data_racy(istate, >stat_data, )) {
-   if (untracked->valid)
+   /*
+* With fsmonitor, we can trust