This bit is basically "dynamic CE_VALID". It marks entries that are
being watched by the incoming file-watcher. When an index is loaded,
file watcher is contacted and the list of updated paths is retrieved.

These paths will have CE_WATCHED cleared and lstat() will be called on
them. Those that have CE_WATCHED and not in the list will have
CE_VALID turn on to skip lstat(). The setting is temporarily, CE_VALID
is not saved to disk if CE_WATCHED is also set.

We keep the CE_WATCHED in a new extension, separated from the entries
to save some space because extended ce_flags adds 2 bytes per entry
and this flag would be present in the majority of entries. When stored
as bitmap, this extension could compress very well with ewah algorithm.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclo...@gmail.com>
---
 Documentation/technical/index-format.txt |  6 +++++
 cache.h                                  |  3 +++
 read-cache.c                             | 41 +++++++++++++++++++++++++++++++-
 3 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/Documentation/technical/index-format.txt 
b/Documentation/technical/index-format.txt
index f352a9b..24fd0ae 100644
--- a/Documentation/technical/index-format.txt
+++ b/Documentation/technical/index-format.txt
@@ -198,3 +198,9 @@ Git index format
   - At most three 160-bit object names of the entry in stages from 1 to 3
     (nothing is written for a missing stage).
 
+=== File watcher
+
+  The signature of this extension is { 'W', 'A', 'T', 'C' }.
+
+  - A bit map of all entries in the index, n-th bit of m-th byte
+    corresponds to CE_WATCHED of the <m * 8+ n>-th index entry.
diff --git a/cache.h b/cache.h
index f14d535..a0af2a5 100644
--- a/cache.h
+++ b/cache.h
@@ -169,6 +169,9 @@ struct cache_entry {
 /* used to temporarily mark paths matched by pathspecs */
 #define CE_MATCHED           (1 << 26)
 
+/* set CE_VALID at runtime if the entry is guaranteed not updated */
+#define CE_WATCHED           (1 << 27)
+
 /*
  * Extended on-disk flags
  */
diff --git a/read-cache.c b/read-cache.c
index 3b6daf1..098d3b6 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -33,6 +33,7 @@ static struct cache_entry *refresh_cache_entry(struct 
cache_entry *ce, int reall
 #define CACHE_EXT(s) ( (s[0]<<24)|(s[1]<<16)|(s[2]<<8)|(s[3]) )
 #define CACHE_EXT_TREE 0x54524545      /* "TREE" */
 #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */
+#define CACHE_EXT_WATCH 0x57415443       /* "WATC" */
 
 struct index_state the_index;
 
@@ -1289,6 +1290,19 @@ static int verify_hdr(struct cache_header *hdr,
        return 0;
 }
 
+static void read_watch_extension(struct index_state *istate, uint8_t *data,
+                                unsigned long sz)
+{
+       int i;
+       if ((istate->cache_nr + 7) / 8 != sz) {
+               error("invalid 'WATC' extension");
+               return;
+       }
+       for (i = 0; i < istate->cache_nr; i++)
+               if (data[i / 8] & (1 << (i % 8)))
+                       istate->cache[i]->ce_flags |= CE_WATCHED;
+}
+
 static int read_index_extension(struct index_state *istate,
                                const char *ext, void *data, unsigned long sz)
 {
@@ -1299,6 +1313,9 @@ static int read_index_extension(struct index_state 
*istate,
        case CACHE_EXT_RESOLVE_UNDO:
                istate->resolve_undo = resolve_undo_read(data, sz);
                break;
+       case CACHE_EXT_WATCH:
+               read_watch_extension(istate, data, sz);
+               break;
        default:
                if (*ext < 'A' || 'Z' < *ext)
                        return error("index uses %.4s extension, which we do 
not understand",
@@ -1777,7 +1794,7 @@ int write_index(struct index_state *istate, int newfd)
 {
        git_SHA_CTX c;
        struct cache_header hdr;
-       int i, err, removed, extended, hdr_version;
+       int i, err, removed, extended, hdr_version, has_watches = 0;
        struct cache_entry **cache = istate->cache;
        int entries = istate->cache_nr;
        struct stat st;
@@ -1786,6 +1803,8 @@ int write_index(struct index_state *istate, int newfd)
        for (i = removed = extended = 0; i < entries; i++) {
                if (cache[i]->ce_flags & CE_REMOVE)
                        removed++;
+               else if (cache[i]->ce_flags & CE_WATCHED)
+                       has_watches++;
 
                /* reduce extended entries if possible */
                cache[i]->ce_flags &= ~CE_EXTENDED;
@@ -1857,6 +1876,26 @@ int write_index(struct index_state *istate, int newfd)
                if (err)
                        return -1;
        }
+       if (has_watches) {
+               int id, sz = (entries - removed + 7) / 8;
+               uint8_t *data = xmalloc(sz);
+               memset(data, 0, sz);
+               for (i = 0, id = 0; i < entries && has_watches; i++) {
+                       struct cache_entry *ce = cache[i];
+                       if (ce->ce_flags & CE_REMOVE)
+                               continue;
+                       if (ce->ce_flags & CE_WATCHED) {
+                               data[id / 8] |= 1 << (id % 8);
+                               has_watches--;
+                       }
+                       id++;
+               }
+               err = write_index_ext_header(&c, newfd, CACHE_EXT_WATCH, sz) < 0
+                       || ce_write(&c, newfd, data, sz) < 0;
+               free(data);
+               if (err)
+                       return -1;
+       }
 
        if (ce_flush(&c, newfd) || fstat(newfd, &st))
                return -1;
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to