We want to watch files that are never changed because lstat() on those
files is a wasted effort. So we sort unwatched files by date and start
adding them to the file watcher until it barfs (e.g. hits inotify
limit). Recently updated entries are also excluded from watch list.
CE_VALID is used in combination with CE_WATCHED. Those entries that
have CE_VALID already set will never be watched.

We send as many paths as possible in one packet in pkt-line format to
reduce roundtrips. For small projects like git, all entries can be
packed in one packet. For large projects like webkit (182k entries) it
takes two packets. We may do prefix compression as well to send more
in fewer packets..

The file watcher replies how many entries it can watch (because at
least inotify has system limits).

Note that we still do lstat() on these new watched files because they
could have changed before the file watcher could watch them. Watched
files may only skip lstat() at the next git run.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclo...@gmail.com>
---
 file-watcher.c |  31 ++++++++++++++++
 pkt-line.c     |   2 +-
 pkt-line.h     |   2 ++
 read-cache.c   | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 143 insertions(+), 3 deletions(-)

diff --git a/file-watcher.c b/file-watcher.c
index 36a9a8d..3a54168 100644
--- a/file-watcher.c
+++ b/file-watcher.c
@@ -3,6 +3,7 @@
 #include "parse-options.h"
 #include "exec_cmd.h"
 #include "file-watcher-lib.h"
+#include "pkt-line.h"
 
 static const char *const file_watcher_usage[] = {
        N_("git file-watcher [options]"),
@@ -11,6 +12,34 @@ static const char *const file_watcher_usage[] = {
 
 static char index_signature[41];
 
+static int watch_path(char *path)
+{
+       /*
+        * Consider send "wait" every 10ms or so, in case there are
+        * many paths to process that takes more than 20ms or the
+        * sender won't keep waiting. This is usually one-time cost,
+        * waiting a bit is ok.
+        */
+       return -1;
+}
+
+static void watch_paths(char *buf, int maxlen,
+                       int fd, struct sockaddr_un *sock)
+{
+       char *end = buf + maxlen;
+       int n, ret, len;
+       for (n = ret = 0; buf < end && !ret; buf += len) {
+               char ch;
+               len = packet_length(buf);
+               ch = buf[len];
+               buf[len] = '\0';
+               if (!(ret = watch_path(buf + 4)))
+                       n++;
+               buf[len] = ch;
+       }
+       send_watcher(fd, sock, "fine %d", n);
+}
+
 static int handle_command(int fd)
 {
        struct sockaddr_un sun;
@@ -29,6 +58,8 @@ static int handle_command(int fd)
                         * wrong. Clean up and start over.
                         */
                        index_signature[0] = '\0';
+       } else if (starts_with(msg, "watch ")) {
+               watch_paths(msg + 6, len - 6, fd, &sun);
        } else if (!strcmp(msg, "die")) {
                exit(0);
        } else {
diff --git a/pkt-line.c b/pkt-line.c
index bc63b3b..b5af84e 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -135,7 +135,7 @@ static int get_packet_data(int fd, char **src_buf, size_t 
*src_size,
        return ret;
 }
 
-static int packet_length(const char *linelen)
+int packet_length(const char *linelen)
 {
        int n;
        int len = 0;
diff --git a/pkt-line.h b/pkt-line.h
index 0a838d1..40470b9 100644
--- a/pkt-line.h
+++ b/pkt-line.h
@@ -75,6 +75,8 @@ char *packet_read_line(int fd, int *size);
  */
 char *packet_read_line_buf(char **src_buf, size_t *src_len, int *size);
 
+int packet_length(const char *linelen);
+
 #define DEFAULT_PACKET_MAX 1000
 #define LARGE_PACKET_MAX 65520
 extern char packet_buffer[LARGE_PACKET_MAX];
diff --git a/read-cache.c b/read-cache.c
index 76cf0e3..21c3207 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -15,6 +15,7 @@
 #include "strbuf.h"
 #include "varint.h"
 #include "file-watcher-lib.h"
+#include "pkt-line.h"
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int 
really);
 
@@ -1479,6 +1480,98 @@ static void validate_watcher(struct index_state *istate, 
const char *path)
                }
 }
 
+static int sort_by_date(const void *a_, const void *b_)
+{
+       const struct cache_entry *a = *(const struct cache_entry **)a_;
+       const struct cache_entry *b = *(const struct cache_entry **)b_;
+       uint32_t seca = a->ce_stat_data.sd_mtime.sec;
+       uint32_t secb = b->ce_stat_data.sd_mtime.sec;
+       return seca - secb;
+}
+
+static int do_watch_entries(struct index_state *istate,
+                           struct cache_entry **cache,
+                           struct strbuf *sb, int start, int now)
+{
+       char *line;
+       int i;
+       ssize_t len;
+
+       send_watcher(istate->watcher, NULL, "%s", sb->buf);
+       line = read_watcher(istate->watcher, &len, NULL);
+       if (!line) {
+               if (!len) {
+                       close(istate->watcher);
+                       istate->watcher = -1;
+               }
+               return -1;
+       }
+       if (starts_with(line, "fine ")) {
+               char *end;
+               long n = strtoul(line + 5, &end, 10);
+               if (end != line + len)
+                       return -1;
+               for (i = 0; i < n; i++)
+                       cache[start + i]->ce_flags |= CE_WATCHED;
+               istate->cache_changed = 1;
+               if (i != now)
+                       return -1;
+       } else
+               return -1;
+       start = i;
+       strbuf_reset(sb);
+       strbuf_addstr(sb, "watch ");
+       return 0;
+}
+
+static inline int ce_watchable(struct cache_entry *ce)
+{
+       return ce_uptodate(ce) && /* write_index will catch late ce_uptodate 
bits */
+               !(ce->ce_flags & CE_WATCHED) &&
+               !(ce->ce_flags & CE_VALID) &&
+               /*
+                * S_IFGITLINK should not be watched
+                * obviously. S_IFLNK could be problematic because
+                * inotify may follow symlinks without IN_DONT_FOLLOW
+                */
+               S_ISREG(ce->ce_mode);
+}
+
+static void watch_entries(struct index_state *istate)
+{
+       int i, start, nr;
+       struct cache_entry **sorted;
+       struct strbuf sb = STRBUF_INIT;
+       int val;
+       socklen_t vallen = sizeof(val);
+
+       if (istate->watcher <= 0)
+               return;
+       for (i = nr = 0; i < istate->cache_nr; i++)
+               if (ce_watchable(istate->cache[i]))
+                       nr++;
+       sorted = xmalloc(sizeof(*sorted) * nr);
+       for (i = nr = 0; i < istate->cache_nr; i++)
+               if (ce_watchable(istate->cache[i]))
+                       sorted[nr++] = istate->cache[i];
+
+       getsockopt(istate->watcher, SOL_SOCKET, SO_SNDBUF, &val, &vallen);
+       strbuf_grow(&sb, val);
+       strbuf_addstr(&sb, "watch ");
+
+       qsort(sorted, nr, sizeof(*sorted), sort_by_date);
+       for (i = start = 0; i < nr; i++) {
+               if (sb.len + 4 + ce_namelen(sorted[i]) >= val &&
+                   do_watch_entries(istate, sorted, &sb, start, i))
+                       break;
+               packet_buf_write(&sb, "%s", sorted[i]->name);
+       }
+       if (i == nr && start < i)
+               do_watch_entries(istate, sorted, &sb, start, i);
+       strbuf_release(&sb);
+       free(sorted);
+}
+
 /* remember to discard_cache() before reading a different cache! */
 int read_index_from(struct index_state *istate, const char *path)
 {
@@ -1565,6 +1658,7 @@ int read_index_from(struct index_state *istate, const 
char *path)
        }
        munmap(mmap, mmap_size);
        validate_watcher(istate, path);
+       watch_entries(istate);
        return istate->cache_nr;
 
 unmap:
@@ -1844,8 +1938,21 @@ int write_index(struct index_state *istate, int newfd)
        for (i = removed = extended = 0; i < entries; i++) {
                if (cache[i]->ce_flags & CE_REMOVE)
                        removed++;
-               else if (cache[i]->ce_flags & CE_WATCHED)
-                       has_watches++;
+               else if (cache[i]->ce_flags & CE_WATCHED) {
+                       /*
+                        * We may set CE_WATCHED (but not CE_VALID)
+                        * early when refresh has not been done
+                        * yet. At that time we had no idea if the
+                        * entry may have been updated. If it has
+                        * been, remove CE_WATCHED so CE_VALID won't
+                        * incorrectly be set next time if the file
+                        * watcher reports no changes.
+                        */
+                       if (!ce_uptodate(cache[i]))
+                               cache[i]->ce_flags &= ~CE_WATCHED;
+                       else
+                               has_watches++;
+               }
 
                /* reduce extended entries if possible */
                cache[i]->ce_flags &= ~CE_EXTENDED;
-- 
1.8.5.1.208.g05b12ea

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to