We want to watch files that are never changed because lstat() on those
files is a wasted effort. So we sort unwatched files by date and start
adding them to the file watcher until it barfs (e.g. hits inotify
limit). Recently updated entries are also excluded from watch list.
CE_VALID is used in combination with CE_WATCHED. Those entries that
have CE_VALID already set will never be watched.

We send as many paths as possible in one packet in pkt-line
format. For small projects like git, all entries can be packed in one
packet. For large projects like webkit (182k entries) it takes two
packets. We may do prefix compression as well to send more in fewer
packets..

The file watcher replies how many entries it can watch (because at
least inotify has system limits).

Note that we still do lstat() on these new watched files because they
could have changed before the file watcher could watch them. Watched
files may only skip lstat() at the next git run.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclo...@gmail.com>
---
 file-watcher.c | 27 ++++++++++++++++
 pkt-line.c     |  2 +-
 pkt-line.h     |  2 ++
 read-cache.c   | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/file-watcher.c b/file-watcher.c
index 6aeed4d..35781fa 100644
--- a/file-watcher.c
+++ b/file-watcher.c
@@ -1,17 +1,41 @@
 #include "cache.h"
 #include "sigchain.h"
 #include "string-list.h"
+#include "pkt-line.h"
 
 static char index_signature[41];
 static struct string_list updated = STRING_LIST_INIT_DUP;
 static int updated_sorted;
 
+static int watch_path(char *path)
+{
+       return -1;
+}
+
 static void reset(const char *sig)
 {
        string_list_clear(&updated, 0);
        strlcpy(index_signature, sig, sizeof(index_signature));
 }
 
+static void watch_paths(char *buf, int maxlen,
+                       int fd, struct sockaddr *sock,
+                       socklen_t socklen)
+{
+       char *end = buf + maxlen;
+       int n, ret, len;
+       for (n = ret = 0; buf < end && !ret; buf += len) {
+               char ch;
+               len = packet_length(buf);
+               ch = buf[len];
+               buf[len] = '\0';
+               if (!(ret = watch_path(buf + 4)))
+                       n++;
+               buf[len] = ch;
+       }
+       sendtof(fd, 0, sock, socklen, "fine %d", n);
+}
+
 static int handle_command(int fd, char *msg, int msgsize)
 {
        struct sockaddr_un sun;
@@ -41,6 +65,9 @@ static int handle_command(int fd, char *msg, int msgsize)
                               strlen(updated.items[i].string),
                               0, &sun, socklen);
                sendtof(fd, 0, &sun, socklen, "%c", 0);
+       } else if (starts_with(msg, "watch ")) {
+               watch_paths(msg + 6, len - 6,
+                           fd, (struct sockaddr *)&sun, socklen);
        } else if ((arg = skip_prefix(msg, "forget "))) {
                struct string_list_item *item;
                if (!updated_sorted) {
diff --git a/pkt-line.c b/pkt-line.c
index bc63b3b..b5af84e 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -135,7 +135,7 @@ static int get_packet_data(int fd, char **src_buf, size_t 
*src_size,
        return ret;
 }
 
-static int packet_length(const char *linelen)
+int packet_length(const char *linelen)
 {
        int n;
        int len = 0;
diff --git a/pkt-line.h b/pkt-line.h
index 0a838d1..40470b9 100644
--- a/pkt-line.h
+++ b/pkt-line.h
@@ -75,6 +75,8 @@ char *packet_read_line(int fd, int *size);
  */
 char *packet_read_line_buf(char **src_buf, size_t *src_len, int *size);
 
+int packet_length(const char *linelen);
+
 #define DEFAULT_PACKET_MAX 1000
 #define LARGE_PACKET_MAX 65520
 extern char packet_buffer[LARGE_PACKET_MAX];
diff --git a/read-cache.c b/read-cache.c
index caa2298..839fd7c 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -14,6 +14,7 @@
 #include "resolve-undo.h"
 #include "strbuf.h"
 #include "varint.h"
+#include "pkt-line.h"
 
 static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int 
really);
 
@@ -1537,6 +1538,90 @@ static void connect_watcher(struct index_state *istate, 
const char *path)
                }
 }
 
+static int sort_by_date(const void *a_, const void *b_)
+{
+       const struct cache_entry *a = *(const struct cache_entry **)a_;
+       const struct cache_entry *b = *(const struct cache_entry **)b_;
+       uint32_t seca = a->ce_stat_data.sd_mtime.sec;
+       uint32_t secb = b->ce_stat_data.sd_mtime.sec;
+       return seca - secb;
+}
+
+static inline int ce_watchable(struct cache_entry *ce, time_t now)
+{
+       return !(ce->ce_flags & CE_WATCHED) &&
+               !(ce->ce_flags & CE_VALID) &&
+               (ce->ce_stat_data.sd_mtime.sec + 1800 < now);
+}
+
+static int do_watch_entries(struct index_state *istate,
+                           struct cache_entry **cache,
+                           struct strbuf *sb, int start, int now)
+{
+       char line[1024];
+       int i, len;
+
+       write(istate->watcher, sb->buf, sb->len);
+       len = read(istate->watcher, line, sizeof(line) - 1);
+       if (len <= 0)
+               return -1;
+       line[len] = '\0';
+       if (starts_with(line, "fine ")) {
+               char *end;
+               long n = strtoul(line + 5, &end, 10);
+               if (end != line + len)
+                       return -1;
+               for (i = 0; i < n; i++)
+                       cache[start + i]->ce_flags |= CE_WATCHED;
+               istate->cache_changed = 1;
+               if (i != now)
+                       return -1;
+       } else
+               return -1;
+       start = i;
+       strbuf_reset(sb);
+       strbuf_addstr(sb, "watch ");
+       return 0;
+}
+
+static void watch_entries(struct index_state *istate)
+{
+       int i, start, nr;
+       struct cache_entry **sorted;
+       struct strbuf sb = STRBUF_INIT;
+       int val;
+       socklen_t vallen = sizeof(val);
+       time_t now = time(NULL);
+
+       if (istate->watcher == -1)
+               return;
+       for (i = nr = 0; i < istate->cache_nr; i++)
+               if (ce_watchable(istate->cache[i], now))
+                       nr++;
+       if (nr < 50)
+               return;
+       sorted = xmalloc(sizeof(*sorted) * nr);
+       for (i = nr = 0; i < istate->cache_nr; i++)
+               if (ce_watchable(istate->cache[i], now))
+                       sorted[nr++] = istate->cache[i];
+
+       getsockopt(istate->watcher, SOL_SOCKET, SO_SNDBUF, &val, &vallen);
+       strbuf_grow(&sb, val);
+       strbuf_addstr(&sb, "watch ");
+
+       qsort(sorted, nr, sizeof(*sorted), sort_by_date);
+       for (i = start = 0; i < nr; i++) {
+               if (sb.len + 4 + ce_namelen(sorted[i]) >= val &&
+                   do_watch_entries(istate, sorted, &sb, start, i))
+                       break;
+               packet_buf_write(&sb, "%s", sorted[i]->name);
+       }
+       if (i == nr && start < i)
+               do_watch_entries(istate, sorted, &sb, start, i);
+       strbuf_release(&sb);
+       free(sorted);
+}
+
 static void farewell_watcher(struct index_state *istate,
                             const unsigned char *sha1)
 {
@@ -1637,6 +1722,7 @@ int read_index_from(struct index_state *istate, const 
char *path)
        }
        munmap(mmap, mmap_size);
        connect_watcher(istate, path);
+       watch_entries(istate);
        return istate->cache_nr;
 
 unmap:
@@ -1933,6 +2019,17 @@ int write_index(struct index_state *istate, int newfd)
                         * reinstated.
                         */
                        cache[i]->ce_flags &= ~CE_VALID;
+                       /*
+                        * We may set CE_WATCHED (but not CE_VALID)
+                        * early when refresh has not been done
+                        * yet. At that time we had no idea if the
+                        * entry may have been updated. If it has
+                        * been, remove CE_WATCHED so CE_VALID won't
+                        * incorrectly be set next time if the file
+                        * watcher reports no changes.
+                        */
+                       if (!ce_uptodate(cache[i]))
+                               cache[i]->ce_flags &= ~CE_WATCHED;
                        has_watches++;
                }
 
-- 
1.8.5.2.240.g8478abd

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to