Re: [PATCH/WIP v2 02/14] read-cache: new extension to mark what file is watched

2014-01-19 Thread Thomas Rast
Nguyễn Thái Ngọc Duy pclo...@gmail.com writes:

 If an entry is watched, git lets an external program decide if the
 entry is modified or not. It's more like --assume-unchanged, but
 designed to be controlled by machine.

 We are running out of on-disk ce_flags, so instead of extending
 on-disk entry format again, watched flags are in-core only and
 stored as extension instead.

I wonder if this would be a good use-case for EWAH bitmaps?  Presumably
most users would end up having only a few large ranges of files that are
being watched.  Quite possibly most users would watch *all* files.

-- 
Thomas Rast
t...@thomasrast.ch
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH/WIP v2 02/14] read-cache: new extension to mark what file is watched

2014-01-19 Thread Duy Nguyen
On Mon, Jan 20, 2014 at 12:06 AM, Thomas Rast t...@thomasrast.ch wrote:
 Nguyễn Thái Ngọc Duy pclo...@gmail.com writes:

 If an entry is watched, git lets an external program decide if the
 entry is modified or not. It's more like --assume-unchanged, but
 designed to be controlled by machine.

 We are running out of on-disk ce_flags, so instead of extending
 on-disk entry format again, watched flags are in-core only and
 stored as extension instead.

 I wonder if this would be a good use-case for EWAH bitmaps?  Presumably
 most users would end up having only a few large ranges of files that are
 being watched.  Quite possibly most users would watch *all* files.

Oh yeah. I edited my commit message locally to this a while ago

On webkit.git with
182k entries, that's 364k more to be SHA-1'd on current index
versions, compared to 22k in this format (and even less when
jk/pack-bitmap ewah graduates and we can use ewah compression)
-- 
Duy
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH/WIP v2 02/14] read-cache: new extension to mark what file is watched

2014-01-17 Thread Nguyễn Thái Ngọc Duy
If an entry is watched, git lets an external program decide if the
entry is modified or not. It's more like --assume-unchanged, but
designed to be controlled by machine.

We are running out of on-disk ce_flags, so instead of extending
on-disk entry format again, watched flags are in-core only and
stored as extension instead.

Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
---
 cache.h  |  2 ++
 read-cache.c | 41 -
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index a09d622..069dce7 100644
--- a/cache.h
+++ b/cache.h
@@ -168,6 +168,8 @@ struct cache_entry {
 /* used to temporarily mark paths matched by pathspecs */
 #define CE_MATCHED   (1  26)
 
+#define CE_WATCHED   (1  27)
+
 /*
  * Extended on-disk flags
  */
diff --git a/read-cache.c b/read-cache.c
index fe1d153..6f21e3f 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -33,6 +33,7 @@ static struct cache_entry *refresh_cache_entry(struct 
cache_entry *ce, int reall
 #define CACHE_EXT(s) ( (s[0]24)|(s[1]16)|(s[2]8)|(s[3]) )
 #define CACHE_EXT_TREE 0x54524545  /* TREE */
 #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* REUC */
+#define CACHE_EXT_WATCH 0x57415443   /* WATC */
 
 struct index_state the_index;
 
@@ -1293,6 +1294,19 @@ static int verify_hdr(struct cache_header *hdr,
return 0;
 }
 
+static void read_watch_extension(struct index_state *istate, uint8_t *data,
+unsigned long sz)
+{
+   int i;
+   if ((istate-cache_nr + 7) / 8 != sz) {
+   error(invalid 'WATC' extension);
+   return;
+   }
+   for (i = 0; i  istate-cache_nr; i++)
+   if (data[i / 8]  (1  (i % 8)))
+   istate-cache[i]-ce_flags |= CE_WATCHED;
+}
+
 static int read_index_extension(struct index_state *istate,
const char *ext, void *data, unsigned long sz)
 {
@@ -1303,6 +1317,9 @@ static int read_index_extension(struct index_state 
*istate,
case CACHE_EXT_RESOLVE_UNDO:
istate-resolve_undo = resolve_undo_read(data, sz);
break;
+   case CACHE_EXT_WATCH:
+   read_watch_extension(istate, data, sz);
+   break;
default:
if (*ext  'A' || 'Z'  *ext)
return error(index uses %.4s extension, which we do 
not understand,
@@ -1781,7 +1798,7 @@ int write_index(struct index_state *istate, int newfd)
 {
git_SHA_CTX c;
struct cache_header hdr;
-   int i, err, removed, extended, hdr_version;
+   int i, err, removed, extended, hdr_version, has_watches = 0;
struct cache_entry **cache = istate-cache;
int entries = istate-cache_nr;
struct stat st;
@@ -1790,6 +1807,8 @@ int write_index(struct index_state *istate, int newfd)
for (i = removed = extended = 0; i  entries; i++) {
if (cache[i]-ce_flags  CE_REMOVE)
removed++;
+   else if (cache[i]-ce_flags  CE_WATCHED)
+   has_watches++;
 
/* reduce extended entries if possible */
cache[i]-ce_flags = ~CE_EXTENDED;
@@ -1861,6 +1880,26 @@ int write_index(struct index_state *istate, int newfd)
if (err)
return -1;
}
+   if (has_watches) {
+   int id, sz = (entries - removed + 7) / 8;
+   uint8_t *data = xmalloc(sz);
+   memset(data, 0, sz);
+   for (i = 0, id = 0; i  entries  has_watches; i++) {
+   struct cache_entry *ce = cache[i];
+   if (ce-ce_flags  CE_REMOVE)
+   continue;
+   if (ce-ce_flags  CE_WATCHED) {
+   data[id / 8] |= 1  (id % 8);
+   has_watches--;
+   }
+   id++;
+   }
+   err = write_index_ext_header(c, newfd, CACHE_EXT_WATCH, sz)  0
+   || ce_write(c, newfd, data, sz)  0;
+   free(data);
+   if (err)
+   return -1;
+   }
 
if (ce_flush(c, newfd) || fstat(newfd, st))
return -1;
-- 
1.8.5.1.208.g05b12ea

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH/WIP v2 02/14] read-cache: new extension to mark what file is watched

2014-01-17 Thread Thomas Gummerer
Nguyễn Thái Ngọc Duy pclo...@gmail.com writes:

 If an entry is watched, git lets an external program decide if the
 entry is modified or not. It's more like --assume-unchanged, but
 designed to be controlled by machine.

 We are running out of on-disk ce_flags, so instead of extending
 on-disk entry format again, watched flags are in-core only and
 stored as extension instead.

As you said yourself in 
http://thread.gmane.org/gmane.comp.version-control.git/240339/focus=240385
this is not quite true.  As for your explanation there,

 Anyway using extended flags means 2 extra bytes per entry for
 almost every entry in this case (and for index v5 it means redoing
 crc32 for almost every entry too when the bit is updated) so it may
 still be a good idea to keep the new flag separate.

I don't think adding 2 extra bytes would be too bad, since we are
already using 62 bytes plus the bytes for the filename for each index
entry, so it would be a less than 3% increase in the index file size.
(And the extended flags may be used anyway in some cases)

As for index-v5 (if that's ever going to happen), it depends mostly on
how often the CE_WATCHED is going to be updated, to decide whether it
makes sense to store this as extension.

That said, I don't care too deeply if it's stored one way or another,
but I think it would be good to update the commit message with a better
rationale for the choice.

 Signed-off-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
 ---
  cache.h  |  2 ++
  read-cache.c | 41 -
  2 files changed, 42 insertions(+), 1 deletion(-)

 diff --git a/cache.h b/cache.h
 index a09d622..069dce7 100644
 --- a/cache.h
 +++ b/cache.h
 @@ -168,6 +168,8 @@ struct cache_entry {
  /* used to temporarily mark paths matched by pathspecs */
  #define CE_MATCHED   (1  26)

 +#define CE_WATCHED   (1  27)
 +
  /*
   * Extended on-disk flags
   */
 diff --git a/read-cache.c b/read-cache.c
 index fe1d153..6f21e3f 100644
 --- a/read-cache.c
 +++ b/read-cache.c
 @@ -33,6 +33,7 @@ static struct cache_entry *refresh_cache_entry(struct 
 cache_entry *ce, int reall
  #define CACHE_EXT(s) ( (s[0]24)|(s[1]16)|(s[2]8)|(s[3]) )
  #define CACHE_EXT_TREE 0x54524545/* TREE */
  #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* REUC */
 +#define CACHE_EXT_WATCH 0x57415443 /* WATC */

  struct index_state the_index;

 @@ -1293,6 +1294,19 @@ static int verify_hdr(struct cache_header *hdr,
   return 0;
  }

 +static void read_watch_extension(struct index_state *istate, uint8_t *data,
 +  unsigned long sz)
 +{
 + int i;
 + if ((istate-cache_nr + 7) / 8 != sz) {
 + error(invalid 'WATC' extension);
 + return;
 + }
 + for (i = 0; i  istate-cache_nr; i++)
 + if (data[i / 8]  (1  (i % 8)))
 + istate-cache[i]-ce_flags |= CE_WATCHED;
 +}
 +
  static int read_index_extension(struct index_state *istate,
   const char *ext, void *data, unsigned long sz)
  {
 @@ -1303,6 +1317,9 @@ static int read_index_extension(struct index_state 
 *istate,
   case CACHE_EXT_RESOLVE_UNDO:
   istate-resolve_undo = resolve_undo_read(data, sz);
   break;
 + case CACHE_EXT_WATCH:
 + read_watch_extension(istate, data, sz);
 + break;
   default:
   if (*ext  'A' || 'Z'  *ext)
   return error(index uses %.4s extension, which we do 
 not understand,
 @@ -1781,7 +1798,7 @@ int write_index(struct index_state *istate, int newfd)
  {
   git_SHA_CTX c;
   struct cache_header hdr;
 - int i, err, removed, extended, hdr_version;
 + int i, err, removed, extended, hdr_version, has_watches = 0;
   struct cache_entry **cache = istate-cache;
   int entries = istate-cache_nr;
   struct stat st;
 @@ -1790,6 +1807,8 @@ int write_index(struct index_state *istate, int newfd)
   for (i = removed = extended = 0; i  entries; i++) {
   if (cache[i]-ce_flags  CE_REMOVE)
   removed++;
 + else if (cache[i]-ce_flags  CE_WATCHED)
 + has_watches++;

   /* reduce extended entries if possible */
   cache[i]-ce_flags = ~CE_EXTENDED;
 @@ -1861,6 +1880,26 @@ int write_index(struct index_state *istate, int newfd)
   if (err)
   return -1;
   }
 + if (has_watches) {
 + int id, sz = (entries - removed + 7) / 8;
 + uint8_t *data = xmalloc(sz);
 + memset(data, 0, sz);
 + for (i = 0, id = 0; i  entries  has_watches; i++) {
 + struct cache_entry *ce = cache[i];
 + if (ce-ce_flags  CE_REMOVE)
 + continue;
 + if (ce-ce_flags  CE_WATCHED) {
 + data[id / 8] |= 1  (id % 8);
 + has_watches--;