Re: [PATCH v2 06/19] read-cache: add index reading api

2013-07-13 Thread Duy Nguyen
On Sat, Jul 13, 2013 at 12:26 AM, Thomas Gummerer t.gumme...@gmail.com wrote:
 @@ -238,7 +245,6 @@ static int read_index_v2(struct index_state *istate, void 
 *mmap,
 disk_ce = (struct ondisk_cache_entry *)((char *)mmap + 
 src_offset);
 ce = create_from_disk(disk_ce, consumed, previous_name);
 set_index_entry(istate, i, ce);
 -
 src_offset += consumed;
 }
 strbuf_release(previous_name_buf);

Nit pick. This is unnecessary.

 +int for_each_index_entry_v2(struct index_state *istate, each_cache_entry_fn 
 fn, void *cb_data)
 +{
 +   int i, ret = 0;
 +   struct filter_opts *opts= istate-filter_opts;

Nitpick. space before =.

 --- a/read-cache.c
 +++ b/read-cache.c
 @@ -996,6 +996,7 @@ int add_index_entry(struct index_state *istate, struct 
 cache_entry *ce, int opti
 memmove(istate-cache + pos + 1,
 istate-cache + pos,
 (istate-cache_nr - pos - 1) * sizeof(ce));
 +
 set_index_entry(istate, pos, ce);
 istate-cache_changed = 1;
 return 0;

NP: unnecessary change.

 +int set_internal_ops(struct index_state *istate)
 +{
 +   if (!istate-internal_ops  istate-cache)
 +   istate-internal_ops = v2_internal_ops;
 +   if (!istate-internal_ops)
 +   return 0;
 +   return 1;
 +}
 +
 +/*
 + * Execute fn for each index entry which is currently in istate.  Data
 + * can be given to the function using the cb_data parameter.
 + */
 +int for_each_index_entry(struct index_state *istate, each_cache_entry_fn fn, 
 void *cb_data)
 +{
 +   if (!set_internal_ops(istate))
 +   return 0;
 +   return istate-internal_ops-for_each_index_entry(istate, fn, 
 cb_data);
  }

set_internal_ops should have been called in read_index and initialize_index.

 @@ -1374,6 +1409,7 @@ int discard_index(struct index_state *istate)
 free(istate-cache);
 istate-cache = NULL;
 istate-cache_alloc = 0;
 +   istate-filter_opts = NULL;
 return 0;
  }

Why is internal_ops not reset here?

 --- a/read-cache.h
 +++ b/read-cache.h
 @@ -24,11 +24,17 @@ struct cache_version_header {
  struct index_ops {
 int (*match_stat_basic)(const struct cache_entry *ce, struct stat 
 *st, int changed);
 int (*verify_hdr)(void *mmap, unsigned long size);
 -   int (*read_index)(struct index_state *istate, void *mmap, unsigned 
 long mmap_size);
 +   int (*read_index)(struct index_state *istate, void *mmap, unsigned 
 long mmap_size,
 + struct filter_opts *opts);
 int (*write_index)(struct index_state *istate, int newfd);
  };

 +struct internal_ops {
 +   int (*for_each_index_entry)(struct index_state *istate, 
 each_cache_entry_fn fn, void *cb_data);
 +};
 +

I wonder if we do need separate internal_ops from index_ops. Why not
merge internal_ops in index_ops?
--
Duy
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 06/19] read-cache: add index reading api

2013-07-12 Thread Thomas Gummerer
Add an api for access to the index file.  Currently there is only a very
basic api for accessing the index file, which only allows a full read of
the index, and lets the users of the data filter it.  The new index api
gives the users the possibility to use only part of the index and
provides functions for iterating over and accessing cache entries.

This simplifies future improvements to the in-memory format, as changes
will be concentrated on one file, instead of the whole git source code.

Helped-by: Nguyễn Thái Ngọc Duy pclo...@gmail.com
Signed-off-by: Thomas Gummerer t.gumme...@gmail.com
---
 cache.h | 42 +-
 read-cache-v2.c | 35 +--
 read-cache.c| 44 
 read-cache.h|  8 +++-
 4 files changed, 121 insertions(+), 8 deletions(-)

diff --git a/cache.h b/cache.h
index 5082b34..d305d21 100644
--- a/cache.h
+++ b/cache.h
@@ -127,7 +127,7 @@ struct cache_entry {
unsigned int ce_flags;
unsigned int ce_namelen;
unsigned char sha1[20];
-   struct cache_entry *next;
+   struct cache_entry *next; /* used by name_hash */
char name[FLEX_ARRAY]; /* more */
 };
 
@@ -258,6 +258,29 @@ static inline unsigned int canon_mode(unsigned int mode)
 
 #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
 
+/*
+ * Options by which the index should be filtered when read partially.
+ *
+ * pathspec: The pathspec which the index entries have to match
+ * seen: Used to return the seen parameter from match_pathspec()
+ * max_prefix_len: The common prefix length of the pathspecs
+ *
+ * read_staged: used to indicate if the conflicted entries (entries
+ * with a stage) should be included
+ * read_cache_tree: used to indicate if the cache-tree should be read
+ * read_resolve_undo: used to indicate if the resolve undo data should
+ * be read
+ */
+struct filter_opts {
+   const struct pathspec *pathspec;
+   char *seen;
+   int max_prefix_len;
+
+   int read_staged;
+   int read_cache_tree;
+   int read_resolve_undo;
+};
+
 struct index_state {
struct cache_entry **cache;
unsigned int version;
@@ -270,6 +293,8 @@ struct index_state {
struct hash_table name_hash;
struct hash_table dir_hash;
struct index_ops *ops;
+   struct internal_ops *internal_ops;
+   struct filter_opts *filter_opts;
 };
 
 extern struct index_state the_index;
@@ -311,6 +336,12 @@ extern void free_name_hash(struct index_state *istate);
 #define unmerge_cache_entry_at(at) unmerge_index_entry_at(the_index, at)
 #define unmerge_cache(pathspec) unmerge_index(the_index, pathspec)
 #define read_blob_data_from_cache(path, sz) 
read_blob_data_from_index(the_index, (path), (sz))
+
+/* index api */
+#define read_cache_filtered(opts) read_index_filtered(the_index, (opts))
+#define read_cache_filtered_from(path, opts) 
read_index_filtered_from(the_index, (path), (opts))
+#define for_each_cache_entry(fn, cb_data) \
+   for_each_index_entry(the_index, (fn), (cb_data))
 #endif
 
 enum object_type {
@@ -438,6 +469,15 @@ extern int init_db(const char *template_dir, unsigned int 
flags);
} \
} while (0)
 
+/* index api */
+extern int read_index_filtered(struct index_state *, struct filter_opts *opts);
+extern int read_index_filtered_from(struct index_state *, const char *path, 
struct filter_opts *opts);
+
+typedef int each_cache_entry_fn(struct cache_entry *ce, void *);
+extern int for_each_index_entry(struct index_state *istate,
+   each_cache_entry_fn, void *);
+
+
 /* Initialize and use the cache information */
 extern int read_index(struct index_state *);
 extern int read_index_preload(struct index_state *, const char **pathspec);
diff --git a/read-cache-v2.c b/read-cache-v2.c
index a6883c3..51b618f 100644
--- a/read-cache-v2.c
+++ b/read-cache-v2.c
@@ -3,6 +3,7 @@
 #include resolve-undo.h
 #include cache-tree.h
 #include varint.h
+#include dir.h
 
 /* Mask for the name length in ce_flags in the on-disk index */
 #define CE_NAMEMASK  (0x0fff)
@@ -207,8 +208,14 @@ static int read_index_extension(struct index_state *istate,
return 0;
 }
 
+/*
+ * The performance is the same if we read the whole index or only
+ * part of it, therefore we always read the whole index to avoid
+ * having to re-read it later.  The filter_opts will determine
+ * what part of the index is used when retrieving the cache-entries.
+ */
 static int read_index_v2(struct index_state *istate, void *mmap,
-unsigned long mmap_size)
+unsigned long mmap_size, struct filter_opts *opts)
 {
int i;
unsigned long src_offset;
@@ -238,7 +245,6 @@ static int read_index_v2(struct index_state *istate, void 
*mmap,
disk_ce = (struct ondisk_cache_entry *)((char *)mmap + 
src_offset);
ce =