On Sun, May 19, 2019 at 09:56:36AM +0700, Nguyễn Thái Ngọc Duy wrote:
> This patch goes with the second option, making sure that 'index' is
> always allocated after initialization. It's less effort than the first
> one, and also safer because you could still miss things during the code
> audit. The extra allocation cost is not a real concern.
I think this direction makes sense.
The patch looks good, though I wonder if we could simplify even further
by just embedding an index into the repository object. The purpose of
having it as a pointer, I think, is so that the_repository can point to
the_index. But we could possibly hide the latter behind some macro
trickery like:
#define the_index (the_repository->index)
I spent a few minutes on a proof of concept patch, but it gets a bit
hairy:
1. There are some circular dependencies in the header files. We'd need
repository.h to depend on cache.h to get the definition of
index_state, but the latter includes repository.h. We'd need to
break the index bits out of cache.h into index.h, which in turn
requires breaking out some other parts. I did a sloppy job of it in
the patch below.
2. There are hundreds of spots that need to swap out "repo->index" for
"&repo->index". In the patch below I just did enough to compile
archive-zip.o, to illustrate. :)
So it's definitely non-trivial to go that way. I'm not sure if it's
worth the effort to switch at this point, but even if it is, your patch
seems like a good thing to do in the meantime.
Either way, I think we could probably revert the non-test portion of my
581d2fd9f2 (get_oid: handle NULL repo->index, 2019-05-14) after this.
-Peff
---
diff --git a/archive-zip.c b/archive-zip.c
index 4d66b5be6e..517e203483 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -353,7 +353,7 @@ static int write_zip_entry(struct archiver_args *args,
return error(_("cannot read %s"),
oid_to_hex(oid));
crc = crc32(crc, buffer, size);
- is_binary = entry_is_binary(args->repo->index,
+ is_binary = entry_is_binary(&args->repo->index,
path_without_prefix,
buffer, size);
out = buffer;
@@ -430,7 +430,7 @@ static int write_zip_entry(struct archiver_args *args,
break;
crc = crc32(crc, buf, readlen);
if (is_binary == -1)
- is_binary = entry_is_binary(args->repo->index,
+ is_binary = entry_is_binary(&args->repo->index,
path_without_prefix,
buf, readlen);
write_or_die(1, buf, readlen);
@@ -463,7 +463,7 @@ static int write_zip_entry(struct archiver_args *args,
break;
crc = crc32(crc, buf, readlen);
if (is_binary == -1)
- is_binary = entry_is_binary(args->repo->index,
+ is_binary = entry_is_binary(&args->repo->index,
path_without_prefix,
buf, readlen);
diff --git a/cache.h b/cache.h
index b4bb2e2c11..d0450025e1 100644
--- a/cache.h
+++ b/cache.h
@@ -17,6 +17,7 @@
#include "sha1-array.h"
#include "repository.h"
#include "mem-pool.h"
+#include "oid.h"
#include <zlib.h>
typedef struct git_zstream {
@@ -43,28 +44,6 @@ int git_deflate_end_gently(git_zstream *);
int git_deflate(git_zstream *, int flush);
unsigned long git_deflate_bound(git_zstream *, unsigned long);
-/* The length in bytes and in hex digits of an object name (SHA-1 value). */
-#define GIT_SHA1_RAWSZ 20
-#define GIT_SHA1_HEXSZ (2 * GIT_SHA1_RAWSZ)
-/* The block size of SHA-1. */
-#define GIT_SHA1_BLKSZ 64
-
-/* The length in bytes and in hex digits of an object name (SHA-256 value). */
-#define GIT_SHA256_RAWSZ 32
-#define GIT_SHA256_HEXSZ (2 * GIT_SHA256_RAWSZ)
-/* The block size of SHA-256. */
-#define GIT_SHA256_BLKSZ 64
-
-/* The length in byte and in hex digits of the largest possible hash value. */
-#define GIT_MAX_RAWSZ GIT_SHA256_RAWSZ
-#define GIT_MAX_HEXSZ GIT_SHA256_HEXSZ
-/* The largest possible block size for any supported hash. */
-#define GIT_MAX_BLKSZ GIT_SHA256_BLKSZ
-
-struct object_id {
- unsigned char hash[GIT_MAX_RAWSZ];
-};
-
#define the_hash_algo the_repository->hash_algo
#if defined(DT_UNKNOWN) && !defined(NO_D_TYPE_IN_DIRENT)
@@ -143,16 +122,6 @@ struct cache_header {
#define INDEX_FORMAT_LB 2
#define INDEX_FORMAT_UB 4
-/*
- * The "cache_time" is just the low 32 bits of the
- * time. It doesn't matter if it overflows - we only
- * check it for equality in the 32 bits we save.
- */
-struct cache_time {
- uint32_t sec;
- uint32_t nsec;
-};
-
struct stat_data {
struct cache_time sd_ctime;
struct cache_time sd_mtime;
@@ -326,32 +295,6 @@ static inline unsigned int canon_mode(unsigned int mode)
#define UNTRACKED_CHANGED (1 << 7)
#define FSMONITOR_CHANGED (1 << 8)
-struct split_index;
-struct untracked_cache;
-
-struct index_state {
- struct cache_entry **cache;
- unsigned int version;
- unsigned int cache_nr, cache_alloc, cache_changed;
- struct string_list *resolve_undo;
- struct cache_tree *cache_tree;
- struct split_index *split_index;
- struct cache_time timestamp;
- unsigned name_hash_initialized : 1,
- initialized : 1,
- drop_cache_tree : 1,
- updated_workdir : 1,
- updated_skipworktree : 1,
- fsmonitor_has_run_once : 1;
- struct hashmap name_hash;
- struct hashmap dir_hash;
- struct object_id oid;
- struct untracked_cache *untracked;
- uint64_t fsmonitor_last_update;
- struct ewah_bitmap *fsmonitor_dirty;
- struct mem_pool *ce_mem_pool;
-};
-
/* Name hashing */
int test_lazy_init_name_hash(struct index_state *istate, int try_threaded);
void add_name_hash(struct index_state *istate, struct cache_entry *ce);
diff --git a/repository.h b/repository.h
index 4fb6a5885f..3371afceaa 100644
--- a/repository.h
+++ b/repository.h
@@ -1,11 +1,11 @@
#ifndef REPOSITORY_H
#define REPOSITORY_H
+#include "index.h"
#include "path.h"
struct config_set;
struct git_hash_algo;
-struct index_state;
struct lock_file;
struct pathspec;
struct raw_object_store;
@@ -87,7 +87,7 @@ struct repository {
* Repository's in-memory index.
* 'repo_read_index()' can be used to populate 'index'.
*/
- struct index_state *index;
+ struct index_state index;
/* Repository's current hash algorithm, as serialized on disk. */
const struct git_hash_algo *hash_algo;