Re: [WIP RFC 5/5] upload-pack: send part of packfile response as uri

2018-12-04 Thread Stefan Beller
On Mon, Dec 3, 2018 at 3:38 PM Jonathan Tan  wrote:
>
> This is a partial implementation of upload-pack sending part of its
> packfile response as URIs.

It does so by implementing a new flag `--exclude-configured-blobs`
in pack-objects, which would change the output of pack-objects to
output a list of URLs (of the excluded blobs) followed by the
pack to be asked for.

This design seems easy to implement as then upload-pack
can just parse the output and only needs to insert
"packfile" and "packfile-uris\n" at the appropriate places
of the stream, otherwise it just passes through the information
obtained from pack-objects.

The design as-is would make for hard documentation of
pack-objects (its output is not just a pack anymore when that
flag is given, but a highly optimized byte stream).

Initially I did not anticipate this to be one of the major design problems
as I assumed we'd want to use this pack feature over broadly (e.g.
eventually by offloading most of the objects into a base pack that
is just always included as the likelihood for any object in there is
very high on initial clone), but it makes total sense to only
output the URIs that we actually need.

An alternative that comes very close to the current situation
would be to either pass a file path or file descriptor (that upload-pack
listens to in parallel) to pack-objects as an argument of the new flag.
Then we would not need to splice the protocol sections into the single
output stream, but we could announce the sections, then flush
the URIs and then flush the pack afterwards.

I looked at this quickly, but that would need either extensions in
run-command.c for setting up the new fd for us, as there we already
have OS specific code for these setups, or we'd have to duplicate
some of the logic here, which doesn't enthuse me either.

So maybe we'd create a temp file via mkstemp and pass
the file name to pack-objects for writing the URIs and then
we'd just need to stream that file?

> +   # NEEDSWORK: "git clone" fails here because it ignores the URI 
> provided
> +   # instead of fetching it.
> +   test_must_fail env GIT_TRACE_PACKET="$(pwd)/log" \
> +   git -c protocol.version=2 clone \
> +   "$HTTPD_URL/smart/http_parent" http_child 2>err &&
> +   # Although "git clone" fails, we can still check that the server
> +   # provided the URI we requested and that the error message pinpoints
> +   # the object that is missing.
> +   grep "clone< uri https://example.com/a-uri; log &&
> +   test_i18ngrep "did not receive expected object $(cat h)" err

That is a nice test!


[WIP RFC 5/5] upload-pack: send part of packfile response as uri

2018-12-03 Thread Jonathan Tan
This is a partial implementation of upload-pack sending part of its
packfile response as URIs.

The client is not fully implemented - it knows to ignore the
"packfile-uris" section, but because it does not actually fetch those
URIs, the returned packfile is incomplete. A test is included to show
that the appropriate URI is indeed transmitted, and that the returned
packfile is lacking exactly the expected object.

Signed-off-by: Jonathan Tan 
---
 builtin/pack-objects.c | 48 ++
 fetch-pack.c   |  9 
 t/t5702-protocol-v2.sh | 25 ++
 upload-pack.c  | 37 
 4 files changed, 115 insertions(+), 4 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index e7ea206c08..2abbddd3cb 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -117,6 +117,15 @@ enum missing_action {
 static enum missing_action arg_missing_action;
 static show_object_fn fn_show_object;
 
+struct configured_exclusion {
+   struct oidmap_entry e;
+   char *uri;
+};
+static struct oidmap configured_exclusions;
+
+static int exclude_configured_blobs;
+static struct oidset excluded_by_config;
+
 /*
  * stats
  */
@@ -831,6 +840,23 @@ static off_t write_reused_pack(struct hashfile *f)
return reuse_packfile_offset - sizeof(struct pack_header);
 }
 
+static void write_excluded_by_configs(void)
+{
+   struct oidset_iter iter;
+   const struct object_id *oid;
+
+   oidset_iter_init(_by_config, );
+   while ((oid = oidset_iter_next())) {
+   struct configured_exclusion *ex =
+   oidmap_get(_exclusions, oid);
+
+   if (!ex)
+   BUG("configured exclusion wasn't configured");
+   write_in_full(1, ex->uri, strlen(ex->uri));
+   write_in_full(1, "\n", 1);
+   }
+}
+
 static const char no_split_warning[] = N_(
 "disabling bitmap writing, packs are split due to pack.packSizeLimit"
 );
@@ -1124,6 +1150,12 @@ static int want_object_in_pack(const struct object_id 
*oid,
}
}
 
+   if (exclude_configured_blobs &&
+   oidmap_get(_exclusions, oid)) {
+   oidset_insert(_by_config, oid);
+   return 0;
+   }
+
return 1;
 }
 
@@ -2728,6 +2760,19 @@ static int git_pack_config(const char *k, const char *v, 
void *cb)
pack_idx_opts.version);
return 0;
}
+   if (!strcmp(k, "uploadpack.blobpackfileuri")) {
+   struct configured_exclusion *ex = xmalloc(sizeof(*ex));
+   const char *end;
+
+   if (parse_oid_hex(v, >e.oid, ) || *end != ' ')
+   die(_("value of uploadpack.blobpackfileuri must be "
+ "of the form ' ' (got '%s')"), v);
+   if (oidmap_get(_exclusions, >e.oid))
+   die(_("object already configured in another "
+ "uploadpack.blobpackfileuri (got '%s')"), v);
+   ex->uri = xstrdup(end + 1);
+   oidmap_put(_exclusions, ex);
+   }
return git_default_config(k, v, cb);
 }
 
@@ -3314,6 +3359,8 @@ int cmd_pack_objects(int argc, const char **argv, const 
char *prefix)
 N_("do not pack objects in promisor packfiles")),
OPT_BOOL(0, "delta-islands", _delta_islands,
 N_("respect islands during delta compression")),
+   OPT_BOOL(0, "exclude-configured-blobs", 
_configured_blobs,
+N_("respect uploadpack.blobpackfileuri")),
OPT_END(),
};
 
@@ -3487,6 +3534,7 @@ int cmd_pack_objects(int argc, const char **argv, const 
char *prefix)
return 0;
if (nr_result)
prepare_pack(window, depth);
+   write_excluded_by_configs();
write_pack_file();
if (progress)
fprintf_ln(stderr,
diff --git a/fetch-pack.c b/fetch-pack.c
index 9691046e64..6e1985ab55 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1413,6 +1413,15 @@ static struct ref *do_fetch_pack_v2(struct 
fetch_pack_args *args,
receive_wanted_refs(, sought, nr_sought);
 
/* get the pack */
+   if (process_section_header(, "packfile-uris", 
1)) {
+   /* skip the whole section */
+   process_section_header(, 
"packfile-uris", 0);
+   while (packet_reader_read() == 
PACKET_READ_NORMAL) {
+   /* do nothing */
+   }
+   if (reader.status != PACKET_READ_DELIM)
+   die("expected DELIM");
+   }
process_section_header(, "packfile", 0);