Re: [PATCH 04/14] fetch: add object filtering for partial fetch

2017-11-16 Thread Jeff Hostetler



On 11/3/2017 4:38 PM, Jonathan Tan wrote:

@@ -1242,6 +1249,20 @@ static int fetch_multiple(struct string_list *list)
int i, result = 0;
struct argv_array argv = ARGV_ARRAY_INIT;
  
+	if (filter_options.choice) {

+   /*
+* We currently only support partial-fetches to the remote
+* used for the partial-clone because we only support 1
+* promisor remote, so we DO NOT allow explicit command
+* line filter arguments.
+*
+* Note that the loop below will spawn background fetches
+* for each remote and one of them MAY INHERIT the proper
+* partial-fetch settings, so everything is consistent.
+*/
+   die(_("partial-fetch is not supported on multiple remotes"));
+   }
+
if (!append && !dry_run) {
int errcode = truncate_fetch_head();
if (errcode)


My intention in doing the "fetch: refactor calculation of remote list"
patch is so that the interaction between the provided list of remotes
and the specification of the filter can be handled using the following
diff:

 -  if (remote)
 +  if (remote) {
 +  if (filter_options.choice &&
 +  strcmp(remote->name, 
repository_format_partial_clone_remote))
 +  die(_("--blob-max-bytes can only be used with the remote 
configured in core.partialClone"));
result = fetch_one(remote, argc, argv);
 -  else
 +  } else {
 +  if (filter_options.choice)
 +  die(_("--blob-max-bytes can only be used with the remote 
configured in core.partialClone"));
result = fetch_multiple();
 +  }

(Ignore the "blob-max-bytes" in the error message - that needs to be
updated.)

The GitHub link I provided above has this diff, and it seems to work.



I put the filter_options.choice tests inside the fetch_{one,multiple}
routines because the former needs to be able to register partial clone
with the config and/or inherit the default filter-spec for the
promisor remote and that took more code that what can neatly fit inline
here.  This will be more apparent in my next patch series.

Jeff


Re: [PATCH 04/14] fetch: add object filtering for partial fetch

2017-11-03 Thread Jonathan Tan
I did some of my own investigation and have a working (i.e. passing
tests) version of this patch here:

https://github.com/jonathantanmy/git/commits/pc20171103

If you want, you can use that, or incorporate the changes therein here.
I'll also remark on my findings inline.

On Thu,  2 Nov 2017 20:31:19 +
Jeff Hostetler  wrote:

> @@ -754,6 +757,7 @@ static int store_updated_refs(const char *raw_url, const 
> char *remote_name,
>   const char *filename = dry_run ? "/dev/null" : git_path_fetch_head();
>   int want_status;
>   int summary_width = transport_summary_width(ref_map);
> + struct check_connected_options opt = CHECK_CONNECTED_INIT;
>  
>   fp = fopen(filename, "a");
>   if (!fp)
> @@ -765,7 +769,7 @@ static int store_updated_refs(const char *raw_url, const 
> char *remote_name,
>   url = xstrdup("foreign");
>  
>   rm = ref_map;
> - if (check_connected(iterate_ref_map, , NULL)) {
> + if (check_connected(iterate_ref_map, , )) {

opt here is unchanged from CHECK_CONNECTED_INIT, so this change is unnecessary.

>   rc = error(_("%s did not send all necessary objects\n"), url);
>   goto abort;
>   }
> @@ -1044,6 +1048,9 @@ static struct transport *prepare_transport(struct 
> remote *remote, int deepen)
>   set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, "yes");
>   if (update_shallow)
>   set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes");
> + if (filter_options.choice)
> + set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
> +filter_options.raw_value);

You'll also need to set TRANS_OPT_FROM_PROMISOR.

> @@ -1242,6 +1249,20 @@ static int fetch_multiple(struct string_list *list)
>   int i, result = 0;
>   struct argv_array argv = ARGV_ARRAY_INIT;
>  
> + if (filter_options.choice) {
> + /*
> +  * We currently only support partial-fetches to the remote
> +  * used for the partial-clone because we only support 1
> +  * promisor remote, so we DO NOT allow explicit command
> +  * line filter arguments.
> +  *
> +  * Note that the loop below will spawn background fetches
> +  * for each remote and one of them MAY INHERIT the proper
> +  * partial-fetch settings, so everything is consistent.
> +  */
> + die(_("partial-fetch is not supported on multiple remotes"));
> + }
> +
>   if (!append && !dry_run) {
>   int errcode = truncate_fetch_head();
>   if (errcode)

My intention in doing the "fetch: refactor calculation of remote list"
patch is so that the interaction between the provided list of remotes
and the specification of the filter can be handled using the following
diff:

-   if (remote)
+   if (remote) {
+   if (filter_options.choice &&
+   strcmp(remote->name, 
repository_format_partial_clone_remote))
+   die(_("--blob-max-bytes can only be used with the 
remote configured in core.partialClone"));
result = fetch_one(remote, argc, argv);
-   else
+   } else {
+   if (filter_options.choice)
+   die(_("--blob-max-bytes can only be used with the 
remote configured in core.partialClone"));
result = fetch_multiple();
+   }

(Ignore the "blob-max-bytes" in the error message - that needs to be
updated.)

The GitHub link I provided above has this diff, and it seems to work.


[PATCH 04/14] fetch: add object filtering for partial fetch

2017-11-02 Thread Jeff Hostetler
From: Jeff Hostetler 

Teach fetch to use the list-objects filtering parameters
to allow a "partial fetch" following a "partial clone".

Signed-off-by: Jeff Hostetler 
---
 builtin/fetch.c | 66 -
 1 file changed, 65 insertions(+), 1 deletion(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index 1b1f039..150ca0a 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -18,6 +18,7 @@
 #include "argv-array.h"
 #include "utf8.h"
 #include "packfile.h"
+#include "list-objects-filter-options.h"
 
 static const char * const builtin_fetch_usage[] = {
N_("git fetch [] [ [...]]"),
@@ -55,6 +56,7 @@ static int recurse_submodules_default = 
RECURSE_SUBMODULES_ON_DEMAND;
 static int shown_url = 0;
 static int refmap_alloc, refmap_nr;
 static const char **refmap_array;
+static struct list_objects_filter_options filter_options;
 
 static int git_fetch_config(const char *k, const char *v, void *cb)
 {
@@ -160,6 +162,7 @@ static struct option builtin_fetch_options[] = {
TRANSPORT_FAMILY_IPV4),
OPT_SET_INT('6', "ipv6", , N_("use IPv6 addresses only"),
TRANSPORT_FAMILY_IPV6),
+   OPT_PARSE_LIST_OBJECTS_FILTER(_options),
OPT_END()
 };
 
@@ -754,6 +757,7 @@ static int store_updated_refs(const char *raw_url, const 
char *remote_name,
const char *filename = dry_run ? "/dev/null" : git_path_fetch_head();
int want_status;
int summary_width = transport_summary_width(ref_map);
+   struct check_connected_options opt = CHECK_CONNECTED_INIT;
 
fp = fopen(filename, "a");
if (!fp)
@@ -765,7 +769,7 @@ static int store_updated_refs(const char *raw_url, const 
char *remote_name,
url = xstrdup("foreign");
 
rm = ref_map;
-   if (check_connected(iterate_ref_map, , NULL)) {
+   if (check_connected(iterate_ref_map, , )) {
rc = error(_("%s did not send all necessary objects\n"), url);
goto abort;
}
@@ -1044,6 +1048,9 @@ static struct transport *prepare_transport(struct remote 
*remote, int deepen)
set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, "yes");
if (update_shallow)
set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes");
+   if (filter_options.choice)
+   set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER,
+  filter_options.raw_value);
return transport;
 }
 
@@ -1242,6 +1249,20 @@ static int fetch_multiple(struct string_list *list)
int i, result = 0;
struct argv_array argv = ARGV_ARRAY_INIT;
 
+   if (filter_options.choice) {
+   /*
+* We currently only support partial-fetches to the remote
+* used for the partial-clone because we only support 1
+* promisor remote, so we DO NOT allow explicit command
+* line filter arguments.
+*
+* Note that the loop below will spawn background fetches
+* for each remote and one of them MAY INHERIT the proper
+* partial-fetch settings, so everything is consistent.
+*/
+   die(_("partial-fetch is not supported on multiple remotes"));
+   }
+
if (!append && !dry_run) {
int errcode = truncate_fetch_head();
if (errcode)
@@ -1267,6 +1288,46 @@ static int fetch_multiple(struct string_list *list)
return result;
 }
 
+static inline void partial_fetch_one_setup(struct remote *remote)
+{
+#if 0 /* TODO */
+   if (filter_options.choice) {
+   /*
+* A partial-fetch was explicitly requested.
+*
+* If this is the first partial-* command on
+* this repo, we must register the partial
+* settings in the repository extension.
+*
+* If this follows a previous partial-* command
+* we must ensure the args are consistent with
+* the existing registration (because we don't
+* currently support mixing-and-matching).
+*/
+   partial_clone_utils_register(_options,
+remote->name, "fetch");
+   return;
+   }
+
+   if (is_partial_clone_registered() &&
+   !strcmp(remote->name, repository_format_partial_clone_remote)) {
+   /*
+* If a partial-* command has already been used on
+* this repo and it was to this remote, we should
+* inherit the filter settings used previously.
+* That is, if clone omitted very large blobs, then
+* fetch should too.
+*
+* Use the cached filter-spec and create the filter
+* settings.
+