Re: [PATCH 3/4] cat-file: use a single strbuf for all output

2018-08-14 Thread Jeff King
On Tue, Aug 14, 2018 at 09:30:57PM +0200, René Scharfe wrote:

> > -static void batch_object_write(const char *obj_name, struct batch_options 
> > *opt,
> > +static void batch_object_write(const char *obj_name,
> > +  struct strbuf *scratch,
> > +  struct batch_options *opt,
> >struct expand_data *data)
> >  {
> > -   struct strbuf buf = STRBUF_INIT;
> 
> We could also avoid passing that buffer around by making it static.  I
> shy away from adding static variables because the resulting code won't
> be thread-safe, but that fear might be irrational, especially with
> cat-file.

True, I didn't even think of that after your original got me in the
mindset of passing the buffer down. It's not too bad to do it this way,
and I agree with you that we are better avoiding static variables if we
can. Five years ago I might have said the opposite, but we've cleaned up
a lot of confusing hidden-static bits in that time. Let's not go in the
opposite direction. :)

-Peff


Re: [PATCH 3/4] cat-file: use a single strbuf for all output

2018-08-14 Thread René Scharfe
Am 14.08.2018 um 20:20 schrieb Jeff King:
> When we're in batch mode, we end up in batch_object_write()
> for each object, which allocates its own strbuf for each
> call. Instead, we can provide a single "scratch" buffer that
> gets reused for each output. When running:
> 
>   git cat-file --batch-all-objects --batch-check='%(objectname)'
> 
> on git.git, my best-of-five time drops from:
> 
>   real0m0.171s
>   user0m0.159s
>   sys 0m0.012s
> 
> to:
> 
>   real0m0.133s
>   user0m0.121s
>   sys 0m0.012s
> 
> Note that we could do this just by putting the "scratch"
> pointer into "struct expand_data", but I chose instead to
> add an extra parameter to the callstack. That's more
> verbose, but it makes it a bit more obvious what is going
> on, which in turn makes it easy to see where we need to be
> releasing the string in the caller (right after the loop
> which uses it in each case).
> 
> Based-on-a-patch-by: René Scharfe 
> Signed-off-by: Jeff King 
> ---
> It also made it easy to see that without the prior patch,
> we'd have been using "buf" for two parameters. :)

Good catch.

> 
>  builtin/cat-file.c | 28 +---
>  1 file changed, 17 insertions(+), 11 deletions(-)
> 
> diff --git a/builtin/cat-file.c b/builtin/cat-file.c
> index 3ed1d0be80..08dced2614 100644
> --- a/builtin/cat-file.c
> +++ b/builtin/cat-file.c
> @@ -338,11 +338,11 @@ static void print_object_or_die(struct batch_options 
> *opt, struct expand_data *d
>   }
>  }
>  
> -static void batch_object_write(const char *obj_name, struct batch_options 
> *opt,
> +static void batch_object_write(const char *obj_name,
> +struct strbuf *scratch,
> +struct batch_options *opt,
>  struct expand_data *data)
>  {
> - struct strbuf buf = STRBUF_INIT;

We could also avoid passing that buffer around by making it static.  I
shy away from adding static variables because the resulting code won't
be thread-safe, but that fear might be irrational, especially with
cat-file.

> -
>   if (!data->skip_object_info &&
>   oid_object_info_extended(the_repository, >oid, >info,
>OBJECT_INFO_LOOKUP_REPLACE) < 0) {
> @@ -352,10 +352,10 @@ static void batch_object_write(const char *obj_name, 
> struct batch_options *opt,
>   return;
>   }
>  
> - strbuf_expand(, opt->format, expand_format, data);
> - strbuf_addch(, '\n');
> - batch_write(opt, buf.buf, buf.len);
> - strbuf_release();
> + strbuf_reset(scratch);
> + strbuf_expand(scratch, opt->format, expand_format, data);
> + strbuf_addch(scratch, '\n');
> + batch_write(opt, scratch->buf, scratch->len);
>  
>   if (opt->print_contents) {
>   print_object_or_die(opt, data);
> @@ -363,7 +363,9 @@ static void batch_object_write(const char *obj_name, 
> struct batch_options *opt,
>   }
>  }
>  
> -static void batch_one_object(const char *obj_name, struct batch_options *opt,
> +static void batch_one_object(const char *obj_name,
> +  struct strbuf *scratch,
> +  struct batch_options *opt,
>struct expand_data *data)
>  {
>   struct object_context ctx;
> @@ -405,20 +407,21 @@ static void batch_one_object(const char *obj_name, 
> struct batch_options *opt,
>   return;
>   }
>  
> - batch_object_write(obj_name, opt, data);
> + batch_object_write(obj_name, scratch, opt, data);
>  }
>  
>  struct object_cb_data {
>   struct batch_options *opt;
>   struct expand_data *expand;
>   struct oidset *seen;
> + struct strbuf *scratch;
>  };
>  
>  static int batch_object_cb(const struct object_id *oid, void *vdata)
>  {
>   struct object_cb_data *data = vdata;
>   oidcpy(>expand->oid, oid);
> - batch_object_write(NULL, data->opt, data->expand);
> + batch_object_write(NULL, data->scratch, data->opt, data->expand);
>   return 0;
>  }
>  
> @@ -509,6 +512,7 @@ static int batch_objects(struct batch_options *opt)
>  
>   cb.opt = opt;
>   cb.expand = 
> + cb.scratch = 
>  
>   if (opt->unordered) {
>   struct oidset seen = OIDSET_INIT;
> @@ -531,6 +535,7 @@ static int batch_objects(struct batch_options *opt)
>   oid_array_clear();
>   }
>  
> + strbuf_release();
>   return 0;
>   }
>  
> @@ -559,10 +564,11 @@ static int batch_objects(struct batch_options *opt)
>   data.rest = p;
>   }
>  
> - batch_one_object(input.buf, opt, );
> + batch_one_object(input.buf, , opt, );
>   }
>  
>   strbuf_release();
> + strbuf_release();
>   warn_on_object_refname_ambiguity = save_warning;
>   return retval;
>  }
> 


[PATCH 3/4] cat-file: use a single strbuf for all output

2018-08-14 Thread Jeff King
When we're in batch mode, we end up in batch_object_write()
for each object, which allocates its own strbuf for each
call. Instead, we can provide a single "scratch" buffer that
gets reused for each output. When running:

  git cat-file --batch-all-objects --batch-check='%(objectname)'

on git.git, my best-of-five time drops from:

  real  0m0.171s
  user  0m0.159s
  sys   0m0.012s

to:

  real  0m0.133s
  user  0m0.121s
  sys   0m0.012s

Note that we could do this just by putting the "scratch"
pointer into "struct expand_data", but I chose instead to
add an extra parameter to the callstack. That's more
verbose, but it makes it a bit more obvious what is going
on, which in turn makes it easy to see where we need to be
releasing the string in the caller (right after the loop
which uses it in each case).

Based-on-a-patch-by: René Scharfe 
Signed-off-by: Jeff King 
---
It also made it easy to see that without the prior patch,
we'd have been using "buf" for two parameters. :)

 builtin/cat-file.c | 28 +---
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 3ed1d0be80..08dced2614 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -338,11 +338,11 @@ static void print_object_or_die(struct batch_options 
*opt, struct expand_data *d
}
 }
 
-static void batch_object_write(const char *obj_name, struct batch_options *opt,
+static void batch_object_write(const char *obj_name,
+  struct strbuf *scratch,
+  struct batch_options *opt,
   struct expand_data *data)
 {
-   struct strbuf buf = STRBUF_INIT;
-
if (!data->skip_object_info &&
oid_object_info_extended(the_repository, >oid, >info,
 OBJECT_INFO_LOOKUP_REPLACE) < 0) {
@@ -352,10 +352,10 @@ static void batch_object_write(const char *obj_name, 
struct batch_options *opt,
return;
}
 
-   strbuf_expand(, opt->format, expand_format, data);
-   strbuf_addch(, '\n');
-   batch_write(opt, buf.buf, buf.len);
-   strbuf_release();
+   strbuf_reset(scratch);
+   strbuf_expand(scratch, opt->format, expand_format, data);
+   strbuf_addch(scratch, '\n');
+   batch_write(opt, scratch->buf, scratch->len);
 
if (opt->print_contents) {
print_object_or_die(opt, data);
@@ -363,7 +363,9 @@ static void batch_object_write(const char *obj_name, struct 
batch_options *opt,
}
 }
 
-static void batch_one_object(const char *obj_name, struct batch_options *opt,
+static void batch_one_object(const char *obj_name,
+struct strbuf *scratch,
+struct batch_options *opt,
 struct expand_data *data)
 {
struct object_context ctx;
@@ -405,20 +407,21 @@ static void batch_one_object(const char *obj_name, struct 
batch_options *opt,
return;
}
 
-   batch_object_write(obj_name, opt, data);
+   batch_object_write(obj_name, scratch, opt, data);
 }
 
 struct object_cb_data {
struct batch_options *opt;
struct expand_data *expand;
struct oidset *seen;
+   struct strbuf *scratch;
 };
 
 static int batch_object_cb(const struct object_id *oid, void *vdata)
 {
struct object_cb_data *data = vdata;
oidcpy(>expand->oid, oid);
-   batch_object_write(NULL, data->opt, data->expand);
+   batch_object_write(NULL, data->scratch, data->opt, data->expand);
return 0;
 }
 
@@ -509,6 +512,7 @@ static int batch_objects(struct batch_options *opt)
 
cb.opt = opt;
cb.expand = 
+   cb.scratch = 
 
if (opt->unordered) {
struct oidset seen = OIDSET_INIT;
@@ -531,6 +535,7 @@ static int batch_objects(struct batch_options *opt)
oid_array_clear();
}
 
+   strbuf_release();
return 0;
}
 
@@ -559,10 +564,11 @@ static int batch_objects(struct batch_options *opt)
data.rest = p;
}
 
-   batch_one_object(input.buf, opt, );
+   batch_one_object(input.buf, , opt, );
}
 
strbuf_release();
+   strbuf_release();
warn_on_object_refname_ambiguity = save_warning;
return retval;
 }
-- 
2.18.0.1066.g0d97f3a098