On Mon, Mar 24, 2014 at 02:43:36PM -0700, Junio C Hamano wrote:
> Kirill Smelkov <k...@mns.spb.ru> writes:
> 
> > instead of allocating it all the time for every subtree in
> > __diff_tree_sha1, let's allocate it once in diff_tree_sha1, and then all
> > callee just use it in stacking style, without memory allocations.
> >
> > This should be faster, and for me this change gives the following
> > slight speedups for
> >
> >     git log --raw --no-abbrev --no-renames --format='%H'
> >
> >                 navy.git    linux.git v3.10..v3.11
> >
> >     before      0.618s      1.903s
> >     after       0.611s      1.889s
> >     speedup     1.1%        0.7%
> >
> > Signed-off-by: Kirill Smelkov <k...@mns.spb.ru>
> > ---
> >
> > Changes since v1:
> >
> >  - don't need to touch diff.h, as the function we are changing became 
> > static.
> >
> >  tree-diff.c | 36 ++++++++++++++++++------------------
> >  1 file changed, 18 insertions(+), 18 deletions(-)
> >
> > diff --git a/tree-diff.c b/tree-diff.c
> > index aea0297..c76821d 100644
> > --- a/tree-diff.c
> > +++ b/tree-diff.c
> > @@ -115,7 +115,7 @@ static void show_path(struct strbuf *base, struct 
> > diff_options *opt,
> >     if (recurse) {
> >             strbuf_addch(base, '/');
> >             __diff_tree_sha1(t1 ? t1->entry.sha1 : NULL,
> > -                            t2 ? t2->entry.sha1 : NULL, base->buf, opt);
> > +                            t2 ? t2->entry.sha1 : NULL, base, opt);
> >     }
> >  
> >     strbuf_setlen(base, old_baselen);
> 
> I was scratching my head for a while, after seeing that there does
> not seem to be any *new* code added by this patch in order to
> store-away the original length and restore the singleton base buffer
> to the original length after using addch/addstr to extend it.
> 
> But I see that the code has already been prepared to do this
> conversion.  I wonder why we didn't do this earlier ;-)

The conversion to reusing memory started in 48932677 "diff-tree: convert
base+baselen to writable strbuf" which allowed to avoid "quite a bit of
malloc() and memcpy()", but for this to work allocation at diff_tree()
entry had to be there.

In particular it had to be there, because diff_tree() accepted base as C
string, not strbuf, and since diff_tree() was calling itself
recursively - oops - new allocation on every subtree.

I've opened the door for avoiding allocations via splitting diff_tree
into high-level and low-level parts. The high-level part still accepts
`char *base`, but low-level function operates on strbuf and recurses
into low-level self.

The high-level diff_tree_sha1() still allocates memory for every
diff(tree1,tree2), but that is significantly lower compared to
allocating memory on every subtree...

The lesson here is: better use strbuf for api unless there is a reason
not to.


> Looks good.  Thanks.

Thanks.

> > @@ -138,12 +138,10 @@ static void skip_uninteresting(struct tree_desc *t, 
> > struct strbuf *base,
> >  }
> >  
> >  static int __diff_tree_sha1(const unsigned char *old, const unsigned char 
> > *new,
> > -                       const char *base_str, struct diff_options *opt)
> > +                       struct strbuf *base, struct diff_options *opt)
> >  {
> >     struct tree_desc t1, t2;
> >     void *t1tree, *t2tree;
> > -   struct strbuf base;
> > -   int baselen = strlen(base_str);
> >  
> >     t1tree = fill_tree_descriptor(&t1, old);
> >     t2tree = fill_tree_descriptor(&t2, new);
> > @@ -151,17 +149,14 @@ static int __diff_tree_sha1(const unsigned char *old, 
> > const unsigned char *new,
> >     /* Enable recursion indefinitely */
> >     opt->pathspec.recursive = DIFF_OPT_TST(opt, RECURSIVE);
> >  
> > -   strbuf_init(&base, PATH_MAX);
> > -   strbuf_add(&base, base_str, baselen);
> > -
> >     for (;;) {
> >             int cmp;
> >  
> >             if (diff_can_quit_early(opt))
> >                     break;
> >             if (opt->pathspec.nr) {
> > -                   skip_uninteresting(&t1, &base, opt);
> > -                   skip_uninteresting(&t2, &base, opt);
> > +                   skip_uninteresting(&t1, base, opt);
> > +                   skip_uninteresting(&t2, base, opt);
> >             }
> >             if (!t1.size && !t2.size)
> >                     break;
> > @@ -173,7 +168,7 @@ static int __diff_tree_sha1(const unsigned char *old, 
> > const unsigned char *new,
> >                     if (DIFF_OPT_TST(opt, FIND_COPIES_HARDER) ||
> >                         hashcmp(t1.entry.sha1, t2.entry.sha1) ||
> >                         (t1.entry.mode != t2.entry.mode))
> > -                           show_path(&base, opt, &t1, &t2);
> > +                           show_path(base, opt, &t1, &t2);
> >  
> >                     update_tree_entry(&t1);
> >                     update_tree_entry(&t2);
> > @@ -181,18 +176,17 @@ static int __diff_tree_sha1(const unsigned char *old, 
> > const unsigned char *new,
> >  
> >             /* t1 < t2 */
> >             else if (cmp < 0) {
> > -                   show_path(&base, opt, &t1, /*t2=*/NULL);
> > +                   show_path(base, opt, &t1, /*t2=*/NULL);
> >                     update_tree_entry(&t1);
> >             }
> >  
> >             /* t1 > t2 */
> >             else {
> > -                   show_path(&base, opt, /*t1=*/NULL, &t2);
> > +                   show_path(base, opt, /*t1=*/NULL, &t2);
> >                     update_tree_entry(&t2);
> >             }
> >     }
> >  
> > -   strbuf_release(&base);
> >     free(t2tree);
> >     free(t1tree);
> >     return 0;
> > @@ -209,7 +203,7 @@ static inline int diff_might_be_rename(void)
> >             !DIFF_FILE_VALID(diff_queued_diff.queue[0]->one);
> >  }
> >  
> > -static void try_to_follow_renames(const unsigned char *old, const unsigned 
> > char *new, const char *base, struct diff_options *opt)
> > +static void try_to_follow_renames(const unsigned char *old, const unsigned 
> > char *new, struct strbuf *base, struct diff_options *opt)
> >  {
> >     struct diff_options diff_opts;
> >     struct diff_queue_struct *q = &diff_queued_diff;
> > @@ -306,13 +300,19 @@ static void try_to_follow_renames(const unsigned char 
> > *old, const unsigned char
> >     q->nr = 1;
> >  }
> >  
> > -int diff_tree_sha1(const unsigned char *old, const unsigned char *new, 
> > const char *base, struct diff_options *opt)
> > +int diff_tree_sha1(const unsigned char *old, const unsigned char *new, 
> > const char *base_str, struct diff_options *opt)
> >  {
> > +   struct strbuf base;
> >     int retval;
> >  
> > -   retval = __diff_tree_sha1(old, new, base, opt);
> > -   if (!*base && DIFF_OPT_TST(opt, FOLLOW_RENAMES) && 
> > diff_might_be_rename())
> > -           try_to_follow_renames(old, new, base, opt);
> > +   strbuf_init(&base, PATH_MAX);
> > +   strbuf_addstr(&base, base_str);
> > +
> > +   retval = __diff_tree_sha1(old, new, &base, opt);
> > +   if (!*base_str && DIFF_OPT_TST(opt, FOLLOW_RENAMES) && 
> > diff_might_be_rename())
> > +           try_to_follow_renames(old, new, &base, opt);
> > +
> > +   strbuf_release(&base);
> >  
> >     return retval;
> >  }
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to