diff_file.c

Johan Corveleyn Mon, 14 Jan 2013 16:40:10 -0800

On Mon, Dec 31, 2012 at 9:09 PM,  <[email protected]> wrote:
> Author: rhuijben
> Date: Mon Dec 31 20:09:21 2012
> New Revision: 1427237
>
> URL: http://svn.apache.org/viewvc?rev=1427237&view=rev
> Log:
> Following up on r1427210, document some of the unaligned access magic by using
> similar pointer calculations in more cases. Document the + 1's in the pointer
> calculations.


It's all been a while, so my memory is a bit foggy on this, but just
reading this diff I have a question ...

> * subversion/libsvn_diff/diff_file.c
>   (find_identical_suffix): Make the can_read code easier to follow; add some
>      comments. Reduce scope of single-use variable.
>
> Modified:
>     subversion/trunk/subversion/libsvn_diff/diff_file.c
>
> Modified: subversion/trunk/subversion/libsvn_diff/diff_file.c
> URL: 
> http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_diff/diff_file.c?rev=1427237&r1=1427236&r2=1427237&view=diff
> ==============================================================================
> --- subversion/trunk/subversion/libsvn_diff/diff_file.c (original)
> +++ subversion/trunk/subversion/libsvn_diff/diff_file.c Mon Dec 31 20:09:21 
> 2012
> @@ -528,7 +528,6 @@ find_identical_suffix(apr_off_t *suffix_
>    apr_off_t min_file_size;
>    int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
>    svn_boolean_t is_match;
> -  svn_boolean_t reached_prefix;
>    apr_off_t lines = 0;
>    svn_boolean_t had_cr;
>    svn_boolean_t had_nl;
> @@ -590,6 +589,7 @@ find_identical_suffix(apr_off_t *suffix_
>    had_nl = FALSE;
>    while (is_match)
>      {
> +      svn_boolean_t reached_prefix;
>  #if SVN_UNALIGNED_ACCESS_IS_OK
>        /* Initialize the minimum pointer positions. */
>        const char *min_curp[4];
> @@ -616,24 +616,28 @@ find_identical_suffix(apr_off_t *suffix_
>        DECREMENT_POINTERS(file_for_suffix, file_len, pool);
>
>  #if SVN_UNALIGNED_ACCESS_IS_OK
> +      for (i = 0; i < file_len; i++)
> +        min_curp[i] = file_for_suffix[i].buffer;
>
> -      min_curp[0] = file_for_suffix[0].chunk == suffix_min_chunk0
> -                  ? file_for_suffix[0].buffer + suffix_min_offset0 + 1
> -                  : file_for_suffix[0].buffer + 1;
> -      for (i = 1; i < file_len; i++)
> -        min_curp[i] = file_for_suffix[i].buffer + 1;
> +      /* If we are in the same chunk that contains the last part of the 
> common
> +         prefix, use the min_curp[0] pointer to make sure we don't get a
> +         suffix that overlaps the already determined common prefix. */
> +      if (file_for_suffix[0].chunk == suffix_min_chunk0)
> +        min_curp[0] += suffix_min_offset0;

After your change, the min_curp's are 1 lower than before, and ...

>        /* Scan quickly by reading with machine-word granularity. */
>        for (i = 0, can_read_word = TRUE; i < file_len; i++)
>          can_read_word = can_read_word
> -                        && (   file_for_suffix[i].curp - 
> sizeof(apr_uintptr_t)
> -                            >= min_curp[i]);
> +                        && (  (file_for_suffix[i].curp + 1
> +                                 - sizeof(apr_uintptr_t))
> +                            > min_curp[i]);

... here the LHS is 1 higher than before *and* you changed the
operator from >= to >. Are you sure that's correct? It probably
doesn't hurt, but it let's the chunky suffix scanning stop potentially
1 word too early (I think ... if the previous version was correct that
is).

Same below inside the while loop ...

>        while (can_read_word)
>          {
>            apr_uintptr_t chunk;
>
> -          /* For each file curp is positioned at the next byte to read, but 
> we
> -             want to examine the bytes before the current location. */
> +          /* For each file curp is positioned at the current byte, but we
> +             want to examine the current byte and the ones before the current
> +             location as one machine word. */
>
>            chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
>                                               - sizeof(apr_uintptr_t));
> @@ -654,15 +658,18 @@ find_identical_suffix(apr_off_t *suffix_
>              {
>                file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
>                can_read_word = can_read_word
> -                              && (   (file_for_suffix[i].curp
> -                                        - sizeof(apr_uintptr_t))
> -                                  >= min_curp[i]);
> +                              && (  (file_for_suffix[i].curp + 1
> +                                       - sizeof(apr_uintptr_t))
> +                                  > min_curp[i]);

here.

Or was that intentional?

-- 
Johan

>              }
>
> -          /* We skipped 4 bytes, so there are no closing EOLs */
> +          /* We skipped some bytes, so there are no closing EOLs */
>            had_nl = FALSE;
>            had_cr = FALSE;
>          }
> +
> +      /* The > min_curp[i] check leaves at least one final byte for checking
> +         in the non block optimized case below. */
>  #endif
>
>        reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
> @@ -671,7 +678,8 @@ find_identical_suffix(apr_off_t *suffix_
>        if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
>          break;
>
> -      for (i = 1, is_match = TRUE; i < file_len; i++)
> +      is_match = TRUE;
> +      for (i = 1; i < file_len; i++)
>          is_match = is_match
>                     && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
>      }
>
>

Re: svn commit: r1427237 - /subversion/trunk/subversion/libsvn_diff/diff_file.c

Reply via email to