On Fri, Feb 28, 2025 at 02:14:34PM +0100, Branko Čibej wrote:
> This is probably the minimal patch to disable this tweak.

Thanks, your diff looks good to me.

> There's a bunch of
> code that needs to be replaced or removed, though.

These bits of code are easy to spot and remove.
Here is a diff for that:

[[[
Remove optmized unaligned-access code paths.

The newer compilers using vector instructions these manual optimizations can
lead to build errors and do not provide benefits over compiler optimizations.

* subversion/libsvn_diff/diff_file.c
  (contains_eol): Remove.
  (find_identical_prefix, find_identical_suffix): Remove code under
   #ifdef SVN_UNALIGNED_ACCESS_IS_OK.

* subversion/libsvn_fs_fs/tree.c
  (hash_func): Remove code under #ifdef SVN_UNALIGNED_ACCESS_IS_OK.

* subversion/libsvn_fs_x/dag_cache.c
  (cache_lookup): Remove code under #ifdef SVN_UNALIGNED_ACCESS_IS_OK.

* subversion/libsvn_fs_x/string_table.c
  (copy_masks): Remove.
  (table_copy_string): Remove code under #ifdef SVN_UNALIGNED_ACCESS_IS_OK. 

* subversion/libsvn_subr/eol.c
  (svn_eol__find_eol_start): Remove code under #ifdef
   SVN_UNALIGNED_ACCESS_IS_OK.

* subversion/libsvn_subr/hash.c
  (hashfunc_compatible): Remove code under #ifdef SVN_UNALIGNED_ACCESS_IS_OK.

* subversion/libsvn_subr/string.c
 (svn_cstring__match_length): Remove code under #ifdef
   SVN_UNALIGNED_ACCESS_IS_OK.

* subversion/libsvn_subr/utf_validate.c
  (first_non_fsm_start_char):  Remove code under #ifdef
   SVN_UNALIGNED_ACCESS_IS_OK.

Reported by: Sam James from gentoo
]]]

Index: subversion/libsvn_diff/diff_file.c
===================================================================
--- subversion/libsvn_diff/diff_file.c  (revision 1924098)
+++ subversion/libsvn_diff/diff_file.c  (working copy)
@@ -355,23 +355,6 @@ is_one_at_eof(struct file_info file[], apr_size_t
   return FALSE;
 }
 
-/* Quickly determine whether there is a eol char in CHUNK.
- * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
- */
-
-#if SVN_UNALIGNED_ACCESS_IS_OK
-static svn_boolean_t contains_eol(apr_uintptr_t chunk)
-{
-  apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
-  apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
-
-  r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
-  n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
-
-  return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
-}
-#endif
-
 /* Find the prefix which is identical between all elements of the FILE array.
  * Return the number of prefix lines in PREFIX_LINES.  REACHED_ONE_EOF will be
  * set to TRUE if one of the FILEs reached its end while scanning prefix,
@@ -396,10 +379,6 @@ find_identical_prefix(svn_boolean_t *reached_one_e
     is_match = is_match && *file[0].curp == *file[i].curp;
   while (is_match)
     {
-#if SVN_UNALIGNED_ACCESS_IS_OK
-      apr_ssize_t max_delta, delta;
-#endif /* SVN_UNALIGNED_ACCESS_IS_OK */
-
       /* ### TODO: see if we can take advantage of
          diff options like ignore_eol_style or ignore_space. */
       /* check for eol, and count */
@@ -419,53 +398,6 @@ find_identical_prefix(svn_boolean_t *reached_one_e
 
       INCREMENT_POINTERS(file, file_len, pool);
 
-#if SVN_UNALIGNED_ACCESS_IS_OK
-
-      /* Try to advance as far as possible with machine-word granularity.
-       * Determine how far we may advance with chunky ops without reaching
-       * endp for any of the files.
-       * Signedness is important here if curp gets close to endp.
-       */
-      max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
-      for (i = 1; i < file_len; i++)
-        {
-          delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
-          if (delta < max_delta)
-            max_delta = delta;
-        }
-
-      is_match = TRUE;
-      for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
-        {
-          apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
-          if (contains_eol(chunk))
-            break;
-
-          for (i = 1; i < file_len; i++)
-            if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
-              {
-                is_match = FALSE;
-                break;
-              }
-
-          if (! is_match)
-            break;
-        }
-
-      if (delta /* > 0*/)
-        {
-          /* We either found a mismatch or an EOL at or shortly behind 
curp+delta
-           * or we cannot proceed with chunky ops without exceeding endp.
-           * In any way, everything up to curp + delta is equal and not an EOL.
-           */
-          for (i = 0; i < file_len; i++)
-            file[i].curp += delta;
-
-          /* Skipped data without EOL markers, so last char was not a CR. */
-          had_cr = FALSE;
-        }
-#endif
-
       *reached_one_eof = is_one_at_eof(file, file_len);
       if (*reached_one_eof)
         break;
@@ -611,11 +543,6 @@ find_identical_suffix(apr_off_t *suffix_lines, str
   while (is_match)
     {
       svn_boolean_t reached_prefix;
-#if SVN_UNALIGNED_ACCESS_IS_OK
-      /* Initialize the minimum pointer positions. */
-      const char *min_curp[4];
-      svn_boolean_t can_read_word;
-#endif /* SVN_UNALIGNED_ACCESS_IS_OK */
 
       /* ### TODO: see if we can take advantage of
          diff options like ignore_eol_style or ignore_space. */
@@ -636,60 +563,6 @@ find_identical_suffix(apr_off_t *suffix_lines, str
 
       DECREMENT_POINTERS(file_for_suffix, file_len, pool);
 
-#if SVN_UNALIGNED_ACCESS_IS_OK
-      for (i = 0; i < file_len; i++)
-        min_curp[i] = file_for_suffix[i].buffer;
-
-      /* If we are in the same chunk that contains the last part of the common
-         prefix, use the min_curp[0] pointer to make sure we don't get a
-         suffix that overlaps the already determined common prefix. */
-      if (file_for_suffix[0].chunk == suffix_min_chunk0)
-        min_curp[0] += suffix_min_offset0;
-
-      /* Scan quickly by reading with machine-word granularity. */
-      for (i = 0, can_read_word = TRUE; can_read_word && i < file_len; i++)
-        can_read_word = ((file_for_suffix[i].curp + 1 - sizeof(apr_uintptr_t))
-                         > min_curp[i]);
-
-      while (can_read_word)
-        {
-          apr_uintptr_t chunk;
-
-          /* For each file curp is positioned at the current byte, but we
-             want to examine the current byte and the ones before the current
-             location as one machine word. */
-
-          chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
-                                             - sizeof(apr_uintptr_t));
-          if (contains_eol(chunk))
-            break;
-
-          for (i = 1, is_match = TRUE; is_match && i < file_len; i++)
-            is_match = (chunk
-                           == *(const apr_uintptr_t *)
-                                    (file_for_suffix[i].curp + 1
-                                       - sizeof(apr_uintptr_t)));
-
-          if (! is_match)
-            break;
-
-          for (i = 0; i < file_len; i++)
-            {
-              file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
-              can_read_word = can_read_word
-                              && (  (file_for_suffix[i].curp + 1
-                                       - sizeof(apr_uintptr_t))
-                                  > min_curp[i]);
-            }
-
-          /* We skipped some bytes, so there are no closing EOLs */
-          had_nl = FALSE;
-        }
-
-      /* The > min_curp[i] check leaves at least one final byte for checking
-         in the non block optimized case below. */
-#endif
-
       reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
                        && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
                           == suffix_min_offset0;
Index: subversion/libsvn_fs_fs/tree.c
===================================================================
--- subversion/libsvn_fs_fs/tree.c      (revision 1924098)
+++ subversion/libsvn_fs_fs/tree.c      (working copy)
@@ -220,11 +220,6 @@ hash_func(svn_revnum_t revision,
   apr_size_t i;
   apr_uint32_t hash_value = (apr_uint32_t)revision;
 
-#if SVN_UNALIGNED_ACCESS_IS_OK
-  /* "randomizing" / distributing factor used in our hash function */
-  const apr_uint32_t factor = 0xd1f3da69;
-#endif
-
   /* Calculate the hash value
      (HASH_VALUE has been initialized to REVISION).
 
@@ -233,35 +228,8 @@ hash_func(svn_revnum_t revision,
      make as much of *PATH influence the result as possible to get an "even"
      spread across the hash buckets (maximizes our cache retention rate and
      thus the hit rates).
-
-     When chunked access is possible (independent of the PATH pointer's
-     value!), we read 4 bytes at once and multiply the hash value with a
-     FACTOR that mirror / pattern / shift all 4 input bytes to various bits
-     of the result.  The final result will be taken from the MSBs.
-
-     When chunked access is not possible (not supported by CPU or odd bytes
-     at the end of *PATH), we use the simple traditional "* 33" hash
-     function that works very well with texts / paths and that e.g. APR uses.
-
-     Please note that the bytewise and the chunked calculation are *NOT*
-     interchangeable as they will yield different results for the same input.
-     For any given machine and *PATH, we must use a fixed combination of the
-     two functions.
    */
-  i = 0;
-#if SVN_UNALIGNED_ACCESS_IS_OK
-  /* We relax the dependency chain between iterations by processing
-     two chunks from the input per hash_value self-multiplication.
-     The HASH_VALUE update latency is now 1 MUL latency + 1 ADD latency
-     per 2 chunks instead of 1 chunk.
-   */
-  for (; i + 8 <= path_len; i += 8)
-    hash_value = hash_value * factor * factor
-               + (  *(const apr_uint32_t*)(path + i) * factor
-                  + *(const apr_uint32_t*)(path + i + 4));
-#endif
-
-  for (; i < path_len; ++i)
+  for (i = 0; i < path_len; ++i)
     /* Help GCC to minimize the HASH_VALUE update latency by splitting the
        MUL 33 of the naive implementation: h = h * 33 + path[i].  This
        shortens the dependency chain from 1 shift + 2 ADDs to 1 shift + 1 ADD.
Index: subversion/libsvn_fs_x/dag_cache.c
===================================================================
--- subversion/libsvn_fs_x/dag_cache.c  (revision 1924098)
+++ subversion/libsvn_fs_x/dag_cache.c  (working copy)
@@ -312,11 +312,6 @@ cache_lookup(svn_fs_x__dag_cache_t *cache,
   apr_size_t path_len = path->len;
   apr_uint32_t hash_value = (apr_uint32_t)(apr_uint64_t)change_set;
 
-#if SVN_UNALIGNED_ACCESS_IS_OK
-  /* "randomizing" / distributing factor used in our hash function */
-  const apr_uint32_t factor = 0xd1f3da69;
-#endif
-
   /* optimistic lookup: hit the same bucket again? */
   cache_entry_t *result = &cache->buckets[cache->last_hit];
   if (   (result->change_set == change_set)
@@ -332,20 +327,7 @@ cache_lookup(svn_fs_x__dag_cache_t *cache,
 
   /* need to do a full lookup.  Calculate the hash value
      (HASH_VALUE has been initialized to REVISION). */
-  i = 0;
-#if SVN_UNALIGNED_ACCESS_IS_OK
-  /* We relax the dependency chain between iterations by processing
-     two chunks from the input per hash_value self-multiplication.
-     The HASH_VALUE update latency is now 1 MUL latency + 1 ADD latency
-     per 2 chunks instead of 1 chunk.
-   */
-  for (; i + 8 <= path_len; i += 8)
-    hash_value = hash_value * factor * factor
-               + (  *(const apr_uint32_t*)(path->data + i) * factor
-                  + *(const apr_uint32_t*)(path->data + i + 4));
-#endif
-
-  for (; i < path_len; ++i)
+  for (i = 0; i < path_len; ++i)
     /* Help GCC to minimize the HASH_VALUE update latency by splitting the
        MUL 33 of the naive implementation: h = h * 33 + path[i].  This
        shortens the dependency chain from 1 shift + 2 ADDs to 1 shift + 1 ADD.
Index: subversion/libsvn_fs_x/string_table.c
===================================================================
--- subversion/libsvn_fs_x/string_table.c       (revision 1924098)
+++ subversion/libsvn_fs_x/string_table.c       (working copy)
@@ -473,21 +473,6 @@ svn_fs_x__string_table_create(const string_table_b
   return result;
 }
 
-/* Masks used by table_copy_string.  copy_mask[I] is used if the target
-   content to be preserved starts at byte I within the current chunk.
-   This is used to work around alignment issues.
- */
-#if SVN_UNALIGNED_ACCESS_IS_OK
-static const char *copy_masks[8] = { "\xff\xff\xff\xff\xff\xff\xff\xff",
-                                     "\x00\xff\xff\xff\xff\xff\xff\xff",
-                                     "\x00\x00\xff\xff\xff\xff\xff\xff",
-                                     "\x00\x00\x00\xff\xff\xff\xff\xff",
-                                     "\x00\x00\x00\x00\xff\xff\xff\xff",
-                                     "\x00\x00\x00\x00\x00\xff\xff\xff",
-                                     "\x00\x00\x00\x00\x00\x00\xff\xff",
-                                     "\x00\x00\x00\x00\x00\x00\x00\xff" };
-#endif
-
 static void
 table_copy_string(char *buffer,
                   apr_size_t len,
@@ -499,40 +484,10 @@ table_copy_string(char *buffer,
     {
       assert(header->head_length <= len);
         {
-#if SVN_UNALIGNED_ACCESS_IS_OK
-          /* the sections that we copy tend to be short but we can copy
-             *all* of it chunky because we made sure that source and target
-             buffer have some extra padding to prevent segfaults. */
-          apr_uint64_t mask;
-          apr_size_t to_copy = len - header->head_length;
-          apr_size_t copied = 0;
-
-          const char *source = table->data + header->tail_start;
-          char *target = buffer + header->head_length;
-          len = header->head_length;
-
-          /* copy whole chunks */
-          while (to_copy >= copied + sizeof(apr_uint64_t))
-            {
-              *(apr_uint64_t *)(target + copied)
-                = *(const apr_uint64_t *)(source + copied);
-              copied += sizeof(apr_uint64_t);
-            }
-
-          /* copy the remainder assuming that we have up to 8 extra bytes
-             of addressable buffer on the source and target sides.
-             Now, we simply copy 8 bytes and use a mask to filter & merge
-             old with new data. */
-          mask = *(const apr_uint64_t *)copy_masks[to_copy - copied];
-          *(apr_uint64_t *)(target + copied)
-            = (*(apr_uint64_t *)(target + copied) & mask)
-            | (*(const apr_uint64_t *)(source + copied) & ~mask);
-#else
           memcpy(buffer + header->head_length,
                  table->data + header->tail_start,
                  len - header->head_length);
           len = header->head_length;
-#endif
         }
 
       header = &table->short_strings[header->head_string];
Index: subversion/libsvn_subr/eol.c
===================================================================
--- subversion/libsvn_subr/eol.c        (revision 1924098)
+++ subversion/libsvn_subr/eol.c        (working copy)
@@ -33,34 +33,6 @@
 char *
 svn_eol__find_eol_start(char *buf, apr_size_t len)
 {
-#if SVN_UNALIGNED_ACCESS_IS_OK
-
-  /* Scan the input one machine word at a time. */
-  for (; len > sizeof(apr_uintptr_t)
-       ; buf += sizeof(apr_uintptr_t), len -= sizeof(apr_uintptr_t))
-    {
-      /* This is a variant of the well-known strlen test: */
-      apr_uintptr_t chunk = *(const apr_uintptr_t *)buf;
-
-      /* A byte in SVN__R_TEST is \0, iff it was \r in *BUF.
-       * Similarly, SVN__N_TEST is an indicator for \n. */
-      apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
-      apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
-
-      /* A byte in SVN__R_TEST can only be < 0x80, iff it has been \0 before
-       * (i.e. \r in *BUF). Ditto for SVN__N_TEST. */
-      r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
-      n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
-
-      /* Check whether at least one of the words contains a byte <0x80
-       * (if one is detected, there was a \r or \n in CHUNK). */
-      if ((r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET)
-        break;
-    }
-
-#endif
-
-  /* The remaining odd bytes will be examined the naive way: */
   for (; len > 0; ++buf, --len)
     {
       if (*buf == '\n' || *buf == '\r')
Index: subversion/libsvn_subr/hash.c
===================================================================
--- subversion/libsvn_subr/hash.c       (revision 1924098)
+++ subversion/libsvn_subr/hash.c       (working copy)
@@ -636,18 +636,8 @@ hashfunc_compatible(const char *char_key, apr_ssiz
     if (*klen == APR_HASH_KEY_STRING)
       *klen = strlen(char_key);
 
-#if SVN_UNALIGNED_ACCESS_IS_OK
     for (p = key, i = *klen; i >= 4; i-=4, p+=4)
       {
-        apr_uint32_t chunk = *(const apr_uint32_t *)p;
-
-        /* the ">> 17" part gives upper bits in the chunk a chance to make
-           some impact as well */
-        hash = hash * 33 * 33 * 33 * 33 + chunk + (chunk >> 17);
-      }
-#else
-    for (p = key, i = *klen; i >= 4; i-=4, p+=4)
-      {
         hash = hash * 33 * 33 * 33 * 33
               + p[0] * 33 * 33 * 33
               + p[1] * 33 * 33
@@ -654,7 +644,7 @@ hashfunc_compatible(const char *char_key, apr_ssiz
               + p[2] * 33
               + p[3];
       }
-#endif
+
     for (; i; i--, p++)
         hash = hash * 33 + *p;
 
Index: subversion/libsvn_subr/string.c
===================================================================
--- subversion/libsvn_subr/string.c     (revision 1924098)
+++ subversion/libsvn_subr/string.c     (working copy)
@@ -1508,20 +1508,6 @@ svn_cstring__match_length(const char *a,
 {
   apr_size_t pos = 0;
 
-#if SVN_UNALIGNED_ACCESS_IS_OK
-
-  /* Chunky processing is so much faster ...
-   *
-   * We can't make this work on architectures that require aligned access
-   * because A and B will probably have different alignment. So, skipping
-   * the first few chars until alignment is reached is not an option.
-   */
-  for (; max_len - pos >= sizeof(apr_size_t); pos += sizeof(apr_size_t))
-    if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
-      break;
-
-#endif
-
   for (; pos < max_len; ++pos)
     if (a[pos] != b[pos])
       break;
@@ -1536,22 +1522,6 @@ svn_cstring__reverse_match_length(const char *a,
 {
   apr_size_t pos = 0;
 
-#if SVN_UNALIGNED_ACCESS_IS_OK
-
-  /* Chunky processing is so much faster ...
-   *
-   * We can't make this work on architectures that require aligned access
-   * because A and B will probably have different alignment. So, skipping
-   * the first few chars until alignment is reached is not an option.
-   */
-  for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t))
-    if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos))
-      break;
-
-  pos -= sizeof(apr_size_t);
-
-#endif
-
   /* If we find a mismatch at -pos, pos-1 characters matched.
    */
   while (++pos <= max_len)
Index: subversion/libsvn_subr/utf_validate.c
===================================================================
--- subversion/libsvn_subr/utf_validate.c       (revision 1924098)
+++ subversion/libsvn_subr/utf_validate.c       (working copy)
@@ -258,17 +258,6 @@ static const char machine [9][14] = {
 static const char *
 first_non_fsm_start_char(const char *data, apr_size_t max_len)
 {
-#if SVN_UNALIGNED_ACCESS_IS_OK
-
-  /* Scan the input one machine word at a time. */
-  for (; max_len > sizeof(apr_uintptr_t)
-       ; data += sizeof(apr_uintptr_t), max_len -= sizeof(apr_uintptr_t))
-    if (*(const apr_uintptr_t *)data & SVN__BIT_7_SET)
-      break;
-
-#endif
-
-  /* The remaining odd bytes will be examined the naive way: */
   for (; max_len > 0; ++data, --max_len)
     if ((unsigned char)*data >= 0x80)
       break;

Reply via email to