Author: stefan2
Date: Mon Jan  4 12:41:58 2016
New Revision: 1722860

URL: http://svn.apache.org/viewvc?rev=1722860&view=rev
Log:
Fix access to uninitialized memory in our UTF8 validity checker.
For static strings like "", this violated C99, 6.2.4.5 and 3.17.2.

The code path for C strings only fully worked for APR allocated memory
and could fail otherwise on systems with fine-grained boundary checks.

This patch simply eliminates the C string specific implementation and
reuses the equally efficient svn_utf__last_valid function.  Calling
strlen is not an added overhead here:  If the string is ASCII-7 only,
strlen is about as efficient as first_non_fsm_start_char_cstring's
implicit EOS check.  If the string contains chars >= 0x80, the much
more expensive state machine code will dominate the runtime.

Reported by: Hanno Böck <hanno{_AT_}hboeck.de>

* notes/knobs
  (SVN_UTF_NO_UNINITIALISED_ACCESS): Remove section about this knob and
                                     update the index. Our code no longer
                                     uses it.

* subversion/libsvn_subr/utf_validate.c
  (first_non_fsm_start_char_cstring): Remove because it is no longer used. 
  (svn_utf__cstring_is_valid): Implement in terms of svn_utf__last_valid.

Modified:
    subversion/trunk/subversion/libsvn_subr/utf_validate.c

Modified: subversion/trunk/subversion/libsvn_subr/utf_validate.c
URL: 
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/utf_validate.c?rev=1722860&r1=1722859&r2=1722860&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/utf_validate.c (original)
+++ subversion/trunk/subversion/libsvn_subr/utf_validate.c Mon Jan  4 12:41:58 
2016
@@ -291,52 +291,6 @@ first_non_fsm_start_char(const char *dat
   return data;
 }
 
-/* Scan the C string in *DATA for chars that are not in the octet
- * category 0 (FSM_START).  Return the position of either the such
- * char or of the terminating NUL.
- */
-static const char *
-first_non_fsm_start_char_cstring(const char *data)
-{
-  /* We need to make sure that BUF is properly aligned for chunky data
-   * access because we don't know the string's length. Unaligned chunk
-   * read access beyond the NUL terminator could therefore result in a
-   * segfault.
-   */
-  for (; (apr_uintptr_t)data & (sizeof(apr_uintptr_t)-1); ++data)
-    if (*data == 0 || (unsigned char)*data >= 0x80)
-      return data;
-
-  /* Scan the input one machine word at a time. */
-#ifndef SVN_UTF_NO_UNINITIALISED_ACCESS
-  /* This may read allocated but uninitialised bytes beyond the
-     terminating null.  Any such bytes are always readable and this
-     code operates correctly whatever the uninitialised values happen
-     to be.  However memory checking tools such as valgrind and GCC
-     4.8's address santitizer will object so this bit of code can be
-     disabled at compile time. */
-  for (; ; data += sizeof(apr_uintptr_t))
-    {
-      /* Check for non-ASCII chars: */
-      apr_uintptr_t chunk = *(const apr_uintptr_t *)data;
-      if (chunk & SVN__BIT_7_SET)
-        break;
-
-      /* This is the well-known strlen test: */
-      chunk |= (chunk & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
-      if ((chunk & SVN__BIT_7_SET) != SVN__BIT_7_SET)
-        break;
-    }
-#endif
-
-  /* The remaining odd bytes will be examined the naive way: */
-  for (; ; ++data)
-    if (*data == 0 || (unsigned char)*data >= 0x80)
-      break;
-
-  return data;
-}
-
 const char *
 svn_utf__last_valid(const char *data, apr_size_t len)
 {
@@ -359,20 +313,10 @@ svn_utf__last_valid(const char *data, ap
 svn_boolean_t
 svn_utf__cstring_is_valid(const char *data)
 {
-  int state = FSM_START;
-
   if (!data)
     return FALSE;
 
-  data = first_non_fsm_start_char_cstring(data);
-
-  while (*data)
-    {
-      unsigned char octet = *data++;
-      int category = octet_category[octet];
-      state = machine[state][category];
-    }
-  return state == FSM_START;
+  return svn_utf__is_valid(data, strlen(data));
 }
 
 svn_boolean_t


Reply via email to