Author: stefan2
Date: Mon Jan 4 12:41:58 2016
New Revision: 1722860
URL: http://svn.apache.org/viewvc?rev=1722860&view=rev
Log:
Fix access to uninitialized memory in our UTF8 validity checker.
For static strings like "", this violated C99, 6.2.4.5 and 3.17.2.
The code path for C strings only fully worked for APR allocated memory
and could fail otherwise on systems with fine-grained boundary checks.
This patch simply eliminates the C string specific implementation and
reuses the equally efficient svn_utf__last_valid function. Calling
strlen is not an added overhead here: If the string is ASCII-7 only,
strlen is about as efficient as first_non_fsm_start_char_cstring's
implicit EOS check. If the string contains chars >= 0x80, the much
more expensive state machine code will dominate the runtime.
Reported by: Hanno Böck <hanno{_AT_}hboeck.de>
* notes/knobs
(SVN_UTF_NO_UNINITIALISED_ACCESS): Remove section about this knob and
update the index. Our code no longer
uses it.
* subversion/libsvn_subr/utf_validate.c
(first_non_fsm_start_char_cstring): Remove because it is no longer used.
(svn_utf__cstring_is_valid): Implement in terms of svn_utf__last_valid.
Modified:
subversion/trunk/subversion/libsvn_subr/utf_validate.c
Modified: subversion/trunk/subversion/libsvn_subr/utf_validate.c
URL:
http://svn.apache.org/viewvc/subversion/trunk/subversion/libsvn_subr/utf_validate.c?rev=1722860&r1=1722859&r2=1722860&view=diff
==============================================================================
--- subversion/trunk/subversion/libsvn_subr/utf_validate.c (original)
+++ subversion/trunk/subversion/libsvn_subr/utf_validate.c Mon Jan 4 12:41:58
2016
@@ -291,52 +291,6 @@ first_non_fsm_start_char(const char *dat
return data;
}
-/* Scan the C string in *DATA for chars that are not in the octet
- * category 0 (FSM_START). Return the position of either the such
- * char or of the terminating NUL.
- */
-static const char *
-first_non_fsm_start_char_cstring(const char *data)
-{
- /* We need to make sure that BUF is properly aligned for chunky data
- * access because we don't know the string's length. Unaligned chunk
- * read access beyond the NUL terminator could therefore result in a
- * segfault.
- */
- for (; (apr_uintptr_t)data & (sizeof(apr_uintptr_t)-1); ++data)
- if (*data == 0 || (unsigned char)*data >= 0x80)
- return data;
-
- /* Scan the input one machine word at a time. */
-#ifndef SVN_UTF_NO_UNINITIALISED_ACCESS
- /* This may read allocated but uninitialised bytes beyond the
- terminating null. Any such bytes are always readable and this
- code operates correctly whatever the uninitialised values happen
- to be. However memory checking tools such as valgrind and GCC
- 4.8's address santitizer will object so this bit of code can be
- disabled at compile time. */
- for (; ; data += sizeof(apr_uintptr_t))
- {
- /* Check for non-ASCII chars: */
- apr_uintptr_t chunk = *(const apr_uintptr_t *)data;
- if (chunk & SVN__BIT_7_SET)
- break;
-
- /* This is the well-known strlen test: */
- chunk |= (chunk & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
- if ((chunk & SVN__BIT_7_SET) != SVN__BIT_7_SET)
- break;
- }
-#endif
-
- /* The remaining odd bytes will be examined the naive way: */
- for (; ; ++data)
- if (*data == 0 || (unsigned char)*data >= 0x80)
- break;
-
- return data;
-}
-
const char *
svn_utf__last_valid(const char *data, apr_size_t len)
{
@@ -359,20 +313,10 @@ svn_utf__last_valid(const char *data, ap
svn_boolean_t
svn_utf__cstring_is_valid(const char *data)
{
- int state = FSM_START;
-
if (!data)
return FALSE;
- data = first_non_fsm_start_char_cstring(data);
-
- while (*data)
- {
- unsigned char octet = *data++;
- int category = octet_category[octet];
- state = machine[state][category];
- }
- return state == FSM_START;
+ return svn_utf__is_valid(data, strlen(data));
}
svn_boolean_t