On 15/04/2014 7:35 a.m., Alex Rousskov wrote:
> On 04/14/2014 11:07 AM, Amos Jeffries wrote:
>> On 15/04/2014 2:56 a.m., Alex Rousskov wrote:
>>> On 04/14/2014 04:49 AM, Amos Jeffries wrote:
>>>> I've added a unit test to catch the rare \0 mid-string case I spoke of:
>>>> * SBuf("foo\0test").compare("foo", 9);
>>>>
>>>> It works fine up to the point N(4) > strlen("foo"). After that point our
>>>> function returns 1 to indicate that the SBuf is a longer string, whereas
>>>> strncmp/strncasecmp return 0 up to infinity.
>>>
>>> Yes, this is related to the large-n handling bug I keep talking about.
>>> IMO, this must be fixed as previously discussed: C-string API should not
>>> look past the first null character.
>>>
>>>
>>>> The code as presented earlier copes with the weirdness fine.
>>>
>>>
>>> AFAICT, the latest posted patch accesses non-existent c-string bytes
>>> under certain conditions (large n, large SBuf with trailing null
>>> characters, short s matching the c-string in SBuf). Do you agree? If you
>>> do agree, please post a fixed patch. If not, I would have to spend time
>>> writing a test case to prove my point against the last patch posted.
>>
>> I agree.
>
> Great.
>
>
>> Attached patch implements what we agreed on in IRC.
>
> Please note that we discussed only one aspect of the code; there was no
> blueprint on how to get everything working, and there are many ways to
> do that.
>
>
>> It produces wrong return value in two case states. Marked with "BUG 1"
>> and "BUG 2" in the patch.
>
>
>> + // BUG 1: when length() < n - buffer overruns on buf().
>
> byteCompareLen cannot exceed length() so there should not be buf()
> overruns. However, there are other bugs as discussed below.
>
>
>> + // BUG 2: when length() == strlen(s) < n, no terminator to match
>> against in buf()
>
>
> Yes, the lack of buf() terminator still needs to be fixed for all cases
> where the terminator would have been used by a c-string-based
> implementation. Please see the IRC log for some specific suggestions.
The attached patch passes all the tests including \0 embeded in the strings.
It also avoids the s[] access by using strlen(s) != byteCompareLen.
Amos
=== modified file 'src/SBuf.cc'
--- src/SBuf.cc 2014-04-06 07:08:04 +0000
+++ src/SBuf.cc 2014-04-15 14:15:50 +0000
@@ -360,64 +360,100 @@
store_->mem[off_+pos] = toset;
++stats.setChar;
}
static int
memcasecmp(const char *b1, const char *b2, SBuf::size_type len)
{
int rv=0;
while (len > 0) {
rv = tolower(*b1)-tolower(*b2);
if (rv != 0)
return rv;
++b1;
++b2;
--len;
}
return rv;
}
int
-SBuf::compare(const SBuf &S, SBufCaseSensitive isCaseSensitive, size_type n)
const
+SBuf::compare(const SBuf &S, const SBufCaseSensitive isCaseSensitive, const
size_type n) const
{
if (n != npos)
return substr(0,n).compare(S.substr(0,n),isCaseSensitive);
- size_type byteCompareLen = min(S.length(), length());
+ const size_type byteCompareLen = min(S.length(), length());
++stats.compareSlow;
int rv = 0;
if (isCaseSensitive == caseSensitive) {
rv = memcmp(buf(), S.buf(), byteCompareLen);
} else {
rv = memcasecmp(buf(), S.buf(), byteCompareLen);
}
if (rv != 0)
return rv;
if (length() == S.length())
return 0;
if (length() > S.length())
return 1;
return -1;
}
+int
+SBuf::compare(const char *s, const SBufCaseSensitive isCaseSensitive, const
size_type n) const
+{
+ // 0-length comparison is always true regardless of buffer states
+ if (!n) {
+ ++stats.compareFast;
+ return 0;
+ }
+
+ // N-length compare MUST provide a non-NULL C-string pointer
+ assert(s);
+
+ // recurse after finding length if unknown (including terminator byte)
+ if (n == npos)
+ return compare(s, isCaseSensitive, strlen(s)+1);
+
+ // if this SBuf is bigger than N truncate it.
+ // guaranteeing length() <= n for the following comparison
+ if (length() > n)
+ return substr(0,n).compare(s, isCaseSensitive, n);
+
+ const size_type byteCompareLen = min(n, length());
+ ++stats.compareSlow;
+ int rv = 0;
+ if (isCaseSensitive == caseSensitive) {
+ rv = strncmp(buf(), s, byteCompareLen);
+ } else {
+ rv = strncasecmp(buf(), s, byteCompareLen);
+ }
+ if (rv != 0)
+ return rv;
+ if (byteCompareLen < n && strlen(s) != byteCompareLen)
+ return -1;
+ return 0;
+}
+
bool
-SBuf::startsWith(const SBuf &S, SBufCaseSensitive isCaseSensitive) const
+SBuf::startsWith(const SBuf &S, const SBufCaseSensitive isCaseSensitive) const
{
debugs(24, 8, id << " startsWith " << S.id << ", caseSensitive: " <<
isCaseSensitive);
if (length() < S.length()) {
debugs(24, 8, "no, too short");
++stats.compareFast;
return false;
}
return (compare(S, isCaseSensitive, S.length()) == 0);
}
bool
SBuf::operator ==(const SBuf & S) const
{
debugs(24, 8, id << " == " << S.id);
if (length() != S.length()) {
debugs(24, 8, "no, different lengths");
++stats.compareFast;
return false; //shortcut: must be equal length
}
=== modified file 'src/SBuf.h'
--- src/SBuf.h 2014-04-06 07:08:04 +0000
+++ src/SBuf.h 2014-04-13 04:05:25 +0000
@@ -238,58 +238,71 @@
char at(size_type pos) const {checkAccessBounds(pos); return
operator[](pos);}
/** direct-access set a byte at a specified operation.
*
* \param pos the position to be overwritten
* \param toset the value to be written
* \throw OutOfBoundsException when pos is of bounds
* \note bounds is 0 <= pos < length(); caller must pay attention to
signedness
* \note performs a copy-on-write if needed.
*/
void setAt(size_type pos, char toset);
/** compare to other SBuf, str(case)cmp-style
*
* \param isCaseSensitive one of caseSensitive or caseInsensitive
* \param n compare up to this many bytes. if npos (default), compare
whole SBufs
* \retval >0 argument of the call is greater than called SBuf
* \retval <0 argument of the call is smaller than called SBuf
* \retval 0 argument of the call has the same contents of called SBuf
*/
- int compare(const SBuf &S, SBufCaseSensitive isCaseSensitive, size_type n
= npos) const;
+ int compare(const SBuf &S, const SBufCaseSensitive isCaseSensitive, const
size_type n = npos) const;
- /// shorthand version for compare
- inline int cmp(const SBuf &S, size_type n = npos) const {
+ /// shorthand version for compare()
+ inline int cmp(const SBuf &S, const size_type n = npos) const {
return compare(S,caseSensitive,n);
}
- /// shorthand version for case-insensitive comparison
- inline int caseCmp(const SBuf &S, size_type n = npos) const {
+ /// shorthand version for case-insensitive compare()
+ inline int caseCmp(const SBuf &S, const size_type n = npos) const {
+ return compare(S,caseInsensitive,n);
+ }
+
+ /// comparison with a C-string
+ int compare(const char *s, const SBufCaseSensitive isCaseSensitive, const
size_type n) const;
+
+ /// shorthand version for C-string compare()
+ inline int cmp(const char *S, const size_type n = npos) const {
+ return compare(S,caseSensitive,n);
+ }
+
+ /// shorthand version for case-insensitive C-string compare()
+ inline int caseCmp(const char *S, const size_type n = npos) const {
return compare(S,caseInsensitive,n);
}
/** check whether the entire supplied argument is a prefix of the SBuf.
* \param S the prefix to match against
* \param isCaseSensitive one of caseSensitive or caseInsensitive
* \retval true argument is a prefix of the SBuf
*/
- bool startsWith(const SBuf &S, SBufCaseSensitive isCaseSensitive =
caseSensitive) const;
+ bool startsWith(const SBuf &S, const SBufCaseSensitive isCaseSensitive =
caseSensitive) const;
bool operator ==(const SBuf & S) const;
bool operator !=(const SBuf & S) const;
bool operator <(const SBuf &S) const {return (cmp(S) < 0);}
bool operator >(const SBuf &S) const {return (cmp(S) > 0);}
bool operator <=(const SBuf &S) const {return (cmp(S) <= 0);}
bool operator >=(const SBuf &S) const {return (cmp(S) >= 0);}
/** Consume bytes at the head of the SBuf
*
* Consume N chars at SBuf head, or to SBuf's end,
* whichever is shorter. If more bytes are consumed than available,
* the SBuf is emptied
* \param n how many bytes to remove; could be zero.
* npos (or no argument) means 'to the end of SBuf'
* \return a new SBuf containing the consumed bytes.
*/
SBuf consume(size_type n = npos);
/// gets global statistic informations
=== modified file 'src/tests/testHttpRequestMethod.cc'
--- src/tests/testHttpRequestMethod.cc 2014-02-21 10:46:19 +0000
+++ src/tests/testHttpRequestMethod.cc 2014-04-05 09:27:56 +0000
@@ -67,49 +67,49 @@
/*
* we should be able to construct a HttpRequestMethod from a Http::MethodType
*/
void
testHttpRequestMethod::testConstructmethod_t()
{
CPPUNIT_ASSERT_EQUAL(HttpRequestMethod(Http::METHOD_NONE),
HttpRequestMethod(Http::METHOD_NONE));
CPPUNIT_ASSERT_EQUAL(HttpRequestMethod(Http::METHOD_POST),
HttpRequestMethod(Http::METHOD_POST));
CPPUNIT_ASSERT(HttpRequestMethod(Http::METHOD_NONE) !=
HttpRequestMethod(Http::METHOD_POST));
}
/*
* we should be able to get a char const * version of the method.
*/
void
testHttpRequestMethod::testImage()
{
// relaxed RFC-compliance parse HTTP methods are upgraded to correct case
Config.onoff.relaxed_header_parser = 1;
- CPPUNIT_ASSERT_EQUAL(String("POST"),
String(HttpRequestMethod("POST",NULL).image()));
- CPPUNIT_ASSERT_EQUAL(String("POST"),
String(HttpRequestMethod("pOsT",NULL).image()));
- CPPUNIT_ASSERT_EQUAL(String("POST"),
String(HttpRequestMethod("post",NULL).image()));
+ CPPUNIT_ASSERT_EQUAL(SBuf("POST"), HttpRequestMethod("POST",NULL).image());
+ CPPUNIT_ASSERT_EQUAL(SBuf("POST"), HttpRequestMethod("pOsT",NULL).image());
+ CPPUNIT_ASSERT_EQUAL(SBuf("POST"), HttpRequestMethod("post",NULL).image());
// strict RFC-compliance parse HTTP methods are case sensitive
Config.onoff.relaxed_header_parser = 0;
- CPPUNIT_ASSERT_EQUAL(String("POST"),
String(HttpRequestMethod("POST",NULL).image()));
- CPPUNIT_ASSERT_EQUAL(String("pOsT"),
String(HttpRequestMethod("pOsT",NULL).image()));
- CPPUNIT_ASSERT_EQUAL(String("post"),
String(HttpRequestMethod("post",NULL).image()));
+ CPPUNIT_ASSERT_EQUAL(SBuf("POST"), HttpRequestMethod("POST",NULL).image());
+ CPPUNIT_ASSERT_EQUAL(SBuf("pOsT"), HttpRequestMethod("pOsT",NULL).image());
+ CPPUNIT_ASSERT_EQUAL(SBuf("post"), HttpRequestMethod("post",NULL).image());
}
/*
* an HttpRequestMethod should be comparable to a Http::MethodType without
false
* matches
*/
void
testHttpRequestMethod::testEqualmethod_t()
{
CPPUNIT_ASSERT(HttpRequestMethod(Http::METHOD_NONE) == Http::METHOD_NONE);
CPPUNIT_ASSERT(not (HttpRequestMethod(Http::METHOD_POST) ==
Http::METHOD_GET));
CPPUNIT_ASSERT(HttpRequestMethod(Http::METHOD_GET) == Http::METHOD_GET);
CPPUNIT_ASSERT(not (HttpRequestMethod(Http::METHOD_TRACE) ==
Http::METHOD_SEARCH));
}
/*
* an HttpRequestMethod should testable for inequality without fail maatches
*/
void
testHttpRequestMethod::testNotEqualmethod_t()
=== modified file 'src/tests/testSBuf.cc'
--- src/tests/testSBuf.cc 2014-02-08 13:36:42 +0000
+++ src/tests/testSBuf.cc 2014-04-15 13:48:44 +0000
@@ -227,74 +227,164 @@
// note: can't use cppunit's CPPUNIT_TEST_EXCEPTION because TextException
asserts, and
// so the test can't be properly completed.
void
testSBuf::testSubscriptOpFail()
{
char c;
c=literal.at(literal.length()); //out of bounds by 1
//notreached
std::cout << c << std::endl;
}
static int sign(int v)
{
if (v < 0)
return -1;
if (v>0)
return 1;
return 0;
}
+static void
+testComparisonStdFull(const char *left, const char *right)
+{
+ if (sign(strcmp(left, right)) != sign(SBuf(left).cmp(SBuf(right))))
+ std::cerr << std::endl << " cmp(SBuf) npos " << left << " ?= " <<
right << std::endl;
+ CPPUNIT_ASSERT_EQUAL(sign(strcmp(left, right)),
sign(SBuf(left).cmp(SBuf(right))));
+
+ if (sign(strcmp(left, right)) != sign(SBuf(left).cmp(right)))
+ std::cerr << std::endl << " cmp(char*) npos " << left << " ?= " <<
right << std::endl;
+ CPPUNIT_ASSERT_EQUAL(sign(strcmp(left, right)),
sign(SBuf(left).cmp(right)));
+
+ if (sign(strcasecmp(left, right)) != sign(SBuf(left).caseCmp(SBuf(right))))
+ std::cerr << std::endl << " caseCmp(SBuf) npos " << left << " ?= " <<
right << std::endl;
+ CPPUNIT_ASSERT_EQUAL(sign(strcasecmp(left, right)),
sign(SBuf(left).caseCmp(SBuf(right))));
+
+ if (sign(strcasecmp(left, right)) != sign(SBuf(left).caseCmp(right)))
+ std::cerr << std::endl << " caseCmp(char*) npos " << left << " ?= " <<
right << std::endl;
+ CPPUNIT_ASSERT_EQUAL(sign(strcasecmp(left, right)),
sign(SBuf(left).caseCmp(right)));
+}
+
+static void
+testComparisonStdN(const char *left, const char *right, const size_t n)
+{
+ if (sign(strncmp(left, right, n)) != sign(SBuf(left).cmp(SBuf(right), n)))
+ std::cerr << std::endl << " cmp(SBuf) " << n << ' ' << left << " ?= "
<< right << std::endl;
+ CPPUNIT_ASSERT_EQUAL(sign(strncmp(left, right, n)),
sign(SBuf(left).cmp(SBuf(right), n)));
+
+ if (sign(strncmp(left, right, n)) != sign(SBuf(left).cmp(right, n)))
+ std::cerr << std::endl << " cmp(char*) " << n << ' ' << SBuf(left) <<
" ?= " << right << std::endl;
+ CPPUNIT_ASSERT_EQUAL(sign(strncmp(left, right, n)),
sign(SBuf(left).cmp(right, n)));
+
+ if (sign(strncasecmp(left, right, n)) !=
sign(SBuf(left).caseCmp(SBuf(right), n)))
+ std::cerr << std::endl << " caseCmp(SBuf) " << n << ' ' << left << "
?= " << right << std::endl;
+ CPPUNIT_ASSERT_EQUAL(sign(strncasecmp(left, right, n)),
sign(SBuf(left).caseCmp(SBuf(right), n)));
+
+ if (sign(strncasecmp(left, right, n)) != sign(SBuf(left).caseCmp(right,
n)))
+ std::cerr << std::endl << " caseCmp(char*) " << n << ' ' << SBuf(left)
<< " ?= " << right << std::endl;
+ CPPUNIT_ASSERT_EQUAL(sign(strncasecmp(left, right, n)),
sign(SBuf(left).caseCmp(right, n)));
+}
+
+static void
+testComparisonStdOneWay(const char *left, const char *right)
+{
+ testComparisonStdFull(left, right);
+ const size_t maxN = 2 + min(strlen(left), strlen(right));
+ for (size_t n = 0; n <= maxN; ++n) {
+ testComparisonStdN(left, right, n);
+ }
+}
+
+static void
+testComparisonStd(const char *s1, const char *s2)
+{
+ testComparisonStdOneWay(s1, s2);
+ testComparisonStdOneWay(s2, s1);
+}
+
void
testSBuf::testComparisons()
{
//same length
SBuf s1("foo"),s2("foe");
CPPUNIT_ASSERT(s1.cmp(s2)>0);
CPPUNIT_ASSERT(s1.caseCmp(s2)>0);
CPPUNIT_ASSERT(s2.cmp(s1)<0);
CPPUNIT_ASSERT_EQUAL(0,s1.cmp(s2,2));
CPPUNIT_ASSERT_EQUAL(0,s1.caseCmp(s2,2));
CPPUNIT_ASSERT(s1 > s2);
CPPUNIT_ASSERT(s2 < s1);
CPPUNIT_ASSERT_EQUAL(sign(s1.cmp(s2)),sign(strcmp(s1.c_str(),s2.c_str())));
//different lengths
s1.assign("foo");
s2.assign("foof");
CPPUNIT_ASSERT(s1.cmp(s2)<0);
CPPUNIT_ASSERT_EQUAL(sign(s1.cmp(s2)),sign(strcmp(s1.c_str(),s2.c_str())));
CPPUNIT_ASSERT(s1 < s2);
// specifying the max-length and overhanging size
CPPUNIT_ASSERT_EQUAL(1,SBuf("foolong").caseCmp(SBuf("foo"), 5));
// case-insensive comaprison
s1 = "foo";
s2 = "fOo";
CPPUNIT_ASSERT_EQUAL(0,s1.caseCmp(s2));
CPPUNIT_ASSERT_EQUAL(0,s1.caseCmp(s2,2));
// \0-clenliness test
s1.assign("f\0oo",4);
s2.assign("f\0Oo",4);
CPPUNIT_ASSERT(s1.cmp(s2) > 0);
CPPUNIT_ASSERT_EQUAL(0,s1.caseCmp(s2));
CPPUNIT_ASSERT_EQUAL(0,s1.caseCmp(s2,3));
CPPUNIT_ASSERT_EQUAL(0,s1.caseCmp(s2,2));
CPPUNIT_ASSERT_EQUAL(0,s1.cmp(s2,2));
+
+ testComparisonStd("foo", "fooz");
+ testComparisonStd("foo", "foo");
+ testComparisonStd("foo", "f");
+ testComparisonStd("foo", "bar");
+
+ testComparisonStd("foo", "FOOZ");
+ testComparisonStd("foo", "FOO");
+ testComparisonStd("foo", "F");
+
+ // rare case C-string input matching SBuf with N>strlen(s)
+ {
+ char *right = xstrdup("foo34567890123456789012345678");
+ SBuf left("fooZXYWVUTSRQPONMLKJIHGFEDCBA");
+ // is 3 bytes in length. NEVER more.
+ right[3] = '\0';
+ left.setAt(3, '\0');
+
+ // pick another spot to truncate at if something goes horribly wrong.
+ right[14] = '\0';
+ left.setAt(14, '\0');
+
+ const size_t maxN = 20 + min(left.length(), strlen(right));
+ for (size_t n = 0; n <= maxN; ++n) {
+ if (sign(strncmp(left.rawContent(), right, n)) !=
sign(left.cmp(right, n)) )
+ std::cerr << std::endl << " cmp(char*) " << n << ' ' << left
<< " ?= " << right;
+ CPPUNIT_ASSERT_EQUAL(sign(strncmp(left.rawContent(), right, n)),
sign(left.cmp(right, n)));
+ if (sign(strncasecmp(left.rawContent(), right, n)) !=
sign(left.caseCmp(right, n)))
+ std::cerr << std::endl << " caseCmp(char*) " << n << ' ' <<
left << " ?= " << right;
+ CPPUNIT_ASSERT_EQUAL(sign(strncasecmp(left.rawContent(), right,
n)), sign(left.caseCmp(right, n)));
+ }
+ xfree(right);
+ }
}
void
testSBuf::testConsume()
{
SBuf s1(literal),s2,s3;
s2=s1.consume(4);
s3.assign("The ");
CPPUNIT_ASSERT_EQUAL(s2,s3);
s3.assign("quick brown fox jumped over the lazy dog");
CPPUNIT_ASSERT_EQUAL(s1,s3);
s1.consume(40);
CPPUNIT_ASSERT_EQUAL(s1,SBuf());
}
void
testSBuf::testRawContent()
{
SBuf s1(literal);
SBuf s2(s1);