IMPALA-4436: StringValue::StringCompare() should match strncmp()

According to the C standard, strncmp() interprets characters as
unsigned, whereas StringCompare() uses char (which happens to be
signed).  This means that for values greater than 127, they don't give
the same result (which is especially bad considering StringCompare()
falls back to strncmp(), and so the answer depends on the mismatched
position).

Fix StringCompare() to interpret as unsigned char.

Change-Id: Ic0750f98d8c5ef7d0c0ea279cd1f80b4acbad1be
Reviewed-on: http://gerrit.cloudera.org:8080/5083
Reviewed-by: Dan Hecht <[email protected]>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/6937fa9a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/6937fa9a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/6937fa9a

Branch: refs/heads/master
Commit: 6937fa9a4caa9039a95d661ddf209777caa805f5
Parents: 4b77488
Author: Dan Hecht <[email protected]>
Authored: Mon Nov 14 16:47:39 2016 -0800
Committer: Internal Jenkins <[email protected]>
Committed: Tue Nov 15 20:47:18 2016 +0000

----------------------------------------------------------------------
 be/src/runtime/CMakeLists.txt         |  1 +
 be/src/runtime/string-compare-test.cc | 67 ++++++++++++++++++++++++++++++
 be/src/runtime/string-value.inline.h  |  4 +-
 3 files changed, 71 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6937fa9a/be/src/runtime/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt
index a0b0a94..19db090 100644
--- a/be/src/runtime/CMakeLists.txt
+++ b/be/src/runtime/CMakeLists.txt
@@ -78,6 +78,7 @@ ADD_BE_TEST(disk-io-mgr-test)
 ADD_BE_TEST(buffered-block-mgr-test)
 ADD_BE_TEST(parallel-executor-test)
 ADD_BE_TEST(raw-value-test)
+ADD_BE_TEST(string-compare-test)
 ADD_BE_TEST(string-search-test)
 ADD_BE_TEST(string-value-test)
 ADD_BE_TEST(thread-resource-mgr-test)

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6937fa9a/be/src/runtime/string-compare-test.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/string-compare-test.cc 
b/be/src/runtime/string-compare-test.cc
new file mode 100644
index 0000000..ef81ac6
--- /dev/null
+++ b/be/src/runtime/string-compare-test.cc
@@ -0,0 +1,67 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstring>
+
+#include "runtime/string-value.inline.h"
+#include "testutil/gtest-util.h"
+
+#include "common/names.h"
+
+// Test to verify that impala::StringCompare() and clib strncmp() give 
equivalent results.
+namespace impala {
+
+// Expect that strncmp() and StringCompare() return the same result.
+static void RunTestCase(const string& l, const string& r) {
+  int cmp_len = ::min(l.size(), r.size());
+  EXPECT_EQ(strncmp(l.c_str(), r.c_str(), cmp_len),
+      StringCompare(l.c_str(), l.size(), r.c_str(), r.size(), cmp_len));
+}
+
+TEST(StringCompareTest, Basic) {
+  const struct Cases {
+    string lhs, rhs;
+  } cases[] = {{"abc", "abc"},
+               {"abc", "abd"},
+               {"", ""},
+               {"\x01", "\x01"},
+               {"\x01", "\x02"},
+               {"\x7f", "\x01"},
+               {"\x80", "\x80"},
+               {"\x80", "\x01"},
+               {"\x7F", "\x80"},
+               {"\xFF", "\x01"},
+               {"\xFF", "\x7F"},
+               {"\xE9", "\x32"},
+               {"\x32", "\xE5"},
+               {"\xE5", "\xE9"}};
+
+  for (const Cases& c : cases) {
+    RunTestCase(c.lhs, c.rhs);
+    RunTestCase(c.rhs, c.lhs);
+  }
+
+  // Induce the SSE 4.2 code path by using longer strings.
+  string suffix = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
+  for (const Cases& c : cases) {
+    RunTestCase(c.lhs + suffix, c.rhs + suffix);
+    RunTestCase(c.rhs + suffix, c.lhs + suffix);
+  }
+}
+}
+
+IMPALA_TEST_MAIN();

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6937fa9a/be/src/runtime/string-value.inline.h
----------------------------------------------------------------------
diff --git a/be/src/runtime/string-value.inline.h 
b/be/src/runtime/string-value.inline.h
index f52005f..073d01f 100644
--- a/be/src/runtime/string-value.inline.h
+++ b/be/src/runtime/string-value.inline.h
@@ -47,7 +47,9 @@ static inline int StringCompare(const char* s1, int n1, const 
char* s2, int n2,
           SSEUtil::CHARS_PER_128_BIT_REGISTER, xmm1,
           SSEUtil::CHARS_PER_128_BIT_REGISTER);
       if (chars_match != SSEUtil::CHARS_PER_128_BIT_REGISTER) {
-        return s1[chars_match] - s2[chars_match];
+        // Match strncmp() behavior, which interprets characters as unsigned 
char.
+        return static_cast<unsigned char>(s1[chars_match]) -
+            static_cast<unsigned char>(s2[chars_match]);
       }
       len -= SSEUtil::CHARS_PER_128_BIT_REGISTER;
       s1 += SSEUtil::CHARS_PER_128_BIT_REGISTER;

Reply via email to