IMPALA-4436: StringValue::StringCompare() should match strncmp() According to the C standard, strncmp() interprets characters as unsigned, whereas StringCompare() uses char (which happens to be signed). This means that for values greater than 127, they don't give the same result (which is especially bad considering StringCompare() falls back to strncmp(), and so the answer depends on the mismatched position).
Fix StringCompare() to interpret as unsigned char. Change-Id: Ic0750f98d8c5ef7d0c0ea279cd1f80b4acbad1be Reviewed-on: http://gerrit.cloudera.org:8080/5083 Reviewed-by: Dan Hecht <[email protected]> Tested-by: Internal Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/6937fa9a Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/6937fa9a Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/6937fa9a Branch: refs/heads/master Commit: 6937fa9a4caa9039a95d661ddf209777caa805f5 Parents: 4b77488 Author: Dan Hecht <[email protected]> Authored: Mon Nov 14 16:47:39 2016 -0800 Committer: Internal Jenkins <[email protected]> Committed: Tue Nov 15 20:47:18 2016 +0000 ---------------------------------------------------------------------- be/src/runtime/CMakeLists.txt | 1 + be/src/runtime/string-compare-test.cc | 67 ++++++++++++++++++++++++++++++ be/src/runtime/string-value.inline.h | 4 +- 3 files changed, 71 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6937fa9a/be/src/runtime/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt index a0b0a94..19db090 100644 --- a/be/src/runtime/CMakeLists.txt +++ b/be/src/runtime/CMakeLists.txt @@ -78,6 +78,7 @@ ADD_BE_TEST(disk-io-mgr-test) ADD_BE_TEST(buffered-block-mgr-test) ADD_BE_TEST(parallel-executor-test) ADD_BE_TEST(raw-value-test) +ADD_BE_TEST(string-compare-test) ADD_BE_TEST(string-search-test) ADD_BE_TEST(string-value-test) ADD_BE_TEST(thread-resource-mgr-test) http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6937fa9a/be/src/runtime/string-compare-test.cc ---------------------------------------------------------------------- diff --git a/be/src/runtime/string-compare-test.cc b/be/src/runtime/string-compare-test.cc new file mode 100644 index 0000000..ef81ac6 --- /dev/null +++ b/be/src/runtime/string-compare-test.cc @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <cstring> + +#include "runtime/string-value.inline.h" +#include "testutil/gtest-util.h" + +#include "common/names.h" + +// Test to verify that impala::StringCompare() and clib strncmp() give equivalent results. +namespace impala { + +// Expect that strncmp() and StringCompare() return the same result. +static void RunTestCase(const string& l, const string& r) { + int cmp_len = ::min(l.size(), r.size()); + EXPECT_EQ(strncmp(l.c_str(), r.c_str(), cmp_len), + StringCompare(l.c_str(), l.size(), r.c_str(), r.size(), cmp_len)); +} + +TEST(StringCompareTest, Basic) { + const struct Cases { + string lhs, rhs; + } cases[] = {{"abc", "abc"}, + {"abc", "abd"}, + {"", ""}, + {"\x01", "\x01"}, + {"\x01", "\x02"}, + {"\x7f", "\x01"}, + {"\x80", "\x80"}, + {"\x80", "\x01"}, + {"\x7F", "\x80"}, + {"\xFF", "\x01"}, + {"\xFF", "\x7F"}, + {"\xE9", "\x32"}, + {"\x32", "\xE5"}, + {"\xE5", "\xE9"}}; + + for (const Cases& c : cases) { + RunTestCase(c.lhs, c.rhs); + RunTestCase(c.rhs, c.lhs); + } + + // Induce the SSE 4.2 code path by using longer strings. + string suffix = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; + for (const Cases& c : cases) { + RunTestCase(c.lhs + suffix, c.rhs + suffix); + RunTestCase(c.rhs + suffix, c.lhs + suffix); + } +} +} + +IMPALA_TEST_MAIN(); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/6937fa9a/be/src/runtime/string-value.inline.h ---------------------------------------------------------------------- diff --git a/be/src/runtime/string-value.inline.h b/be/src/runtime/string-value.inline.h index f52005f..073d01f 100644 --- a/be/src/runtime/string-value.inline.h +++ b/be/src/runtime/string-value.inline.h @@ -47,7 +47,9 @@ static inline int StringCompare(const char* s1, int n1, const char* s2, int n2, SSEUtil::CHARS_PER_128_BIT_REGISTER, xmm1, SSEUtil::CHARS_PER_128_BIT_REGISTER); if (chars_match != SSEUtil::CHARS_PER_128_BIT_REGISTER) { - return s1[chars_match] - s2[chars_match]; + // Match strncmp() behavior, which interprets characters as unsigned char. + return static_cast<unsigned char>(s1[chars_match]) - + static_cast<unsigned char>(s2[chars_match]); } len -= SSEUtil::CHARS_PER_128_BIT_REGISTER; s1 += SSEUtil::CHARS_PER_128_BIT_REGISTER;
