This is an automated email from the ASF dual-hosted git repository. abukor pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit 9067963c94a5263e5c1cb0c87d839ac7b6f0f449 Author: Attila Bukor <[email protected]> AuthorDate: Tue Sep 24 15:36:08 2019 +0200 KUDU-1938 Add non-copy VARCHAR setters pt 3 Apache Impala uses KuduPartialRow API to determine which partition a row will be inserted to distribute the data between executors optimally. For this purpose the copy is unnecessary and it should be fast. This commit adds NoCopyUnsafe variants for this purpose which expect the data to already be truncated (which it is in Impala's case) and only check that the value's length is lower than the highest possible upper bound: val.size() < max_length*4 bytes (the maximum size of an UTF8 character) to avoid having to count each character manually. Change-Id: I1f2aba098d649eb94e0314f6606cc33600e8d766 Reviewed-on: http://gerrit.cloudera.org:8080/13928 Reviewed-by: Adar Dembo <[email protected]> Reviewed-by: Grant Henke <[email protected]> Tested-by: Kudu Jenkins --- src/kudu/common/partial_row-test.cc | 20 ++++++++++++++ src/kudu/common/partial_row.cc | 16 ++++++++++++ src/kudu/common/partial_row.h | 52 +++++++++++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+) diff --git a/src/kudu/common/partial_row-test.cc b/src/kudu/common/partial_row-test.cc index ecf600e..a62f309 100644 --- a/src/kudu/common/partial_row-test.cc +++ b/src/kudu/common/partial_row-test.cc @@ -278,6 +278,26 @@ TEST_F(PartialRowTest, UnitTest) { s = row.SetVarchar("varchar_val", "123456789\xF0\x9F\xA6\x8C ABCDEF"); EXPECT_EQ("varchar varchar_val=\"123456789\xF0\x9F\xA6\x8C\"", row.ToString()); + + s = row.SetVarcharNoCopyUnsafe("varchar_val", "varchar"); + EXPECT_EQ("varchar varchar_val=\"varchar\"", row.ToString()); + + std::string utf8_char_4byte = "\xf3\xa0\x87\xa1"; + std::string test_string = utf8_char_4byte; + for (auto i = 0; i < 9; ++i) { + test_string += utf8_char_4byte; + } + + std::string expected_string = "varchar varchar_val=\""; + expected_string += test_string + "\""; + + s = row.SetVarcharNoCopyUnsafe("varchar_val", test_string); + EXPECT_EQ(expected_string, row.ToString()); + + test_string += utf8_char_4byte; + + s = row.SetVarcharNoCopyUnsafe("varchar_val", test_string); + EXPECT_TRUE(s.IsInvalidArgument()); } TEST_F(PartialRowTest, TestCopy) { diff --git a/src/kudu/common/partial_row.cc b/src/kudu/common/partial_row.cc index f99dd58..2467423 100644 --- a/src/kudu/common/partial_row.cc +++ b/src/kudu/common/partial_row.cc @@ -386,6 +386,22 @@ Status KuduPartialRow::SetStringNoCopy(int col_idx, const Slice& val) { return Set<TypeTraits<STRING> >(col_idx, val, false); } +Status KuduPartialRow::SetVarcharNoCopyUnsafe(const Slice& col_name, const Slice& val) { + int col_idx; + RETURN_NOT_OK(schema_->FindColumn(col_name, &col_idx)); + return SetVarcharNoCopyUnsafe(col_idx, val); +} + +Status KuduPartialRow::SetVarcharNoCopyUnsafe(int col_idx, const Slice& val) { + auto col = schema_->column(col_idx); + if (val.size() > col.type_attributes().length * 4) { + return Status::InvalidArgument( + Substitute("Value too long, limit is $0 characters", + col.type_attributes().length)); + } + return Set<TypeTraits<VARCHAR> >(col_idx, val); +} + template<typename T> Status KuduPartialRow::SetSliceCopy(const Slice& col_name, const Slice& val) { int col_idx; diff --git a/src/kudu/common/partial_row.h b/src/kudu/common/partial_row.h index ec33879..ac2816b 100644 --- a/src/kudu/common/partial_row.h +++ b/src/kudu/common/partial_row.h @@ -281,6 +281,30 @@ class KUDU_EXPORT KuduPartialRow { Status SetStringNoCopy(const Slice& col_name, const Slice& val) WARN_UNUSED_RESULT; ///@} + /// @name [Advanced][Unstable] Setter for varchar columns by name (non-copying). + /// + /// Set the varchar value for a column by name, not copying the + /// specified data. + /// + /// This method expects the values to be truncated already and they only do a + /// basic validation that the data is not larger than the maximum column + /// length (as indicated by the schema) multiplied by 4, as that's the upper + /// limit if only 4-byte UTF8 characters are used. This is subject to change in + /// the future. + /// + /// @note The specified data must remain valid until the corresponding + /// RPC calls are completed to be able to access error buffers, + /// if any errors happened (the errors can be fetched using the + /// KuduSession::GetPendingErrors() method). + /// + /// @param [in] col_name + /// Name of the target column. + /// @param [in] val + /// The value to set. + /// @return Operation result status. + /// + Status SetVarcharNoCopyUnsafe(const Slice& col_name, const Slice& val) WARN_UNUSED_RESULT; + /// @name Setters for binary/string columns by index (non-copying). /// /// Set the binary/string value for a column by index, not copying the @@ -307,6 +331,34 @@ class KUDU_EXPORT KuduPartialRow { Status SetStringNoCopy(int col_idx, const Slice& val) WARN_UNUSED_RESULT; ///@} + /// @name [Advanced][Unstable] Setter for varchar columns by index (non-copying). + /// + /// Set the varchar value for a column by index, not copying the specified data. + /// + /// This method expects the values to be truncated already and they only do a + /// basic validation that the data is not larger than the maximum column + /// length (as indicated by the schema) multiplied by 4, as that's the upper + /// limit if only 4-byte UTF8 characters are used. This is subject to change in + /// the future. + /// + /// This setter is the same as the corresponding column-name-based setter, + /// but with numeric column indexes. This is faster since it avoids + /// hashmap lookups, so should be preferred in performance-sensitive code + /// (e.g. bulk loaders). + /// + /// @note The specified data must remain valid until the corresponding + /// RPC calls are completed to be able to access error buffers, + /// if any errors happened (the errors can be fetched using the + /// KuduSession::GetPendingErrors() method). + /// + /// @param [in] col_idx + /// The index of the target column. + /// @param [in] val + /// The value to set. + /// @return Operation result status. + /// + Status SetVarcharNoCopyUnsafe(int col_idx, const Slice& val) WARN_UNUSED_RESULT; + /// Set column value to @c NULL; the column is identified by its name. /// /// This will only succeed on nullable columns. Use Unset() to restore
