This is an automated email from the ASF dual-hosted git repository. abukor pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit caa8d6d2fc9988dbd8c6331b2ab14f1a5cf63e56 Author: Attila Bukor <[email protected]> AuthorDate: Tue Sep 24 14:00:55 2019 +0200 KUDU-1938 Add support for VARCHAR pt 1 Introduces the VARCHAR data type to the server. Follow up commits will add integration to the clients. The VARCHAR type is parameterized with a length column type attribute similar to DECIMAL's scale and precision. Internally it's stored as BINARY. The maximum length for VARCHAR is 65,535 characters. If a value longer than the "n" is submitted for a VARCHAR(n) column the value is truncated to "n" characters before persisting the data on the server side. The maximum length was chosen for compatibility reasons. Apache Impala has a maximum length of 65,535 *bytes* for VARCHAR and major RDBMSs I checked also have a similar limits either in characters or bytes, mostly configurable. Change-Id: I998982dba93831db91c43a97ce30d3e68c2a4a54 Reviewed-on: http://gerrit.cloudera.org:8080/13760 Reviewed-by: Alexey Serbin <[email protected]> Tested-by: Kudu Jenkins Reviewed-by: Grant Henke <[email protected]> Reviewed-by: Adar Dembo <[email protected]> --- src/kudu/common/column_predicate-test.cc | 10 +++++ src/kudu/common/common.proto | 3 ++ src/kudu/common/partial_row-test.cc | 25 +++++++++++- src/kudu/common/partial_row.cc | 70 ++++++++++++++++++++++++-------- src/kudu/common/partial_row.h | 45 +++++++++++++++++--- src/kudu/common/schema.cc | 4 ++ src/kudu/common/schema.h | 18 +++++++- src/kudu/common/types.cc | 1 + src/kudu/common/types.h | 19 ++++++++- src/kudu/common/wire_protocol.cc | 5 +++ src/kudu/util/CMakeLists.txt | 1 + src/kudu/util/char_util.cc | 44 ++++++++++++++++++++ src/kudu/util/char_util.h | 39 ++++++++++++++++++ 13 files changed, 256 insertions(+), 28 deletions(-) diff --git a/src/kudu/common/column_predicate-test.cc b/src/kudu/common/column_predicate-test.cc index 335bf78..15c0ed3 100644 --- a/src/kudu/common/column_predicate-test.cc +++ b/src/kudu/common/column_predicate-test.cc @@ -1121,6 +1121,7 @@ TEST_F(TestColumnPredicate, TestLess) { ColumnSchema d128("d128", DECIMAL128); ColumnSchema string("string", STRING); ColumnSchema binary("binary", BINARY); + ColumnSchema varchar("varchar", VARCHAR); ASSERT_EQ(PredicateType::None, ColumnPredicate::Range(i8, nullptr, TypeTraits<INT8>::min_value()) @@ -1158,6 +1159,9 @@ TEST_F(TestColumnPredicate, TestLess) { ASSERT_EQ(PredicateType::None, ColumnPredicate::Range(binary, nullptr, TypeTraits<BINARY>::min_value()) .predicate_type()); + ASSERT_EQ(PredicateType::None, + ColumnPredicate::Range(varchar, nullptr, TypeTraits<VARCHAR>::min_value()) + .predicate_type()); } TEST_F(TestColumnPredicate, TestGreaterThanEquals) { @@ -1173,6 +1177,7 @@ TEST_F(TestColumnPredicate, TestGreaterThanEquals) { ColumnSchema d128("d128", DECIMAL128); ColumnSchema string("string", STRING); ColumnSchema binary("binary", BINARY); + ColumnSchema varchar("varchar", VARCHAR); ASSERT_EQ(PredicateType::IsNotNull, ColumnPredicate::Range(i8, TypeTraits<INT8>::min_value(), nullptr) @@ -1210,6 +1215,9 @@ TEST_F(TestColumnPredicate, TestGreaterThanEquals) { ASSERT_EQ(PredicateType::IsNotNull, ColumnPredicate::Range(binary, TypeTraits<BINARY>::min_value(), nullptr) .predicate_type()); + ASSERT_EQ(PredicateType::IsNotNull, + ColumnPredicate::Range(varchar, TypeTraits<VARCHAR>::min_value(), nullptr) + .predicate_type()); ASSERT_EQ(PredicateType::Equality, ColumnPredicate::Range(i8, TypeTraits<INT8>::max_value(), nullptr) @@ -1247,6 +1255,8 @@ TEST_F(TestColumnPredicate, TestGreaterThanEquals) { ColumnPredicate::Range(string, &s, nullptr).predicate_type()); ASSERT_EQ(PredicateType::Range, ColumnPredicate::Range(binary, &s, nullptr).predicate_type()); + ASSERT_EQ(PredicateType::Range, + ColumnPredicate::Range(varchar, &s, nullptr).predicate_type()); } // Test the InList constructor. diff --git a/src/kudu/common/common.proto b/src/kudu/common/common.proto index 1982315..8fb6a4a 100644 --- a/src/kudu/common/common.proto +++ b/src/kudu/common/common.proto @@ -56,6 +56,7 @@ enum DataType { DECIMAL64 = 16; DECIMAL128 = 17; IS_DELETED = 18; // virtual column; not a real data type + VARCHAR = 19; } enum EncodingType { @@ -94,6 +95,8 @@ message ColumnTypeAttributesPB { // For decimal columns optional int32 precision = 1; optional int32 scale = 2; + // For varchar columns + optional int32 length = 3; } // TODO: Differentiate between the schema attributes diff --git a/src/kudu/common/partial_row-test.cc b/src/kudu/common/partial_row-test.cc index 383a090..ecf600e 100644 --- a/src/kudu/common/partial_row-test.cc +++ b/src/kudu/common/partial_row-test.cc @@ -43,7 +43,9 @@ class PartialRowTest : public KuduTest { ColumnSchema("string_val", STRING, true), ColumnSchema("binary_val", BINARY, true), ColumnSchema("decimal_val", DECIMAL32, true, nullptr, nullptr, - ColumnStorageAttributes(), ColumnTypeAttributes(6, 2)) }, + ColumnStorageAttributes(), ColumnTypeAttributes(6, 2)), + ColumnSchema("varchar_val", VARCHAR, true, nullptr, nullptr, + ColumnStorageAttributes(), ColumnTypeAttributes(10)) }, 1) { SeedRandom(); } @@ -129,6 +131,9 @@ TEST_F(PartialRowTest, UnitTest) { EXPECT_FALSE(row.IsColumnSet(0)); EXPECT_FALSE(row.IsColumnSet(1)); EXPECT_FALSE(row.IsColumnSet(2)); + EXPECT_FALSE(row.IsColumnSet(3)); + EXPECT_FALSE(row.IsColumnSet(4)); + EXPECT_FALSE(row.IsColumnSet(5)); EXPECT_FALSE(row.IsKeySet()); EXPECT_EQ("", row.ToString()); @@ -255,6 +260,24 @@ TEST_F(PartialRowTest, UnitTest) { // able to set string columns with SetBinary and vice versa. EXPECT_FALSE(row.SetBinaryCopy("string_val", "oops").ok()); EXPECT_FALSE(row.SetStringCopy("binary_val", "oops").ok()); + + EXPECT_OK(row.Unset(4)); + + s = row.SetVarchar("varchar_val", "shortval"); + EXPECT_TRUE(row.IsColumnSet(5)); + EXPECT_EQ("varchar varchar_val=\"shortval\"", row.ToString()); + + s = row.SetVarchar("varchar_val", "shortval value "); + EXPECT_EQ("varchar varchar_val=\"shortval \"", row.ToString()); + + s = row.SetVarchar("varchar_val", "this value is too long"); + EXPECT_EQ("varchar varchar_val=\"this value\"", row.ToString()); + + s = row.SetVarchar("varchar_val", "Árvíztűrő tükörfúrógép"); + EXPECT_EQ("varchar varchar_val=\"Árvíztűrő \"", row.ToString()); + + s = row.SetVarchar("varchar_val", "123456789\xF0\x9F\xA6\x8C ABCDEF"); + EXPECT_EQ("varchar varchar_val=\"123456789\xF0\x9F\xA6\x8C\"", row.ToString()); } TEST_F(PartialRowTest, TestCopy) { diff --git a/src/kudu/common/partial_row.cc b/src/kudu/common/partial_row.cc index 6e924de..f99dd58 100644 --- a/src/kudu/common/partial_row.cc +++ b/src/kudu/common/partial_row.cc @@ -18,6 +18,7 @@ #include "kudu/common/partial_row.h" #include <cstring> +#include <ostream> #include <string> #include <utility> @@ -31,6 +32,7 @@ #include "kudu/gutil/port.h" #include "kudu/gutil/strings/substitute.h" #include "kudu/util/bitmap.h" +#include "kudu/util/char_util.h" #include "kudu/util/decimal_util.h" #include "kudu/util/int128.h" #include "kudu/util/logging.h" @@ -188,6 +190,10 @@ Status KuduPartialRow::Set(int32_t column_idx, const uint8_t* val) { RETURN_NOT_OK(SetBinaryCopy(column_idx, *reinterpret_cast<const Slice*>(val))); break; } + case VARCHAR: { + RETURN_NOT_OK(SetVarchar(column_idx, *reinterpret_cast<const Slice*>(val))); + break; + } case UNIXTIME_MICROS: { RETURN_NOT_OK(SetUnixTimeMicros(column_idx, *reinterpret_cast<const int64_t*>(val))); break; @@ -218,11 +224,19 @@ void KuduPartialRow::DeallocateStringIfSet(int col_idx, const ColumnSchema& col) if (BitmapTest(owned_strings_bitmap_, col_idx)) { ContiguousRow row(schema_, row_data_); const Slice* dst; - if (col.type_info()->type() == BINARY) { - dst = schema_->ExtractColumnFromRow<BINARY>(row, col_idx); - } else { - CHECK(col.type_info()->type() == STRING); - dst = schema_->ExtractColumnFromRow<STRING>(row, col_idx); + switch (col.type_info()->type()) { + case BINARY: + dst = schema_->ExtractColumnFromRow<BINARY>(row, col_idx); + break; + case VARCHAR: + dst = schema_->ExtractColumnFromRow<VARCHAR>(row, col_idx); + break; + case STRING: + dst = schema_->ExtractColumnFromRow<STRING>(row, col_idx); + break; + default: + LOG(FATAL) << "Unexpected type " << col.type_info()->type(); + break; } delete [] dst->data(); BitmapClear(owned_strings_bitmap_, col_idx); @@ -330,12 +344,21 @@ Status KuduPartialRow::SetBinary(const Slice& col_name, const Slice& val) { Status KuduPartialRow::SetString(const Slice& col_name, const Slice& val) { return SetStringCopy(col_name, val); } +Status KuduPartialRow::SetVarchar(const Slice& col_name, const Slice& val) { + int col_idx; + RETURN_NOT_OK(schema_->FindColumn(col_name, &col_idx)); + return SetVarchar(col_idx, val); +} + Status KuduPartialRow::SetBinary(int col_idx, const Slice& val) { return SetBinaryCopy(col_idx, val); } Status KuduPartialRow::SetString(int col_idx, const Slice& val) { return SetStringCopy(col_idx, val); } +Status KuduPartialRow::SetVarchar(int col_idx, const Slice& val) { + return SetSliceCopy<TypeTraits<VARCHAR> >(col_idx, val); +} Status KuduPartialRow::SetBinaryCopy(const Slice& col_name, const Slice& val) { return SetSliceCopy<TypeTraits<BINARY> >(col_name, val); @@ -365,24 +388,29 @@ Status KuduPartialRow::SetStringNoCopy(int col_idx, const Slice& val) { template<typename T> Status KuduPartialRow::SetSliceCopy(const Slice& col_name, const Slice& val) { - auto relocated = new uint8_t[val.size()]; - memcpy(relocated, val.data(), val.size()); - Slice relocated_val(relocated, val.size()); - Status s = Set<T>(col_name, relocated_val, true); - if (!s.ok()) { - delete [] relocated; - } - return s; + int col_idx; + RETURN_NOT_OK(schema_->FindColumn(col_name, &col_idx)); + return SetSliceCopy<T>(col_idx, val); } template<typename T> Status KuduPartialRow::SetSliceCopy(int col_idx, const Slice& val) { - auto relocated = new uint8_t[val.size()]; - memcpy(relocated, val.data(), val.size()); - Slice relocated_val(relocated, val.size()); + auto col = schema_->column(col_idx); + Slice relocated_val; + switch (T::type) { + case VARCHAR: + relocated_val = UTF8Truncate(val, col.type_attributes().length); + break; + case STRING: + case BINARY: + auto relocated = new uint8_t[val.size()]; + memcpy(relocated, val.data(), val.size()); + relocated_val = Slice(relocated, val.size()); + break; + } Status s = Set<T>(col_idx, relocated_val, true); if (!s.ok()) { - delete [] relocated; + delete [] relocated_val.data(); } return s; } @@ -656,6 +684,11 @@ Status KuduPartialRow::GetString(const Slice& col_name, Slice* val) const { Status KuduPartialRow::GetBinary(const Slice& col_name, Slice* val) const { return Get<TypeTraits<BINARY> >(col_name, val); } +Status KuduPartialRow::GetVarchar(const Slice& col_name, Slice* val) const { + int col_idx; + RETURN_NOT_OK(schema_->FindColumn(col_name, &col_idx)); + return GetVarchar(col_idx, val); +} Status KuduPartialRow::GetBool(int col_idx, bool* val) const { return Get<TypeTraits<BOOL> >(col_idx, val); @@ -716,6 +749,9 @@ Status KuduPartialRow::GetString(int col_idx, Slice* val) const { Status KuduPartialRow::GetBinary(int col_idx, Slice* val) const { return Get<TypeTraits<BINARY> >(col_idx, val); } +Status KuduPartialRow::GetVarchar(int col_idx, Slice* val) const { + return Get<TypeTraits<VARCHAR> >(col_idx, val); +} template<typename T> Status KuduPartialRow::Get(const Slice& col_name, diff --git a/src/kudu/common/partial_row.h b/src/kudu/common/partial_row.h index 74375c3..ec33879 100644 --- a/src/kudu/common/partial_row.h +++ b/src/kudu/common/partial_row.h @@ -166,10 +166,23 @@ class KUDU_EXPORT KuduPartialRow { Status SetString(const Slice& col_name, const Slice& val) WARN_UNUSED_RESULT; ///@} + /// @name Setters for varchar columns by name (copying). + /// + /// Set the varchar value for a column by name, copying the + /// specified data immediately. + /// + /// @param [in] col_name + /// Name of the target column. + /// @param [in] val + /// The value to set. + /// @return Operation result status. + /// + Status SetVarchar(const Slice& col_name, const Slice& val) WARN_UNUSED_RESULT; + /// @name Setters for binary/string columns by index (copying). /// - /// Set the binary/string value for a column by index, copying the specified - /// data immediately. + /// Set the binary/string value for a column by index, copying + /// the specified data immediately. /// /// These setters are the same as the corresponding column-name-based setters, /// but with numeric column indexes. These are faster since they avoid @@ -192,6 +205,24 @@ class KUDU_EXPORT KuduPartialRow { Status SetString(int col_idx, const Slice& val) WARN_UNUSED_RESULT; ///@} + /// @name Setter for varchar columns by index (copying). + /// + /// Set the varchar value for a column by index, copying + /// the specified data immediately. + /// + /// These setters are the same as the corresponding column-name-based setters, + /// but with numeric column indexes. These are faster since they avoid + /// hashmap lookups, so should be preferred in performance-sensitive code + /// (e.g. bulk loaders). + /// + /// @param [in] col_idx + /// The index of the target column. + /// @param [in] val + /// The value to set. + /// @return Operation result status. + /// + Status SetVarchar(int col_idx, const Slice& val) WARN_UNUSED_RESULT; + /// @name Setters for binary/string columns by name (copying). /// /// Set the binary/string value for a column by name, copying the specified @@ -406,9 +437,9 @@ class KUDU_EXPORT KuduPartialRow { #endif ///@} - /// @name Getters for string/binary column by column name. + /// @name Getters for string/binary/varchar column by column name. /// - /// Get the string/binary value for a column by its name. + /// Get the string/binary/varchar value for a column by its name. /// /// @param [in] col_name /// Name of the column. @@ -425,11 +456,12 @@ class KUDU_EXPORT KuduPartialRow { ///@{ Status GetString(const Slice& col_name, Slice* val) const WARN_UNUSED_RESULT; Status GetBinary(const Slice& col_name, Slice* val) const WARN_UNUSED_RESULT; + Status GetVarchar(const Slice& col_name, Slice* val) const WARN_UNUSED_RESULT; ///@} - /// @name Getters for string/binary column by column index. + /// @name Getters for string/binary/varchar column by column index. /// - /// Get the string/binary value for a column by its index. + /// Get the string/binary/varchar value for a column by its index. /// /// These methods are faster than their name-based counterparts /// since they use indices to avoid hashmap lookups, so index-based getters @@ -450,6 +482,7 @@ class KUDU_EXPORT KuduPartialRow { ///@{ Status GetString(int col_idx, Slice* val) const WARN_UNUSED_RESULT; Status GetBinary(int col_idx, Slice* val) const WARN_UNUSED_RESULT; + Status GetVarchar(int col_idx, Slice* val) const WARN_UNUSED_RESULT; ///@} //------------------------------------------------------------ diff --git a/src/kudu/common/schema.cc b/src/kudu/common/schema.cc index 74671bd..11a9966 100644 --- a/src/kudu/common/schema.cc +++ b/src/kudu/common/schema.cc @@ -87,6 +87,8 @@ bool ColumnTypeAttributes::EqualsForType(ColumnTypeAttributes other, case DECIMAL64: case DECIMAL128: return precision == other.precision && scale == other.scale; + case VARCHAR: + return length == other.length; default: return true; // true because unhandled types don't use ColumnTypeAttributes. } @@ -98,6 +100,8 @@ string ColumnTypeAttributes::ToStringForType(DataType type) const { case DECIMAL64: case DECIMAL128: return Substitute("($0, $1)", precision, scale); + case VARCHAR: + return Substitute("($0)", length); default: return ""; } diff --git a/src/kudu/common/schema.h b/src/kudu/common/schema.h index 1916868..489bb00 100644 --- a/src/kudu/common/schema.h +++ b/src/kudu/common/schema.h @@ -92,12 +92,20 @@ struct ColumnTypeAttributes { public: ColumnTypeAttributes() : precision(0), - scale(0) { + scale(0), + length(0) { } ColumnTypeAttributes(int8_t precision, int8_t scale) : precision(precision), - scale(scale) { + scale(scale), + length(0) { + } + + explicit ColumnTypeAttributes(uint16_t length) + : precision(0), + scale(0), + length(length) { } // Does `other` represent equivalent attributes for `type`? @@ -112,6 +120,12 @@ struct ColumnTypeAttributes { int8_t precision; int8_t scale; + + // Maximum value of the length is 65,535 for compatibility reasons as it's + // used by VARCHAR type which can be set to a maximum of 65,535 in case of + // MySQL and less for other major RDBMS implementations. The length refers to + // the number of characters/symbols (not bytes). + uint16_t length; }; // Class for storing column attributes such as compression and diff --git a/src/kudu/common/types.cc b/src/kudu/common/types.cc index a70c425..7572f69 100644 --- a/src/kudu/common/types.cc +++ b/src/kudu/common/types.cc @@ -89,6 +89,7 @@ class TypeInfoResolver { AddMapping<DECIMAL64>(); AddMapping<DECIMAL128>(); AddMapping<IS_DELETED>(); + AddMapping<VARCHAR>(); } template<DataType type> void AddMapping() { diff --git a/src/kudu/common/types.h b/src/kudu/common/types.h index 36f9b6f..e77671e 100644 --- a/src/kudu/common/types.h +++ b/src/kudu/common/types.h @@ -20,8 +20,8 @@ #include <cmath> -#include <cstdio> #include <cstdint> +#include <cstdio> #include <cstdlib> #include <cstring> #include <ctime> @@ -37,7 +37,7 @@ #include "kudu/gutil/strings/escaping.h" #include "kudu/gutil/strings/numbers.h" #include "kudu/util/int128.h" -#include "kudu/util/int128_util.h" +#include "kudu/util/int128_util.h" // IWYU pragma: keep #include "kudu/util/slice.h" // IWYU pragma: no_include "kudu/util/status.h" @@ -630,6 +630,19 @@ struct DataTypeTraits<IS_DELETED> : public DerivedTypeTraits<BOOL>{ } }; +template<> +struct DataTypeTraits<VARCHAR> : public DerivedTypeTraits<BINARY>{ + static const char* name() { + return "varchar"; + } + static void AppendDebugStringForValue(const void *val, std::string *str) { + const Slice *s = reinterpret_cast<const Slice *>(val); + str->push_back('"'); + str->append(strings::Utf8SafeCEscape(s->ToString())); + str->push_back('"'); + } +}; + // Instantiate this template to get static access to the type traits. template<DataType datatype> struct TypeTraits : public DataTypeTraits<datatype> { @@ -713,6 +726,7 @@ class Variant { numeric_.double_val = *static_cast<const double *>(value); break; case STRING: // Fallthrough intended. + case VARCHAR: case BINARY: { const Slice *str = static_cast<const Slice *>(value); @@ -779,6 +793,7 @@ class Variant { case FLOAT: return (&numeric_.float_val); case DOUBLE: return (&numeric_.double_val); case STRING: + case VARCHAR: case BINARY: return &vstr_; default: LOG(FATAL) << "Unknown data type: " << type_; } diff --git a/src/kudu/common/wire_protocol.cc b/src/kudu/common/wire_protocol.cc index 82e67cd..d5f3826 100644 --- a/src/kudu/common/wire_protocol.cc +++ b/src/kudu/common/wire_protocol.cc @@ -228,6 +228,8 @@ void ColumnSchemaToPB(const ColumnSchema& col_schema, ColumnSchemaPB *pb, int fl type == DataType::DECIMAL128) { pb->mutable_type_attributes()->set_precision(col_schema.type_attributes().precision); pb->mutable_type_attributes()->set_scale(col_schema.type_attributes().scale); + } else if (type == DataType::VARCHAR) { + pb->mutable_type_attributes()->set_length(col_schema.type_attributes().length); } if (!(flags & SCHEMA_PB_WITHOUT_STORAGE_ATTRIBUTES)) { pb->set_encoding(col_schema.attributes().encoding); @@ -299,6 +301,9 @@ Status ColumnSchemaFromPB(const ColumnSchemaPB& pb, boost::optional<ColumnSchema if (typeAttributesPB.has_scale()) { type_attributes.scale = typeAttributesPB.scale(); } + if (typeAttributesPB.has_length()) { + type_attributes.length = typeAttributesPB.length(); + } } ColumnStorageAttributes attributes; diff --git a/src/kudu/util/CMakeLists.txt b/src/kudu/util/CMakeLists.txt index be764c5..cd184ec 100644 --- a/src/kudu/util/CMakeLists.txt +++ b/src/kudu/util/CMakeLists.txt @@ -150,6 +150,7 @@ set(UTIL_SRCS block_cache_metrics.cc bloom_filter.cc cache.cc + char_util.cc coding.cc condition_variable.cc cow_object.cc diff --git a/src/kudu/util/char_util.cc b/src/kudu/util/char_util.cc new file mode 100644 index 0000000..606d421 --- /dev/null +++ b/src/kudu/util/char_util.cc @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "kudu/util/char_util.h" + +#include <string.h> + +namespace kudu { + +Slice UTF8Truncate(Slice val, size_t max_utf8_length) { + size_t num_utf8_chars = 0; + size_t num_bytes = 0; + auto str = val.data(); + for (auto i = 0; i < val.size(); ++i) { + num_utf8_chars += (*str++ & 0xc0) != 0x80; + num_bytes++; + if (num_utf8_chars > max_utf8_length) { + num_bytes--; + num_utf8_chars--; + break; + } + } + // as num_bytes <= val.size() we can use that to allocate the new slice data + // and copy the first num_bytes from val.data() to it. + auto relocated = new uint8_t[num_bytes]; + memcpy(relocated, val.data(), num_bytes); + return Slice(relocated, num_bytes); +} + +} // namespace kudu diff --git a/src/kudu/util/char_util.h b/src/kudu/util/char_util.h new file mode 100644 index 0000000..9fa0338 --- /dev/null +++ b/src/kudu/util/char_util.h @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <limits> + +#include "kudu/util/slice.h" + +namespace kudu { + + // Minimum and maxium length for VARCHAR [1,65535] + constexpr uint16_t kMinVarcharLength = 1; + constexpr uint16_t kMaxVarcharLength = std::numeric_limits<uint16_t>::max(); + + // Copy and truncate a slice. The Slice returned owns its memory. + // + // max_utf8_length is the number of UTF-8 characters/symbols (not bytes) to + // truncate to. + // + // The method doesn't validate the string is well-formed UTF-8. + Slice UTF8Truncate(Slice val, size_t max_utf8_length); +} // namespace kudu
