This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new a949b1f8ac6 branch-4.0: [fix](varbinary) Rename StringView to
StringContainer #57381 (#57446)
a949b1f8ac6 is described below
commit a949b1f8ac6e5eed06ad2a64f9958d74f5e1225c
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Oct 30 09:53:00 2025 +0800
branch-4.0: [fix](varbinary) Rename StringView to StringContainer #57381
(#57446)
Cherry-picked from #57381
Co-authored-by: admiring_xm <[email protected]>
---
be/src/runtime/primitive_type.h | 14 ++--
be/src/vec/columns/column_varbinary.cpp | 6 +-
be/src/vec/columns/column_varbinary.h | 18 ++---
.../{string_view.cpp => string_container.cpp} | 6 +-
.../common/{string_view.h => string_container.h} | 40 +++++-----
be/src/vec/core/field.h | 17 +++--
be/src/vec/data_types/data_type_varbinary.cpp | 6 +-
be/src/vec/data_types/data_type_varbinary.h | 7 +-
be/src/vec/functions/function_varbinary.cpp | 8 +-
be/src/vec/utils/varbinaryop_subbinary.h | 8 +-
be/test/vec/columns/column_varbinary_test.cpp | 66 ++++++++---------
...ing_view_test.cpp => string_container_test.cpp} | 86 +++++++++++-----------
.../vec/data_types/data_type_varbinary_test.cpp | 8 +-
be/test/vec/function/function_test_util.h | 6 +-
14 files changed, 149 insertions(+), 147 deletions(-)
diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h
index 6dd0fed82be..d89fa1a0f16 100644
--- a/be/src/runtime/primitive_type.h
+++ b/be/src/runtime/primitive_type.h
@@ -28,7 +28,7 @@
#include "olap/decimal12.h"
#include "olap/uint24.h"
#include "runtime/define_primitive_type.h"
-#include "vec/common/string_view.h"
+#include "vec/common/string_container.h"
#include "vec/core/field.h"
#include "vec/core/types.h"
#include "vec/runtime/vdatetime_value.h"
@@ -634,15 +634,15 @@ struct PrimitiveTypeTraits<TYPE_STRING> {
};
template <>
struct PrimitiveTypeTraits<TYPE_VARBINARY> {
- using CppType = doris::StringView;
+ using CppType = doris::StringContainer;
using StorageFieldType = CppType;
- using CppNativeType = doris::StringView;
- using ColumnItemType = doris::StringView;
+ using CppNativeType = doris::StringContainer;
+ using ColumnItemType = doris::StringContainer;
using DataType = vectorized::DataTypeVarbinary;
using ColumnType = vectorized::ColumnVarbinary;
- using NearestFieldType = doris::StringView;
- using AvgNearestFieldType = doris::StringView;
- using AvgNearestFieldType256 = doris::StringView;
+ using NearestFieldType = doris::StringContainer;
+ using AvgNearestFieldType = doris::StringContainer;
+ using AvgNearestFieldType256 = doris::StringContainer;
static constexpr PrimitiveType NearestPrimitiveType = TYPE_VARBINARY;
static constexpr PrimitiveType AvgNearestPrimitiveType = TYPE_VARBINARY;
static constexpr PrimitiveType AvgNearestPrimitiveType256 = TYPE_VARBINARY;
diff --git a/be/src/vec/columns/column_varbinary.cpp
b/be/src/vec/columns/column_varbinary.cpp
index 00698921e60..51b517d483a 100644
--- a/be/src/vec/columns/column_varbinary.cpp
+++ b/be/src/vec/columns/column_varbinary.cpp
@@ -115,7 +115,7 @@ size_t ColumnVarbinary::filter(const IColumn::Filter&
filter) {
} else {
auto val = src_vec.get_data()[i];
const auto* dst = _arena.insert(val.data(), val.size());
- _data[pos] = doris::StringView(dst, val.size());
+ _data[pos] = doris::StringContainer(dst, val.size());
}
pos++;
}
@@ -144,7 +144,7 @@ MutableColumnPtr ColumnVarbinary::permute(const
IColumn::Permutation& perm, size
continue;
}
const auto* dst = const_cast<Arena&>(_arena).insert(val.data(),
val.size());
- res_data[i] = doris::StringView(dst, val.size());
+ res_data[i] = doris::StringContainer(dst, val.size());
}
return res;
@@ -159,7 +159,7 @@ void ColumnVarbinary::replace_column_data(const IColumn&
rhs, size_t row, size_t
return;
}
const auto* dst = _arena.insert(val.data(), val.size());
- _data[self_row] = doris::StringView(dst, val.size());
+ _data[self_row] = doris::StringContainer(dst, val.size());
}
#include "common/compile_check_end.h"
diff --git a/be/src/vec/columns/column_varbinary.h
b/be/src/vec/columns/column_varbinary.h
index 6411eb26ec0..4aec2ba17dc 100644
--- a/be/src/vec/columns/column_varbinary.h
+++ b/be/src/vec/columns/column_varbinary.h
@@ -26,7 +26,7 @@
#include "vec/columns/column.h"
#include "vec/common/arena.h"
#include "vec/common/assert_cast.h"
-#include "vec/common/string_view.h"
+#include "vec/common/string_container.h"
namespace doris::vectorized {
#include "common/compile_check_begin.h"
@@ -36,7 +36,7 @@ private:
friend class COWHelper<IColumn, ColumnVarbinary>;
public:
- using Container = PaddedPODArray<doris::StringView>;
+ using Container = PaddedPODArray<doris::StringContainer>;
ColumnVarbinary() = default;
ColumnVarbinary(const size_t n) : _data(n) {}
@@ -73,7 +73,7 @@ public:
char* alloc(size_t length) { return _arena.alloc(length); }
void insert(const Field& x) override {
- auto value = vectorized::get<const doris::StringView&>(x);
+ auto value = vectorized::get<const doris::StringContainer&>(x);
insert_data(value.data(), value.size());
}
@@ -84,7 +84,7 @@ public:
}
void insert_data(const char* pos, size_t length) override {
- if (length <= doris::StringView::kInlineSize) {
+ if (length <= doris::StringContainer::kInlineSize) {
insert_inline_data(pos, length);
} else {
insert_to_buffer(pos, length);
@@ -92,16 +92,16 @@ public:
}
void insert_inline_data(const char* pos, size_t length) {
- DCHECK(length <= doris::StringView::kInlineSize);
- _data.push_back(doris::StringView(pos, cast_set<uint32_t>(length)));
+ DCHECK(length <= doris::StringContainer::kInlineSize);
+ _data.push_back(doris::StringContainer(pos,
cast_set<uint32_t>(length)));
}
void insert_to_buffer(const char* pos, size_t length) {
const char* dst = _arena.insert(pos, length);
- _data.push_back(doris::StringView(dst, cast_set<uint32_t>(length)));
+ _data.push_back(doris::StringContainer(dst,
cast_set<uint32_t>(length)));
}
- void insert_default() override { _data.push_back(doris::StringView()); }
+ void insert_default() override {
_data.push_back(doris::StringContainer()); }
int compare_at(size_t n, size_t m, const IColumn& rhs_,
int /*nan_direction_hint*/) const override {
@@ -131,7 +131,7 @@ public:
size_t allocated_bytes() const override { return _data.allocated_bytes() +
_arena.size(); }
size_t byte_size() const override {
- size_t bytes = _data.size() * sizeof(doris::StringView);
+ size_t bytes = _data.size() * sizeof(doris::StringContainer);
return bytes + _arena.used_size();
}
diff --git a/be/src/vec/common/string_view.cpp
b/be/src/vec/common/string_container.cpp
similarity index 91%
rename from be/src/vec/common/string_view.cpp
rename to be/src/vec/common/string_container.cpp
index 657e4585f22..804359f945d 100644
--- a/be/src/vec/common/string_view.cpp
+++ b/be/src/vec/common/string_container.cpp
@@ -15,11 +15,11 @@
// specific language governing permissions and limitations
// under the License.
-#include "vec/common/string_view.h"
+#include "vec/common/string_container.h"
namespace doris {
-bool StringView::operator==(const StringView& other) const {
+bool StringContainer::operator==(const StringContainer& other) const {
// Compare lengths and first 4 characters.
if (size_and_prefix_as_int64() != other.size_and_prefix_as_int64()) {
return false;
@@ -34,7 +34,7 @@ bool StringView::operator==(const StringView& other) const {
return memcmp(value_.data + kPrefixSize, other.value_.data + kPrefixSize,
size_ - kPrefixSize) == 0;
}
-int32_t StringView::compare(const StringView& other) const {
+int32_t StringContainer::compare(const StringContainer& other) const {
if (prefix_as_int() != other.prefix_as_int()) {
// The result is decided on prefix. The shorter will be less because
the
// prefix is padded with zeros.
diff --git a/be/src/vec/common/string_view.h
b/be/src/vec/common/string_container.h
similarity index 80%
rename from be/src/vec/common/string_view.h
rename to be/src/vec/common/string_container.h
index 5cd560aad4a..72f52366c64 100644
--- a/be/src/vec/common/string_view.h
+++ b/be/src/vec/common/string_container.h
@@ -35,22 +35,22 @@ namespace doris {
// exposes a subset of the interface. If the string is 12 characters
// or less, it is inlined and no reference is held. If it is longer, a
// reference to the string is held and the 4 first characters are
-// cached in the StringView. This allows failing comparisons early and
+// cached in the StringContainer. This allows failing comparisons early and
// reduces the CPU cache working set when dealing with short strings.
-class StringView {
+class StringContainer {
#include "common/compile_check_begin.h"
public:
using value_type = char;
static constexpr size_t kPrefixSize = 4 * sizeof(char);
static constexpr size_t kInlineSize = 12;
- StringView() {
- static_assert(sizeof(StringView) == 16);
- memset(this, 0, sizeof(StringView));
+ StringContainer() {
+ static_assert(sizeof(StringContainer) == 16);
+ memset(this, 0, sizeof(StringContainer));
}
- StringView(const char* data, uint32_t len) : size_(len) {
+ StringContainer(const char* data, uint32_t len) : size_(len) {
DCHECK_GE(len, 0);
DCHECK(data || len == 0);
if (isInline()) {
@@ -72,20 +72,20 @@ public:
}
}
- StringView(unsigned char* data, uint32_t len)
- : StringView(reinterpret_cast<const char*>(data), len) {}
+ StringContainer(unsigned char* data, uint32_t len)
+ : StringContainer(reinterpret_cast<const char*>(data), len) {}
bool isInline() const { return isInline(size_); }
ALWAYS_INLINE static constexpr bool isInline(uint32_t size) { return size
<= kInlineSize; }
- explicit StringView(std::string&& value) = delete;
- explicit StringView(const std::string& value)
- : StringView(value.data(), cast_set<uint32_t>(value.size())) {}
- explicit StringView(std::string_view value)
- : StringView(value.data(), cast_set<uint32_t>(value.size())) {}
- /* implicit */ StringView(const char* data)
- : StringView(data, cast_set<uint32_t>(strlen(data))) {}
+ explicit StringContainer(std::string&& value) = delete;
+ explicit StringContainer(const std::string& value)
+ : StringContainer(value.data(), cast_set<uint32_t>(value.size()))
{}
+ explicit StringContainer(std::string_view value)
+ : StringContainer(value.data(), cast_set<uint32_t>(value.size()))
{}
+ /* implicit */ StringContainer(const char* data)
+ : StringContainer(data, cast_set<uint32_t>(strlen(data))) {}
doris::StringRef to_string_ref() const { return {data(), size()}; }
operator std::string_view() && = delete;
@@ -101,12 +101,12 @@ public:
void set_size(uint32_t size) { size_ = size; }
- bool operator==(const StringView& other) const;
- friend std::ostream& operator<<(std::ostream& os, const StringView&
stringView) {
- os.write(stringView.data(), stringView.size());
+ bool operator==(const StringContainer& other) const;
+ friend std::ostream& operator<<(std::ostream& os, const StringContainer&
StringContainer) {
+ os.write(StringContainer.data(), StringContainer.size());
return os;
}
- auto operator<=>(const StringView& other) const {
+ auto operator<=>(const StringContainer& other) const {
const auto cmp = compare(other);
return cmp < 0 ? std::strong_ordering::less
: cmp > 0 ? std::strong_ordering::greater
@@ -116,7 +116,7 @@ public:
// Returns 0, if this == other
// < 0, if this < other
// > 0, if this > other
- int32_t compare(const StringView& other) const;
+ int32_t compare(const StringContainer& other) const;
const char* begin() && = delete;
const char* begin() const& { return data(); }
diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h
index 1131ecfc4f7..de6e7f9bdde 100644
--- a/be/src/vec/core/field.h
+++ b/be/src/vec/core/field.h
@@ -38,7 +38,7 @@
#include "olap/hll.h"
#include "util/bitmap_value.h"
#include "util/quantile_state.h"
-#include "vec/common/string_view.h"
+#include "vec/common/string_container.h"
#include "vec/common/uint128.h"
#include "vec/core/types.h"
#include "vec/json/path_in_data.h"
@@ -388,7 +388,7 @@ public:
case PrimitiveType::TYPE_VARCHAR:
return get<String>() <=> rhs.get<String>();
case PrimitiveType::TYPE_VARBINARY:
- return get<doris::StringView>() <=> rhs.get<doris::StringView>();
+ return get<doris::StringContainer>() <=>
rhs.get<doris::StringContainer>();
case PrimitiveType::TYPE_DECIMAL32:
return get<Decimal32>() <=> rhs.get<Decimal32>();
case PrimitiveType::TYPE_DECIMAL64:
@@ -436,7 +436,7 @@ public:
f(field.template get<String>());
return;
case PrimitiveType::TYPE_VARBINARY:
- f(field.template get<doris::StringView>());
+ f(field.template get<doris::StringContainer>());
return;
case PrimitiveType::TYPE_JSONB:
f(field.template get<JsonbField>());
@@ -486,11 +486,12 @@ public:
std::string_view as_string_view() const;
private:
- std::aligned_union_t<
- DBMS_MIN_FIELD_SIZE - sizeof(PrimitiveType), Null, UInt64,
UInt128, Int64, Int128, IPv6,
- Float64, String, JsonbField, Array, Tuple, Map, VariantMap,
DecimalField<Decimal32>,
- DecimalField<Decimal64>, DecimalField<Decimal128V2>,
DecimalField<Decimal128V3>,
- DecimalField<Decimal256>, BitmapValue, HyperLogLog, QuantileState,
doris::StringView>
+ std::aligned_union_t<DBMS_MIN_FIELD_SIZE - sizeof(PrimitiveType), Null,
UInt64, UInt128, Int64,
+ Int128, IPv6, Float64, String, JsonbField, Array,
Tuple, Map, VariantMap,
+ DecimalField<Decimal32>, DecimalField<Decimal64>,
+ DecimalField<Decimal128V2>,
DecimalField<Decimal128V3>,
+ DecimalField<Decimal256>, BitmapValue, HyperLogLog,
QuantileState,
+ doris::StringContainer>
storage;
PrimitiveType type;
diff --git a/be/src/vec/data_types/data_type_varbinary.cpp
b/be/src/vec/data_types/data_type_varbinary.cpp
index 440b79f112e..8394645eb7c 100644
--- a/be/src/vec/data_types/data_type_varbinary.cpp
+++ b/be/src/vec/data_types/data_type_varbinary.cpp
@@ -34,8 +34,8 @@
#include "vec/columns/column_varbinary.h"
#include "vec/common/assert_cast.h"
#include "vec/common/string_buffer.hpp"
+#include "vec/common/string_container.h"
#include "vec/common/string_ref.h"
-#include "vec/common/string_view.h"
#include "vec/core/field.h"
#include "vec/core/types.h"
@@ -43,7 +43,7 @@ namespace doris::vectorized {
#include "common/compile_check_begin.h"
Field DataTypeVarbinary::get_default() const {
- return Field::create_field<TYPE_VARBINARY>(StringView());
+ return Field::create_field<TYPE_VARBINARY>(StringContainer());
}
MutableColumnPtr DataTypeVarbinary::create_column() const {
@@ -132,7 +132,7 @@ FieldWithDataType
DataTypeVarbinary::get_field_with_data_type(const IColumn& col
const auto& column_data =
assert_cast<const ColumnVarbinary&,
TypeCheckOnRelease::DISABLE>(column);
return FieldWithDataType {.field = Field::create_field<TYPE_VARBINARY>(
-
doris::StringView(column_data.get_data_at(row_num))),
+
doris::StringContainer(column_data.get_data_at(row_num))),
.base_scalar_type_id = get_primitive_type()};
}
diff --git a/be/src/vec/data_types/data_type_varbinary.h
b/be/src/vec/data_types/data_type_varbinary.h
index fa13d19287d..4ec908e8dc7 100644
--- a/be/src/vec/data_types/data_type_varbinary.h
+++ b/be/src/vec/data_types/data_type_varbinary.h
@@ -27,7 +27,7 @@
#include "runtime/define_primitive_type.h"
#include "runtime/primitive_type.h"
#include "serde/data_type_string_serde.h"
-#include "vec/common/string_view.h"
+#include "vec/common/string_container.h"
#include "vec/core/field.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/serde/data_type_serde.h"
@@ -40,7 +40,7 @@ class IColumn;
class DataTypeVarbinary : public IDataType {
public:
using ColumnType = ColumnVarbinary;
- using FieldType = doris::StringView;
+ using FieldType = doris::StringContainer;
static constexpr PrimitiveType PType = TYPE_VARBINARY;
@@ -72,7 +72,8 @@ public:
Field get_field(const TExprNode& node) const override {
DCHECK_EQ(node.node_type, TExprNodeType::VARBINARY_LITERAL);
DCHECK(node.__isset.varbinary_literal);
- return
Field::create_field<TYPE_VARBINARY>(doris::StringView(node.varbinary_literal.value));
+ return Field::create_field<TYPE_VARBINARY>(
+ doris::StringContainer(node.varbinary_literal.value));
}
FieldWithDataType get_field_with_data_type(const IColumn& column,
diff --git a/be/src/vec/functions/function_varbinary.cpp
b/be/src/vec/functions/function_varbinary.cpp
index ec592c236bc..fadde6dfb39 100644
--- a/be/src/vec/functions/function_varbinary.cpp
+++ b/be/src/vec/functions/function_varbinary.cpp
@@ -64,7 +64,7 @@ public:
auto col_res = ColumnVarbinary::create();
const auto& data = col->get_chars();
const auto& offsets = col->get_offsets();
- col_res->get_data().assign(input_rows_count, StringView());
+ col_res->get_data().assign(input_rows_count, StringContainer());
for (int i = 0; i < input_rows_count; ++i) {
const auto* source = reinterpret_cast<const
char*>(&data[offsets[i - 1]]);
@@ -155,7 +155,7 @@ struct VarbinaryLengthImpl {
return {std::make_shared<DataTypeVarbinary>()};
}
- static Status vector(const PaddedPODArray<doris::StringView>& data,
+ static Status vector(const PaddedPODArray<doris::StringContainer>& data,
PaddedPODArray<Int32>& res) {
size_t rows_count = data.size();
res.resize(rows_count);
@@ -174,7 +174,7 @@ struct ToBase64BinaryImpl {
using ColumnType = ColumnString;
static constexpr auto PrimitiveTypeImpl = PrimitiveType::TYPE_VARBINARY;
- static Status vector(const PaddedPODArray<doris::StringView>& data,
+ static Status vector(const PaddedPODArray<doris::StringContainer>& data,
ColumnString::Chars& dst_data, ColumnString::Offsets&
dst_offsets) {
auto rows_count = data.size();
dst_offsets.resize(rows_count);
@@ -221,7 +221,7 @@ struct FromBase64BinaryImpl {
static Status vector(const ColumnString::Chars& data, const
ColumnString::Offsets& offsets,
ColumnVarbinary* res, NullMap& null_map) {
auto rows_count = offsets.size();
- res->get_data().assign(rows_count, StringView());
+ res->get_data().assign(rows_count, StringContainer());
for (size_t i = 0; i < rows_count; i++) {
const auto* source = reinterpret_cast<const char*>(&data[offsets[i
- 1]]);
diff --git a/be/src/vec/utils/varbinaryop_subbinary.h
b/be/src/vec/utils/varbinaryop_subbinary.h
index ef4d0b1d20a..2ffc394f168 100644
--- a/be/src/vec/utils/varbinaryop_subbinary.h
+++ b/be/src/vec/utils/varbinaryop_subbinary.h
@@ -30,17 +30,17 @@ namespace doris::vectorized {
constexpr auto SIZE_OF_UINT = sizeof(uint32_t);
struct VarBinaryOP {
- static void check_and_insert_data(doris::StringView& sView, const char*
data, uint32_t len,
+ static void check_and_insert_data(doris::StringContainer& sView, const
char* data, uint32_t len,
bool before_is_inline) {
if (before_is_inline) {
sView.set_size(len);
} else {
- sView = doris::StringView(data, len);
+ sView = doris::StringContainer(data, len);
}
}
static std::pair<bool, char*> alloc(ColumnVarbinary* res_col, size_t
index, uint32_t len) {
- bool is_inline = StringView::isInline(len);
+ bool is_inline = StringContainer::isInline(len);
char* dst = nullptr;
if (is_inline) {
dst = reinterpret_cast<char*>(&(res_col->get_data()[index])) +
SIZE_OF_UINT;
@@ -90,7 +90,7 @@ private:
res->get_data().reserve(size);
for (size_t i = 0; i < size; ++i) {
- doris::StringView binary =
binarys->get_data()[index_check_const<binary_const>(i)];
+ doris::StringContainer binary =
binarys->get_data()[index_check_const<binary_const>(i)];
int binary_size = static_cast<int>(binary.size());
int start_value =
start->get_data()[index_check_const<start_const>(i)];
diff --git a/be/test/vec/columns/column_varbinary_test.cpp
b/be/test/vec/columns/column_varbinary_test.cpp
index f593c1d2001..0e1d2b9d917 100644
--- a/be/test/vec/columns/column_varbinary_test.cpp
+++ b/be/test/vec/columns/column_varbinary_test.cpp
@@ -32,8 +32,8 @@
#include "vec/columns/column.h"
#include "vec/columns/column_string.h"
#include "vec/common/assert_cast.h"
+#include "vec/common/string_container.h"
#include "vec/common/string_ref.h"
-#include "vec/common/string_view.h"
#include "vec/core/types.h"
namespace doris::vectorized {
@@ -64,9 +64,9 @@ TEST_F(ColumnVarbinaryTest, BasicInsertGetPopClear) {
EXPECT_EQ(col->get_name(), std::string("ColumnVarbinary"));
EXPECT_EQ(col->size(), 0U);
- const size_t inline_len = std::min<size_t>(doris::StringView::kInlineSize,
8);
+ const size_t inline_len =
std::min<size_t>(doris::StringContainer::kInlineSize, 8);
const std::string small = make_bytes(inline_len, 0x11);
- const std::string big = make_bytes(doris::StringView::kInlineSize + 32,
0x22);
+ const std::string big = make_bytes(doris::StringContainer::kInlineSize +
32, 0x22);
size_t before_bytes = col->byte_size();
@@ -78,7 +78,7 @@ TEST_F(ColumnVarbinaryTest, BasicInsertGetPopClear) {
ASSERT_EQ(memcmp(r0.data, small.data(), small.size()), 0);
size_t after_small_bytes = col->byte_size();
- ASSERT_EQ(after_small_bytes - before_bytes, sizeof(doris::StringView));
+ ASSERT_EQ(after_small_bytes - before_bytes,
sizeof(doris::StringContainer));
ASSERT_EQ(after_small_bytes - before_bytes, 16);
col->insert_default();
@@ -94,11 +94,11 @@ TEST_F(ColumnVarbinaryTest, BasicInsertGetPopClear) {
ASSERT_EQ(memcmp(r2.data, big.data(), big.size()), 0);
size_t after_big_bytes = col->byte_size();
- // big insert adds one StringView slot + big payload in arena (Arena may
add alignment/overhead)
+ // big insert adds one StringContainer slot + big payload in arena (Arena
may add alignment/overhead)
size_t diff = after_big_bytes - after_small_bytes;
std::cout << "after_big_bytes: " << after_big_bytes
<< " after_small_bytes: " << after_small_bytes << " diff: " <<
diff << std::endl;
- ASSERT_GE(diff, sizeof(doris::StringView) + big.size());
+ ASSERT_GE(diff, sizeof(doris::StringContainer) + big.size());
// pop_back
col->pop_back(1);
@@ -115,7 +115,7 @@ TEST_F(ColumnVarbinaryTest, BasicInsertGetPopClear) {
TEST_F(ColumnVarbinaryTest, InsertFromAndRanges) {
auto src = ColumnVarbinary::create();
std::vector<std::string> vals = {make_bytes(1, 0x01), make_bytes(2, 0x02),
- make_bytes(doris::StringView::kInlineSize
+ 5, 0x03),
+
make_bytes(doris::StringContainer::kInlineSize + 5, 0x03),
make_bytes(0, 0x00), make_bytes(7, 0x05)};
for (auto& v : vals) {
src->insert_data(v.data(), v.size());
@@ -160,12 +160,12 @@ TEST_F(ColumnVarbinaryTest, FilterBothModes) {
auto col = ColumnVarbinary::create();
// Mix inline (small) and non-inline (large > kInlineSize) values
std::vector<std::string> vals = {
- make_bytes(1, 0x10), // inline
- make_bytes(doris::StringView::kInlineSize + 5, 0x91), //
non-inline (dropped)
- make_bytes(3, 0x12), // inline
- make_bytes(doris::StringView::kInlineSize + 7, 0x92), // non-inline
- make_bytes(0, 0x00), // empty
(dropped)
- make_bytes(doris::StringView::kInlineSize + 9, 0x93) // non-inline
+ make_bytes(1, 0x10), //
inline
+ make_bytes(doris::StringContainer::kInlineSize + 5, 0x91), //
non-inline (dropped)
+ make_bytes(3, 0x12), //
inline
+ make_bytes(doris::StringContainer::kInlineSize + 7, 0x92), //
non-inline
+ make_bytes(0, 0x00), //
empty (dropped)
+ make_bytes(doris::StringContainer::kInlineSize + 9, 0x93) //
non-inline
};
for (auto& v : vals) {
col->insert_data(v.data(), v.size());
@@ -205,10 +205,10 @@ TEST_F(ColumnVarbinaryTest, Permute) {
auto col = ColumnVarbinary::create();
// Include large (non-inline) entries to exercise arena path
std::vector<std::string> vals = {
- make_bytes(1, 0x20), // inline
- make_bytes(doris::StringView::kInlineSize + 3, 0xA0), // non-inline
- make_bytes(3, 0x22), // inline
- make_bytes(doris::StringView::kInlineSize + 8, 0xA1) // non-inline
+ make_bytes(1, 0x20), //
inline
+ make_bytes(doris::StringContainer::kInlineSize + 3, 0xA0), //
non-inline
+ make_bytes(3, 0x22), //
inline
+ make_bytes(doris::StringContainer::kInlineSize + 8, 0xA1) //
non-inline
};
for (auto& v : vals) {
col->insert_data(v.data(), v.size());
@@ -242,7 +242,7 @@ TEST_F(ColumnVarbinaryTest, Permute) {
TEST_F(ColumnVarbinaryTest, CloneResized) {
auto col = ColumnVarbinary::create();
std::vector<std::string> vals = {make_bytes(1, 0x30), make_bytes(0, 0x00),
- make_bytes(doris::StringView::kInlineSize
+ 1, 0x31)};
+
make_bytes(doris::StringContainer::kInlineSize + 1, 0x31)};
for (auto& v : vals) {
col->insert_data(v.data(), v.size());
}
@@ -276,9 +276,9 @@ TEST_F(ColumnVarbinaryTest, ReplaceColumnData) {
auto col = ColumnVarbinary::create();
// mix inline and non-inline
std::vector<std::string> vals = {
- make_bytes(2, 0x40), // inline
- make_bytes(doris::StringView::kInlineSize + 4, 0xB0), // non-inline
- make_bytes(4, 0x42) // inline
+ make_bytes(2, 0x40), //
inline
+ make_bytes(doris::StringContainer::kInlineSize + 4, 0xB0), //
non-inline
+ make_bytes(4, 0x42) //
inline
};
for (auto& v : vals) {
col->insert_data(v.data(), v.size());
@@ -286,8 +286,8 @@ TEST_F(ColumnVarbinaryTest, ReplaceColumnData) {
auto rhs = ColumnVarbinary::create();
std::vector<std::string> rhs_vals = {
- make_bytes(doris::StringView::kInlineSize + 7, 0xC0), // non-inline
- make_bytes(1, 0x51) // inline
+ make_bytes(doris::StringContainer::kInlineSize + 7, 0xC0), //
non-inline
+ make_bytes(1, 0x51) //
inline
};
for (auto& v : rhs_vals) {
rhs->insert_data(v.data(), v.size());
@@ -308,7 +308,7 @@ TEST_F(ColumnVarbinaryTest, ReplaceColumnData) {
TEST_F(ColumnVarbinaryTest, SerializeDeserializeRoundtripManual) {
auto col = ColumnVarbinary::create();
- std::string v = make_bytes(doris::StringView::kInlineSize + 17, 0x60);
+ std::string v = make_bytes(doris::StringContainer::kInlineSize + 17, 0x60);
std::vector<char> buf;
auto len = static_cast<uint32_t>(v.size());
@@ -339,7 +339,7 @@ TEST_F(ColumnVarbinaryTest, FieldAccessOperatorAndGet) {
auto col = ColumnVarbinary::create();
std::vector<std::string> vals = {
make_bytes(1, 0x11), make_bytes(0, 0x00),
- make_bytes(doris::StringView::kInlineSize + 6, 0x12)}; // include
non-inline
+ make_bytes(doris::StringContainer::kInlineSize + 6, 0x12)}; //
include non-inline
for (auto& v : vals) {
col->insert_data(v.data(), v.size());
}
@@ -347,13 +347,13 @@ TEST_F(ColumnVarbinaryTest, FieldAccessOperatorAndGet) {
for (size_t i = 0; i < vals.size(); ++i) {
// operator[]
Field f = (*col)[i];
- auto sv = vectorized::get<const doris::StringView&>(f);
+ auto sv = vectorized::get<const doris::StringContainer&>(f);
ASSERT_EQ(sv.size(), vals[i].size());
ASSERT_EQ(memcmp(sv.data(), vals[i].data(), sv.size()), 0);
// get(size_t, Field&)
Field f2;
col->get(i, f2);
- auto sv2 = vectorized::get<const doris::StringView&>(f2);
+ auto sv2 = vectorized::get<const doris::StringContainer&>(f2);
ASSERT_EQ(sv2.size(), vals[i].size());
ASSERT_EQ(memcmp(sv2.data(), vals[i].data(), sv2.size()), 0);
}
@@ -363,12 +363,12 @@ TEST_F(ColumnVarbinaryTest, InsertField) {
auto col = ColumnVarbinary::create();
// prepare inline and non-inline fields
std::string inline_v = make_bytes(2, 0x21);
- std::string big_v = make_bytes(doris::StringView::kInlineSize + 10, 0x22);
+ std::string big_v = make_bytes(doris::StringContainer::kInlineSize + 10,
0x22);
Field f_inline = Field::create_field<TYPE_VARBINARY>(
- doris::StringView(inline_v.data(), inline_v.size()));
+ doris::StringContainer(inline_v.data(), inline_v.size()));
Field f_big =
-
Field::create_field<TYPE_VARBINARY>(doris::StringView(big_v.data(),
big_v.size()));
+
Field::create_field<TYPE_VARBINARY>(doris::StringContainer(big_v.data(),
big_v.size()));
col->insert(f_inline);
col->insert(f_big);
@@ -384,8 +384,8 @@ TEST_F(ColumnVarbinaryTest, InsertField) {
TEST_F(ColumnVarbinaryTest, SerializeValueIntoArenaAndImpl) {
auto col = ColumnVarbinary::create();
- std::string small = make_bytes(3, 0x31);
// inline
- std::string big = make_bytes(doris::StringView::kInlineSize + 12, 0x32);
// non-inline
+ std::string small = make_bytes(3, 0x31);
// inline
+ std::string big = make_bytes(doris::StringContainer::kInlineSize + 12,
0x32); // non-inline
col->insert_data(small.data(), small.size());
col->insert_data(big.data(), big.size());
@@ -425,7 +425,7 @@ TEST_F(ColumnVarbinaryTest,
AllocatedBytesAndHasEnoughCapacity) {
}
// Force some non-inline values to ensure arena usage
for (int i = 0; i < 3; ++i) {
- auto big = make_bytes(doris::StringView::kInlineSize + 20 + i, 0x90 +
i);
+ auto big = make_bytes(doris::StringContainer::kInlineSize + 20 + i,
0x90 + i);
dest->insert_data(big.data(), big.size());
}
// Capture capacity & size
diff --git a/be/test/vec/common/string_view_test.cpp
b/be/test/vec/common/string_container_test.cpp
similarity index 78%
rename from be/test/vec/common/string_view_test.cpp
rename to be/test/vec/common/string_container_test.cpp
index 4bfd8c25ea6..27d1455d455 100644
--- a/be/test/vec/common/string_view_test.cpp
+++ b/be/test/vec/common/string_container_test.cpp
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-#include "vec/common/string_view.h"
+#include "vec/common/string_container.h"
#include <gtest/gtest.h>
@@ -29,7 +29,7 @@
namespace doris {
-class StringViewTest : public ::testing::Test {};
+class StringContainerTest : public ::testing::Test {};
static std::string make_bytes(size_t n, uint8_t seed = 0x30) {
std::string s;
@@ -44,68 +44,68 @@ static std::string make_bytes(size_t n, uint8_t seed =
0x30) {
return s;
}
-TEST_F(StringViewTest, EmptyAndBasics) {
- StringView sv;
+TEST_F(StringContainerTest, EmptyAndBasics) {
+ StringContainer sv;
EXPECT_TRUE(sv.empty());
EXPECT_EQ(sv.size(), 0U);
EXPECT_TRUE(sv.isInline());
- StringView a("abc");
+ StringContainer a("abc");
EXPECT_FALSE(a.empty());
EXPECT_EQ(a.size(), 3U);
EXPECT_TRUE(a.isInline());
EXPECT_EQ(std::string(a), std::string("abc"));
std::string s12(12, 'x');
- StringView b(s12);
+ StringContainer b(s12);
EXPECT_TRUE(b.isInline());
EXPECT_EQ(b.size(), 12U);
std::string s13(13, 'y');
- StringView c(s13);
+ StringContainer c(s13);
EXPECT_FALSE(c.isInline());
EXPECT_EQ(c.size(), 13U);
}
-TEST_F(StringViewTest, DataPointerInlineVsOutline) {
+TEST_F(StringContainerTest, DataPointerInlineVsOutline) {
std::string small = "hello";
- StringView si(small);
+ StringContainer si(small);
EXPECT_TRUE(si.isInline());
EXPECT_NE(si.data(), small.data()); // inline stores its own bytes
std::string big = make_bytes(16);
- StringView so(big);
+ StringContainer so(big);
EXPECT_FALSE(so.isInline());
EXPECT_EQ(so.data(), big.data()); // outline holds external pointer
}
-TEST_F(StringViewTest, EqualityAndCompare) {
- StringView a("abcd");
- StringView b("abcd");
+TEST_F(StringContainerTest, EqualityAndCompare) {
+ StringContainer a("abcd");
+ StringContainer b("abcd");
EXPECT_TRUE(a == b);
EXPECT_EQ(a.compare(b), 0);
- StringView c("abce");
+ StringContainer c("abce");
EXPECT_FALSE(a == c);
EXPECT_LT(a.compare(c), 0); // 'd' < 'e'
// different length, same prefix
- StringView d("ab");
- StringView e("abc");
+ StringContainer d("ab");
+ StringContainer e("abc");
EXPECT_LT(d.compare(e), 0);
EXPECT_GT(e.compare(d), 0);
// same first 4 bytes, differ later (exercise non-prefix compare path)
std::string s1 = std::string("abcd") + std::string("XXXX");
std::string s2 = std::string("abcd") + std::string("YYYY");
- StringView x(s1);
- StringView y(s2);
+ StringContainer x(s1);
+ StringContainer y(s2);
EXPECT_NE(x.compare(y), 0);
}
-TEST_F(StringViewTest, EmbeddedNulls) {
+TEST_F(StringContainerTest, EmbeddedNulls) {
std::string raw = std::string("ab\0cd\0ef", 8);
- StringView sv(raw);
+ StringContainer sv(raw);
EXPECT_EQ(sv.size(), 8U);
// string conversion preserves bytes
std::string s = static_cast<std::string>(sv);
@@ -113,14 +113,14 @@ TEST_F(StringViewTest, EmbeddedNulls) {
EXPECT_EQ(::memcmp(s.data(), raw.data(), 8), 0);
// equality with same content containing nulls
- StringView sv2(raw);
+ StringContainer sv2(raw);
EXPECT_TRUE(sv == sv2);
EXPECT_EQ(sv.compare(sv2), 0);
}
-TEST_F(StringViewTest, ConversionsAndIteration) {
+TEST_F(StringContainerTest, ConversionsAndIteration) {
std::string src = make_bytes(10);
- const StringView sv(src);
+ const StringContainer sv(src);
// to_string_ref
auto ref = sv.to_string_ref();
@@ -138,9 +138,9 @@ TEST_F(StringViewTest, ConversionsAndIteration) {
EXPECT_EQ(::memcmp(via_iter.data(), sv.data(), sv.size()), 0);
}
-TEST_F(StringViewTest, OstreamWrite) {
+TEST_F(StringContainerTest, OstreamWrite) {
std::string raw = std::string("12\0\0", 4);
- StringView sv(raw);
+ StringContainer sv(raw);
std::ostringstream oss;
oss << sv; // write() respects size; embedded nulls are preserved
std::string out = oss.str();
@@ -148,12 +148,12 @@ TEST_F(StringViewTest, OstreamWrite) {
EXPECT_EQ(::memcmp(out.data(), raw.data(), raw.size()), 0);
}
-TEST_F(StringViewTest, NonInlineEqualityAndCompare) {
+TEST_F(StringContainerTest, NonInlineEqualityAndCompare) {
// Create two large (> kInlineSize) equal strings
std::string base_a = make_bytes(24, 0x41); // length 24
std::string base_b = base_a; // identical
- StringView sva(base_a);
- StringView svb(base_b);
+ StringContainer sva(base_a);
+ StringContainer svb(base_b);
EXPECT_FALSE(sva.isInline());
EXPECT_FALSE(svb.isInline());
EXPECT_TRUE(sva == svb);
@@ -164,8 +164,8 @@ TEST_F(StringViewTest, NonInlineEqualityAndCompare) {
std::string diff1 = base_a;
std::string diff2 = base_a;
diff2[15] ^= 0x01; // change one byte after prefix region
- StringView svd1(diff1);
- StringView svd2(diff2);
+ StringContainer svd1(diff1);
+ StringContainer svd2(diff2);
EXPECT_NE(svd1.compare(svd2), 0);
EXPECT_NE(svd1 == svd2, true);
@@ -173,15 +173,15 @@ TEST_F(StringViewTest, NonInlineEqualityAndCompare) {
std::string p1 = base_a;
std::string p2 = base_a;
p2[0] = static_cast<char>(p2[0] + 1);
- StringView svp1(p1), svp2(p2);
+ StringContainer svp1(p1), svp2(p2);
int cmp = svp1.compare(svp2);
EXPECT_LT(cmp, 0);
EXPECT_TRUE((svp1 <=> svp2) == std::strong_ordering::less);
}
-TEST_F(StringViewTest, StrConversionInlineAndNonInline) {
+TEST_F(StringContainerTest, StrConversionInlineAndNonInline) {
std::string inl = "abcd"; // inline
- StringView svi(inl);
+ StringContainer svi(inl);
std::string out_inl = svi.str();
EXPECT_EQ(out_inl.size(), inl.size());
EXPECT_EQ(out_inl, inl);
@@ -190,19 +190,19 @@ TEST_F(StringViewTest, StrConversionInlineAndNonInline) {
std::string big = make_bytes(20, 0x50); // ensure > 12
big[5] = '\0';
big[14] = '\0';
- StringView svb(big);
+ StringContainer svb(big);
EXPECT_FALSE(svb.isInline());
std::string out_big = svb.str();
EXPECT_EQ(out_big.size(), big.size());
EXPECT_EQ(::memcmp(out_big.data(), big.data(), big.size()), 0);
}
-TEST_F(StringViewTest, ThreeWayComparisonOrdering) {
- StringView a("abcd"); // inline
- StringView b("abce"); // inline > a
+TEST_F(StringContainerTest, ThreeWayComparisonOrdering) {
+ StringContainer a("abcd"); // inline
+ StringContainer b("abce"); // inline > a
auto tmp_long = make_bytes(30); // create std::string first (avoid rvalue
deleted ctor)
- StringView c(tmp_long); // non-inline
- StringView d(c); // identical non-inline
+ StringContainer c(tmp_long); // non-inline
+ StringContainer d(c); // identical non-inline
// a vs b
EXPECT_TRUE((a <=> b) == std::strong_ordering::less);
EXPECT_TRUE((b <=> a) == std::strong_ordering::greater);
@@ -220,20 +220,20 @@ TEST_F(StringViewTest, ThreeWayComparisonOrdering) {
}
}
-TEST_F(StringViewTest, DumpHex) {
+TEST_F(StringContainerTest, DumpHex) {
// Empty
- StringView empty;
+ StringContainer empty;
EXPECT_EQ(empty.dump_hex(), "X''");
// Inline with known bytes
const unsigned char bytes_inline[] = {0x00, 0x01, 0x0A, 0x1F, 0x7F};
- StringView svi(reinterpret_cast<const char*>(bytes_inline),
sizeof(bytes_inline));
+ StringContainer svi(reinterpret_cast<const char*>(bytes_inline),
sizeof(bytes_inline));
EXPECT_TRUE(svi.isInline());
EXPECT_EQ(svi.dump_hex(), "X'00010A1F7F'");
// Non-inline, length > 12
std::string big = make_bytes(16, 0x20); // bytes 0x20,0x21,...
- StringView svb(big);
+ StringContainer svb(big);
EXPECT_FALSE(svb.isInline());
// Build expected
std::ostringstream oss;
diff --git a/be/test/vec/data_types/data_type_varbinary_test.cpp
b/be/test/vec/data_types/data_type_varbinary_test.cpp
index 1049a60b991..d74c0424e10 100644
--- a/be/test/vec/data_types/data_type_varbinary_test.cpp
+++ b/be/test/vec/data_types/data_type_varbinary_test.cpp
@@ -33,8 +33,8 @@
#include "vec/columns/column_varbinary.h"
#include "vec/common/assert_cast.h"
#include "vec/common/string_buffer.hpp"
+#include "vec/common/string_container.h"
#include "vec/common/string_ref.h"
-#include "vec/common/string_view.h"
#include "vec/core/field.h"
#include "vec/core/types.h"
#include "vec/data_types/common_data_type_serder_test.h"
@@ -87,7 +87,7 @@ TEST_F(DataTypeVarbinaryTest, CreateColumnAndCheckColumn) {
TEST_F(DataTypeVarbinaryTest, GetDefaultField) {
DataTypeVarbinary dt;
Field def = dt.get_default();
- const auto& sv = get<const doris::StringView&>(def);
+ const auto& sv = get<const doris::StringContainer&>(def);
EXPECT_EQ(sv.size(), 0U);
}
@@ -175,7 +175,7 @@ TEST_F(DataTypeVarbinaryTest, GetFieldWithDataType) {
auto fwd = dt.get_field_with_data_type(*col, 0);
EXPECT_EQ(fwd.base_scalar_type_id, PrimitiveType::TYPE_VARBINARY);
- const auto& sv = get<const doris::StringView&>(fwd.field);
+ const auto& sv = get<const doris::StringContainer&>(fwd.field);
ASSERT_EQ(sv.size(), v.size());
ASSERT_EQ(memcmp(sv.data(), v.data(), sv.size()), 0);
}
@@ -188,7 +188,7 @@ TEST_F(DataTypeVarbinaryTest, GetFieldFromTExprNode) {
node.__isset.varbinary_literal = true;
Field f = dt.get_field(node);
- const auto& sv = get<const doris::StringView&>(f);
+ const auto& sv = get<const doris::StringContainer&>(f);
ASSERT_EQ(sv.size(), 5U);
ASSERT_EQ(memcmp(sv.data(), "hello", 5), 0);
}
diff --git a/be/test/vec/function/function_test_util.h
b/be/test/vec/function/function_test_util.h
index 7e2799c9dd5..9fdf88eb50e 100644
--- a/be/test/vec/function/function_test_util.h
+++ b/be/test/vec/function/function_test_util.h
@@ -104,7 +104,7 @@ using VARCHAR = std::string;
using CHAR = std::string;
using STRING = std::string;
-using VARBINARY = doris::StringView;
+using VARBINARY = doris::StringContainer;
using DOUBLE = double;
using FLOAT = float;
@@ -133,8 +133,8 @@ struct ut_input_type<DataTypeString> {
};
template <>
struct ut_input_type<DataTypeVarbinary> {
- using type = doris::StringView;
- inline static type default_value = doris::StringView("test_default");
+ using type = doris::StringContainer;
+ inline static type default_value = doris::StringContainer("test_default");
};
template <>
struct ut_input_type<DataTypeDate> {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]