This is an automated email from the ASF dual-hosted git repository.
zhangstar333 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 104aa2dbd51 [improve](varbinary) support varbinary type with topn
runtime filter (#58721)
104aa2dbd51 is described below
commit 104aa2dbd518b8c3f79f7ce2e9fcada271da7691
Author: zhangstar333 <[email protected]>
AuthorDate: Mon Dec 15 17:38:53 2025 +0800
[improve](varbinary) support varbinary type with topn runtime filter
(#58721)
### What problem does this PR solve?
Problem Summary:
support varbinary with topn runtime filter eg: order by binary_col limit
n
and temp forbid varbinary type at: group by key, join key, comparison
predicate in FE part.
---
be/src/runtime/primitive_type.h | 3 +-
be/src/runtime/runtime_predicate.cpp | 11 +-
be/src/vec/columns/column_varbinary.cpp | 10 +-
be/src/vec/columns/column_varbinary.h | 5 +-
be/src/vec/common/string_view.h | 7 +-
be/src/vec/core/field.cpp | 8 ++
be/src/vec/core/field.h | 96 +++++++++++++++--
be/src/vec/core/sort_block.h | 6 ++
be/src/vec/data_types/convert_field_to_type.cpp | 5 +
be/src/vec/data_types/data_type_varbinary.h | 2 +-
.../data_types/serde/data_type_varbinary_serde.cpp | 13 ++-
be/src/vec/exprs/vexpr.cpp | 6 ++
be/src/vec/exprs/vexpr.h | 7 ++
be/test/vec/columns/column_varbinary_test.cpp | 4 +-
be/test/vec/common/string_view_test.cpp | 9 +-
.../vec/data_types/data_type_varbinary_test.cpp | 10 +-
.../exec/format/parquet/parquet_reader_test.cpp | 24 ++---
.../doris/iceberg/IcebergSysTableColumnValue.java | 7 ++
.../org/apache/doris/analysis/OutFileClause.java | 2 +
.../nereids/rules/analysis/CheckAfterRewrite.java | 8 +-
.../apache/doris/nereids/types/VarBinaryType.java | 4 +
.../doris/nereids/util/TypeCoercionUtils.java | 6 ++
.../export/test_hive_export_varbinary.out | 29 +++++
.../data/external_table_p0/hive/test_hive_orc.out | 96 +++++++++++++++++
.../iceberg/test_iceberg_sys_table.out | 35 ++++++
.../export/test_hive_export_varbinary.groovy | 120 +++++++++++++++++++++
.../external_table_p0/hive/test_hive_orc.groovy | 55 ++++++++++
.../iceberg/test_iceberg_sys_table.groovy | 19 ++++
28 files changed, 562 insertions(+), 45 deletions(-)
diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h
index 4673160bc52..4686bdbab7d 100644
--- a/be/src/runtime/primitive_type.h
+++ b/be/src/runtime/primitive_type.h
@@ -603,7 +603,8 @@ struct PrimitiveTypeTraits<TYPE_VARBINARY> {
using ColumnItemType = doris::StringView;
using DataType = vectorized::DataTypeVarbinary;
using ColumnType = vectorized::ColumnVarbinary;
- using NearestFieldType = doris::StringView;
+ // StringView is non-owning, but StringViewField wraps it with String for
ownership
+ using NearestFieldType = vectorized::StringViewField;
static constexpr PrimitiveType NearestPrimitiveType = TYPE_VARBINARY;
static constexpr PrimitiveType AvgNearestPrimitiveType = TYPE_VARBINARY;
};
diff --git a/be/src/runtime/runtime_predicate.cpp
b/be/src/runtime/runtime_predicate.cpp
index b0ce2b7f7d4..bb14faf3e2b 100644
--- a/be/src/runtime/runtime_predicate.cpp
+++ b/be/src/runtime/runtime_predicate.cpp
@@ -157,6 +157,15 @@ StringRef RuntimePredicate::_get_string_ref(const Field&
field, const PrimitiveT
const auto& v = field.get<typename
PrimitiveTypeTraits<TYPE_IPV6>::CppType>();
return StringRef((char*)&v, sizeof(v));
}
+ case doris::PrimitiveType::TYPE_VARBINARY: {
+ // For VARBINARY type, use StringViewField to store binary data
+ const auto& v = field.get<StringViewField>();
+ auto length = v.size();
+ char* buffer = _predicate_arena.alloc(length);
+ memset(buffer, 0, length);
+ memcpy(buffer, v.data(), length);
+ return {buffer, length};
+ }
default:
break;
}
@@ -167,7 +176,7 @@ StringRef RuntimePredicate::_get_string_ref(const Field&
field, const PrimitiveT
bool RuntimePredicate::_init(PrimitiveType type) {
return is_int_or_bool(type) || is_decimal(type) || is_string_type(type) ||
is_date_type(type) ||
- is_time_type(type) || is_ip(type);
+ is_time_type(type) || is_ip(type) || is_varbinary(type);
}
Status RuntimePredicate::update(const Field& value) {
diff --git a/be/src/vec/columns/column_varbinary.cpp
b/be/src/vec/columns/column_varbinary.cpp
index 639324f5578..e352513804c 100644
--- a/be/src/vec/columns/column_varbinary.cpp
+++ b/be/src/vec/columns/column_varbinary.cpp
@@ -28,6 +28,7 @@
#include "vec/columns/columns_common.h"
#include "vec/common/arena.h"
#include "vec/common/assert_cast.h"
+#include "vec/core/sort_block.h"
namespace doris::vectorized {
#include "common/compile_check_begin.h"
@@ -144,7 +145,7 @@ MutableColumnPtr ColumnVarbinary::permute(const
IColumn::Permutation& perm, size
res_data[i] = val;
continue;
}
- const auto* dst = const_cast<Arena&>(_arena).insert(val.data(),
val.size());
+ const auto* dst = res->_arena.insert(val.data(), val.size());
res_data[i] = doris::StringView(dst, val.size());
}
@@ -222,5 +223,12 @@ void ColumnVarbinary::insert_many_strings_overflow(const
StringRef* strings, siz
insert_many_strings(strings, num);
}
+void ColumnVarbinary::sort_column(const ColumnSorter* sorter, EqualFlags&
flags,
+ IColumn::Permutation& perms, EqualRange&
range,
+ bool last_column) const {
+ sorter->sort_column(assert_cast<const ColumnVarbinary&>(*this), flags,
perms, range,
+ last_column);
+}
+
#include "common/compile_check_end.h"
} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_varbinary.h
b/be/src/vec/columns/column_varbinary.h
index 41b2604eeef..913f52ac84f 100644
--- a/be/src/vec/columns/column_varbinary.h
+++ b/be/src/vec/columns/column_varbinary.h
@@ -77,7 +77,7 @@ public:
char* alloc(size_t length) { return _arena.alloc(length); }
void insert(const Field& x) override {
- auto value = vectorized::get<const doris::StringView&>(x);
+ const auto& value = vectorized::get<const StringViewField&>(x);
insert_data(value.data(), value.size());
}
@@ -185,6 +185,9 @@ public:
void insert_many_strings_overflow(const StringRef* strings, size_t num,
size_t max_length) override;
+ void sort_column(const ColumnSorter* sorter, EqualFlags& flags,
IColumn::Permutation& perms,
+ EqualRange& range, bool last_column) const override;
+
private:
Container _data;
Arena _arena;
diff --git a/be/src/vec/common/string_view.h b/be/src/vec/common/string_view.h
index 5cd560aad4a..218104ff750 100644
--- a/be/src/vec/common/string_view.h
+++ b/be/src/vec/common/string_view.h
@@ -126,16 +126,15 @@ public:
std::string dump_hex() const {
static const char* kHex = "0123456789ABCDEF";
std::string out;
- out.reserve(size_ * 2 + 3);
- out.push_back('X');
- out.push_back('\'');
+ out.reserve(size_ * 2 + 2);
+ out.push_back('0');
+ out.push_back('x');
const char* ptr = data();
for (uint32_t i = 0; i < size_; ++i) {
auto c = static_cast<unsigned char>(ptr[i]);
out.push_back(kHex[c >> 4]);
out.push_back(kHex[c & 0x0F]);
}
- out.push_back('\'');
return out;
}
diff --git a/be/src/vec/core/field.cpp b/be/src/vec/core/field.cpp
index a0cfa4ead7d..4567fa20905 100644
--- a/be/src/vec/core/field.cpp
+++ b/be/src/vec/core/field.cpp
@@ -740,6 +740,10 @@ std::string_view Field::as_string_view() const {
const auto& s = get<String>();
return {s.data(), s.size()};
}
+ if (type == PrimitiveType::TYPE_VARBINARY) {
+ const auto& svf = get<StringViewField>();
+ return {svf.data(), svf.size()};
+ }
// MATCH_PRIMITIVE_TYPE(INVALID_TYPE);
// MATCH_PRIMITIVE_TYPE(TYPE_NULL);
MATCH_PRIMITIVE_TYPE(TYPE_BOOLEAN);
@@ -806,6 +810,10 @@ std::string Field::to_string() const {
const auto& s = get<String>();
return {s.data(), s.size()};
}
+ if (type == PrimitiveType::TYPE_VARBINARY) {
+ const auto& svf = get<StringViewField>();
+ return {svf.data(), svf.size()};
+ }
MATCH_DECIMAL_TYPE(TYPE_DECIMAL32);
MATCH_DECIMAL_TYPE(TYPE_DECIMAL64);
MATCH_DECIMAL_TYPE(TYPE_DECIMALV2);
diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h
index 0f5de96c36f..d0045cd73d8 100644
--- a/be/src/vec/core/field.h
+++ b/be/src/vec/core/field.h
@@ -246,6 +246,83 @@ private:
UInt32 scale;
};
+// StringViewField wraps a StringView and provides deep copy semantics.
+// Since StringView is a non-owning view (only contains pointer and length),
+// we need to store the actual data in a String to ensure the Field owns the
data.
+// This prevents dangling pointer issues when Field objects are copied or
moved.
+class StringViewField {
+public:
+ StringViewField() = default;
+ ~StringViewField() = default;
+
+ // Construct from raw data - performs deep copy
+ StringViewField(const char* data, size_t len) : _storage(data, len) {}
+
+ // Construct from StringView - performs deep copy
+ StringViewField(const StringView& sv) : _storage(sv.data(), sv.size()) {}
+
+ // Copy constructor - deep copy
+ StringViewField(const StringViewField& x) = default;
+
+ // Move constructor
+ StringViewField(StringViewField&& x) noexcept = default;
+
+ // Copy assignment - deep copy
+ StringViewField& operator=(const StringViewField& x) = default;
+
+ // Move assignment
+ StringViewField& operator=(StringViewField&& x) noexcept = default;
+
+ // Access methods
+ const char* data() const { return _storage.data(); }
+ size_t size() const { return _storage.size(); }
+ const String& get_string() const { return _storage; }
+
+ // Convert to StringView for compatibility
+ StringView to_string_view() const { return {data(),
static_cast<uint32_t>(size())}; }
+
+ // Comparison operators - using binary comparison (memcmp) for VARBINARY
semantics
+ bool operator<(const StringViewField& r) const {
+ int cmp = memcmp(_storage.data(), r._storage.data(),
+ std::min(_storage.size(), r._storage.size()));
+ return cmp < 0 || (cmp == 0 && _storage.size() < r._storage.size());
+ }
+ bool operator<=(const StringViewField& r) const { return !(r < *this); }
+ bool operator==(const StringViewField& r) const {
+ return _storage.size() == r._storage.size() &&
+ memcmp(_storage.data(), r._storage.data(), _storage.size()) ==
0;
+ }
+ bool operator>(const StringViewField& r) const { return r < *this; }
+ bool operator>=(const StringViewField& r) const { return !(*this < r); }
+ bool operator!=(const StringViewField& r) const { return !(*this == r); }
+
+ std::strong_ordering operator<=>(const StringViewField& r) const {
+ size_t min_size = std::min(_storage.size(), r._storage.size());
+ int cmp = memcmp(_storage.data(), r._storage.data(), min_size);
+ if (cmp < 0) {
+ return std::strong_ordering::less;
+ }
+ if (cmp > 0) {
+ return std::strong_ordering::greater;
+ }
+ // Prefixes are equal, compare lengths
+ return _storage.size() <=> r._storage.size();
+ }
+
+ // Arithmetic operators (not commonly used but required by Field)
+ const StringViewField& operator+=(const StringViewField& r) {
+ _storage += r._storage;
+ return *this;
+ }
+
+ const StringViewField& operator-=(const StringViewField& r) {
+ throw Exception(Status::FatalError("Not support minus operation on
StringViewField"));
+ }
+
+private:
+ String _storage; // Use String for deep copy and ownership
+};
+
/** 32 is enough. Round number is used for alignment and for better arithmetic
inside std::vector.
* NOTE: Actually, sizeof(std::string) is 32 when using libc++, so Field is
40 bytes.
*/
@@ -390,7 +467,7 @@ public:
case PrimitiveType::TYPE_VARCHAR:
return get<String>() <=> rhs.get<String>();
case PrimitiveType::TYPE_VARBINARY:
- return get<doris::StringView>() <=> rhs.get<doris::StringView>();
+ return get<StringViewField>() <=> rhs.get<StringViewField>();
case PrimitiveType::TYPE_DECIMAL32:
return get<Decimal32>() <=> rhs.get<Decimal32>();
case PrimitiveType::TYPE_DECIMAL64:
@@ -439,7 +516,7 @@ public:
f(field.template get<String>());
return;
case PrimitiveType::TYPE_VARBINARY:
- f(field.template get<doris::StringView>());
+ f(field.template get<StringViewField>());
return;
case PrimitiveType::TYPE_JSONB:
f(field.template get<JsonbField>());
@@ -490,11 +567,11 @@ public:
std::string to_string() const;
private:
- std::aligned_union_t<
- DBMS_MIN_FIELD_SIZE - sizeof(PrimitiveType), Null, UInt64,
UInt128, Int64, Int128, IPv6,
- Float64, String, JsonbField, Array, Tuple, Map, VariantMap,
DecimalField<Decimal32>,
- DecimalField<Decimal64>, DecimalField<Decimal128V2>,
DecimalField<Decimal128V3>,
- DecimalField<Decimal256>, BitmapValue, HyperLogLog, QuantileState,
doris::StringView>
+ std::aligned_union_t<DBMS_MIN_FIELD_SIZE - sizeof(PrimitiveType), Null,
UInt64, UInt128, Int64,
+ Int128, IPv6, Float64, String, JsonbField,
StringViewField, Array, Tuple,
+ Map, VariantMap, DecimalField<Decimal32>,
DecimalField<Decimal64>,
+ DecimalField<Decimal128V2>,
DecimalField<Decimal128V3>,
+ DecimalField<Decimal256>, BitmapValue, HyperLogLog,
QuantileState>
storage;
PrimitiveType type;
@@ -648,6 +725,11 @@ struct NearestFieldTypeImpl<PackedInt128> {
using Type = Int128;
};
+template <>
+struct NearestFieldTypeImpl<doris::StringView> {
+ using Type = StringViewField;
+};
+
template <typename T>
decltype(auto) cast_to_nearest_field_type(T&& x) {
using U = NearestFieldType<std::decay_t<T>>;
diff --git a/be/src/vec/core/sort_block.h b/be/src/vec/core/sort_block.h
index bc25129b4a2..b65f5b715df 100644
--- a/be/src/vec/core/sort_block.h
+++ b/be/src/vec/core/sort_block.h
@@ -38,6 +38,7 @@
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
#include "vec/columns/column_struct.h"
+#include "vec/columns/column_varbinary.h"
#include "vec/common/memcmp_small.h"
#include "vec/common/string_ref.h"
#include "vec/core/block.h"
@@ -249,6 +250,10 @@ public:
EqualRange& range, bool last_column) const {
_sort_by_default(column, flags, perms, range, last_column);
}
+ void sort_column(const ColumnVarbinary& column, EqualFlags& flags,
IColumn::Permutation& perms,
+ EqualRange& range, bool last_column) const {
+ _sort_by_default(column, flags, perms, range, last_column);
+ }
void sort_column(const ColumnString64& column, EqualFlags& flags,
IColumn::Permutation& perms,
EqualRange& range, bool last_column) const {
@@ -378,6 +383,7 @@ private:
if constexpr (!std::is_same_v<ColumnType, ColumnString> &&
!std::is_same_v<ColumnType, ColumnString64> &&
!std::is_same_v<ColumnType, ColumnArray> &&
+ !std::is_same_v<ColumnType, ColumnVarbinary> &&
!std::is_same_v<ColumnType, ColumnMap> &&
!std::is_same_v<ColumnType, ColumnStruct>) {
auto value_a = column.get_data()[a];
diff --git a/be/src/vec/data_types/convert_field_to_type.cpp
b/be/src/vec/data_types/convert_field_to_type.cpp
index bdd3a7922ba..28947232d5b 100644
--- a/be/src/vec/data_types/convert_field_to_type.cpp
+++ b/be/src/vec/data_types/convert_field_to_type.cpp
@@ -93,6 +93,11 @@ public:
writer->writeString(x);
writer->writeEndString();
}
+ void operator()(const StringViewField& x, JsonbWriter* writer) const {
+ writer->writeStartString();
+ writer->writeString(x.data(), x.size());
+ writer->writeEndString();
+ }
void operator()(const JsonbField& x, JsonbWriter* writer) const {
const JsonbDocument* doc;
THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(x.get_value(),
x.get_size(), &doc));
diff --git a/be/src/vec/data_types/data_type_varbinary.h
b/be/src/vec/data_types/data_type_varbinary.h
index fa13d19287d..f84884d8e1b 100644
--- a/be/src/vec/data_types/data_type_varbinary.h
+++ b/be/src/vec/data_types/data_type_varbinary.h
@@ -40,7 +40,7 @@ class IColumn;
class DataTypeVarbinary : public IDataType {
public:
using ColumnType = ColumnVarbinary;
- using FieldType = doris::StringView;
+ using FieldType = StringViewField;
static constexpr PrimitiveType PType = TYPE_VARBINARY;
diff --git a/be/src/vec/data_types/serde/data_type_varbinary_serde.cpp
b/be/src/vec/data_types/serde/data_type_varbinary_serde.cpp
index 12e8a7c1924..b60ab825332 100644
--- a/be/src/vec/data_types/serde/data_type_varbinary_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_varbinary_serde.cpp
@@ -41,7 +41,7 @@ Status
DataTypeVarbinarySerDe::write_column_to_mysql_binary(const IColumn& colum
int64_t row_idx,
bool col_const,
const
FormatOptions& options) const {
auto col_index = index_check_const(row_idx, col_const);
- auto data = assert_cast<const
ColumnVarbinary&>(column).get_data()[col_index];
+ const auto& data = assert_cast<const
ColumnVarbinary&>(column).get_data()[col_index];
if (0 != result.push_string(data.data(), data.size())) {
return Status::InternalError("pack mysql buffer failed.");
@@ -62,7 +62,7 @@ Status DataTypeVarbinarySerDe::write_column_to_arrow(const
IColumn& column, cons
builder.type()->name()));
continue;
}
- auto string_view = varbinary_column_data[i];
+ const auto& string_view = varbinary_column_data[i];
RETURN_IF_ERROR(checkArrowStatus(builder.Append(string_view.data(),
string_view.size()),
column.get_name(),
builder.type()->name()));
}
@@ -118,8 +118,13 @@ Status
DataTypeVarbinarySerDe::deserialize_one_cell_from_json(IColumn& column, S
void DataTypeVarbinarySerDe::to_string(const IColumn& column, size_t row_num,
BufferWritable& bw,
const FormatOptions& options) const {
- const auto value = assert_cast<const
ColumnVarbinary&>(column).get_data_at(row_num);
- bw.write(value.data, value.size);
+ const auto& value = assert_cast<const
ColumnVarbinary&>(column).get_data()[row_num];
+ if (_nesting_level >= 2) { // in complex type, need to dump as hex string
by hand
+ const auto& hex_str = value.dump_hex();
+ bw.write(hex_str.data(), hex_str.size());
+ } else { // mysql protocol will be handle as hex binary data directly
+ bw.write(value.data(), value.size());
+ }
}
} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
index a9c771b0c7e..e6b0ce0ea59 100644
--- a/be/src/vec/exprs/vexpr.cpp
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -349,6 +349,12 @@ TExprNode create_texpr_node_from(const vectorized::Field&
field, const Primitive
THROW_IF_ERROR(create_texpr_literal_node<TYPE_TIMEV2>(&storage,
&node));
break;
}
+ case TYPE_VARBINARY: {
+ const auto& svf = field.get<vectorized::StringViewField>();
+ const std::string& storage = svf.get_string();
+ THROW_IF_ERROR(create_texpr_literal_node<TYPE_VARBINARY>(&storage,
&node));
+ break;
+ }
default:
throw Exception(ErrorCode::INTERNAL_ERROR, "runtime filter meet
invalid type {}",
int(type));
diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h
index 0cc944c85f5..10e230984b8 100644
--- a/be/src/vec/exprs/vexpr.h
+++ b/be/src/vec/exprs/vexpr.h
@@ -597,6 +597,13 @@ Status create_texpr_literal_node(const void* data,
TExprNode* node, int precisio
(*node).__set_timev2_literal(timev2_literal);
(*node).__set_node_type(TExprNodeType::TIMEV2_LITERAL);
(*node).__set_type(create_type_desc(PrimitiveType::TYPE_TIMEV2,
precision, scale));
+ } else if constexpr (T == TYPE_VARBINARY) {
+ const auto* origin_value = reinterpret_cast<const std::string*>(data);
+ (*node).__set_node_type(TExprNodeType::VARBINARY_LITERAL);
+ TVarBinaryLiteral varbinary_literal;
+ varbinary_literal.__set_value(*origin_value);
+ (*node).__set_varbinary_literal(varbinary_literal);
+ (*node).__set_type(create_type_desc(PrimitiveType::TYPE_VARBINARY));
} else {
return Status::InvalidArgument("Invalid argument type!");
}
diff --git a/be/test/vec/columns/column_varbinary_test.cpp
b/be/test/vec/columns/column_varbinary_test.cpp
index 55b01b43f32..d903d3abeff 100644
--- a/be/test/vec/columns/column_varbinary_test.cpp
+++ b/be/test/vec/columns/column_varbinary_test.cpp
@@ -348,13 +348,13 @@ TEST_F(ColumnVarbinaryTest, FieldAccessOperatorAndGet) {
for (size_t i = 0; i < vals.size(); ++i) {
// operator[]
Field f = (*col)[i];
- auto sv = vectorized::get<const doris::StringView&>(f);
+ const auto& sv = vectorized::get<const StringViewField&>(f);
ASSERT_EQ(sv.size(), vals[i].size());
ASSERT_EQ(memcmp(sv.data(), vals[i].data(), sv.size()), 0);
// get(size_t, Field&)
Field f2;
col->get(i, f2);
- auto sv2 = vectorized::get<const doris::StringView&>(f2);
+ const auto& sv2 = vectorized::get<const StringViewField&>(f2);
ASSERT_EQ(sv2.size(), vals[i].size());
ASSERT_EQ(memcmp(sv2.data(), vals[i].data(), sv2.size()), 0);
}
diff --git a/be/test/vec/common/string_view_test.cpp
b/be/test/vec/common/string_view_test.cpp
index 551c166ffb6..63bf3edb845 100644
--- a/be/test/vec/common/string_view_test.cpp
+++ b/be/test/vec/common/string_view_test.cpp
@@ -223,13 +223,13 @@ TEST_F(StringViewTest, ThreeWayComparisonOrdering) {
TEST_F(StringViewTest, DumpHex) {
// Empty
StringView empty;
- EXPECT_EQ(empty.dump_hex(), "X''");
+ EXPECT_EQ(empty.dump_hex(), "0x");
// Inline with known bytes
const unsigned char bytes_inline[] = {0x00, 0x01, 0x0A, 0x1F, 0x7F};
StringView svi(reinterpret_cast<const char*>(bytes_inline),
sizeof(bytes_inline));
EXPECT_TRUE(svi.isInline());
- EXPECT_EQ(svi.dump_hex(), "X'00010A1F7F'");
+ EXPECT_EQ(svi.dump_hex(), "0x00010A1F7F");
// Non-inline, length > 12
std::string big = make_bytes(16, 0x20); // bytes 0x20,0x21,...
@@ -237,12 +237,11 @@ TEST_F(StringViewTest, DumpHex) {
EXPECT_FALSE(svb.isInline());
// Build expected
std::ostringstream oss;
- oss << "X'";
+ oss << "0x";
for (unsigned char c : big) {
static const char* kHex = "0123456789ABCDEF";
oss << kHex[c >> 4] << kHex[c & 0x0F];
}
- oss << "'";
EXPECT_EQ(svb.dump_hex(), oss.str());
}
@@ -276,7 +275,7 @@ TEST_F(StringViewTest, StringViewAndUnsignedCtorAndHighHex)
{
StringView v_unsigned(reinterpret_cast<unsigned char*>(bytes.data()),
static_cast<uint32_t>(bytes.size()));
EXPECT_TRUE(v_unsigned.isInline());
- EXPECT_EQ(v_unsigned.dump_hex(), "X'80FF007F'");
+ EXPECT_EQ(v_unsigned.dump_hex(), "0x80FF007F");
}
// Construct from nullptr with zero length should be a valid empty inline view
diff --git a/be/test/vec/data_types/data_type_varbinary_test.cpp
b/be/test/vec/data_types/data_type_varbinary_test.cpp
index 4783bc2f7e9..02f4c51848c 100644
--- a/be/test/vec/data_types/data_type_varbinary_test.cpp
+++ b/be/test/vec/data_types/data_type_varbinary_test.cpp
@@ -89,7 +89,7 @@ TEST_F(DataTypeVarbinaryTest, CreateColumnAndCheckColumn) {
TEST_F(DataTypeVarbinaryTest, GetDefaultField) {
DataTypeVarbinary dt;
Field def = dt.get_default();
- const auto& sv = get<const doris::StringView&>(def);
+ const auto& sv = get<const StringViewField&>(def);
EXPECT_EQ(sv.size(), 0U);
}
@@ -178,7 +178,7 @@ TEST_F(DataTypeVarbinaryTest, GetFieldWithDataType) {
auto fwd = dt.get_field_with_data_type(*col, 0);
EXPECT_EQ(fwd.base_scalar_type_id, PrimitiveType::TYPE_VARBINARY);
- const auto& sv = get<const doris::StringView&>(fwd.field);
+ const auto& sv = get<const StringViewField&>(fwd.field);
ASSERT_EQ(sv.size(), v.size());
ASSERT_EQ(memcmp(sv.data(), v.data(), sv.size()), 0);
}
@@ -191,7 +191,7 @@ TEST_F(DataTypeVarbinaryTest, GetFieldFromTExprNode) {
node.__isset.varbinary_literal = true;
Field f = dt.get_field(node);
- const auto& sv = get<const doris::StringView&>(f);
+ const auto& sv = get<const StringViewField&>(f);
ASSERT_EQ(sv.size(), 5U);
ASSERT_EQ(memcmp(sv.data(), "hello", 5), 0);
}
@@ -278,7 +278,7 @@ TEST_F(DataTypeVarbinaryTest,
GetFieldFromTExprNodeWithEmbeddedNull) {
node.__isset.varbinary_literal = true;
Field f = dt.get_field(node);
- const auto& sv = get<const doris::StringView&>(f);
+ const auto& sv = get<const StringViewField&>(f);
ASSERT_EQ(sv.size(), raw.size());
ASSERT_EQ(memcmp(sv.data(), raw.data(), sv.size()), 0);
}
@@ -301,7 +301,7 @@ TEST_F(DataTypeVarbinaryTest,
GetFieldWithDataTypeNonInline) {
auto fwd = dt.get_field_with_data_type(*col, 0);
EXPECT_EQ(fwd.base_scalar_type_id, PrimitiveType::TYPE_VARBINARY);
- const auto& sv = get<const doris::StringView&>(fwd.field);
+ const auto& sv = get<const StringViewField&>(fwd.field);
ASSERT_EQ(sv.size(), big.size());
ASSERT_EQ(memcmp(sv.data(), big.data(), sv.size()), 0);
}
diff --git a/be/test/vec/exec/format/parquet/parquet_reader_test.cpp
b/be/test/vec/exec/format/parquet/parquet_reader_test.cpp
index 9a1a0f676ea..09d056431f5 100644
--- a/be/test/vec/exec/format/parquet/parquet_reader_test.cpp
+++ b/be/test/vec/exec/format/parquet/parquet_reader_test.cpp
@@ -243,9 +243,9 @@ TEST_F(ParquetReaderTest, uuid_varbinary) {
auto varbinary_column =
assert_cast<const
ColumnVarbinary*>(nullable_column->get_nested_column_ptr().get());
auto& data = varbinary_column->get_data();
- EXPECT_EQ(data[0].dump_hex(), "X'550E8400E29B41D4A716446655440000'");
- EXPECT_EQ(data[1].dump_hex(), "X'123E4567E89B12D3A456426614174000'");
- EXPECT_EQ(data[2].dump_hex(), "X'00000000000000000000000000000000'");
+ EXPECT_EQ(data[0].dump_hex(), "0x550E8400E29B41D4A716446655440000");
+ EXPECT_EQ(data[1].dump_hex(), "0x123E4567E89B12D3A456426614174000");
+ EXPECT_EQ(data[2].dump_hex(), "0x00000000000000000000000000000000");
}
TEST_F(ParquetReaderTest, varbinary_varbinary) {
@@ -316,9 +316,9 @@ TEST_F(ParquetReaderTest, varbinary_varbinary) {
auto varbinary_column =
assert_cast<const
ColumnVarbinary*>(nullable_column->get_nested_column_ptr().get());
auto& data = varbinary_column->get_data();
- EXPECT_EQ(data[0].dump_hex(), "X'0123456789ABCDEF'");
- EXPECT_EQ(data[1].dump_hex(), "X'FEDCBA9876543210'");
- EXPECT_EQ(data[2].dump_hex(), "X'00'");
+ EXPECT_EQ(data[0].dump_hex(), "0x0123456789ABCDEF");
+ EXPECT_EQ(data[1].dump_hex(), "0xFEDCBA9876543210");
+ EXPECT_EQ(data[2].dump_hex(), "0x00");
}
TEST_F(ParquetReaderTest, varbinary_string) {
@@ -391,9 +391,9 @@ TEST_F(ParquetReaderTest, varbinary_string) {
auto varbinary_column =
assert_cast<const
ColumnVarbinary*>(nullable_column->get_nested_column_ptr().get());
auto& data = varbinary_column->get_data();
- EXPECT_EQ(data[0].dump_hex(), "X'0123456789ABCDEF'");
- EXPECT_EQ(data[1].dump_hex(), "X'FEDCBA9876543210'");
- EXPECT_EQ(data[2].dump_hex(), "X'00'");
+ EXPECT_EQ(data[0].dump_hex(), "0x0123456789ABCDEF");
+ EXPECT_EQ(data[1].dump_hex(), "0xFEDCBA9876543210");
+ EXPECT_EQ(data[2].dump_hex(), "0x00");
}
TEST_F(ParquetReaderTest, varbinary_string2) {
@@ -465,9 +465,9 @@ TEST_F(ParquetReaderTest, varbinary_string2) {
auto nullable_column = assert_cast<const ColumnNullable*>(col.get());
auto string_column =
assert_cast<const
ColumnString*>(nullable_column->get_nested_column_ptr().get());
- EXPECT_EQ(StringView(string_column->get_data_at(0)).dump_hex(),
"X'0123456789ABCDEF'");
- EXPECT_EQ(StringView(string_column->get_data_at(1)).dump_hex(),
"X'FEDCBA9876543210'");
- EXPECT_EQ(StringView(string_column->get_data_at(2)).dump_hex(), "X'00'");
+ EXPECT_EQ(StringView(string_column->get_data_at(0)).dump_hex(),
"0x0123456789ABCDEF");
+ EXPECT_EQ(StringView(string_column->get_data_at(1)).dump_hex(),
"0xFEDCBA9876543210");
+ EXPECT_EQ(StringView(string_column->get_data_at(2)).dump_hex(), "0x00");
}
} // namespace vectorized
diff --git
a/fe/be-java-extensions/iceberg-metadata-scanner/src/main/java/org/apache/doris/iceberg/IcebergSysTableColumnValue.java
b/fe/be-java-extensions/iceberg-metadata-scanner/src/main/java/org/apache/doris/iceberg/IcebergSysTableColumnValue.java
index b1caff8ae8f..70814f27f75 100644
---
a/fe/be-java-extensions/iceberg-metadata-scanner/src/main/java/org/apache/doris/iceberg/IcebergSysTableColumnValue.java
+++
b/fe/be-java-extensions/iceberg-metadata-scanner/src/main/java/org/apache/doris/iceberg/IcebergSysTableColumnValue.java
@@ -142,6 +142,13 @@ public class IcebergSysTableColumnValue implements
ColumnValue {
@Override
public byte[] getBytes() {
+ //
https://github.com/apache/iceberg/blob/8626ef5137024c1a69daaff97a832af6b0ae37ea/api/src/main/java/org/apache/iceberg/types/Type.java#L45C5-L45C30
+ if (fieldData instanceof ByteBuffer) {
+ ByteBuffer buffer = (ByteBuffer) fieldData;
+ byte[] bytes = new byte[buffer.remaining()];
+ buffer.get(bytes);
+ return bytes;
+ }
return (byte[]) fieldData;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
index 78bc458b76f..2d3cd457130 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
@@ -255,6 +255,7 @@ public class OutFileClause {
case HLL:
case BITMAP:
case QUANTILE_STATE:
+ case VARBINARY:
orcType = "binary";
break;
case DATEV2:
@@ -412,6 +413,7 @@ public class OutFileClause {
case HLL:
case BITMAP:
case QUANTILE_STATE:
+ case VARBINARY:
checkOrcType(schema.second, "binary", true,
resultType.getPrimitiveType().toString());
break;
case STRUCT:
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java
index 3bfcedea99b..0db0762af68 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java
@@ -155,7 +155,7 @@ public class CheckAfterRewrite extends
OneAnalysisRuleFactory {
if (plan instanceof LogicalAggregate) {
LogicalAggregate<?> agg = (LogicalAggregate<?>) plan;
for (Expression groupBy : agg.getGroupByExpressions()) {
- if (groupBy.getDataType().isObjectOrVariantType()) {
+ if (groupBy.getDataType().isObjectOrVariantType() ||
groupBy.getDataType().isVarBinaryType()) {
throw new AnalysisException(Type.OnlyMetricTypeErrorMsg);
}
}
@@ -193,11 +193,17 @@ public class CheckAfterRewrite extends
OneAnalysisRuleFactory {
for (Expression conjunct : join.getHashJoinConjuncts()) {
if (conjunct.anyMatch(e -> ((Expression)
e).getDataType().isVariantType())) {
throw new AnalysisException("variant type could not in
join equal conditions: " + conjunct.toSql());
+ } else if (conjunct.anyMatch(e -> ((Expression)
e).getDataType().isVarBinaryType())) {
+ throw new AnalysisException(
+ "varbinary type could not in join equal
conditions: " + conjunct.toSql());
}
}
for (Expression conjunct : join.getMarkJoinConjuncts()) {
if (conjunct.anyMatch(e -> ((Expression)
e).getDataType().isVariantType())) {
throw new AnalysisException("variant type could not in
join equal conditions: " + conjunct.toSql());
+ } else if (conjunct.anyMatch(e -> ((Expression)
e).getDataType().isVarBinaryType())) {
+ throw new AnalysisException(
+ "varbinary type could not in join equal
conditions: " + conjunct.toSql());
}
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VarBinaryType.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VarBinaryType.java
index de5dfcbdfe2..a06b7c20053 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VarBinaryType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/VarBinaryType.java
@@ -21,6 +21,8 @@ import org.apache.doris.catalog.ScalarType;
import org.apache.doris.catalog.Type;
import org.apache.doris.nereids.types.coercion.PrimitiveType;
+import com.google.common.base.Preconditions;
+
import java.util.Objects;
/**
@@ -40,6 +42,8 @@ public class VarBinaryType extends PrimitiveType {
}
public VarBinaryType(int len) {
+ Preconditions.checkArgument(0 <= len && len <= MAX_VARBINARY_LENGTH,
+ "VarBinary length must be between 0 and " +
MAX_VARBINARY_LENGTH + ", but got: " + len);
this.len = len;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
index e02a2925674..0e1689caf72 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
@@ -1321,6 +1321,12 @@ public class TypeCoercionUtils {
Expression left = comparisonPredicate.left();
Expression right = comparisonPredicate.right();
+ // TODO: remove this restriction after supporting varbinary comparison
in BE
+ if (left.getDataType().isVarBinaryType() ||
right.getDataType().isVarBinaryType()) {
+ throw new AnalysisException("data type varbinary "
+ + " could not used in ComparisonPredicate now " +
comparisonPredicate.toSql());
+ }
+
// same type
if (left.getDataType().equals(right.getDataType())) {
if (!supportCompare(left.getDataType(), false)) {
diff --git
a/regression-test/data/external_table_p0/export/test_hive_export_varbinary.out
b/regression-test/data/external_table_p0/export/test_hive_export_varbinary.out
new file mode 100644
index 00000000000..f50f72bd68a
--- /dev/null
+++
b/regression-test/data/external_table_p0/export/test_hive_export_varbinary.out
@@ -0,0 +1,29 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select_tvf0 --
+1 0x550E8400E29B41D4A716446655440000 0x0123456789ABCDEF
+2 0x123E4567E89B12D3A456426614174000 0xFEDCBA9876543210
+3 0x00000000000000000000000000000000 0x00
+4 \N \N
+5 0xABCDEF1234567890 0xFFFF
+
+-- !select_tvf1 --
+1 0x550E8400E29B41D4A716446655440000 0x0123456789ABCDEF
+2 0x123E4567E89B12D3A456426614174000 0xFEDCBA9876543210
+3 0x00000000000000000000000000000000 0x00
+4 \N \N
+5 0xABCDEF1234567890 0xFFFF
+
+-- !select_tvf2 --
+1 0x550E8400E29B41D4A716446655440000 0x0123456789ABCDEF
+2 0x123E4567E89B12D3A456426614174000 0xFEDCBA9876543210
+3 0x00000000000000000000000000000000 0x00
+4 \N \N
+5 0xABCDEF1234567890 0xFFFF
+
+-- !select_tvf3 --
+1 0x550E8400E29B41D4A716446655440000 0x0123456789ABCDEF
+2 0x123E4567E89B12D3A456426614174000 0xFEDCBA9876543210
+3 0x00000000000000000000000000000000 0x00
+4 \N \N
+5 0xABCDEF1234567890 0xFFFF
+
diff --git a/regression-test/data/external_table_p0/hive/test_hive_orc.out
b/regression-test/data/external_table_p0/hive/test_hive_orc.out
index 42f49602d9a..aa105acc1cd 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_orc.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_orc.out
@@ -1005,6 +1005,54 @@ tablets tinyint_col 179 182 182 187
183 181 177 183 177 187 183 202 202 186
528
4 71 986333570 88457348565826264 true 3.851428E7
5.177242499015833e+17 Football stumble result taste pleased midst. Mirror
loyal divide. Ultimately injury chip lawyer. Leadership teacher belong. \N
2022-08-26T19:19:31 140717.2626 phones smallint_col 2022-08-22
[] ["PWCCGPfT"] phones smallint_col
3 21 986131998 683875991736989008 \N 6.0774349E8
\N Weird period none. Assertion coincide college. Subscriber fridge craft.
Poisonous donation ordinary. Explode village debt. split terrify
2022-08-27T01:05:20 390407.1015 tablets tinyint_col 2015-11-04
[6.630172173849485e+17] [null, "lSQFYzUG", "vMVMwfZzpl", "QRFiYUUefBc",
"VdtTHy", "YrPtPPzynqXCCzm", "LfIgQvGimBBzlgn"] tablets tinyint_col
+-- !sql_topn_binary_col1 --
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+
+-- !sql_topn_binary_col2 --
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+
+-- !sql_topn_binary_col3 --
+0x79656C6C6F772070756E6368 yellow punch
+0x79657374657264617920736574746C656D656E7420756E636F6E7363696F75732062696F6C6F6779
yesterday settlement unconscious biology
+0x796574207265696E666F726365 yet reinforce
+0x7965742072656C6178 yet relax
+0x796F756E67206372656174697665207072657365727665 young creative preserve
+0x796F757220617070726F7072696174656C7920736574746C657220646F6D696E616E6365
your appropriately settler dominance
+0x796F75727320696E766164652070726976616379 yours invade privacy
+0x796F7572732073657373696F6E20636F756E63696C yours session council
+0x796F75746820626563617573652065787065646974696F6E youth because expedition
+0x7A65726F207765617220736561736F6E zero wear season
+
+-- !sql_topn_binary_col4 --
+0x79656C6C6F772070756E6368 yellow punch
+0x79657374657264617920736574746C656D656E7420756E636F6E7363696F75732062696F6C6F6779
yesterday settlement unconscious biology
+0x796574207265696E666F726365 yet reinforce
+0x7965742072656C6178 yet relax
+0x796F756E67206372656174697665207072657365727665 young creative preserve
+0x796F757220617070726F7072696174656C7920736574746C657220646F6D696E616E6365
your appropriately settler dominance
+0x796F75727320696E766164652070726976616379 yours invade privacy
+0x796F7572732073657373696F6E20636F756E63696C yours session council
+0x796F75746820626563617573652065787065646974696F6E youth because expedition
+0x7A65726F207765617220736561736F6E zero wear season
+
-- !select_top50 --
4 55 999742610 400899305488827731 false 6.5976813E8
7.87233046169374e+17 \N base tennis pit vertical friday
2022-08-19T07:29:58 \N tablets smallint_col 2019-02-07
[7.53124931825377e+17] ["NbSSBtwzpxNSkkwga"] tablets smallint_col
2 49 999613702 105493714032727452 \N 6.3322381E8
9.864232441024018e+17 Unveil bright recruit participate. Suspect impression
camera mathematical revelation. Fault live2 elbow debt west hydrogen current.
how literary 2022-09-03T17:20:21 481707.1065 tablets boolean_col
2020-01-12 [] ["HoMrAnn", "wteEFvIwoZsVpVQdscMb", null, "zcGFmv",
"kGEBBckbMtX", "hrEtCGFdPWZK"] tablets boolean_col
@@ -2011,3 +2059,51 @@ tablets tinyint_col 179 182 182 187
183 181 177 183 177 187 183 202 202 186
528
4 71 986333570 88457348565826264 true 3.851428E7
5.177242499015833e+17 Football stumble result taste pleased midst. Mirror
loyal divide. Ultimately injury chip lawyer. Leadership teacher belong. \N
2022-08-26T19:19:31 140717.2626 phones smallint_col 2022-08-22
[] ["PWCCGPfT"] phones smallint_col
3 21 986131998 683875991736989008 \N 6.0774349E8
\N Weird period none. Assertion coincide college. Subscriber fridge craft.
Poisonous donation ordinary. Explode village debt. split terrify
2022-08-27T01:05:20 390407.1015 tablets tinyint_col 2015-11-04
[6.630172173849485e+17] [null, "lSQFYzUG", "vMVMwfZzpl", "QRFiYUUefBc",
"VdtTHy", "YrPtPPzynqXCCzm", "LfIgQvGimBBzlgn"] tablets tinyint_col
+-- !sql_topn_binary_col1 --
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+
+-- !sql_topn_binary_col2 --
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+\N \N
+
+-- !sql_topn_binary_col3 --
+0x79656C6C6F772070756E6368 yellow punch
+0x79657374657264617920736574746C656D656E7420756E636F6E7363696F75732062696F6C6F6779
yesterday settlement unconscious biology
+0x796574207265696E666F726365 yet reinforce
+0x7965742072656C6178 yet relax
+0x796F756E67206372656174697665207072657365727665 young creative preserve
+0x796F757220617070726F7072696174656C7920736574746C657220646F6D696E616E6365
your appropriately settler dominance
+0x796F75727320696E766164652070726976616379 yours invade privacy
+0x796F7572732073657373696F6E20636F756E63696C yours session council
+0x796F75746820626563617573652065787065646974696F6E youth because expedition
+0x7A65726F207765617220736561736F6E zero wear season
+
+-- !sql_topn_binary_col4 --
+0x79656C6C6F772070756E6368 yellow punch
+0x79657374657264617920736574746C656D656E7420756E636F6E7363696F75732062696F6C6F6779
yesterday settlement unconscious biology
+0x796574207265696E666F726365 yet reinforce
+0x7965742072656C6178 yet relax
+0x796F756E67206372656174697665207072657365727665 young creative preserve
+0x796F757220617070726F7072696174656C7920736574746C657220646F6D696E616E6365
your appropriately settler dominance
+0x796F75727320696E766164652070726976616379 yours invade privacy
+0x796F7572732073657373696F6E20636F756E63696C yours session council
+0x796F75746820626563617573652065787065646974696F6E youth because expedition
+0x7A65726F207765617220736561736F6E zero wear season
+
diff --git
a/regression-test/data/external_table_p0/iceberg/test_iceberg_sys_table.out
b/regression-test/data/external_table_p0/iceberg/test_iceberg_sys_table.out
index f6b7072cc54..030a379d20b 100644
--- a/regression-test/data/external_table_p0/iceberg/test_iceberg_sys_table.out
+++ b/regression-test/data/external_table_p0/iceberg/test_iceberg_sys_table.out
@@ -975,3 +975,38 @@ total_data_file_size_in_bytes bigint Yes true
\N NONE
-- !select_partitions_count --
9
+-- !varbinary_sys_table_desc --
+column_sizes map<int,bigint> Yes true \N NONE
+content int Yes true \N NONE
+content_offset bigint Yes true \N NONE
+content_size_in_bytes bigint Yes true \N NONE
+equality_ids array<int> Yes true \N NONE
+file_format text Yes true \N NONE
+file_path text Yes true \N NONE
+file_size_in_bytes bigint Yes true \N NONE
+first_row_id bigint Yes true \N NONE
+key_metadata varbinary(2147483647) Yes true \N NONE
+lower_bounds map<int,varbinary(2147483647)> Yes true \N NONE
+nan_value_counts map<int,bigint> Yes true \N NONE
+null_value_counts map<int,bigint> Yes true \N NONE
+readable_metrics
struct<id:struct<column_size:bigint,value_count:bigint,null_value_count:bigint,nan_value_count:bigint,lower_bound:int,upper_bound:int>,name:struct<column_size:bigint,value_count:bigint,null_value_count:bigint,nan_value_count:bigint,lower_bound:text,upper_bound:text>>
Yes true \N NONE
+record_count bigint Yes true \N NONE
+referenced_data_file text Yes true \N NONE
+sort_order_id int Yes true \N NONE
+spec_id int Yes true \N NONE
+split_offsets array<bigint> Yes true \N NONE
+upper_bounds map<int,varbinary(2147483647)> Yes true \N NONE
+value_counts map<int,bigint> Yes true \N NONE
+
+-- !varbinary_sys_table_select --
+0 PARQUET 1 {1:0x01000000, 2:0x416C696365} {1:0x01000000,
2:0x416C696365}
+0 PARQUET 1 {1:0x02000000, 2:0x426F622055706461746564}
{1:0x02000000, 2:0x426F622055706461746564}
+0 PARQUET 1 {1:0x02000000, 2:0x426F62} {1:0x02000000,
2:0x426F62}
+0 PARQUET 1 {1:0x04000000, 2:0x44617665} {1:0x04000000,
2:0x44617665}
+0 PARQUET 1 {1:0x05000000, 2:0x457665} {1:0x05000000,
2:0x457665}
+0 PARQUET 1 {1:0x06000000, 2:0x4672616E6B} {1:0x06000000,
2:0x4672616E6B}
+0 PARQUET 1 {1:0x07000000, 2:0x4772616365} {1:0x07000000,
2:0x4772616365}
+0 PARQUET 1 {1:0x08000000, 2:0x4865696469} {1:0x08000000,
2:0x4865696469}
+0 PARQUET 1 {1:0x09000000, 2:0x4976616E} {1:0x09000000,
2:0x4976616E}
+0 PARQUET 1 {1:0x0A000000, 2:0x4A756479} {1:0x0A000000,
2:0x4A756479}
+
diff --git
a/regression-test/suites/external_table_p0/export/test_hive_export_varbinary.groovy
b/regression-test/suites/external_table_p0/export/test_hive_export_varbinary.groovy
new file mode 100644
index 00000000000..dff673c6b61
--- /dev/null
+++
b/regression-test/suites/external_table_p0/export/test_hive_export_varbinary.groovy
@@ -0,0 +1,120 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+import java.nio.file.Paths
+
+suite("test_hive_export_varbinary", "external,hive,external_docker") {
+
+ String enabled = context.config.otherConfigs.get("enableHiveTest")
+ if (enabled == null || !enabled.equalsIgnoreCase("true")) {
+ logger.info("diable Hive test.")
+ return;
+ }
+
+ for (String hivePrefix : ["hive2"]) {
+ setHivePrefix(hivePrefix)
+ String hms_port = context.config.otherConfigs.get(hivePrefix +
"HmsPort")
+ String hdfs_port = context.config.otherConfigs.get(hivePrefix +
"HdfsPort")
+ String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
+
+ // It's okay to use random `hdfsUser`, but can not be empty.
+ def hdfsUserName = "doris"
+ def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}"
+ def outfile_path = "/user/doris/tmp_data"
+ def uri = "${defaultFS}" + "${outfile_path}/exp_"
+
+ def outfile_to_HDFS = {format,export_table_name ->
+ // select ... into outfile ...
+ def uuid = UUID.randomUUID().toString()
+ outfile_path = "/user/doris/tmp_data/${uuid}"
+ uri = "${defaultFS}" + "${outfile_path}/exp_"
+
+ def res = sql """
+ SELECT * FROM ${export_table_name} t ORDER BY id
+ INTO OUTFILE "${uri}"
+ FORMAT AS ${format}
+ PROPERTIES (
+ "fs.defaultFS"="${defaultFS}",
+ "hadoop.username" = "${hdfsUserName}"
+ );
+ """
+ logger.info("outfile success path: " + res[0][3]);
+ return res[0][3]
+ }
+
+ try {
+ String catalog_name_with_export =
"${hivePrefix}_test_varbinary_with_export"
+ sql """drop catalog if exists ${catalog_name_with_export}"""
+ sql """create catalog if not exists ${catalog_name_with_export}
properties (
+ "type"="hms",
+ 'hive.metastore.uris' =
'thrift://${externalEnvIp}:${hms_port}',
+ "enable.mapping.varbinary"="true"
+ );"""
+
+ sql """ switch ${catalog_name_with_export}"""
+ sql """ use `test_varbinary` """
+
+ // test outfile to hdfs
+ def format = "parquet"
+ def export_table_name = "test_hive_binary_parquet"
+
+ def outfile_url0 = outfile_to_HDFS(format, export_table_name)
+ order_qt_select_tvf0 """ select * from HDFS(
+ "uri" = "${outfile_url0}.${format}",
+ "hadoop.username" = "${hdfsUserName}",
+ "enable_mapping_varbinary"="true",
+ "format" = "${format}");
+ """
+
+ format = "parquet"
+ export_table_name = "test_hive_binary_orc"
+ def outfile_url1 = outfile_to_HDFS(format, export_table_name)
+ order_qt_select_tvf1 """ select * from HDFS(
+ "uri" = "${outfile_url1}.${format}",
+ "hadoop.username" = "${hdfsUserName}",
+ "enable_mapping_varbinary"="true",
+ "format" = "${format}");
+ """
+
+ format = "orc"
+ export_table_name = "test_hive_binary_parquet"
+ def outfile_url2 = outfile_to_HDFS(format, export_table_name)
+ order_qt_select_tvf2 """ select * from HDFS(
+ "uri" = "${outfile_url2}.${format}",
+ "hadoop.username" = "${hdfsUserName}",
+ "enable_mapping_varbinary"="true",
+ "format" = "${format}");
+ """
+
+ format = "orc"
+ export_table_name = "test_hive_binary_orc"
+ def outfile_url3 = outfile_to_HDFS(format, export_table_name)
+ order_qt_select_tvf3 """ select * from HDFS(
+ "uri" = "${outfile_url3}.${format}",
+ "hadoop.username" = "${hdfsUserName}",
+ "enable_mapping_varbinary"="true",
+ "format" = "${format}");
+ """
+
+ } finally {
+ }
+ }
+}
diff --git a/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy
index 380169fa667..8e19fe581d5 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_orc.groovy
@@ -204,6 +204,61 @@ suite("test_hive_orc",
"all_types,p0,external,hive,external_docker,external_dock
sql """use `${catalog_name}`.`default`"""
select_top50()
sql """drop catalog if exists ${catalog_name}"""
+
+ sql """drop catalog if exists test_hive_orc_mapping_varbinary"""
+ sql """create catalog if not exists
test_hive_orc_mapping_varbinary properties (
+ "type"="hms",
+ 'hive.metastore.uris' =
'thrift://${externalEnvIp}:${hms_port}',
+ 'enable.mapping.varbinary' = 'true'
+ );"""
+ sql """use `test_hive_orc_mapping_varbinary`.`default`"""
+
+ explain {
+ sql("select binary_col from orc_all_types order by
binary_col,string_col asc limit 10;")
+ contains("TOPN OPT:1")
+ }
+ explain {
+ sql("select binary_col from orc_all_types order by
binary_col asc limit 10;")
+ contains("TOPN OPT:1")
+ }
+ order_qt_sql_topn_binary_col1 """ select
binary_col,cast(binary_col as string) from orc_all_types order by binary_col
asc limit 10; """
+ order_qt_sql_topn_binary_col2 """ select
binary_col,cast(binary_col as string) from orc_all_types order by binary_col
asc ,string_col asc limit 10; """
+ order_qt_sql_topn_binary_col3 """ select
binary_col,cast(binary_col as string) from orc_all_types order by binary_col
desc limit 10; """
+ order_qt_sql_topn_binary_col4 """ select
binary_col,cast(binary_col as string) from orc_all_types order by binary_col
desc,string_col desc limit 10; """
+
+ sql """ switch internal; """
+ sql """ drop database if exists test_view_varbinary_db"""
+ sql """ create database if not exists test_view_varbinary_db"""
+ sql """use test_view_varbinary_db"""
+ test {
+ sql " create view test_view_varbinary as select binary_col
from `test_hive_orc_mapping_varbinary`.`default`.`orc_all_types`; "
+ exception " View does not support VARBINARY type: binary_col"
+ }
+
+ test {
+ sql """ CREATE MATERIALIZED VIEW test_mv_varbinary
+ BUILD DEFERRED REFRESH AUTO ON MANUAL
+ DISTRIBUTED BY RANDOM BUCKETS 2
+ PROPERTIES ('replication_num' = '1')
+ AS select binary_col from
`test_hive_orc_mapping_varbinary`.`default`.`orc_all_types`; """
+ exception " MTMV do not support varbinary type : binary_col"
+ }
+
+ test {
+ sql " select count() from
`test_hive_orc_mapping_varbinary`.`default`.`orc_all_types` group by
binary_col; "
+ exception " errCode = 2"
+ }
+
+ test {
+ sql " select * from
`test_hive_orc_mapping_varbinary`.`default`.`orc_all_types` as a join
`test_hive_orc_mapping_varbinary`.`default`.`orc_all_types` as b on
a.binary_col = b.binary_col; "
+ exception " errCode = 2,"
+ }
+
+ test {
+ sql " select * from
`test_hive_orc_mapping_varbinary`.`default`.`orc_all_types` where binary_col =
X'AB'; "
+ exception " could not used in ComparisonPredicate now"
+ }
+
} finally {
}
}
diff --git
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_sys_table.groovy
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_sys_table.groovy
index 0b2925a0d6e..0efa0e0233a 100644
---
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_sys_table.groovy
+++
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_sys_table.groovy
@@ -390,4 +390,23 @@ suite("test_iceberg_sys_table",
"p0,external,doris,external_docker,external_dock
sql """select committed_at, snapshot_id, parent_id, operation from
${catalog_name}.${db_name}.test_iceberg_systable_tbl1\$snapshots"""
}
try_sql("DROP USER ${user}")
+
+ sql """drop catalog if exists test_iceberg_varbinary_sys"""
+ sql """
+ CREATE CATALOG test_iceberg_varbinary_sys PROPERTIES (
+ 'type'='iceberg',
+ 'iceberg.catalog.type'='rest',
+ 'uri' = 'http://${externalEnvIp}:${rest_port}',
+ "s3.access_key" = "admin",
+ "s3.secret_key" = "password",
+ "s3.endpoint" = "http://${externalEnvIp}:${minio_port}",
+ "s3.region" = "us-east-1",
+ 'enable.mapping.varbinary' = 'true'
+ );"""
+
+ sql """switch test_iceberg_varbinary_sys """
+ sql """use ${db_name}"""
+
+ order_qt_varbinary_sys_table_desc """desc
test_iceberg_systable_unpartitioned\$files"""
+ order_qt_varbinary_sys_table_select """select content, file_format,
record_count, lower_bounds, upper_bounds from
test_iceberg_systable_unpartitioned\$files;"""
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]