This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9e2956601bb [fix](ip) fix datatype serde for ipv6 with rowstore
(#43065)
9e2956601bb is described below
commit 9e2956601bb48e2fd64649f1c258f015b199a0b7
Author: amory <[email protected]>
AuthorDate: Tue Nov 5 14:28:45 2024 +0800
[fix](ip) fix datatype serde for ipv6 with rowstore (#43065)
before this pr:
if we has ipv6 column in table which is support store_row_column
we insert some data
and then make sql with update we will meet core
```
*** Aborted at 1730367188 (unix time) try "date -d @1730367188" if you are
using GNU date ***
*** Current BE git commitID: face753ded ***
*** SIGSEGV invalid permissions for mapped object (@0x60c000a339ae)
received by PID 4176451 (TID 4187168 OR 0x7ff774f5a700) from PID 10697134;
stack trace: ***
0# doris::signal::(anonymous namespace)::FailureSignalHandler(int,
siginfo_t*, void*) at
/mnt/disk1/wangqiannan/amory/doris/be/src/common/signal_handler.h:421
1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in
/mnt/disk1/wangqiannan/tool/jdk-17.0.10/lib/server/libjvm.so
2# JVM_handle_linux_signal in
/mnt/disk1/wangqiannan/tool/jdk-17.0.10/lib/server/libjvm.so
3# 0x00007FFFF63AEB50 in /lib64/libc.so.6
4# doris::IPv6Value::from_string(unsigned __int128&, char const*, unsigned
long) at /mnt/disk1/wangqiannan/amory/doris/be/src/vec/runtime/ipv6_value.h:55
5# bool doris::vectorized::read_ipv6_text_impl<unsigned __int128>(unsigned
__int128&, doris::vectorized::ReadBuffer&) at
/mnt/disk1/wangqiannan/amory/doris/be/src/vec/io/io_helper.h:316
6#
doris::vectorized::DataTypeIPv6SerDe::read_one_cell_from_jsonb(doris::vectorized::IColumn&,
doris::JsonbValue const*) const at
/mnt/disk1/wangqiannan/amory/doris/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp:76
7#
doris::vectorized::JsonbSerializeUtil::jsonb_to_block(std::vector<std::shared_ptr<doris::vectorized::DataTypeSerDe>,
std::allocator<std::shared_ptr<doris::vectorized::DataTypeSerDe> > > const&,
char const*, unsigned long, std::unordered_map<unsigned int, unsigned int,
std::hash<unsigned int>, std::equal_to<unsigned int>,
std::allocator<std::pair<unsigned int const, unsigned int> > > const&,
doris::vectorized::Block&, std::vector<std::__cxx11::basic_string<char,
std::char_traits<ch [...]
8#
doris::vectorized::JsonbSerializeUtil::jsonb_to_block(std::vector<std::shared_ptr<doris::vectorized::DataTypeSerDe>,
std::allocator<std::shared_ptr<doris::vectorized::DataTypeSerDe> > > const&,
doris::vectorized::ColumnStr<unsigned int> const&, std::unordered_map<unsigned
int, unsigned int, std::hash<unsigned int>, std::equal_to<unsigned int>,
std::allocator<std::pair<unsigned int const, unsigned int> > > const&,
doris::vectorized::Block&, std::vector<std::__cxx11::basic_string<ch [...]
9#
doris::BaseTablet::fetch_value_through_row_column(std::shared_ptr<doris::Rowset>,
doris::TabletSchema const&, unsigned int, std::vector<unsigned int,
std::allocator<unsigned int> > const&, std::vector<unsigned int,
std::allocator<unsigned int> > const&, doris::vectorized::Block&) at
/mnt/disk1/wangqiannan/amory/doris/be/src/olap/base_tablet.cpp:885
10# doris::FixedReadPlan::read_columns_by_plan(doris::TabletSchema const&,
std::vector<unsigned int, std::allocator<unsigned int> >,
std::map<doris::RowsetId, std::shared_ptr<doris::Rowset>,
std::less<doris::RowsetId>, std::allocator<std::pair<doris::RowsetId const,
std::shared_ptr<doris::Rowset> > > > const&, doris::vectorized::Block&,
std::map<unsigned int, unsigned int, std::less<unsigned int>,
std::allocator<std::pair<unsigned int const, unsigned int> > >*, signed char
const*) con [...]
11# doris::FixedReadPlan::fill_missing_columns(doris::RowsetWriterContext*,
std::map<doris::RowsetId, std::shared_ptr<doris::Rowset>,
std::less<doris::RowsetId>, std::allocator<std::pair<doris::RowsetId const,
std::shared_ptr<doris::Rowset> > > > const&, doris::TabletSchema const&,
doris::vectorized::Block&, std::vector<bool, std::allocator<bool> > const&,
bool, unsigned long const&, doris::vectorized::Block const*) const at
/mnt/disk1/wangqiannan/amory/doris/be/src/olap/partial_updat [...]
12#
doris::segment_v2::VerticalSegmentWriter::_append_block_with_partial_content(doris::segment_v2::RowsInBlock&,
doris::vectorized::Block&) at
/mnt/disk1/wangqiannan/amory/doris/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp:502
```
---
.../vec/data_types/serde/data_type_ipv6_serde.cpp | 22 ++++----
.../vec/data_types/serde/data_type_serde_test.cpp | 62 ++++++++++++++++++++++
.../data/datatype_p0/ip/test_ip_basic.out | 12 +++++
.../suites/datatype_p0/ip/test_ip_basic.groovy | 14 +++++
4 files changed, 97 insertions(+), 13 deletions(-)
diff --git a/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp
b/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp
index f09b6feb4a2..612c9ce4222 100644
--- a/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_ipv6_serde.cpp
@@ -69,25 +69,21 @@ Status DataTypeIPv6SerDe::write_column_to_mysql(const
IColumn& column,
}
void DataTypeIPv6SerDe::read_one_cell_from_jsonb(IColumn& column, const
JsonbValue* arg) const {
- IPv6 val = 0;
- const auto* str_value = static_cast<const JsonbStringVal*>(arg);
- ReadBuffer rb(reinterpret_cast<const unsigned char*>(str_value->getBlob()),
- str_value->getBlobLen());
- if (!read_ipv6_text_impl(val, rb)) {
- throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "parse ipv6 fail,
string: '{}'",
- rb.to_string());
- }
- assert_cast<ColumnIPv6&>(column).insert_value(val);
+ const auto* str_value = static_cast<const JsonbBinaryVal*>(arg);
+ column.deserialize_and_insert_from_arena(str_value->getBlob());
}
void DataTypeIPv6SerDe::write_one_cell_to_jsonb(const IColumn& column,
JsonbWriterT<JsonbOutStream>&
result,
Arena* mem_pool, int col_id,
int row_num) const {
- // we make ipv6 as string in jsonb
+ // we make ipv6 as BinaryValue in jsonb
result.writeKey(col_id);
- IPv6 data = assert_cast<const ColumnIPv6&>(column).get_element(row_num);
- IPv6Value ipv6_value(data);
- result.writeString(ipv6_value.to_string());
+ const char* begin = nullptr;
+ // maybe serialize_value_into_arena should move to here later.
+ StringRef value = column.serialize_value_into_arena(row_num, *mem_pool,
begin);
+ result.writeStartBinary();
+ result.writeBinary(value.data, value.size);
+ result.writeEndBinary();
}
Status DataTypeIPv6SerDe::serialize_one_cell_to_json(const IColumn& column,
int row_num,
diff --git a/be/test/vec/data_types/serde/data_type_serde_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_test.cpp
index ef235254db5..82674b0aa44 100644
--- a/be/test/vec/data_types/serde/data_type_serde_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_test.cpp
@@ -220,4 +220,66 @@ TEST(DataTypeSerDeTest, DataTypeScalaSerDeTest) {
serialize_and_deserialize_pb_test();
}
+TEST(DataTypeSerDeTest, DataTypeRowStoreSerDeTest) {
+ // ipv6
+ {
+ std::string ip = "5be8:dde9:7f0b:d5a7:bd01:b3be:9c69:573b";
+ auto vec = vectorized::ColumnVector<IPv6>::create();
+ IPv6Value ipv6;
+ EXPECT_TRUE(ipv6.from_string(ip));
+ vec->insert(ipv6.value());
+
+ vectorized::DataTypePtr
data_type(std::make_shared<vectorized::DataTypeIPv6>());
+ auto serde = data_type->get_serde(0);
+ JsonbWriterT<JsonbOutStream> jsonb_writer;
+ Arena pool;
+ jsonb_writer.writeStartObject();
+ serde->write_one_cell_to_jsonb(*vec, jsonb_writer, &pool, 0, 0);
+ jsonb_writer.writeEndObject();
+ auto jsonb_column = ColumnString::create();
+ jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(),
+ jsonb_writer.getOutput()->getSize());
+ StringRef jsonb_data = jsonb_column->get_data_at(0);
+ auto pdoc = JsonbDocument::createDocument(jsonb_data.data,
jsonb_data.size);
+ JsonbDocument& doc = *pdoc;
+ for (auto it = doc->begin(); it != doc->end(); ++it) {
+ serde->read_one_cell_from_jsonb(*vec, it->value());
+ }
+ EXPECT_TRUE(vec->size() == 2);
+ IPv6 data = vec->get_element(1);
+ IPv6Value ipv6_value(data);
+ EXPECT_EQ(ipv6_value.to_string(), ip);
+ }
+
+ // ipv4
+ {
+ std::string ip = "192.0.0.1";
+ auto vec = vectorized::ColumnVector<IPv4>::create();
+ IPv4Value ipv4;
+ EXPECT_TRUE(ipv4.from_string(ip));
+ vec->insert(ipv4.value());
+
+ vectorized::DataTypePtr
data_type(std::make_shared<vectorized::DataTypeIPv4>());
+ auto serde = data_type->get_serde(0);
+ JsonbWriterT<JsonbOutStream> jsonb_writer;
+ Arena pool;
+ jsonb_writer.writeStartObject();
+ serde->write_one_cell_to_jsonb(*vec, jsonb_writer, &pool, 0, 0);
+ jsonb_writer.writeEndObject();
+ auto jsonb_column = ColumnString::create();
+ jsonb_column->insert_data(jsonb_writer.getOutput()->getBuffer(),
+ jsonb_writer.getOutput()->getSize());
+ StringRef jsonb_data = jsonb_column->get_data_at(0);
+ auto pdoc = JsonbDocument::createDocument(jsonb_data.data,
jsonb_data.size);
+ JsonbDocument& doc = *pdoc;
+ for (auto it = doc->begin(); it != doc->end(); ++it) {
+ serde->read_one_cell_from_jsonb(*vec, it->value());
+ }
+ EXPECT_TRUE(vec->size() == 2);
+ IPv4 data = vec->get_element(1);
+ IPv4Value ipv4_value(data);
+ EXPECT_EQ(ipv4_value.to_string(), ip);
+ }
+}
+
} // namespace doris::vectorized
diff --git a/regression-test/data/datatype_p0/ip/test_ip_basic.out
b/regression-test/data/datatype_p0/ip/test_ip_basic.out
index 14ecbd47a46..b69f9708a1b 100644
--- a/regression-test/data/datatype_p0/ip/test_ip_basic.out
+++ b/regression-test/data/datatype_p0/ip/test_ip_basic.out
@@ -361,3 +361,15 @@ ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff 4
121.25.82.29 2620:44:a000::1
121.25.160.80 2001:418:0:5000::c2d
+-- !sql --
+1 true 255.255.255.255 5be8:dde9:7f0b:d5a7:bd01:b3be:9c69:573b
+
+-- !sql --
+1 false 255.255.255.255 5be8:dde9:7f0b:d5a7:bd01:b3be:9c69:573b
+
+-- !sql --
+1 false 127.0.0.1 5be8:dde9:7f0b:d5a7:bd01:b3be:9c69:573b
+
+-- !sql --
+1 false 127.0.0.1 ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff
+
diff --git a/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy
b/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy
index 650dc86ad4e..468b6f6f146 100644
--- a/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy
+++ b/regression-test/suites/datatype_p0/ip/test_ip_basic.groovy
@@ -146,4 +146,18 @@ suite("test_ip_basic") {
sql "DROP TABLE t0"
sql "DROP TABLE t1"
sql "DROP TABLE t2"
+
+ // test ip with rowstore
+ sql """ SET enable_nereids_planner=true """
+ sql """ SET enable_fallback_to_original_planner=false """
+ sql """ DROP TABLE IF EXISTS table_ip """
+ sql """ CREATE TABLE IF NOT EXISTS `table_ip` (`col0` bigint NOT
NULL,`col1` boolean NOT NULL, `col24` ipv4 NOT NULL, `col25` ipv6 NOT
NULL,INDEX col1 (`col1`) USING INVERTED, INDEX col25 (`col25`) USING INVERTED )
ENGINE=OLAP UNIQUE KEY(`col0`) DISTRIBUTED BY HASH(`col0`) BUCKETS 4 PROPERTIES
("replication_allocation" = "tag.location.default: 1", "store_row_column" =
"true") """
+ sql """ insert into table_ip values (1, true, '255.255.255.255',
"5be8:dde9:7f0b:d5a7:bd01:b3be:9c69:573b") """
+ qt_sql """ select * from table_ip """
+ sql """ Update table_ip set col1 = false where col0 = 1 """
+ qt_sql """ select * from table_ip """
+ sql """ Update table_ip set col24 = '127.0.0.1' where col0 = 1 """
+ qt_sql """ select * from table_ip where col0 = 1"""
+ sql """ Update table_ip set col25 =
'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' where col0 = 1 """
+ qt_sql """ select * from table_ip where col0 = 1"""
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]