This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new ba6102b6454 [fix](hive) support null_format and escape_char for hive
text (#40291)
ba6102b6454 is described below
commit ba6102b6454ed06b8da2b95458044e6f14e08b86
Author: Socrates <[email protected]>
AuthorDate: Fri Sep 27 11:46:46 2024 +0800
[fix](hive) support null_format and escape_char for hive text (#40291)
## Proposed changes
related pr: https://github.com/apache/doris/pull/39700
support escape.delim and serialization.null.format for hive text
---
.../vec/data_types/serde/data_type_array_serde.cpp | 3 ++
.../vec/data_types/serde/data_type_map_serde.cpp | 10 ++++--
.../data_types/serde/data_type_nullable_serde.cpp | 4 +--
be/src/vec/data_types/serde/data_type_serde.h | 17 +++++----
.../vec/data_types/serde/data_type_string_serde.h | 42 ++++++++++++++++++++++
.../data_types/serde/data_type_struct_serde.cpp | 3 ++
be/src/vec/exec/format/csv/csv_reader.cpp | 34 ++++++++++++++++--
be/src/vec/exec/format/csv/csv_reader.h | 16 +++++++++
be/src/vec/runtime/vcsv_transformer.cpp | 15 ++++++--
.../scripts/create_preinstalled_scripts/run42.hql | 1 -
.../regression/serde_prop/some_serde_table.hql | 20 +++--------
.../doris/datasource/hive/source/HiveScanNode.java | 25 +++++++------
.../org/apache/doris/planner/HiveTableSink.java | 13 +++++--
gensrc/thrift/PlanNodes.thrift | 1 +
.../hive/test_hive_basic_type.out | 8 ++---
.../hive/test_hive_serde_prop.out | 24 +++++++++++++
.../hive/test_hive_serde_prop.groovy | 22 +++---------
17 files changed, 191 insertions(+), 67 deletions(-)
diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp
b/be/src/vec/data_types/serde/data_type_array_serde.cpp
index 9ebc9ce9747..0d5dea7fc3d 100644
--- a/be/src/vec/data_types/serde/data_type_array_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp
@@ -169,6 +169,9 @@ Status
DataTypeArraySerDe::deserialize_one_cell_from_hive_text(
for (int idx = 0, start = 0; idx <= slice.size; idx++) {
char c = (idx == slice.size) ? collection_delimiter : slice[idx];
if (c == collection_delimiter) {
+ if (options.escape_char != 0 && idx > 0 && slice[idx - 1] ==
options.escape_char) {
+ continue;
+ }
slices.emplace_back(slice.data + start, idx - start);
start = idx + 1;
}
diff --git a/be/src/vec/data_types/serde/data_type_map_serde.cpp
b/be/src/vec/data_types/serde/data_type_map_serde.cpp
index a1a65e7ea5b..2140885942d 100644
--- a/be/src/vec/data_types/serde/data_type_map_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_map_serde.cpp
@@ -97,13 +97,17 @@ Status
DataTypeMapSerDe::deserialize_one_cell_from_hive_text(
*
* So i use 'kv <= from' in order to get _map_kv_delimiter that
appears first.
* */
- if (i < slice.size && slice[i] == map_kv_delimiter && kv <= from) {
+ if (i < slice.size && slice[i] == map_kv_delimiter && kv <= from &&
+ (options.escape_char == 0 || i == 0 || slice[i - 1] !=
options.escape_char)) {
kv = i;
continue;
}
if ((i == slice.size || slice[i] == collection_delimiter) && i >= kv +
1) {
- key_slices.push_back({slice.data + from, kv - from});
- value_slices.push_back({slice.data + kv + 1, i - 1 - kv});
+ if (options.escape_char != 0 && i > 0 && slice[i - 1] ==
options.escape_char) {
+ continue;
+ }
+ key_slices.emplace_back(slice.data + from, kv - from);
+ value_slices.emplace_back(slice.data + kv + 1, i - 1 - kv);
from = i + 1;
kv = from;
}
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
index 1b26d775d2d..1af85bd040d 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
@@ -88,7 +88,7 @@ Status DataTypeNullableSerDe::serialize_one_cell_to_hive_text(
const auto& col_null = assert_cast<const ColumnNullable&>(*ptr);
if (col_null.is_null_at(row_num)) {
- bw.write(NULL_IN_CSV_FOR_ORDINARY_TYPE.c_str(), 2);
+ bw.write(options.null_format, options.null_len);
} else {
RETURN_IF_ERROR(nested_serde->serialize_one_cell_to_hive_text(
col_null.get_nested_column(), row_num, bw, options,
@@ -101,7 +101,7 @@ Status
DataTypeNullableSerDe::deserialize_one_cell_from_hive_text(
IColumn& column, Slice& slice, const FormatOptions& options,
int hive_text_complex_type_delimiter_level) const {
auto& null_column = assert_cast<ColumnNullable&>(column);
- if (slice.size == 2 && slice[0] == '\\' && slice[1] == 'N') {
+ if (slice.compare(Slice(options.null_format, options.null_len)) == 0) {
null_column.insert_data(nullptr, 0);
return Status::OK();
}
diff --git a/be/src/vec/data_types/serde/data_type_serde.h
b/be/src/vec/data_types/serde/data_type_serde.h
index cddc704cdc2..262f9cae6a8 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -137,6 +137,10 @@ public:
bool converted_from_string = false;
char escape_char = 0;
+ /**
+ * flags for each byte to indicate if escape is needed.
+ */
+ bool need_escape[256] = {false};
/**
* only used for export data
@@ -148,8 +152,8 @@ public:
* NULL
* null
*/
- const char* null_format;
- int null_len;
+ const char* null_format = "\\N";
+ int null_len = 2;
/**
* The wrapper char for string type in nested type.
@@ -166,7 +170,7 @@ public:
CHECK(0 <= hive_text_complex_type_delimiter_level &&
hive_text_complex_type_delimiter_level <= 153);
- char ans = '\002';
+ char ans;
//https://github.com/apache/hive/blob/master/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySerDeParameters.java#L250
//use only control chars that are very unlikely to be part of the
string
// the following might/likely to be used in text files for strings
@@ -175,8 +179,9 @@ public:
// 12 (form feed, FF, \f, ^L),
// 13 (carriage return, CR, \r, ^M),
// 27 (escape, ESC, \e [GCC only], ^[).
-
- if (hive_text_complex_type_delimiter_level == 1) {
+ if (hive_text_complex_type_delimiter_level == 0) {
+ ans = field_delim[0];
+ } else if (hive_text_complex_type_delimiter_level == 1) {
ans = collection_delim;
} else if (hive_text_complex_type_delimiter_level == 2) {
ans = map_key_delim;
@@ -192,7 +197,7 @@ public:
} else if (hive_text_complex_type_delimiter_level <= 25) {
// [22, 25] -> [28, 31]
ans = hive_text_complex_type_delimiter_level + 6;
- } else if (hive_text_complex_type_delimiter_level <= 153) {
+ } else {
// [26, 153] -> [-128, -1]
ans = hive_text_complex_type_delimiter_level + (-26 - 128);
}
diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h
b/be/src/vec/data_types/serde/data_type_string_serde.h
index d3161c88706..fe09ff615f4 100644
--- a/be/src/vec/data_types/serde/data_type_string_serde.h
+++ b/be/src/vec/data_types/serde/data_type_string_serde.h
@@ -96,6 +96,27 @@ public:
return Status::OK();
}
+ Status serialize_one_cell_to_hive_text(
+ const IColumn& column, int row_num, BufferWritable& bw,
FormatOptions& options,
+ int hive_text_complex_type_delimiter_level = 1) const override {
+ auto result = check_column_const_set_readability(column, row_num);
+ ColumnPtr ptr = result.first;
+ row_num = result.second;
+ const auto& value = assert_cast<const
ColumnType&>(*ptr).get_data_at(row_num);
+ if constexpr (std::is_same_v<ColumnType, ColumnString>) {
+ if (options.escape_char != 0) {
+ StringRef str_ref = value;
+ write_with_escaped_char_to_hive_text(str_ref, bw,
options.escape_char,
+ options.need_escape);
+ } else {
+ bw.write(value.data, value.size);
+ }
+ } else {
+ bw.write(value.data, value.size);
+ }
+ return Status::OK();
+ }
+
inline void write_with_escaped_char_to_json(StringRef value,
BufferWritable& bw) const {
for (char it : value) {
switch (it) {
@@ -126,6 +147,17 @@ public:
}
}
+ inline void write_with_escaped_char_to_hive_text(StringRef value,
BufferWritable& bw,
+ char escape_char,
+ const bool need_escape[])
const {
+ for (char it : value) {
+ if (need_escape[it & 0xff]) {
+ bw.write(escape_char);
+ }
+ bw.write(it);
+ }
+ }
+
Status serialize_column_to_json(const IColumn& column, int start_idx, int
end_idx,
BufferWritable& bw, FormatOptions&
options) const override {
SERIALIZE_COLUMN_TO_JSON();
@@ -154,6 +186,16 @@ public:
return Status::OK();
}
+ Status deserialize_one_cell_from_hive_text(
+ IColumn& column, Slice& slice, const FormatOptions& options,
+ int hive_text_complex_type_delimiter_level = 1) const override {
+ if (options.escape_char != 0) {
+ escape_string(slice.data, slice.size, options.escape_char);
+ }
+ assert_cast<ColumnType&>(column).insert_data(slice.data, slice.size);
+ return Status::OK();
+ }
+
Status deserialize_column_from_json_vector(IColumn& column,
std::vector<Slice>& slices,
int* num_deserialized,
const FormatOptions& options)
const override {
diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
index afbd15441b6..c88b0088e5a 100644
--- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp
@@ -250,6 +250,9 @@ Status
DataTypeStructSerDe::deserialize_one_cell_from_hive_text(
char* data = slice.data;
for (size_t i = 0, from = 0; i <= slice.size; i++) {
if (i == slice.size || data[i] == struct_delimiter) {
+ if (options.escape_char != 0 && i > 0 && data[i - 1] ==
options.escape_char) {
+ continue;
+ }
slices.push_back({data + from, i - from});
from = i + 1;
}
diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp
b/be/src/vec/exec/format/csv/csv_reader.cpp
index 82781cddf24..0583b74d735 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -174,6 +174,23 @@ void PlainCsvTextFieldSplitter::do_split(const Slice&
line, std::vector<Slice>*
}
}
+void HiveCsvTextFieldSplitter::do_split(const Slice& line, std::vector<Slice>*
splitted_values) {
+ const char* data = line.data;
+ const size_t size = line.size;
+ size_t value_start = 0;
+ for (size_t i = 0; i < size; ++i) {
+ if (data[i] == _value_sep[0]) {
+ // hive will escape the field separator in string
+ if (_escape_char != 0 && i > 0 && data[i - 1] == _escape_char) {
+ continue;
+ }
+ process_value_func(data, value_start, i - value_start,
_trimming_char, splitted_values);
+ value_start = i + _value_sep_len;
+ }
+ }
+ process_value_func(data, value_start, size - value_start, _trimming_char,
splitted_values);
+}
+
CsvReader::CsvReader(RuntimeState* state, RuntimeProfile* profile,
ScannerCounter* counter,
const TFileScanRangeParams& params, const TFileRangeDesc&
range,
const std::vector<SlotDescriptor*>& file_slot_descs,
io::IOContext* io_ctx)
@@ -352,6 +369,12 @@ Status CsvReader::init_reader(bool is_load) {
} else {
_options.map_key_delim =
_params.file_attributes.text_params.mapkv_delimiter[0];
}
+
+ if (_params.file_attributes.text_params.__isset.null_format) {
+ _options.null_format =
_params.file_attributes.text_params.null_format.data();
+ _options.null_len =
_params.file_attributes.text_params.null_format.length();
+ }
+
_use_nullable_string_opt.resize(_file_slot_descs.size());
for (int i = 0; i < _file_slot_descs.size(); ++i) {
auto data_type_ptr = _file_slot_descs[i]->get_data_type_ptr();
@@ -376,9 +399,14 @@ Status CsvReader::init_reader(bool is_load) {
if (_enclose == 0) {
text_line_reader_ctx = std::make_shared<PlainTextLineReaderCtx>(
_line_delimiter, _line_delimiter_length, _keep_cr);
-
- _fields_splitter = std::make_unique<PlainCsvTextFieldSplitter>(
- _trim_tailing_spaces, false, _value_separator,
_value_separator_length, -1);
+ if (_text_serde_type == TTextSerdeType::HIVE_TEXT_SERDE) {
+ _fields_splitter = std::make_unique<HiveCsvTextFieldSplitter>(
+ _trim_tailing_spaces, false, _value_separator,
_value_separator_length, -1,
+ _escape);
+ } else {
+ _fields_splitter = std::make_unique<PlainCsvTextFieldSplitter>(
+ _trim_tailing_spaces, false, _value_separator,
_value_separator_length, -1);
+ }
} else {
text_line_reader_ctx = std::make_shared<EncloseCsvLineReaderContext>(
_line_delimiter, _line_delimiter_length, _value_separator,
_value_separator_length,
diff --git a/be/src/vec/exec/format/csv/csv_reader.h
b/be/src/vec/exec/format/csv/csv_reader.h
index 5e2865e45cb..8ffd704c6e4 100644
--- a/be/src/vec/exec/format/csv/csv_reader.h
+++ b/be/src/vec/exec/format/csv/csv_reader.h
@@ -170,6 +170,22 @@ private:
std::string _value_sep;
};
+class HiveCsvTextFieldSplitter : public
BaseCsvTextFieldSplitter<HiveCsvTextFieldSplitter> {
+public:
+ explicit HiveCsvTextFieldSplitter(bool trim_tailing_space, bool trim_ends,
+ const string& value_sep, size_t
value_sep_len = 1,
+ char trimming_char = 0, char escape_char
= 0)
+ : BaseCsvTextFieldSplitter(trim_tailing_space, trim_ends,
value_sep_len, trimming_char),
+ _value_sep(value_sep),
+ _escape_char(escape_char) {}
+
+ void do_split(const Slice& line, std::vector<Slice>* splitted_values);
+
+private:
+ std::string _value_sep;
+ char _escape_char;
+};
+
class CsvReader : public GenericReader {
ENABLE_FACTORY_CREATOR(CsvReader);
diff --git a/be/src/vec/runtime/vcsv_transformer.cpp
b/be/src/vec/runtime/vcsv_transformer.cpp
index 4bfd342fe76..51ac2eb2729 100644
--- a/be/src/vec/runtime/vcsv_transformer.cpp
+++ b/be/src/vec/runtime/vcsv_transformer.cpp
@@ -63,10 +63,21 @@ VCSVTransformer::VCSVTransformer(RuntimeState* state,
doris::io::FileWriter* fil
}
if (_is_text_format) {
+ _options.field_delim = hive_serde_properties->field_delim;
_options.collection_delim = hive_serde_properties->collection_delim[0];
_options.map_key_delim = hive_serde_properties->mapkv_delim[0];
- _options.escape_char = hive_serde_properties->escape_char[0];
- _options.null_format = hive_serde_properties->null_format.c_str();
+ if (hive_serde_properties->__isset.escape_char) {
+ _options.escape_char = hive_serde_properties->escape_char[0];
+ }
+ _options.null_format = hive_serde_properties->null_format.data();
+ _options.null_len = hive_serde_properties->null_format.length();
+ // The list of separators + escapeChar are the bytes required to be
escaped.
+ if (_options.escape_char != 0) {
+ _options.need_escape[_options.escape_char & 0xff] = true;
+ }
+ for (int i = 0; i <= 153; i++) {
+ _options.need_escape[_options.get_collection_delimiter(i) & 0xff]
= true;
+ }
}
}
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run42.hql
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run42.hql
index dc469fad77b..36b4776dc8f 100755
---
a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run42.hql
+++
b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run42.hql
@@ -32,7 +32,6 @@ CREATE TABLE IF NOT EXISTS `text_all_types`(
`t_decimal_precision_38` decimal(38,16),
`t_binary` binary
)
-ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE
LOCATION
'/user/doris/preinstalled_data/text/text_all_types';
diff --git
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
index b5d963a1c2b..4de85bc19f0 100644
---
a/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
+++
b/docker/thirdparties/docker-compose/hive/scripts/data/regression/serde_prop/some_serde_table.hql
@@ -88,19 +88,8 @@ CREATE TABLE `serde_test7`(
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
- 'escape.delim' = '|'
-)
-STORED AS INPUTFORMAT
- 'org.apache.hadoop.mapred.TextInputFormat'
-OUTPUTFORMAT
- 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
-
-CREATE TABLE `serde_test8`(
- `id` int,
- `name` string)
-ROW FORMAT SERDE
- 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-WITH SERDEPROPERTIES (
+ 'field.delim' = 'a',
+ 'escape.delim' = '|',
'serialization.null.format' = 'null'
)
STORED AS INPUTFORMAT
@@ -108,11 +97,12 @@ STORED AS INPUTFORMAT
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat';
+CREATE TABLE `serde_test8` like `serde_test7`;
+
insert into serde_test1 values(1, "abc"),(2, "def");
insert into serde_test2 values(1, "abc"),(2, "def");
insert into serde_test3 values(1, "abc"),(2, "def");
insert into serde_test4 values(1, "abc"),(2, "def");
insert into serde_test5 values(1, "abc"),(2, "def");
insert into serde_test6 values(1, "abc"),(2, "def");
-insert into serde_test7 values(1, "abc"),(2, "def");
-insert into serde_test8 values(1, "abc"),(2, "def");
+insert into serde_test7 values(1, null),(2, "|||"),(3, "aaa"),(4, "\"null\"");
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
index 634c596c69f..0dcf4724a7b 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java
@@ -464,21 +464,24 @@ public class HiveScanNode extends FileQueryScanNode {
if (serdeParams.containsKey(PROP_QUOTE_CHAR)) {
textParams.setEnclose(serdeParams.get(PROP_QUOTE_CHAR).getBytes()[0]);
}
-
- // TODO: support escape char and null format in csv_reader
- Optional<String> escapeChar =
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
+ // 6. set escape delimiter
+ Optional<String> escapeDelim =
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_ESCAPE_DELIMITER);
- if (escapeChar.isPresent() &&
!escapeChar.get().equals(DEFAULT_ESCAPE_DELIMIER)) {
- throw new UserException(
- "not support serde prop " + PROP_ESCAPE_DELIMITER + " in
hive text reading");
+ if (escapeDelim.isPresent()) {
+ String escape = HiveMetaStoreClientHelper.getByte(
+ escapeDelim.get());
+ if (escape != null) {
+ textParams
+ .setEscape(escape.getBytes()[0]);
+ } else {
+ textParams.setEscape(DEFAULT_ESCAPE_DELIMIER.getBytes()[0]);
+ }
}
-
+ // 7. set null format
Optional<String> nullFormat =
HiveMetaStoreClientHelper.getSerdeProperty(hmsTable.getRemoteTable(),
PROP_NULL_FORMAT);
- if (nullFormat.isPresent() &&
!nullFormat.get().equals(DEFAULT_NULL_FORMAT)) {
- throw new UserException(
- "not support serde prop " + PROP_NULL_FORMAT + " in hive
text reading");
- }
+
textParams.setNullFormat(HiveMetaStoreClientHelper.firstPresentOrDefault(
+ DEFAULT_NULL_FORMAT, nullFormat));
TFileAttributes fileAttributes = new TFileAttributes();
fileAttributes.setTextParams(textParams);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java
b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java
index 0d952330d7c..676241d06d8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java
@@ -244,9 +244,16 @@ public class HiveTableSink extends
BaseExternalTableDataSink {
// 5. set escape delimiter
Optional<String> escapeDelim =
HiveMetaStoreClientHelper.getSerdeProperty(targetTable.getRemoteTable(),
PROP_ESCAPE_DELIMITER);
- serDeProperties
-
.setEscapeChar(HiveMetaStoreClientHelper.getByte(HiveMetaStoreClientHelper.firstPresentOrDefault(
- DEFAULT_ESCAPE_DELIMIER, escapeDelim)));
+ if (escapeDelim.isPresent()) {
+ String escape = HiveMetaStoreClientHelper.getByte(
+ escapeDelim.get());
+ if (escape != null) {
+ serDeProperties
+ .setEscapeChar(escape);
+ } else {
+ serDeProperties.setEscapeChar(DEFAULT_ESCAPE_DELIMIER);
+ }
+ }
// 6. set null format
Optional<String> nullFormat =
HiveMetaStoreClientHelper.getSerdeProperty(targetTable.getRemoteTable(),
PROP_NULL_FORMAT);
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index 426c3217918..b7598302fe1 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -258,6 +258,7 @@ struct TFileTextScanRangeParams {
4: optional string mapkv_delimiter;
5: optional i8 enclose;
6: optional i8 escape;
+ 7: optional string null_format;
}
struct TFileScanSlotInfo {
diff --git
a/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
b/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
index 388b95944e9..cc48c4a5601 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
@@ -24,7 +24,7 @@ true 8 8 8 80 8.8 80.8 7298
12/31/10 8 2010-12-31T12:08:13.780 2010 12
"" "test"
-- !10 --
-\\N\\N\\N\\N\\N\\N\\N\\N\\Ntesttestaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
+\N \N \N \N \N \N \N \N \N
test test
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
-- !11 --
\N \N \N \N \N \N \N \N \N
test test
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
@@ -290,7 +290,7 @@ true 8 8 8 80 8.8 80.8
7298 12/31/10 8 2010-12-31T12:08:13.780 2010 12
"" "test"
-- !10 --
-\\N\\N\\N\\N\\N\\N\\N\\N\\Ntesttestaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
+\N \N \N \N \N \N \N \N \N
test test
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
-- !11 --
\N \N \N \N \N \N \N \N \N
test test
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
@@ -556,7 +556,7 @@ true 8 8 8 80 8.8 80.8
7298 12/31/10 8 2010-12-31T12:08:13.780 2010 12
"" "test"
-- !10 --
-\\N\\N\\N\\N\\N\\N\\N\\N\\Ntesttestaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
+\N \N \N \N \N \N \N \N \N
test test
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
-- !11 --
\N \N \N \N \N \N \N \N \N
test test
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
@@ -822,7 +822,7 @@ true 8 8 8 80 8.8 80.8
7298 12/31/10 8 2010-12-31T12:08:13.780 2010 12
"" "test"
-- !10 --
-\\N\\N\\N\\N\\N\\N\\N\\N\\Ntesttestaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
+\N \N \N \N \N \N \N \N \N
test test
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
-- !11 --
\N \N \N \N \N \N \N \N \N
test test
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
diff --git
a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
index 38918c3fc6f..a527c7b687d 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_serde_prop.out
@@ -27,6 +27,18 @@ b 2.2
1 abc
2 def
+-- !8 --
+1 null
+2 |||
+3 aaa
+4 "null"
+
+-- !9 --
+1 null
+2 |||
+3 aaa
+4 "null"
+
-- !1 --
a 1.1
b 2.2
@@ -55,3 +67,15 @@ b 2.2
1 abc
2 def
+-- !8 --
+1 null
+2 |||
+3 aaa
+4 "null"
+
+-- !9 --
+1 null
+2 |||
+3 aaa
+4 "null"
+
diff --git
a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
index 8aa97e63123..d0c191f7c67 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_serde_prop.groovy
@@ -23,6 +23,7 @@ suite("test_hive_serde_prop",
"external_docker,hive,external_docker_hive,p0,exte
}
for (String hivePrefix : ["hive2", "hive3"]) {
+ setHivePrefix(hivePrefix)
String catalog_name = "test_${hivePrefix}_serde_prop"
String ex_db_name = "`stats_test`"
String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
@@ -45,24 +46,11 @@ suite("test_hive_serde_prop",
"external_docker,hive,external_docker_hive,p0,exte
qt_5 """select * from ${catalog_name}.regression.serde_test4 order by
id;"""
qt_6 """select * from ${catalog_name}.regression.serde_test5 order by
id;"""
qt_7 """select * from ${catalog_name}.regression.serde_test6 order by
id;"""
+ qt_8 """select * from ${catalog_name}.regression.serde_test7 order by
id;"""
- def success = true;
- try {
- sql """select * from ${catalog_name}.regression.serde_test7 order
by id;"""
- } catch(Exception e) {
- assertTrue(e.getMessage().contains("not support serde prop"),
e.getMessage())
- success = false;
- }
- assertEquals(success, false)
-
- success = true;
- try {
- sql """select * from ${catalog_name}.regression.serde_test8 order
by id;"""
- } catch(Exception e) {
- assertTrue(e.getMessage().contains("not support serde prop"),
e.getMessage())
- success = false;
- }
- assertEquals(success, false)
+ hive_docker """truncate table regression.serde_test8;"""
+ sql """insert into ${catalog_name}.regression.serde_test8 select *
from ${catalog_name}.regression.serde_test7;"""
+ qt_9 """select * from ${catalog_name}.regression.serde_test8 order by
id;"""
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]