This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 91cdb79d89 [Bugfix](Outfile) fix that export data to parquet and orc
file format (#19436)
91cdb79d89 is described below
commit 91cdb79d897d41993c08c2501eb258e3c7d90e54
Author: Tiewei Fang <[email protected]>
AuthorDate: Sat May 13 22:39:24 2023 +0800
[Bugfix](Outfile) fix that export data to parquet and orc file format
(#19436)
1. support export `LARGEINT` data type to parquet/orc file format.
2. Export the DORIS `DATE/DATETIME` type to the `Date/Timestamp` logic type
of parquet file format.
3. Fix that the data is not correct when the DATE type data is exported to
ORC.
---
be/src/vec/runtime/vfile_result_writer.cpp | 2 +-
be/src/vec/runtime/vorc_writer.cpp | 207 +++++++++++------
be/src/vec/runtime/vparquet_writer.cpp | 230 ++++++++++++++++---
be/src/vec/runtime/vparquet_writer.h | 14 +-
.../org/apache/doris/analysis/OutFileClause.java | 29 ++-
.../org/apache/doris/analysis/SelectStmtTest.java | 4 +-
gensrc/thrift/DataSinks.thrift | 20 ++
.../data/export_p0/test_export_data_types.out | 31 +--
.../suites/export_p0/test_export_basic.groovy | 2 +
.../suites/export_p0/test_export_csv.groovy | 2 +
.../suites/export_p0/test_export_data_types.groovy | 247 +++++----------------
.../suites/export_p0/test_export_orc.groovy | 2 +
.../suites/export_p0/test_export_parquet.groovy | 2 +
13 files changed, 462 insertions(+), 330 deletions(-)
diff --git a/be/src/vec/runtime/vfile_result_writer.cpp
b/be/src/vec/runtime/vfile_result_writer.cpp
index 2684576f5d..9bfbd962b0 100644
--- a/be/src/vec/runtime/vfile_result_writer.cpp
+++ b/be/src/vec/runtime/vfile_result_writer.cpp
@@ -243,7 +243,7 @@ Status VFileResultWriter::append_block(Block& block) {
RETURN_IF_ERROR(VExprContext::get_output_block_after_execute_exprs(_output_vexpr_ctxs,
block,
&output_block));
if (_vfile_writer) {
- _write_file(output_block);
+ RETURN_IF_ERROR(_write_file(output_block));
} else {
RETURN_IF_ERROR(_write_csv_file(output_block));
}
diff --git a/be/src/vec/runtime/vorc_writer.cpp
b/be/src/vec/runtime/vorc_writer.cpp
index 665ec27429..f3804a6b62 100644
--- a/be/src/vec/runtime/vorc_writer.cpp
+++ b/be/src/vec/runtime/vorc_writer.cpp
@@ -165,9 +165,69 @@ void VOrcWriterWrapper::close() {
RETURN_WRONG_TYPE
\
}
+#define WRITE_LARGEINT_STRING_INTO_BATCH(VECTOR_BATCH, COLUMN)
\
+ VECTOR_BATCH* cur_batch = dynamic_cast<VECTOR_BATCH*>(root->fields[i]);
\
+ size_t begin_off = offset;
\
+ if (null_map != nullptr) {
\
+ cur_batch->hasNulls = true;
\
+ auto& null_data = assert_cast<const
ColumnUInt8&>(*null_map).get_data(); \
+ for (size_t row_id = 0; row_id < sz; row_id++) {
\
+ if (null_data[row_id] != 0) {
\
+ cur_batch->notNull[row_id] = 0;
\
+ } else {
\
+ cur_batch->notNull[row_id] = 1;
\
+ auto value = assert_cast<const
COLUMN&>(*col).get_data()[row_id]; \
+ std::string value_str = fmt::format("{}", value);
\
+ size_t len = value_str.size();
\
+ while (buffer.size < offset + len) {
\
+ char* new_ptr = (char*)malloc(buffer.size +
BUFFER_UNIT_SIZE); \
+ memcpy(new_ptr, buffer.data, buffer.size);
\
+ free(const_cast<char*>(buffer.data));
\
+ buffer.data = new_ptr;
\
+ buffer.size = buffer.size + BUFFER_UNIT_SIZE;
\
+ }
\
+ strcpy(const_cast<char*>(buffer.data) + offset,
value_str.c_str()); \
+ offset += len;
\
+ cur_batch->length[row_id] = len;
\
+ }
\
+ }
\
+ size_t data_off = 0;
\
+ for (size_t row_id = 0; row_id < sz; row_id++) {
\
+ if (null_data[row_id] != 0) {
\
+ cur_batch->notNull[row_id] = 0;
\
+ } else {
\
+ cur_batch->data[row_id] = const_cast<char*>(buffer.data) +
begin_off + data_off; \
+ data_off += cur_batch->length[row_id];
\
+ }
\
+ }
\
+ } else if (const auto& not_null_column = check_and_get_column<const
COLUMN>(col)) { \
+ for (size_t row_id = 0; row_id < sz; row_id++) {
\
+ auto value = not_null_column->get_data()[row_id];
\
+ std::string value_str = fmt::format("{}", value);
\
+ size_t len = value_str.size();
\
+ while (buffer.size < offset + len) {
\
+ char* new_ptr = (char*)malloc(buffer.size + BUFFER_UNIT_SIZE);
\
+ memcpy(new_ptr, buffer.data, buffer.size);
\
+ free(const_cast<char*>(buffer.data));
\
+ buffer.data = new_ptr;
\
+ buffer.size = buffer.size + BUFFER_UNIT_SIZE;
\
+ }
\
+ strcpy(const_cast<char*>(buffer.data) + offset,
value_str.c_str()); \
+ offset += len;
\
+ cur_batch->length[row_id] = len;
\
+ }
\
+ size_t data_off = 0;
\
+ for (size_t row_id = 0; row_id < sz; row_id++) {
\
+ cur_batch->data[row_id] = const_cast<char*>(buffer.data) +
begin_off + data_off; \
+ data_off += cur_batch->length[row_id];
\
+ }
\
+ } else {
\
+ RETURN_WRONG_TYPE
\
+ }
+
#define WRITE_DATE_STRING_INTO_BATCH(FROM, TO)
\
orc::StringVectorBatch* cur_batch =
dynamic_cast<orc::StringVectorBatch*>(root->fields[i]); \
- size_t offset = 0;
\
+ size_t begin_off = offset;
\
if (null_map != nullptr) {
\
cur_batch->hasNulls = true;
\
auto& null_data = assert_cast<const
ColumnUInt8&>(*null_map).get_data(); \
@@ -178,7 +238,7 @@ void VOrcWriterWrapper::close() {
cur_batch->notNull[row_id] = 1;
\
int len = binary_cast<FROM, TO>(
\
assert_cast<const
ColumnVector<FROM>&>(*col).get_data()[row_id]) \
- .to_buffer(const_cast<char*>(buffer.data));
\
+ .to_buffer(const_cast<char*>(buffer.data) +
offset); \
while (buffer.size < offset + len) {
\
char* new_ptr = (char*)malloc(buffer.size +
BUFFER_UNIT_SIZE); \
memcpy(new_ptr, buffer.data, buffer.size);
\
@@ -190,20 +250,20 @@ void VOrcWriterWrapper::close() {
offset += len;
\
}
\
}
\
- offset = 0;
\
+ size_t data_off = 0;
\
for (size_t row_id = 0; row_id < sz; row_id++) {
\
if (null_data[row_id] != 0) {
\
cur_batch->notNull[row_id] = 0;
\
} else {
\
- cur_batch->data[row_id] = const_cast<char*>(buffer.data) +
offset; \
- offset += cur_batch->length[row_id];
\
+ cur_batch->data[row_id] = const_cast<char*>(buffer.data) +
begin_off + data_off; \
+ data_off += cur_batch->length[row_id];
\
}
\
}
\
} else if (const auto& not_null_column =
\
check_and_get_column<const ColumnVector<FROM>>(col)) {
\
for (size_t row_id = 0; row_id < sz; row_id++) {
\
int len = binary_cast<FROM,
TO>(not_null_column->get_data()[row_id]) \
- .to_buffer(const_cast<char*>(buffer.data));
\
+ .to_buffer(const_cast<char*>(buffer.data) +
offset); \
while (buffer.size < offset + len) {
\
char* new_ptr = (char*)malloc(buffer.size + BUFFER_UNIT_SIZE);
\
memcpy(new_ptr, buffer.data, buffer.size);
\
@@ -214,10 +274,71 @@ void VOrcWriterWrapper::close() {
cur_batch->length[row_id] = len;
\
offset += len;
\
}
\
- offset = 0;
\
+ size_t data_off = 0;
\
for (size_t row_id = 0; row_id < sz; row_id++) {
\
- cur_batch->data[row_id] = const_cast<char*>(buffer.data) + offset;
\
- offset += cur_batch->length[row_id];
\
+ cur_batch->data[row_id] = const_cast<char*>(buffer.data) +
begin_off + data_off; \
+ data_off += cur_batch->length[row_id];
\
+ }
\
+ } else {
\
+ RETURN_WRONG_TYPE
\
+ }
+
+#define WRITE_DATETIMEV2_STRING_INTO_BATCH(FROM, TO)
\
+ orc::StringVectorBatch* cur_batch =
dynamic_cast<orc::StringVectorBatch*>(root->fields[i]); \
+ size_t begin_off = offset;
\
+ if (null_map != nullptr) {
\
+ cur_batch->hasNulls = true;
\
+ auto& null_data = assert_cast<const
ColumnUInt8&>(*null_map).get_data(); \
+ for (size_t row_id = 0; row_id < sz; row_id++) {
\
+ if (null_data[row_id] != 0) {
\
+ cur_batch->notNull[row_id] = 0;
\
+ } else {
\
+ cur_batch->notNull[row_id] = 1;
\
+ int output_scale =
_output_vexpr_ctxs[i]->root()->type().scale; \
+ int len =
\
+ binary_cast<FROM, TO>(
\
+ assert_cast<const
ColumnVector<FROM>&>(*col).get_data()[row_id]) \
+ .to_buffer(const_cast<char*>(buffer.data) +
offset, output_scale); \
+ while (buffer.size < offset + len) {
\
+ char* new_ptr = (char*)malloc(buffer.size +
BUFFER_UNIT_SIZE); \
+ memcpy(new_ptr, buffer.data, buffer.size);
\
+ free(const_cast<char*>(buffer.data));
\
+ buffer.data = new_ptr;
\
+ buffer.size = buffer.size + BUFFER_UNIT_SIZE;
\
+ }
\
+ cur_batch->length[row_id] = len;
\
+ offset += len;
\
+ }
\
+ }
\
+ size_t data_off = 0;
\
+ for (size_t row_id = 0; row_id < sz; row_id++) {
\
+ if (null_data[row_id] != 0) {
\
+ cur_batch->notNull[row_id] = 0;
\
+ } else {
\
+ cur_batch->data[row_id] = const_cast<char*>(buffer.data) +
begin_off + data_off; \
+ data_off += cur_batch->length[row_id];
\
+ }
\
+ }
\
+ } else if (const auto& not_null_column =
\
+ check_and_get_column<const ColumnVector<FROM>>(col)) {
\
+ for (size_t row_id = 0; row_id < sz; row_id++) {
\
+ int output_scale = _output_vexpr_ctxs[i]->root()->type().scale;
\
+ int len = binary_cast<FROM,
TO>(not_null_column->get_data()[row_id]) \
+ .to_buffer(const_cast<char*>(buffer.data) +
offset, output_scale); \
+ while (buffer.size < offset + len) {
\
+ char* new_ptr = (char*)malloc(buffer.size + BUFFER_UNIT_SIZE);
\
+ memcpy(new_ptr, buffer.data, buffer.size);
\
+ free(const_cast<char*>(buffer.data));
\
+ buffer.data = new_ptr;
\
+ buffer.size = buffer.size + BUFFER_UNIT_SIZE;
\
+ }
\
+ cur_batch->length[row_id] = len;
\
+ offset += len;
\
+ }
\
+ size_t data_off = 0;
\
+ for (size_t row_id = 0; row_id < sz; row_id++) {
\
+ cur_batch->data[row_id] = const_cast<char*>(buffer.data) +
begin_off + data_off; \
+ data_off += cur_batch->length[row_id];
\
}
\
} else {
\
RETURN_WRONG_TYPE
\
@@ -279,6 +400,7 @@ Status VOrcWriterWrapper::write(const Block& block) {
// Buffer used by date type
char* ptr = (char*)malloc(BUFFER_UNIT_SIZE);
StringRef buffer(ptr, BUFFER_UNIT_SIZE);
+ size_t offset = 0;
size_t sz = block.rows();
auto row_batch = _create_row_batch(sz);
@@ -327,7 +449,9 @@ Status VOrcWriterWrapper::write(const Block& block) {
break;
}
case TYPE_LARGEINT: {
- return Status::InvalidArgument("do not support large int
type.");
+ WRITE_LARGEINT_STRING_INTO_BATCH(orc::StringVectorBatch,
ColumnVector<Int128>)
+ SET_NUM_ELEMENTS;
+ break;
}
case TYPE_FLOAT: {
WRITE_SINGLE_ELEMENTS_INTO_BATCH(orc::DoubleVectorBatch,
ColumnVector<Float32>)
@@ -352,68 +476,7 @@ Status VOrcWriterWrapper::write(const Block& block) {
break;
}
case TYPE_DATETIMEV2: {
- orc::StringVectorBatch* cur_batch =
- dynamic_cast<orc::StringVectorBatch*>(root->fields[i]);
- size_t offset = 0;
- if (null_map != nullptr) {
- cur_batch->hasNulls = true;
- auto& null_data = assert_cast<const
ColumnUInt8&>(*null_map).get_data();
- for (size_t row_id = 0; row_id < sz; row_id++) {
- if (null_data[row_id] != 0) {
- cur_batch->notNull[row_id] = 0;
- } else {
- cur_batch->notNull[row_id] = 1;
- int output_scale =
_output_vexpr_ctxs[i]->root()->type().scale;
- int len = binary_cast<UInt64,
DateV2Value<DateTimeV2ValueType>>(
- assert_cast<const
ColumnVector<UInt64>&>(*col)
- .get_data()[row_id])
-
.to_buffer(const_cast<char*>(buffer.data),
- output_scale);
- while (buffer.size < offset + len) {
- char* new_ptr = (char*)malloc(buffer.size +
BUFFER_UNIT_SIZE);
- memcpy(new_ptr, buffer.data, buffer.size);
- free(const_cast<char*>(buffer.data));
- buffer.data = new_ptr;
- buffer.size = buffer.size + BUFFER_UNIT_SIZE;
- }
- cur_batch->length[row_id] = len;
- offset += len;
- }
- }
- offset = 0;
- for (size_t row_id = 0; row_id < sz; row_id++) {
- if (null_data[row_id] != 0) {
- cur_batch->notNull[row_id] = 0;
- } else {
- cur_batch->data[row_id] =
const_cast<char*>(buffer.data) + offset;
- offset += cur_batch->length[row_id];
- }
- }
- } else if (const auto& not_null_column =
- check_and_get_column<const
ColumnVector<UInt64>>(col)) {
- for (size_t row_id = 0; row_id < sz; row_id++) {
- int output_scale =
_output_vexpr_ctxs[i]->root()->type().scale;
- int len = binary_cast<UInt64,
DateV2Value<DateTimeV2ValueType>>(
- not_null_column->get_data()[row_id])
-
.to_buffer(const_cast<char*>(buffer.data), output_scale);
- while (buffer.size < offset + len) {
- char* new_ptr = (char*)malloc(buffer.size +
BUFFER_UNIT_SIZE);
- memcpy(new_ptr, buffer.data, buffer.size);
- free(const_cast<char*>(buffer.data));
- buffer.data = new_ptr;
- buffer.size = buffer.size + BUFFER_UNIT_SIZE;
- }
- cur_batch->length[row_id] = len;
- offset += len;
- }
- offset = 0;
- for (size_t row_id = 0; row_id < sz; row_id++) {
- cur_batch->data[row_id] =
const_cast<char*>(buffer.data) + offset;
- offset += cur_batch->length[row_id];
- }
- } else {
- RETURN_WRONG_TYPE
- }
+ WRITE_DATETIMEV2_STRING_INTO_BATCH(UInt64,
DateV2Value<DateTimeV2ValueType>)
SET_NUM_ELEMENTS
break;
}
diff --git a/be/src/vec/runtime/vparquet_writer.cpp
b/be/src/vec/runtime/vparquet_writer.cpp
index ea25a1bc5c..c22f110519 100644
--- a/be/src/vec/runtime/vparquet_writer.cpp
+++ b/be/src/vec/runtime/vparquet_writer.cpp
@@ -24,14 +24,18 @@
#include <parquet/platform.h>
#include <parquet/schema.h>
#include <parquet/type_fwd.h>
+#include <parquet/types.h>
#include <time.h>
#include <algorithm>
+#include <cstdint>
#include <exception>
#include <ostream>
#include <string>
+#include "common/status.h"
#include "io/fs/file_writer.h"
+#include "olap/olap_common.h"
#include "runtime/decimalv2_value.h"
#include "runtime/define_primitive_type.h"
#include "runtime/types.h"
@@ -58,6 +62,10 @@
namespace doris::vectorized {
+static const std::string epoch_date_str = "1970-01-01";
+static const int64_t timestamp_threshold = -2177481943;
+static const int64_t timestamp_diff = 343;
+
ParquetOutputStream::ParquetOutputStream(doris::io::FileWriter* file_writer)
: _file_writer(file_writer), _cur_pos(0), _written_len(0) {
set_mode(arrow::io::FileMode::WRITE);
@@ -170,6 +178,25 @@ void
ParquetBuildHelper::build_schema_data_type(parquet::Type::type& parquet_dat
}
}
+void ParquetBuildHelper::build_schema_data_logical_type(
+ std::shared_ptr<const parquet::LogicalType>&
parquet_data_logical_type_ptr,
+ const TParquetDataLogicalType::type& column_data_logical_type) {
+ switch (column_data_logical_type) {
+ case TParquetDataLogicalType::DATE: {
+ parquet_data_logical_type_ptr = parquet::LogicalType::Date();
+ break;
+ }
+ case TParquetDataLogicalType::TIMESTAMP: {
+ parquet_data_logical_type_ptr =
+ parquet::LogicalType::Timestamp(true,
parquet::LogicalType::TimeUnit::MILLIS, true);
+ break;
+ }
+ default: {
+ parquet_data_logical_type_ptr = parquet::LogicalType::None();
+ }
+ }
+}
+
void ParquetBuildHelper::build_compression_type(
parquet::WriterProperties::Builder& builder,
const TParquetCompressionType::type& compression_type) {
@@ -234,41 +261,56 @@
VParquetWriterWrapper::VParquetWriterWrapper(doris::io::FileWriter* file_writer,
const bool&
parquet_disable_dictionary,
const TParquetVersion::type&
parquet_version,
bool output_object_data)
- : VFileWriterWrapper(output_vexpr_ctxs, output_object_data),
_rg_writer(nullptr) {
+ : VFileWriterWrapper(output_vexpr_ctxs, output_object_data),
+ _rg_writer(nullptr),
+ _parquet_schemas(parquet_schemas),
+ _compression_type(compression_type),
+ _parquet_disable_dictionary(parquet_disable_dictionary),
+ _parquet_version(parquet_version) {
_outstream = std::shared_ptr<ParquetOutputStream>(new
ParquetOutputStream(file_writer));
- parse_properties(compression_type, parquet_disable_dictionary,
parquet_version);
- parse_schema(parquet_schemas);
}
-void VParquetWriterWrapper::parse_properties(const
TParquetCompressionType::type& compression_type,
- const bool&
parquet_disable_dictionary,
- const TParquetVersion::type&
parquet_version) {
- parquet::WriterProperties::Builder builder;
- ParquetBuildHelper::build_compression_type(builder, compression_type);
- ParquetBuildHelper::build_version(builder, parquet_version);
- if (parquet_disable_dictionary) {
- builder.disable_dictionary();
- } else {
- builder.enable_dictionary();
- }
- _properties = builder.build();
+Status VParquetWriterWrapper::parse_properties() {
+ try {
+ parquet::WriterProperties::Builder builder;
+ ParquetBuildHelper::build_compression_type(builder, _compression_type);
+ ParquetBuildHelper::build_version(builder, _parquet_version);
+ if (_parquet_disable_dictionary) {
+ builder.disable_dictionary();
+ } else {
+ builder.enable_dictionary();
+ }
+ _properties = builder.build();
+ } catch (const parquet::ParquetException& e) {
+ return Status::InternalError("parquet writer parse properties error:
{}", e.what());
+ }
+ return Status::OK();
}
-void VParquetWriterWrapper::parse_schema(const std::vector<TParquetSchema>&
parquet_schemas) {
+Status VParquetWriterWrapper::parse_schema() {
parquet::schema::NodeVector fields;
parquet::Repetition::type parquet_repetition_type;
parquet::Type::type parquet_data_type;
- for (int idx = 0; idx < parquet_schemas.size(); ++idx) {
+ std::shared_ptr<const parquet::LogicalType> parquet_data_logical_type;
+ for (int idx = 0; idx < _parquet_schemas.size(); ++idx) {
ParquetBuildHelper::build_schema_repetition_type(
- parquet_repetition_type,
parquet_schemas[idx].schema_repetition_type);
+ parquet_repetition_type,
_parquet_schemas[idx].schema_repetition_type);
ParquetBuildHelper::build_schema_data_type(parquet_data_type,
-
parquet_schemas[idx].schema_data_type);
- fields.push_back(parquet::schema::PrimitiveNode::Make(
- parquet_schemas[idx].schema_column_name,
parquet_repetition_type,
- parquet::LogicalType::None(), parquet_data_type));
+
_parquet_schemas[idx].schema_data_type);
+ ParquetBuildHelper::build_schema_data_logical_type(
+ parquet_data_logical_type,
_parquet_schemas[idx].schema_data_logical_type);
+ try {
+ fields.push_back(parquet::schema::PrimitiveNode::Make(
+ _parquet_schemas[idx].schema_column_name,
parquet_repetition_type,
+ parquet_data_logical_type, parquet_data_type));
+ } catch (const parquet::ParquetException& e) {
+ LOG(WARNING) << "parquet writer parse schema error: " << e.what();
+ return Status::InternalError("parquet writer parse schema error:
{}", e.what());
+ }
_schema = std::static_pointer_cast<parquet::schema::GroupNode>(
parquet::schema::GroupNode::Make("schema",
parquet::Repetition::REQUIRED, fields));
}
+ return Status::OK();
}
#define RETURN_WRONG_TYPE \
@@ -393,7 +435,40 @@ Status VParquetWriterWrapper::write(const Block& block) {
break;
}
case TYPE_LARGEINT: {
- return Status::InvalidArgument("do not support large int
type.");
+ parquet::RowGroupWriter* rgWriter = get_rg_writer();
+ parquet::ByteArrayWriter* col_writer =
+
static_cast<parquet::ByteArrayWriter*>(rgWriter->column(i));
+ parquet::ByteArray value;
+ if (null_map != nullptr) {
+ auto& null_data = assert_cast<const
ColumnUInt8&>(*null_map).get_data();
+ for (size_t row_id = 0; row_id < sz; row_id++) {
+ if (null_data[row_id] != 0) {
+ single_def_level = 0;
+ col_writer->WriteBatch(1, &single_def_level,
nullptr, &value);
+ single_def_level = 1;
+ } else {
+ const int128_t tmp = assert_cast<const
ColumnVector<Int128>&>(*col)
+ .get_data()[row_id];
+ std::string value_str = fmt::format("{}", tmp);
+ value.ptr = reinterpret_cast<const
uint8_t*>(value_str.data());
+ value.len = value_str.length();
+ col_writer->WriteBatch(1, &single_def_level,
nullptr, &value);
+ }
+ }
+ } else if (const auto* not_nullable_column =
+ check_and_get_column<const
ColumnVector<Int128>>(col)) {
+ for (size_t row_id = 0; row_id < sz; row_id++) {
+ const int128_t tmp =
not_nullable_column->get_data()[row_id];
+ std::string value_str = fmt::format("{}", tmp);
+ value.ptr = reinterpret_cast<const
uint8_t*>(value_str.data());
+ value.len = value_str.length();
+ col_writer->WriteBatch(1, nullable ? &single_def_level
: nullptr, nullptr,
+ &value);
+ }
+ } else {
+ RETURN_WRONG_TYPE
+ }
+ break;
}
case TYPE_FLOAT: {
DISPATCH_PARQUET_NUMERIC_WRITER(FloatWriter,
ColumnVector<Float32>, float_t)
@@ -458,8 +533,7 @@ Status VParquetWriterWrapper::write(const Block& block) {
DISPATCH_PARQUET_NUMERIC_WRITER(Int32Writer,
ColumnVector<Int32>, Int32)
break;
}
- case TYPE_DATETIME:
- case TYPE_DATE: {
+ case TYPE_DATETIME: {
parquet::RowGroupWriter* rgWriter = get_rg_writer();
parquet::Int64Writer* col_writer =
static_cast<parquet::Int64Writer*>(rgWriter->column(i));
@@ -469,26 +543,101 @@ Status VParquetWriterWrapper::write(const Block& block) {
for (size_t row_id = 0; row_id < sz; row_id++) {
def_level[row_id] = null_data[row_id] == 0;
}
- uint64_t tmp_data[sz];
+ int64_t tmp_data[sz];
for (size_t row_id = 0; row_id < sz; row_id++) {
if (null_data[row_id] != 0) {
tmp_data[row_id] = default_int64;
} else {
- tmp_data[row_id] = binary_cast<Int64,
VecDateTimeValue>(
- assert_cast<const
ColumnVector<Int64>&>(*col)
-
.get_data()[row_id])
- .to_olap_datetime();
+ VecDateTimeValue datetime_value =
binary_cast<Int64, VecDateTimeValue>(
+ assert_cast<const
ColumnVector<Int64>&>(*col)
+ .get_data()[row_id]);
+ if
(!datetime_value.unix_timestamp(&tmp_data[row_id],
+
TimezoneUtils::default_time_zone)) {
+ return Status::InternalError("get unix
timestamp error.");
+ }
+ // -2177481943 represent '1900-12-31 23:54:17'
+ // but -2177481944 represent '1900-12-31 23:59:59'
+ // so for timestamp <= -2177481944, we subtract
343 (5min 43s)
+ if (tmp_data[row_id] < timestamp_threshold) {
+ tmp_data[row_id] -= timestamp_diff;
+ }
+ // convert seconds to MILLIS seconds
+ tmp_data[row_id] *= 1000;
}
}
col_writer->WriteBatch(sz, def_level.data(), nullptr,
reinterpret_cast<const
int64_t*>(tmp_data));
} else if (const auto* not_nullable_column =
check_and_get_column<const
ColumnVector<Int64>>(col)) {
- std::vector<uint64_t> res(sz);
+ std::vector<int64_t> res(sz);
+ for (size_t row_id = 0; row_id < sz; row_id++) {
+ VecDateTimeValue datetime_value = binary_cast<Int64,
VecDateTimeValue>(
+ not_nullable_column->get_data()[row_id]);
+
+ if (!datetime_value.unix_timestamp(&res[row_id],
+
TimezoneUtils::default_time_zone)) {
+ return Status::InternalError("get unix timestamp
error.");
+ };
+ // -2177481943 represent '1900-12-31 23:54:17'
+ // but -2177481944 represent '1900-12-31 23:59:59'
+ // so for timestamp <= -2177481944, we subtract 343
(5min 43s)
+ if (res[row_id] < timestamp_threshold) {
+ res[row_id] -= timestamp_diff;
+ }
+ // convert seconds to MILLIS seconds
+ res[row_id] *= 1000;
+ }
+ col_writer->WriteBatch(sz, nullable ? def_level.data() :
nullptr, nullptr,
+ reinterpret_cast<const
int64_t*>(res.data()));
+ } else {
+ RETURN_WRONG_TYPE
+ }
+ break;
+ }
+ case TYPE_DATE: {
+ parquet::RowGroupWriter* rgWriter = get_rg_writer();
+ parquet::Int64Writer* col_writer =
+
static_cast<parquet::Int64Writer*>(rgWriter->column(i));
+ uint64_t default_int64 = 0;
+ if (null_map != nullptr) {
+ auto& null_data = assert_cast<const
ColumnUInt8&>(*null_map).get_data();
for (size_t row_id = 0; row_id < sz; row_id++) {
- res[row_id] = binary_cast<Int64, VecDateTimeValue>(
-
not_nullable_column->get_data()[row_id])
- .to_olap_datetime();
+ def_level[row_id] = null_data[row_id] == 0;
+ }
+ VecDateTimeValue epoch_date;
+ if (!epoch_date.from_date_str(epoch_date_str.c_str(),
+ epoch_date_str.length())) {
+ return Status::InternalError("create epoch date from
string error");
+ }
+ int32_t days_from_epoch = epoch_date.daynr();
+ int32_t tmp_data[sz];
+ for (size_t row_id = 0; row_id < sz; row_id++) {
+ if (null_data[row_id] != 0) {
+ tmp_data[row_id] = default_int64;
+ } else {
+ int32_t days = binary_cast<Int64,
VecDateTimeValue>(
+ assert_cast<const
ColumnVector<Int64>&>(*col)
+ .get_data()[row_id])
+ .daynr();
+ tmp_data[row_id] = days - days_from_epoch;
+ }
+ }
+ col_writer->WriteBatch(sz, def_level.data(), nullptr,
+ reinterpret_cast<const
int64_t*>(tmp_data));
+ } else if (check_and_get_column<const
ColumnVector<Int64>>(col)) {
+ VecDateTimeValue epoch_date;
+ if (!epoch_date.from_date_str(epoch_date_str.c_str(),
+ epoch_date_str.length())) {
+ return Status::InternalError("create epoch date from
string error");
+ }
+ int32_t days_from_epoch = epoch_date.daynr();
+ std::vector<int32_t> res(sz);
+ for (size_t row_id = 0; row_id < sz; row_id++) {
+ int32_t days = binary_cast<Int64, VecDateTimeValue>(
+ assert_cast<const
ColumnVector<Int64>&>(*col)
+ .get_data()[row_id])
+ .daynr();
+ res[row_id] = days - days_from_epoch;
}
col_writer->WriteBatch(sz, nullable ? def_level.data() :
nullptr, nullptr,
reinterpret_cast<const
int64_t*>(res.data()));
@@ -669,7 +818,14 @@ Status VParquetWriterWrapper::write(const Block& block) {
}
Status VParquetWriterWrapper::prepare() {
- _writer = parquet::ParquetFileWriter::Open(_outstream, _schema,
_properties);
+ RETURN_IF_ERROR(parse_properties());
+ RETURN_IF_ERROR(parse_schema());
+ try {
+ _writer = parquet::ParquetFileWriter::Open(_outstream, _schema,
_properties);
+ } catch (const parquet::ParquetStatusException& e) {
+ LOG(WARNING) << "parquet file writer open error: " << e.what();
+ return Status::InternalError("parquet file writer open error: {}",
e.what());
+ }
if (_writer == nullptr) {
return Status::InternalError("Failed to create file writer");
}
@@ -698,7 +854,9 @@ void VParquetWriterWrapper::close() {
_rg_writer->Close();
_rg_writer = nullptr;
}
- _writer->Close();
+ if (_writer != nullptr) {
+ _writer->Close();
+ }
arrow::Status st = _outstream->Close();
if (!st.ok()) {
LOG(WARNING) << "close parquet file error: " << st.ToString();
diff --git a/be/src/vec/runtime/vparquet_writer.h
b/be/src/vec/runtime/vparquet_writer.h
index 419b4948ef..7d28f35cfb 100644
--- a/be/src/vec/runtime/vparquet_writer.h
+++ b/be/src/vec/runtime/vparquet_writer.h
@@ -86,6 +86,9 @@ public:
static void build_version(parquet::WriterProperties::Builder& builder,
const TParquetVersion::type& parquet_version);
+ static void build_schema_data_logical_type(
+ std::shared_ptr<const parquet::LogicalType>&
parquet_data_logical_type_ptr,
+ const TParquetDataLogicalType::type& column_data_logical_type);
};
class VFileWriterWrapper {
@@ -134,11 +137,9 @@ public:
private:
parquet::RowGroupWriter* get_rg_writer();
- void parse_schema(const std::vector<TParquetSchema>& parquet_schemas);
+ Status parse_schema();
- void parse_properties(const TParquetCompressionType::type&
compression_type,
- const bool& parquet_disable_dictionary,
- const TParquetVersion::type& parquet_version);
+ Status parse_properties();
private:
std::shared_ptr<ParquetOutputStream> _outstream;
@@ -147,6 +148,11 @@ private:
std::unique_ptr<parquet::ParquetFileWriter> _writer;
parquet::RowGroupWriter* _rg_writer;
const int64_t _max_row_per_group = 10;
+
+ const std::vector<TParquetSchema>& _parquet_schemas;
+ const TParquetCompressionType::type& _compression_type;
+ const bool& _parquet_disable_dictionary;
+ const TParquetVersion::type& _parquet_version;
};
} // namespace doris::vectorized
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
index 6a17626931..c3a98b7b7f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
@@ -35,6 +35,7 @@ import
org.apache.doris.datasource.property.constants.S3Properties;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.thrift.TFileFormatType;
import org.apache.doris.thrift.TParquetCompressionType;
+import org.apache.doris.thrift.TParquetDataLogicalType;
import org.apache.doris.thrift.TParquetDataType;
import org.apache.doris.thrift.TParquetRepetitionType;
import org.apache.doris.thrift.TParquetSchema;
@@ -65,6 +66,7 @@ public class OutFileClause {
public static final List<Type> RESULT_COL_TYPES = Lists.newArrayList();
public static final Map<String, TParquetRepetitionType>
PARQUET_REPETITION_TYPE_MAP = Maps.newHashMap();
public static final Map<String, TParquetDataType> PARQUET_DATA_TYPE_MAP =
Maps.newHashMap();
+ public static final Map<String, TParquetDataLogicalType>
PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP = Maps.newHashMap();
public static final Map<String, TParquetCompressionType>
PARQUET_COMPRESSION_TYPE_MAP = Maps.newHashMap();
public static final Map<String, TParquetVersion> PARQUET_VERSION_MAP =
Maps.newHashMap();
public static final Set<String> ORC_DATA_TYPE = Sets.newHashSet();
@@ -97,6 +99,11 @@ public class OutFileClause {
PARQUET_DATA_TYPE_MAP.put("double", TParquetDataType.DOUBLE);
PARQUET_DATA_TYPE_MAP.put("fixed_len_byte_array",
TParquetDataType.FIXED_LEN_BYTE_ARRAY);
+ PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.put("date",
TParquetDataLogicalType.DATE);
+ PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.put("datetime",
TParquetDataLogicalType.TIMESTAMP);
+ // TODO(ftw): add other logical type
+ PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.put("none",
TParquetDataLogicalType.NONE);
+
PARQUET_COMPRESSION_TYPE_MAP.put("snappy",
TParquetCompressionType.SNAPPY);
PARQUET_COMPRESSION_TYPE_MAP.put("gzip", TParquetCompressionType.GZIP);
PARQUET_COMPRESSION_TYPE_MAP.put("brotli",
TParquetCompressionType.BROTLI);
@@ -283,6 +290,7 @@ public class OutFileClause {
}
type = "string";
break;
+ case LARGEINT:
case DATE:
case DATETIME:
case DATETIMEV2:
@@ -354,6 +362,7 @@ public class OutFileClause {
+ " but the type of column " + i + " is " +
schema.second);
}
break;
+ case LARGEINT:
case DATE:
case DATETIME:
case DATETIMEV2:
@@ -420,13 +429,13 @@ public class OutFileClause {
case TINYINT:
case SMALLINT:
case INT:
+ case DATE:
if (!PARQUET_DATA_TYPE_MAP.get("int32").equals(type)) {
throw new AnalysisException("project field type is
TINYINT/SMALLINT/INT,"
+ "should use int32, " + "but the definition
type of column " + i + " is " + type);
}
break;
case BIGINT:
- case DATE:
case DATETIME:
if (!PARQUET_DATA_TYPE_MAP.get("int64").equals(type)) {
throw new AnalysisException("project field type is
BIGINT/DATE/DATETIME,"
@@ -454,9 +463,10 @@ public class OutFileClause {
case DECIMALV2:
case DATETIMEV2:
case DATEV2:
+ case LARGEINT:
if (!PARQUET_DATA_TYPE_MAP.get("byte_array").equals(type))
{
throw new AnalysisException("project field type is
CHAR/VARCHAR/STRING/DECIMAL/DATEV2"
- + "/DATETIMEV2, should use byte_array, but the
definition type of column "
+ + "/DATETIMEV2/LARGEINT, should use
byte_array, but the definition type of column "
+ i + " is " + type);
}
break;
@@ -497,10 +507,10 @@ public class OutFileClause {
case TINYINT:
case SMALLINT:
case INT:
+ case DATE:
parquetSchema.schema_data_type =
PARQUET_DATA_TYPE_MAP.get("int32");
break;
case BIGINT:
- case DATE:
case DATETIME:
parquetSchema.schema_data_type =
PARQUET_DATA_TYPE_MAP.get("int64");
break;
@@ -519,6 +529,7 @@ public class OutFileClause {
case DECIMAL128:
case DATETIMEV2:
case DATEV2:
+ case LARGEINT:
parquetSchema.schema_data_type =
PARQUET_DATA_TYPE_MAP.get("byte_array");
break;
case HLL:
@@ -532,6 +543,18 @@ public class OutFileClause {
throw new AnalysisException("currently parquet do not
support column type: "
+ expr.getType().getPrimitiveType());
}
+
+ switch (expr.getType().getPrimitiveType()) {
+ case DATE:
+ parquetSchema.schema_data_logical_type =
PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.get("date");
+ break;
+ case DATETIME:
+ parquetSchema.schema_data_logical_type =
PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.get("datetime");
+ break;
+ default:
+ parquetSchema.schema_data_logical_type =
PARQUET_DATA_LOGICAL_TYPE_TYPE_MAP.get("none");
+ }
+
parquetSchema.schema_column_name = colLabels.get(i);
parquetSchemas.add(parquetSchema);
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java
b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java
index b8e7bae590..692eea7657 100755
--- a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java
@@ -727,7 +727,7 @@ public class SelectStmtTest {
SelectStmt stmt = (SelectStmt)
UtFrameUtils.parseAndAnalyzeStmt(sql, ctx); // CHECKSTYLE IGNORE THIS LINE
} catch (Exception e) {
e.printStackTrace();
- Assert.assertTrue(e.getMessage().contains("Parquet format does not
support column type: LARGEINT"));
+ Assert.assertTrue(e.getMessage().contains("should use
byte_array"));
}
// do not support large int type, contains function
@@ -740,7 +740,7 @@ public class SelectStmtTest {
+ "\"schema\"=\"required,int32,siteid;\");";
SelectStmt stmt = (SelectStmt)
UtFrameUtils.parseAndAnalyzeStmt(sql, ctx); // CHECKSTYLE IGNORE THIS LINE
} catch (Exception e) {
- Assert.assertTrue(e.getMessage().contains("Parquet format does not
support column type: LARGEINT"));
+ Assert.assertTrue(e.getMessage().contains("should use
byte_array"));
}
// support cast
diff --git a/gensrc/thrift/DataSinks.thrift b/gensrc/thrift/DataSinks.thrift
index 9659b5227c..f2a191a777 100644
--- a/gensrc/thrift/DataSinks.thrift
+++ b/gensrc/thrift/DataSinks.thrift
@@ -70,6 +70,25 @@ enum TParquetDataType {
FIXED_LEN_BYTE_ARRAY,
}
+enum TParquetDataLogicalType {
+ UNDEFINED = 0, // Not a real logical type
+ STRING = 1,
+ MAP,
+ LIST,
+ ENUM,
+ DECIMAL,
+ DATE,
+ TIME,
+ TIMESTAMP,
+ INTERVAL,
+ INT,
+ NIL, // Thrift NullType: annotates data that is always null
+ JSON,
+ BSON,
+ UUID,
+ NONE // Not a real logical type; should always be last element
+ }
+
enum TParquetRepetitionType {
REQUIRED,
REPEATED,
@@ -80,6 +99,7 @@ struct TParquetSchema {
1: optional TParquetRepetitionType schema_repetition_type
2: optional TParquetDataType schema_data_type
3: optional string schema_column_name
+ 4: optional TParquetDataLogicalType schema_data_logical_type
}
struct TResultFileSinkOptions {
diff --git a/regression-test/data/export_p0/test_export_data_types.out
b/regression-test/data/export_p0/test_export_data_types.out
index e66d811fbc..37c038818a 100644
--- a/regression-test/data/export_p0/test_export_data_types.out
+++ b/regression-test/data/export_p0/test_export_data_types.out
@@ -1,36 +1,37 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
-- !select_export1 --
1 2023-04-20 2023-04-20 2023-04-20T00:00
2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing
Haidian 1 1 true 1 1 1 1.1 1.1 char1
1 1 1 0.1 1.00000000 1.0000000000 1
1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000
-2 9999-12-31 9999-12-31 9999-12-31T23:59:59
9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400
Haidian -32768 -128 true -2147483648 -9223372036854775808
2 1.4E-45 4.9E-324 char2 100000000 100000000 4
0.1 0.99999999 9999999999.9999999999
99999999999999999999999999999999999999 9.9999999999999999999999999999999999999
0.99999999999999999999999999999999999999
-3 2023-04-21 2023-04-21 2023-04-20T12:34:56
2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456
Beijing 32767 127 true 2147483647 9223372036854775807
3 3.4028235e+38 1.7976931348623157E308 char3 999999999
999999999 9 0.9 9.99999999 1234567890.0123456789
12345678901234567890123456789012345678 1.2345678901234567890123456789012345678
0.12345678901234567890123456789012345678
+2 9999-12-31 9999-12-31 9999-12-31T23:59:59
9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400
Haidian -32768 -128 true -2147483648 -9223372036854775808
-170141183460469231731687303715884105728 1.4E-45 4.9E-324 char2
100000000 100000000 4 0.1 0.99999999
9999999999.9999999999 99999999999999999999999999999999999999
9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999
+3 2023-04-21 2023-04-21 2023-04-20T12:34:56
2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456
Beijing 32767 127 true 2147483647 9223372036854775807
170141183460469231731687303715884105727 3.4028235e+38 1.7976931348623157E308
char3 999999999 999999999 9 0.9 9.99999999
1234567890.0123456789 12345678901234567890123456789012345678
1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678
4 0000-01-01 0000-01-01 2023-04-20T00:00
2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing
Haidian 4 4 true 4 4 4 4.4 4.4 char4
4 4 4 0.4 4.00000000 4.0000000000 4
4.0000000000000000000000000000000000000 0.40000000000000000000000000000000000000
-- !select_load1 --
1 2023-04-20 2023-04-20 2023-04-20T00:00
2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing
Haidian 1 1 true 1 1 1 1.1 1.1 char1
1 1 1 0.1 1.00000000 1.0000000000 1
1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000
-2 9999-12-31 9999-12-31 9999-12-31T23:59:59
9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400
Haidian -32768 -128 true -2147483648 -9223372036854775808
2 1.4E-45 4.9E-324 char2 100000000 100000000 4
0.1 0.99999999 9999999999.9999999999
99999999999999999999999999999999999999 9.9999999999999999999999999999999999999
0.99999999999999999999999999999999999999
-3 2023-04-21 2023-04-21 2023-04-20T12:34:56
2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456
Beijing 32767 127 true 2147483647 9223372036854775807
3 3.4028235e+38 1.7976931348623157E308 char3 999999999
999999999 9 0.9 9.99999999 1234567890.0123456789
12345678901234567890123456789012345678 1.2345678901234567890123456789012345678
0.12345678901234567890123456789012345678
+2 9999-12-31 9999-12-31 9999-12-31T23:59:59
9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400
Haidian -32768 -128 true -2147483648 -9223372036854775808
-170141183460469231731687303715884105728 1.4E-45 4.9E-324 char2
100000000 100000000 4 0.1 0.99999999
9999999999.9999999999 99999999999999999999999999999999999999
9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999
+3 2023-04-21 2023-04-21 2023-04-20T12:34:56
2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456
Beijing 32767 127 true 2147483647 9223372036854775807
170141183460469231731687303715884105727 3.4028235e+38 1.7976931348623157E308
char3 999999999 999999999 9 0.9 9.99999999
1234567890.0123456789 12345678901234567890123456789012345678
1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678
4 0000-01-01 0000-01-01 2023-04-20T00:00
2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing
Haidian 4 4 true 4 4 4 4.4 4.4 char4
4 4 4 0.4 4.00000000 4.0000000000 4
4.0000000000000000000000000000000000000 0.40000000000000000000000000000000000000
-- !select_load2 --
-1 2023-04-20 2023-04-20 2023-04-20T00:00
2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing
Haidian 1 1 true 1 1 1.1 1.1 char1 1
1 1 0.1 1.00000000 1.0000000000 1
1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000
-2 9999-12-31 9999-12-31 9999-12-31T23:59:59
9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400
Haidian -32768 -128 true -2147483648 -9223372036854775808
1.4E-45 4.9E-324 char2 100000000 100000000 4 0.1
0.99999999 9999999999.9999999999 99999999999999999999999999999999999999
9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999
-3 2023-04-21 2023-04-21 2023-04-20T12:34:56
2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456
Beijing 32767 127 true 2147483647 9223372036854775807
3.4028235e+38 1.7976931348623157E308 char3 999999999 999999999
9 0.9 9.99999999 1234567890.0123456789
12345678901234567890123456789012345678 1.2345678901234567890123456789012345678
0.12345678901234567890123456789012345678
+1 2023-04-20 2023-04-20 2023-04-20T00:00
2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing
Haidian 1 1 true 1 1 1 1.1 1.1 char1
1 1 1 0.1 1.00000000 1.0000000000 1
1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000
+2 9999-12-31 9999-12-31 9999-12-31T23:59:59
9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400
Haidian -32768 -128 true -2147483648 -9223372036854775808
-170141183460469231731687303715884105728 1.4E-45 4.9E-324 char2
100000000 100000000 4 0.1 0.99999999
9999999999.9999999999 99999999999999999999999999999999999999
9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999
+3 2023-04-21 2023-04-21 2023-04-20T12:34:56
2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456
Beijing 32767 127 true 2147483647 9223372036854775807
170141183460469231731687303715884105727 3.4028235e+38 1.7976931348623157E308
char3 999999999 999999999 9 0.9 9.99999999
1234567890.0123456789 12345678901234567890123456789012345678
1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678
+4 0000-01-02 0000-01-01 2023-04-20T00:00
2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing
Haidian 4 4 true 4 4 4 4.4 4.4 char4
4 4 4 0.4 4.00000000 4.0000000000 4
4.0000000000000000000000000000000000000 0.40000000000000000000000000000000000000
-- !select_load3 --
-1 2023-04-20 Beijing Haidian 1 1 true 1 1
1.1 1.1 char1 1
-2 9999-12-31 Haidian -32768 -128 true -2147483648
-9223372036854775808 1.4E-45 4.9E-324 char2 100000000
-3 2023-04-21 Beijing 32767 127 true 2147483647
9223372036854775807 3.4028235e+38 1.7976931348623157E308 char3
999999999
-4 0000-01-01 Beijing Haidian 4 4 true 4 4
4.4 4.4 char4 4
+1 2023-04-20 2023-04-20 2023-04-20T00:00
2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing
Haidian 1 1 true 1 1 1 1.1 1.1 char1
1 1 1 0.1 1.00000000 1.0000000000 1
1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000
+2 9999-12-31 9999-12-31 9999-12-31T23:59:59
9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400
Haidian -32768 -128 true -2147483648 -9223372036854775808
-170141183460469231731687303715884105728 1.4E-45 4.9E-324 char2
100000000 100000000 4 0.1 0.99999999
9999999999.9999999999 99999999999999999999999999999999999999
9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999
+3 2023-04-21 2023-04-21 2023-04-20T12:34:56
2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456
Beijing 32767 127 true 2147483647 9223372036854775807
170141183460469231731687303715884105727 3.4028235e+38 1.7976931348623157E308
char3 999999999 999999999 9 0.9 9.99999999
1234567890.0123456789 12345678901234567890123456789012345678
1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678
+4 0000-01-01 0000-01-01 2023-04-20T00:00
2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing
Haidian 4 4 true 4 4 4 4.4 4.4 char4
4 4 4 0.4 4.00000000 4.0000000000 4
4.0000000000000000000000000000000000000 0.40000000000000000000000000000000000000
-- !select_load4 --
1 2023-04-20 2023-04-20 2023-04-20T00:00
2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing
Haidian 1 1 true 1 1 1 1.1 1.1 char1
1 1 1 0.1 1.00000000 1.0000000000 1
1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000
-2 9999-12-31 9999-12-31 9999-12-31T23:59:59
9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400
Haidian -32768 -128 true -2147483648 -9223372036854775808
2 1.4E-45 4.9E-324 char2 100000000 100000000 4
0.1 0.99999999 9999999999.9999999999
99999999999999999999999999999999999999 9.9999999999999999999999999999999999999
0.99999999999999999999999999999999999999
-3 2023-04-21 2023-04-21 2023-04-20T12:34:56
2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456
Beijing 32767 127 true 2147483647 9223372036854775807
3 3.4028235e+38 1.7976931348623157E308 char3 999999999
999999999 9 0.9 9.99999999 1234567890.0123456789
12345678901234567890123456789012345678 1.2345678901234567890123456789012345678
0.12345678901234567890123456789012345678
+2 9999-12-31 9999-12-31 9999-12-31T23:59:59
9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400
Haidian -32768 -128 true -2147483648 -9223372036854775808
-170141183460469231731687303715884105728 1.4E-45 4.9E-324 char2
100000000 100000000 4 0.1 0.99999999
9999999999.9999999999 99999999999999999999999999999999999999
9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999
+3 2023-04-21 2023-04-21 2023-04-20T12:34:56
2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456
Beijing 32767 127 true 2147483647 9223372036854775807
170141183460469231731687303715884105727 3.4028235e+38 1.7976931348623157E308
char3 999999999 999999999 9 0.9 9.99999999
1234567890.0123456789 12345678901234567890123456789012345678
1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678
4 0000-01-01 0000-01-01 2023-04-20T00:00
2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing
Haidian 4 4 true 4 4 4 4.4 4.4 char4
4 4 4 0.4 4.00000000 4.0000000000 4
4.0000000000000000000000000000000000000 0.40000000000000000000000000000000000000
-- !select_load5 --
1 2023-04-20 2023-04-20 2023-04-20T00:00
2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing
Haidian 1 1 true 1 1 1 1.1 1.1 char1
1 1 1 0.1 1.00000000 1.0000000000 1
1.0000000000000000000000000000000000000 0.10000000000000000000000000000000000000
-2 9999-12-31 9999-12-31 9999-12-31T23:59:59
9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400
Haidian -32768 -128 true -2147483648 -9223372036854775808
2 1.4E-45 4.9E-324 char2 100000000 100000000 4
0.1 0.99999999 9999999999.9999999999
99999999999999999999999999999999999999 9.9999999999999999999999999999999999999
0.99999999999999999999999999999999999999
-3 2023-04-21 2023-04-21 2023-04-20T12:34:56
2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456
Beijing 32767 127 true 2147483647 9223372036854775807
3 3.4028235e+38 1.7976931348623157E308 char3 999999999
999999999 9 0.9 9.99999999 1234567890.0123456789
12345678901234567890123456789012345678 1.2345678901234567890123456789012345678
0.12345678901234567890123456789012345678
+2 9999-12-31 9999-12-31 9999-12-31T23:59:59
9999-12-31T23:59:59 2023-04-20T00:00:00.120 2023-04-20T00:00:00.334400
Haidian -32768 -128 true -2147483648 -9223372036854775808
-170141183460469231731687303715884105728 1.4E-45 4.9E-324 char2
100000000 100000000 4 0.1 0.99999999
9999999999.9999999999 99999999999999999999999999999999999999
9.9999999999999999999999999999999999999 0.99999999999999999999999999999999999999
+3 2023-04-21 2023-04-21 2023-04-20T12:34:56
2023-04-20T00:00 2023-04-20T00:00:00.123 2023-04-20T00:00:00.123456
Beijing 32767 127 true 2147483647 9223372036854775807
170141183460469231731687303715884105727 3.4028235e+38 1.7976931348623157E308
char3 999999999 999999999 9 0.9 9.99999999
1234567890.0123456789 12345678901234567890123456789012345678
1.2345678901234567890123456789012345678 0.12345678901234567890123456789012345678
4 0000-01-01 0000-01-01 2023-04-20T00:00
2023-04-20T00:00 2023-04-20T00:00 2023-04-20T00:00 Beijing
Haidian 4 4 true 4 4 4 4.4 4.4 char4
4 4 4 0.4 4.00000000 4.0000000000 4
4.0000000000000000000000000000000000000 0.40000000000000000000000000000000000000
diff --git a/regression-test/suites/export_p0/test_export_basic.groovy
b/regression-test/suites/export_p0/test_export_basic.groovy
index b55bea8a95..07c3168c5d 100644
--- a/regression-test/suites/export_p0/test_export_basic.groovy
+++ b/regression-test/suites/export_p0/test_export_basic.groovy
@@ -84,6 +84,8 @@ suite("test_export_basic", "p0") {
sql """ INSERT INTO ${table_export_name} VALUES
${sb.toString()}
"""
+ def insert_res = sql "show last insert;"
+ logger.info("insert result: " + insert_res.toString())
qt_select_export """ SELECT * FROM ${table_export_name} t ORDER BY id; """
diff --git a/regression-test/suites/export_p0/test_export_csv.groovy
b/regression-test/suites/export_p0/test_export_csv.groovy
index 7286d1efe6..a8f75f21a4 100644
--- a/regression-test/suites/export_p0/test_export_csv.groovy
+++ b/regression-test/suites/export_p0/test_export_csv.groovy
@@ -88,6 +88,8 @@ suite("test_export_csv", "p0") {
sql """ INSERT INTO ${table_export_name} VALUES
${sb.toString()}
"""
+ def insert_res = sql "show last insert;"
+ logger.info("insert result: " + insert_res.toString())
qt_select_export1 """ SELECT * FROM ${table_export_name} t ORDER BY
user_id; """
diff --git a/regression-test/suites/export_p0/test_export_data_types.groovy
b/regression-test/suites/export_p0/test_export_data_types.groovy
index 21cb93e733..f23d16c3ec 100644
--- a/regression-test/suites/export_p0/test_export_data_types.groovy
+++ b/regression-test/suites/export_p0/test_export_data_types.groovy
@@ -54,40 +54,43 @@ suite("test_export_data_types", "p0") {
def table_load_name = "test_load_data_types"
def outfile_path_prefix = """/tmp/test_export"""
- // create table
- sql """ DROP TABLE IF EXISTS ${table_export_name} """
- sql """
- CREATE TABLE IF NOT EXISTS ${table_export_name} (
- `user_id` INT NOT NULL COMMENT "用户id",
- `date` DATE NOT NULL COMMENT "数据灌入日期时间",
- `datev2` DATEV2 NOT NULL COMMENT "数据灌入日期时间2",
- `datetime` DATETIME NOT NULL COMMENT "数据灌入日期时间",
- `datetimev2_1` DATETIMEV2 NOT NULL COMMENT "数据灌入日期时间",
- `datetimev2_2` DATETIMEV2(3) NOT NULL COMMENT "数据灌入日期时间",
- `datetimev2_3` DATETIMEV2(6) NOT NULL COMMENT "数据灌入日期时间",
- `city` VARCHAR(20) COMMENT "用户所在城市",
- `street` STRING COMMENT "用户所在街道",
- `age` SMALLINT COMMENT "用户年龄",
- `sex` TINYINT COMMENT "用户性别",
- `bool_col` boolean COMMENT "",
- `int_col` int COMMENT "",
- `bigint_col` bigint COMMENT "",
- `largeint_col` largeint COMMENT "",
- `float_col` float COMMENT "",
- `double_col` double COMMENT "",
- `char_col` CHAR(10) COMMENT "",
- `decimal_col` decimal COMMENT "",
- `decimalv3_col` decimalv3 COMMENT "",
- `decimalv3_col2` decimalv3(1,0) COMMENT "",
- `decimalv3_col3` decimalv3(1,1) COMMENT "",
- `decimalv3_col4` decimalv3(9,8) COMMENT "",
- `decimalv3_col5` decimalv3(20,10) COMMENT "",
- `decimalv3_col6` decimalv3(38,0) COMMENT "",
- `decimalv3_col7` decimalv3(38,37) COMMENT "",
- `decimalv3_col8` decimalv3(38,38) COMMENT ""
- )
- DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
- """
+ def create_table = {table_name ->
+ sql """ DROP TABLE IF EXISTS ${table_name} """
+ sql """
+ CREATE TABLE IF NOT EXISTS ${table_name} (
+ `user_id` INT NOT NULL COMMENT "用户id",
+ `date` DATE NOT NULL COMMENT "数据灌入日期时间",
+ `datev2` DATEV2 NOT NULL COMMENT "数据灌入日期时间2",
+ `datetime` DATETIME NOT NULL COMMENT "数据灌入日期时间",
+ `datetimev2_1` DATETIMEV2 NOT NULL COMMENT "数据灌入日期时间",
+ `datetimev2_2` DATETIMEV2(3) NOT NULL COMMENT "数据灌入日期时间",
+ `datetimev2_3` DATETIMEV2(6) NOT NULL COMMENT "数据灌入日期时间",
+ `city` VARCHAR(20) COMMENT "用户所在城市",
+ `street` STRING COMMENT "用户所在街道",
+ `age` SMALLINT COMMENT "用户年龄",
+ `sex` TINYINT COMMENT "用户性别",
+ `bool_col` boolean COMMENT "",
+ `int_col` int COMMENT "",
+ `bigint_col` bigint COMMENT "",
+ `largeint_col` largeint COMMENT "",
+ `float_col` float COMMENT "",
+ `double_col` double COMMENT "",
+ `char_col` CHAR(10) COMMENT "",
+ `decimal_col` decimal COMMENT "",
+ `decimalv3_col` decimalv3 COMMENT "",
+ `decimalv3_col2` decimalv3(1,0) COMMENT "",
+ `decimalv3_col3` decimalv3(1,1) COMMENT "",
+ `decimalv3_col4` decimalv3(9,8) COMMENT "",
+ `decimalv3_col5` decimalv3(20,10) COMMENT "",
+ `decimalv3_col6` decimalv3(38,0) COMMENT "",
+ `decimalv3_col7` decimalv3(38,37) COMMENT "",
+ `decimalv3_col8` decimalv3(38,38) COMMENT ""
+ )
+ DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
+ """
+ }
+
+ create_table(table_export_name);
StringBuilder sb = new StringBuilder()
int i = 1
@@ -101,14 +104,14 @@ suite("test_export_data_types", "p0") {
sb.append("""
(${++i}, '9999-12-31', '9999-12-31', '9999-12-31 23:59:59',
'9999-12-31 23:59:59', '2023-04-20 00:00:00.12', '2023-04-20 00:00:00.3344',
'', 'Haidian',
- ${Short.MIN_VALUE}, ${Byte.MIN_VALUE}, true, ${Integer.MIN_VALUE},
${Long.MIN_VALUE}, ${i}, ${Float.MIN_VALUE}, ${Double.MIN_VALUE}, 'char${i}',
+ ${Short.MIN_VALUE}, ${Byte.MIN_VALUE}, true, ${Integer.MIN_VALUE},
${Long.MIN_VALUE}, -170141183460469231731687303715884105728,
${Float.MIN_VALUE}, ${Double.MIN_VALUE}, 'char${i}',
100000000, 100000000, 4, 0.1, 0.99999999, 9999999999.9999999999,
99999999999999999999999999999999999999,
9.9999999999999999999999999999999999999,
0.99999999999999999999999999999999999999),
""")
sb.append("""
(${++i}, '2023-04-21', '2023-04-21', '2023-04-20 12:34:56',
'2023-04-20 00:00:00', '2023-04-20 00:00:00.123', '2023-04-20 00:00:00.123456',
'Beijing', '',
- ${Short.MAX_VALUE}, ${Byte.MAX_VALUE}, true, ${Integer.MAX_VALUE},
${Long.MAX_VALUE}, ${i}, ${Float.MAX_VALUE}, ${Double.MAX_VALUE}, 'char${i}',
+ ${Short.MAX_VALUE}, ${Byte.MAX_VALUE}, true, ${Integer.MAX_VALUE},
${Long.MAX_VALUE}, 170141183460469231731687303715884105727, ${Float.MAX_VALUE},
${Double.MAX_VALUE}, 'char${i}',
999999999, 999999999, 9, 0.9, 9.99999999, 1234567890.0123456789,
12345678901234567890123456789012345678,
1.2345678901234567890123456789012345678,
0.12345678901234567890123456789012345678),
""")
@@ -124,6 +127,8 @@ suite("test_export_data_types", "p0") {
${sb.toString()}
"""
+ def insert_res = sql "show last insert;"
+ logger.info("insert result: " + insert_res.toString())
qt_select_export1 """ SELECT * FROM ${table_export_name} t ORDER BY
user_id; """
def check_path_exists = { dir_path ->
@@ -187,40 +192,7 @@ suite("test_export_data_types", "p0") {
// check file amounts
check_file_amounts.call("${outFilePath}", 1)
- // check data correctness
- sql """ DROP TABLE IF EXISTS ${table_load_name} """
- sql """
- CREATE TABLE IF NOT EXISTS ${table_load_name} (
- `user_id` INT NOT NULL COMMENT "用户id",
- `date` DATE NOT NULL COMMENT "数据灌入日期时间",
- `datev2` DATEV2 NOT NULL COMMENT "数据灌入日期时间2",
- `datetime` DATETIME NOT NULL COMMENT "数据灌入日期时间",
- `datetimev2_1` DATETIMEV2 NOT NULL COMMENT "数据灌入日期时间",
- `datetimev2_2` DATETIMEV2(3) NOT NULL COMMENT "数据灌入日期时间",
- `datetimev2_3` DATETIMEV2(6) NOT NULL COMMENT "数据灌入日期时间",
- `city` VARCHAR(20) COMMENT "用户所在城市",
- `street` STRING COMMENT "用户所在街道",
- `age` SMALLINT COMMENT "用户年龄",
- `sex` TINYINT COMMENT "用户性别",
- `bool_col` boolean COMMENT "",
- `int_col` int COMMENT "",
- `bigint_col` bigint COMMENT "",
- `largeint_col` largeint COMMENT "",
- `float_col` float COMMENT "",
- `double_col` double COMMENT "",
- `char_col` CHAR(10) COMMENT "",
- `decimal_col` decimal COMMENT "",
- `decimalv3_col` decimalv3 COMMENT "",
- `decimalv3_col2` decimalv3(1,0) COMMENT "",
- `decimalv3_col3` decimalv3(1,1) COMMENT "",
- `decimalv3_col4` decimalv3(9,8) COMMENT "",
- `decimalv3_col5` decimalv3(20,10) COMMENT "",
- `decimalv3_col6` decimalv3(38,0) COMMENT "",
- `decimalv3_col7` decimalv3(38,37) COMMENT "",
- `decimalv3_col8` decimalv3(38,38) COMMENT ""
- )
- DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
- """
+ create_table(table_load_name);
File[] files = new File("${outFilePath}").listFiles()
String file_path = files[0].getAbsolutePath()
@@ -264,11 +236,10 @@ suite("test_export_data_types", "p0") {
// exec export
sql """
- EXPORT TABLE ${table_export_name} where user_id<4 TO
"file://${outFilePath}/"
+ EXPORT TABLE ${table_export_name} TO "file://${outFilePath}/"
PROPERTIES(
"label" = "${label}",
- "format" = "parquet",
- "columns" = "user_id, date, datev2, datetime, datetimev2_1,
datetimev2_2, datetimev2_3, city, street, age, sex, bool_col, int_col,
bigint_col, float_col, double_col, char_col, decimal_col, decimalv3_col,
decimalv3_col2, decimalv3_col3, decimalv3_col4, decimalv3_col5, decimalv3_col6,
decimalv3_col7, decimalv3_col8"
+ "format" = "parquet"
);
"""
waiting_export.call(label)
@@ -276,39 +247,7 @@ suite("test_export_data_types", "p0") {
// check file amounts
check_file_amounts.call("${outFilePath}", 1)
- // check data correctness
- sql """ DROP TABLE IF EXISTS ${table_load_name} """
- sql """
- CREATE TABLE IF NOT EXISTS ${table_load_name} (
- `user_id` INT NOT NULL COMMENT "用户id",
- `date` DATE NOT NULL COMMENT "数据灌入日期时间",
- `datev2` DATEV2 NOT NULL COMMENT "数据灌入日期时间2",
- `datetime` DATETIME NOT NULL COMMENT "数据灌入日期时间",
- `datetimev2_1` DATETIMEV2 NOT NULL COMMENT "数据灌入日期时间",
- `datetimev2_2` DATETIMEV2(3) NOT NULL COMMENT "数据灌入日期时间",
- `datetimev2_3` DATETIMEV2(6) NOT NULL COMMENT "数据灌入日期时间",
- `city` VARCHAR(20) COMMENT "用户所在城市",
- `street` STRING COMMENT "用户所在街道",
- `age` SMALLINT COMMENT "用户年龄",
- `sex` TINYINT COMMENT "用户性别",
- `bool_col` boolean COMMENT "",
- `int_col` int COMMENT "",
- `bigint_col` bigint COMMENT "",
- `float_col` float COMMENT "",
- `double_col` double COMMENT "",
- `char_col` CHAR(10) COMMENT "",
- `decimal_col` decimal COMMENT "",
- `decimalv3_col` decimalv3 COMMENT "",
- `decimalv3_col2` decimalv3(1,0) COMMENT "",
- `decimalv3_col3` decimalv3(1,1) COMMENT "",
- `decimalv3_col4` decimalv3(9,8) COMMENT "",
- `decimalv3_col5` decimalv3(20,10) COMMENT "",
- `decimalv3_col6` decimalv3(38,0) COMMENT "",
- `decimalv3_col7` decimalv3(38,37) COMMENT "",
- `decimalv3_col8` decimalv3(38,38) COMMENT ""
- )
- DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
- """
+ create_table(table_load_name);
File[] files = new File("${outFilePath}").listFiles()
String file_path = files[0].getAbsolutePath()
@@ -328,7 +267,7 @@ suite("test_export_data_types", "p0") {
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
assertEquals("success", json.Status.toLowerCase())
- assertEquals(3, json.NumberTotalRows)
+ assertEquals(4, json.NumberTotalRows)
assertEquals(0, json.NumberFilteredRows)
}
}
@@ -353,8 +292,7 @@ suite("test_export_data_types", "p0") {
EXPORT TABLE ${table_export_name} TO "file://${outFilePath}/"
PROPERTIES(
"label" = "${label}",
- "format" = "orc",
- "columns" = "user_id, date, city, street, age, sex, bool_col,
int_col, bigint_col, float_col, double_col, char_col, decimal_col"
+ "format" = "orc"
);
"""
waiting_export.call(label)
@@ -362,26 +300,7 @@ suite("test_export_data_types", "p0") {
// check file amounts
check_file_amounts.call("${outFilePath}", 1)
- // check data correctness
- sql """ DROP TABLE IF EXISTS ${table_load_name} """
- sql """
- CREATE TABLE IF NOT EXISTS ${table_load_name} (
- `user_id` INT NOT NULL COMMENT "用户id",
- `date` DATE NOT NULL COMMENT "数据灌入日期时间",
- `city` VARCHAR(20) COMMENT "用户所在城市",
- `street` STRING COMMENT "用户所在街道",
- `age` SMALLINT COMMENT "用户年龄",
- `sex` TINYINT COMMENT "用户性别",
- `bool_col` boolean COMMENT "",
- `int_col` int COMMENT "",
- `bigint_col` bigint COMMENT "",
- `float_col` float COMMENT "",
- `double_col` double COMMENT "",
- `char_col` CHAR(10) COMMENT "",
- `decimal_col` decimal COMMENT ""
- )
- DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
- """
+ create_table(table_load_name);
File[] files = new File("${outFilePath}").listFiles()
String file_path = files[0].getAbsolutePath()
@@ -435,40 +354,7 @@ suite("test_export_data_types", "p0") {
// check file amounts
check_file_amounts.call("${outFilePath}", 1)
- // check data correctness
- sql """ DROP TABLE IF EXISTS ${table_load_name} """
- sql """
- CREATE TABLE IF NOT EXISTS ${table_load_name} (
- `user_id` INT NOT NULL COMMENT "用户id",
- `date` DATE NOT NULL COMMENT "数据灌入日期时间",
- `datev2` DATEV2 NOT NULL COMMENT "数据灌入日期时间2",
- `datetime` DATETIME NOT NULL COMMENT "数据灌入日期时间",
- `datetimev2_1` DATETIMEV2 NOT NULL COMMENT "数据灌入日期时间",
- `datetimev2_2` DATETIMEV2(3) NOT NULL COMMENT "数据灌入日期时间",
- `datetimev2_3` DATETIMEV2(6) NOT NULL COMMENT "数据灌入日期时间",
- `city` VARCHAR(20) COMMENT "用户所在城市",
- `street` STRING COMMENT "用户所在街道",
- `age` SMALLINT COMMENT "用户年龄",
- `sex` TINYINT COMMENT "用户性别",
- `bool_col` boolean COMMENT "",
- `int_col` int COMMENT "",
- `bigint_col` bigint COMMENT "",
- `largeint_col` largeint COMMENT "",
- `float_col` float COMMENT "",
- `double_col` double COMMENT "",
- `char_col` CHAR(10) COMMENT "",
- `decimal_col` decimal COMMENT "",
- `decimalv3_col` decimalv3 COMMENT "",
- `decimalv3_col2` decimalv3(1,0) COMMENT "",
- `decimalv3_col3` decimalv3(1,1) COMMENT "",
- `decimalv3_col4` decimalv3(9,8) COMMENT "",
- `decimalv3_col5` decimalv3(20,10) COMMENT "",
- `decimalv3_col6` decimalv3(38,0) COMMENT "",
- `decimalv3_col7` decimalv3(38,37) COMMENT "",
- `decimalv3_col8` decimalv3(38,38) COMMENT ""
- )
- DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
- """
+ create_table(table_load_name);
File[] files = new File("${outFilePath}").listFiles()
String file_path = files[0].getAbsolutePath()
@@ -524,40 +410,7 @@ suite("test_export_data_types", "p0") {
// check file amounts
check_file_amounts.call("${outFilePath}", 1)
- // check data correctness
- sql """ DROP TABLE IF EXISTS ${table_load_name} """
- sql """
- CREATE TABLE IF NOT EXISTS ${table_load_name} (
- `user_id` INT NOT NULL COMMENT "用户id",
- `date` DATE NOT NULL COMMENT "数据灌入日期时间",
- `datev2` DATEV2 NOT NULL COMMENT "数据灌入日期时间2",
- `datetime` DATETIME NOT NULL COMMENT "数据灌入日期时间",
- `datetimev2_1` DATETIMEV2 NOT NULL COMMENT "数据灌入日期时间",
- `datetimev2_2` DATETIMEV2(3) NOT NULL COMMENT "数据灌入日期时间",
- `datetimev2_3` DATETIMEV2(6) NOT NULL COMMENT "数据灌入日期时间",
- `city` VARCHAR(20) COMMENT "用户所在城市",
- `street` STRING COMMENT "用户所在街道",
- `age` SMALLINT COMMENT "用户年龄",
- `sex` TINYINT COMMENT "用户性别",
- `bool_col` boolean COMMENT "",
- `int_col` int COMMENT "",
- `bigint_col` bigint COMMENT "",
- `largeint_col` largeint COMMENT "",
- `float_col` float COMMENT "",
- `double_col` double COMMENT "",
- `char_col` CHAR(10) COMMENT "",
- `decimal_col` decimal COMMENT "",
- `decimalv3_col` decimalv3 COMMENT "",
- `decimalv3_col2` decimalv3(1,0) COMMENT "",
- `decimalv3_col3` decimalv3(1,1) COMMENT "",
- `decimalv3_col4` decimalv3(9,8) COMMENT "",
- `decimalv3_col5` decimalv3(20,10) COMMENT "",
- `decimalv3_col6` decimalv3(38,0) COMMENT "",
- `decimalv3_col7` decimalv3(38,37) COMMENT "",
- `decimalv3_col8` decimalv3(38,38) COMMENT ""
- )
- DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1");
- """
+ create_table(table_load_name);
File[] files = new File("${outFilePath}").listFiles()
String file_path = files[0].getAbsolutePath()
diff --git a/regression-test/suites/export_p0/test_export_orc.groovy
b/regression-test/suites/export_p0/test_export_orc.groovy
index 1321988c9e..7892e97e62 100644
--- a/regression-test/suites/export_p0/test_export_orc.groovy
+++ b/regression-test/suites/export_p0/test_export_orc.groovy
@@ -88,6 +88,8 @@ suite("test_export_orc", "p0") {
sql """ INSERT INTO ${table_export_name} VALUES
${sb.toString()}
"""
+ def insert_res = sql "show last insert;"
+ logger.info("insert result: " + insert_res.toString())
qt_select_export1 """ SELECT * FROM ${table_export_name} t ORDER BY
user_id; """
diff --git a/regression-test/suites/export_p0/test_export_parquet.groovy
b/regression-test/suites/export_p0/test_export_parquet.groovy
index 3f98d32530..7c4bc78715 100644
--- a/regression-test/suites/export_p0/test_export_parquet.groovy
+++ b/regression-test/suites/export_p0/test_export_parquet.groovy
@@ -88,6 +88,8 @@ suite("test_export_parquet", "p0") {
sql """ INSERT INTO ${table_export_name} VALUES
${sb.toString()}
"""
+ def insert_res = sql "show last insert;"
+ logger.info("insert result: " + insert_res.toString())
qt_select_export1 """ SELECT * FROM ${table_export_name} t ORDER BY
user_id; """
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]