This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new b399ed36ad8 [fix](OrcReader) fix the issue that orc_reader can not
read DECIMAL(0,0) type of orc file #41795 (#42298)
b399ed36ad8 is described below
commit b399ed36ad8f66a99dadbe1db083f08ad391d3c1
Author: Rayner Chen <[email protected]>
AuthorDate: Wed Oct 23 17:39:31 2024 +0800
[fix](OrcReader) fix the issue that orc_reader can not read DECIMAL(0,0)
type of orc file #41795 (#42298)
cherry pick from #41795
Co-authored-by: Tiewei Fang <[email protected]>
---
be/src/vec/exec/format/orc/vorc_reader.cpp | 9 +++++++++
be/src/vec/exec/format/orc/vorc_reader.h | 1 -
.../tvf/orc_tvf/test_hdfs_orc_group1_orc_files.out | 7 +++++++
.../tvf/orc_tvf/test_hdfs_orc_group2_orc_files.out | 12 ++++++++++++
.../tvf/orc_tvf/test_hdfs_orc_group1_orc_files.groovy | 10 +++++-----
.../tvf/orc_tvf/test_hdfs_orc_group2_orc_files.groovy | 6 ++++++
6 files changed, 39 insertions(+), 6 deletions(-)
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index bb13aa36ef5..6b6639f2feb 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -96,6 +96,11 @@ namespace doris::vectorized {
// TODO: we need to determine it by test.
static constexpr uint32_t MAX_DICT_CODE_PREDICATE_TO_REWRITE =
std::numeric_limits<uint32_t>::max();
static constexpr char
EMPTY_STRING_FOR_OVERFLOW[ColumnString::MAX_STRINGS_OVERFLOW_SIZE] = "";
+// Because HIVE 0.11 & 0.12 does not support precision and scale for decimal
+// The decimal type of orc file produced by HIVE 0.11 & 0.12 are DECIMAL(0,0)
+// We should set a default precision and scale for these orc files.
+static constexpr int decimal_precision_for_hive11 =
BeConsts::MAX_DECIMAL128_PRECISION;
+static constexpr int decimal_scale_for_hive11 = 10;
#define FOR_FLAT_ORC_COLUMNS(M) \
M(TypeIndex::Int8, Int8, orc::LongVectorBatch) \
@@ -1050,6 +1055,10 @@ TypeDescriptor OrcReader::convert_to_doris_type(const
orc::Type* orc_type) {
case orc::TypeKind::TIMESTAMP:
return TypeDescriptor(PrimitiveType::TYPE_DATETIMEV2);
case orc::TypeKind::DECIMAL:
+ if (orc_type->getPrecision() == 0) {
+ return
TypeDescriptor::create_decimalv3_type(decimal_precision_for_hive11,
+
decimal_scale_for_hive11);
+ }
return TypeDescriptor::create_decimalv3_type(orc_type->getPrecision(),
orc_type->getScale());
case orc::TypeKind::DATE:
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h
b/be/src/vec/exec/format/orc/vorc_reader.h
index c0b372dfcea..4aad5637ef5 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -587,7 +587,6 @@ private:
std::unique_ptr<orc::Reader> _reader;
std::unique_ptr<orc::RowReader> _row_reader;
std::unique_ptr<ORCFilterImpl> _orc_filter;
- orc::ReaderOptions _reader_options;
orc::RowReaderOptions _row_reader_options;
std::shared_ptr<io::FileSystem> _file_system;
diff --git
a/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.out
b/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.out
index 12864d9c8a4..1e27e511487 100644
---
a/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.out
+++
b/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.out
@@ -9,6 +9,13 @@
2014-02-11
8200-02-11
+-- !test_2 --
+12345678.6547450000
+12345678.6547450000
+12345678.6547450000
+12345678.6547450000
+12345678.6547450000
+
-- !test_3 --
2 foo 0.8 1 1969-12-31T16:00
5 eat 0.8 6 1969-12-31T16:00:20
diff --git
a/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.out
b/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.out
index 3046384b928..9b5840ac0cd 100644
---
a/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.out
+++
b/regression-test/data/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.out
@@ -21,3 +21,15 @@ row 000009
Alyssa \N [3, 9, 15, 20]
Ben red []
+-- !test_4 --
+2 foo 0.8 1.2000000000 1969-12-31T16:00
+5 eat 0.8 5.5000000000 1969-12-31T16:00:20
+13 bar 80.0 2.2000000000 1969-12-31T16:00:05
+29 cat 8.0 3.3000000000 1969-12-31T16:00:10
+70 dog 1.8 4.4000000000 1969-12-31T16:00:15
+100 zebra 8.0 0E-10 1969-12-31T16:04:10
+100 zebra 8.0 0E-10 1969-12-31T16:04:10
+100 zebra 8.0 0E-10 1969-12-31T16:04:10
+100 zebra 8.0 0E-10 1969-12-31T16:04:10
+100 zebra 8.0 0E-10 1969-12-31T16:04:10
+
diff --git
a/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.groovy
b/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.groovy
index 44176a47fd4..e42b745bfae 100644
---
a/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.groovy
+++
b/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group1_orc_files.groovy
@@ -41,11 +41,11 @@
suite("test_hdfs_orc_group1_orc_files","external,hive,tvf,external_docker") {
// Doris cannot read this ORC file because of a NOT_IMPLEMENT
error.
- // uri = "${defaultFS}" +
"/user/doris/tvf_data/test_hdfs_orc/group1/orc-file-11-format.orc"
- // order_qt_test_2 """ select * from HDFS(
- // "uri" = "${uri}",
- // "hadoop.username" = "${hdfsUserName}",
- // "format" = "orc"); """
+ uri = "${defaultFS}" +
"/user/doris/tvf_data/test_hdfs_orc/group1/orc-file-11-format.orc"
+ order_qt_test_2 """ select decimal1 from HDFS(
+ "uri" = "${uri}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "orc") limit 5; """
uri = "${defaultFS}" +
"/user/doris/tvf_data/test_hdfs_orc/group1/orc_split_elim.orc"
diff --git
a/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.groovy
b/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.groovy
index 4495494a3f0..daf1d2a1383 100644
---
a/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.groovy
+++
b/regression-test/suites/external_table_p0/tvf/orc_tvf/test_hdfs_orc_group2_orc_files.groovy
@@ -49,6 +49,12 @@
suite("test_hdfs_orc_group2_orc_files","external,hive,tvf,external_docker") {
"uri" = "${uri}",
"hadoop.username" = "${hdfsUserName}",
"format" = "orc"); """
+
+ uri = "${defaultFS}" +
"/user/doris/tvf_data/test_hdfs_orc/group2/orc_split_elim.orc"
+ qt_test_4 """ select * from HDFS(
+ "uri" = "${uri}",
+ "hadoop.username" = "${hdfsUserName}",
+ "format" = "orc") order by userid limit 10; """
} finally {
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]