This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new a7156ee7756 [fix](parquet)Fix the be core issue when reading parquet
unsigned types. (#39926) (#40123)
a7156ee7756 is described below
commit a7156ee7756b408263248e26e6dcc7937b6c74e1
Author: daidai <[email protected]>
AuthorDate: Thu Aug 29 21:52:52 2024 +0800
[fix](parquet)Fix the be core issue when reading parquet unsigned types.
(#39926) (#40123)
bp #39926
---
.../exec/format/parquet/parquet_column_convert.cpp | 18 +-
.../exec/format/parquet/parquet_column_convert.h | 65 +++
be/src/vec/exec/format/parquet/schema_desc.cpp | 58 ++-
be/src/vec/exec/format/parquet/schema_desc.h | 10 +-
.../test_outfile_expr_generate_col_name.out | 6 +-
.../test_local_tvf_parquet_unsigned_integers.out | 440 +++++++++++++++++++++
.../tvf/unsigned_integers_1.parquet | Bin 0 -> 3202 bytes
.../tvf/unsigned_integers_2.parquet | Bin 0 -> 3218 bytes
.../tvf/unsigned_integers_3.parquet | Bin 0 -> 4727 bytes
.../tvf/unsigned_integers_4.parquet | Bin 0 -> 704 bytes
...test_local_tvf_parquet_unsigned_integers.groovy | 102 +++++
11 files changed, 673 insertions(+), 26 deletions(-)
diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
index 57f1f54b7b9..2fb0afea82a 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
@@ -66,7 +66,9 @@ ColumnPtr
PhysicalToLogicalConverter::get_physical_column(tparquet::Type::type s
src_physical_type = tparquet::Type::INT32;
src_logical_type = TypeDescriptor(PrimitiveType::TYPE_INT);
}
- if (is_consistent() && _logical_converter->is_consistent()) {
+
+ if (!_convert_params->is_type_compatibility && is_consistent() &&
+ _logical_converter->is_consistent()) {
if (_cached_src_physical_type == nullptr) {
_cached_src_physical_type =
DataTypeFactory::instance().create_data_type(
src_logical_type, dst_logical_type->is_nullable());
@@ -246,7 +248,19 @@ std::unique_ptr<PhysicalToLogicalConverter>
PhysicalToLogicalConverter::get_conv
}
PrimitiveType src_logical_primitive = src_logical_type.type;
- if (is_parquet_native_type(src_logical_primitive)) {
+ if (field_schema->is_type_compatibility) {
+ if (src_logical_type == TYPE_SMALLINT) {
+ physical_converter.reset(new
UnsignedIntegerConverter<TYPE_SMALLINT>());
+ } else if (src_logical_type == TYPE_INT) {
+ physical_converter.reset(new UnsignedIntegerConverter<TYPE_INT>());
+ } else if (src_logical_type == TYPE_BIGINT) {
+ physical_converter.reset(new
UnsignedIntegerConverter<TYPE_BIGINT>());
+ } else if (src_logical_type == TYPE_LARGEINT) {
+ physical_converter.reset(new
UnsignedIntegerConverter<TYPE_LARGEINT>());
+ } else {
+ physical_converter.reset(new
UnsupportedConverter(src_physical_type, src_logical_type));
+ }
+ } else if (is_parquet_native_type(src_logical_primitive)) {
if (is_string_type(src_logical_primitive) &&
src_physical_type == tparquet::Type::FIXED_LEN_BYTE_ARRAY) {
// for FixedSizeBinary
diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h
b/be/src/vec/exec/format/parquet/parquet_column_convert.h
index 551bf7e14ed..91b81121aa4 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.h
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h
@@ -40,6 +40,9 @@ struct ConvertParams {
DecimalScaleParams decimal_scale;
FieldSchema* field_schema = nullptr;
+ //For UInt8 -> Int16,UInt16 -> Int32,UInt32 -> Int64,UInt64 -> Int128.
+ bool is_type_compatibility = false;
+
/**
* Some frameworks like paimon maybe writes non-standard parquet files.
Timestamp field doesn't have
* logicalType or converted_type to indicates its precision. We have to
reset the time mask.
@@ -108,6 +111,7 @@ struct ConvertParams {
t.from_unixtime(0, *ctz);
offset_days = t.day() == 31 ? -1 : 0;
}
+ is_type_compatibility = field_schema_->is_type_compatibility;
}
template <typename DecimalPrimitiveType>
@@ -273,6 +277,67 @@ class LittleIntPhysicalConverter : public
PhysicalToLogicalConverter {
}
};
+template <PrimitiveType type>
+struct UnsignedTypeTraits;
+
+template <>
+struct UnsignedTypeTraits<TYPE_SMALLINT> {
+ using UnsignedCppType = UInt8;
+
//https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#unsigned-integers
+ //INT(8, false), INT(16, false), and INT(32, false) must annotate an int32
primitive type and INT(64, false)
+ //must annotate an int64 primitive type.
+ using StorageCppType = Int32;
+ using StorageColumnType = vectorized::ColumnInt32;
+};
+
+template <>
+struct UnsignedTypeTraits<TYPE_INT> {
+ using UnsignedCppType = UInt16;
+ using StorageCppType = Int32;
+ using StorageColumnType = vectorized::ColumnInt32;
+};
+
+template <>
+struct UnsignedTypeTraits<TYPE_BIGINT> {
+ using UnsignedCppType = UInt32;
+ using StorageCppType = Int32;
+ using StorageColumnType = vectorized::ColumnInt32;
+};
+
+template <>
+struct UnsignedTypeTraits<TYPE_LARGEINT> {
+ using UnsignedCppType = UInt64;
+ using StorageCppType = Int64;
+ using StorageColumnType = vectorized::ColumnInt64;
+};
+
+template <PrimitiveType IntPrimitiveType>
+class UnsignedIntegerConverter : public PhysicalToLogicalConverter {
+ Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr&
src_logical_column) override {
+ using UnsignedCppType = typename
UnsignedTypeTraits<IntPrimitiveType>::UnsignedCppType;
+ using StorageCppType = typename
UnsignedTypeTraits<IntPrimitiveType>::StorageCppType;
+ using StorageColumnType = typename
UnsignedTypeTraits<IntPrimitiveType>::StorageColumnType;
+ using DstColumnType = typename
PrimitiveTypeTraits<IntPrimitiveType>::ColumnType;
+
+ ColumnPtr from_col = remove_nullable(src_physical_col);
+ MutableColumnPtr to_col =
remove_nullable(src_logical_column)->assume_mutable();
+ auto& src_data = static_cast<const
StorageColumnType*>(from_col.get())->get_data();
+
+ size_t rows = src_data.size();
+ size_t start_idx = to_col->size();
+ to_col->resize(start_idx + rows);
+ auto& data = static_cast<DstColumnType&>(*to_col.get()).get_data();
+
+ for (int i = 0; i < rows; i++) {
+ StorageCppType src_value = src_data[i];
+ auto unsigned_value = static_cast<UnsignedCppType>(src_value);
+ data[start_idx + i] = unsigned_value;
+ }
+
+ return Status::OK();
+ }
+};
+
class FixedSizeBinaryConverter : public PhysicalToLogicalConverter {
private:
int _type_length;
diff --git a/be/src/vec/exec/format/parquet/schema_desc.cpp
b/be/src/vec/exec/format/parquet/schema_desc.cpp
index 08692de8743..9097b65718f 100644
--- a/be/src/vec/exec/format/parquet/schema_desc.cpp
+++ b/be/src/vec/exec/format/parquet/schema_desc.cpp
@@ -191,16 +191,19 @@ void FieldDescriptor::parse_physical_field(const
tparquet::SchemaElement& physic
physical_field->physical_type = physical_schema.type;
_physical_fields.push_back(physical_field);
physical_field->physical_column_index = _physical_fields.size() - 1;
- physical_field->type = get_doris_type(physical_schema);
+ auto type = get_doris_type(physical_schema);
+ physical_field->type = type.first;
+ physical_field->is_type_compatibility = type.second;
}
-TypeDescriptor FieldDescriptor::get_doris_type(const tparquet::SchemaElement&
physical_schema) {
- TypeDescriptor type;
- type.type = INVALID_TYPE;
+std::pair<TypeDescriptor, bool> FieldDescriptor::get_doris_type(
+ const tparquet::SchemaElement& physical_schema) {
+ std::pair<TypeDescriptor, bool> ans = {INVALID_TYPE, false};
+ TypeDescriptor& type = ans.first;
if (physical_schema.__isset.logicalType) {
- type = convert_to_doris_type(physical_schema.logicalType);
+ ans = convert_to_doris_type(physical_schema.logicalType);
} else if (physical_schema.__isset.converted_type) {
- type = convert_to_doris_type(physical_schema);
+ ans = convert_to_doris_type(physical_schema);
}
// use physical type instead
if (type.type == INVALID_TYPE) {
@@ -233,7 +236,7 @@ TypeDescriptor FieldDescriptor::get_doris_type(const
tparquet::SchemaElement& ph
break;
}
}
- return type;
+ return ans;
}
// Copy from org.apache.iceberg.avro.AvroSchemaUtil#validAvroName
@@ -302,8 +305,11 @@ void FieldDescriptor::iceberg_sanitize(const
std::vector<std::string>& read_colu
}
}
-TypeDescriptor FieldDescriptor::convert_to_doris_type(tparquet::LogicalType
logicalType) {
- TypeDescriptor type;
+std::pair<TypeDescriptor, bool> FieldDescriptor::convert_to_doris_type(
+ tparquet::LogicalType logicalType) {
+ std::pair<TypeDescriptor, bool> ans = {INVALID_TYPE, false};
+ TypeDescriptor& type = ans.first;
+ bool& is_type_compatibility = ans.second;
if (logicalType.__isset.STRING) {
type = TypeDescriptor(TYPE_STRING);
} else if (logicalType.__isset.DECIMAL) {
@@ -313,16 +319,25 @@ TypeDescriptor
FieldDescriptor::convert_to_doris_type(tparquet::LogicalType logi
type = TypeDescriptor(TYPE_DATEV2);
} else if (logicalType.__isset.INTEGER) {
if (logicalType.INTEGER.isSigned) {
- if (logicalType.INTEGER.bitWidth <= 32) {
+ if (logicalType.INTEGER.bitWidth <= 8) {
+ type = TypeDescriptor(TYPE_TINYINT);
+ } else if (logicalType.INTEGER.bitWidth <= 16) {
+ type = TypeDescriptor(TYPE_SMALLINT);
+ } else if (logicalType.INTEGER.bitWidth <= 32) {
type = TypeDescriptor(TYPE_INT);
} else {
type = TypeDescriptor(TYPE_BIGINT);
}
} else {
- if (logicalType.INTEGER.bitWidth <= 16) {
+ is_type_compatibility = true;
+ if (logicalType.INTEGER.bitWidth <= 8) {
+ type = TypeDescriptor(TYPE_SMALLINT);
+ } else if (logicalType.INTEGER.bitWidth <= 16) {
type = TypeDescriptor(TYPE_INT);
- } else {
+ } else if (logicalType.INTEGER.bitWidth <= 32) {
type = TypeDescriptor(TYPE_BIGINT);
+ } else {
+ type = TypeDescriptor(TYPE_LARGEINT);
}
}
} else if (logicalType.__isset.TIME) {
@@ -344,12 +359,14 @@ TypeDescriptor
FieldDescriptor::convert_to_doris_type(tparquet::LogicalType logi
} else {
type = TypeDescriptor(INVALID_TYPE);
}
- return type;
+ return ans;
}
-TypeDescriptor FieldDescriptor::convert_to_doris_type(
+std::pair<TypeDescriptor, bool> FieldDescriptor::convert_to_doris_type(
const tparquet::SchemaElement& physical_schema) {
- TypeDescriptor type;
+ std::pair<TypeDescriptor, bool> ans = {INVALID_TYPE, false};
+ TypeDescriptor& type = ans.first;
+ bool& is_type_compatibility = ans.second;
switch (physical_schema.converted_type) {
case tparquet::ConvertedType::type::UTF8:
type = TypeDescriptor(TYPE_STRING);
@@ -378,28 +395,33 @@ TypeDescriptor FieldDescriptor::convert_to_doris_type(
type = TypeDescriptor(TYPE_TINYINT);
break;
case tparquet::ConvertedType::type::UINT_8:
+ is_type_compatibility = true;
[[fallthrough]];
case tparquet::ConvertedType::type::INT_16:
type = TypeDescriptor(TYPE_SMALLINT);
break;
case tparquet::ConvertedType::type::UINT_16:
+ is_type_compatibility = true;
[[fallthrough]];
case tparquet::ConvertedType::type::INT_32:
type = TypeDescriptor(TYPE_INT);
break;
case tparquet::ConvertedType::type::UINT_32:
- [[fallthrough]];
- case tparquet::ConvertedType::type::UINT_64:
+ is_type_compatibility = true;
[[fallthrough]];
case tparquet::ConvertedType::type::INT_64:
type = TypeDescriptor(TYPE_BIGINT);
break;
+ case tparquet::ConvertedType::type::UINT_64:
+ is_type_compatibility = true;
+ type = TypeDescriptor(TYPE_LARGEINT);
+ break;
default:
LOG(WARNING) << "Not supported parquet ConvertedType: " <<
physical_schema.converted_type;
type = TypeDescriptor(INVALID_TYPE);
break;
}
- return type;
+ return ans;
}
Status FieldDescriptor::parse_group_field(const
std::vector<tparquet::SchemaElement>& t_schemas,
diff --git a/be/src/vec/exec/format/parquet/schema_desc.h
b/be/src/vec/exec/format/parquet/schema_desc.h
index 50e526bd730..ca726ef1b57 100644
--- a/be/src/vec/exec/format/parquet/schema_desc.h
+++ b/be/src/vec/exec/format/parquet/schema_desc.h
@@ -49,6 +49,9 @@ struct FieldSchema {
int16_t repeated_parent_def_level = 0;
std::vector<FieldSchema> children;
+ //For UInt8 -> Int16,UInt16 -> Int32,UInt32 -> Int64,UInt64 -> Int128.
+ bool is_type_compatibility = false;
+
FieldSchema() = default;
~FieldSchema() = default;
FieldSchema(const FieldSchema& fieldSchema) = default;
@@ -84,12 +87,13 @@ private:
Status parse_node_field(const std::vector<tparquet::SchemaElement>&
t_schemas, size_t curr_pos,
FieldSchema* node_field);
- TypeDescriptor convert_to_doris_type(tparquet::LogicalType logicalType);
+ std::pair<TypeDescriptor, bool>
convert_to_doris_type(tparquet::LogicalType logicalType);
- TypeDescriptor convert_to_doris_type(const tparquet::SchemaElement&
physical_schema);
+ std::pair<TypeDescriptor, bool> convert_to_doris_type(
+ const tparquet::SchemaElement& physical_schema);
public:
- TypeDescriptor get_doris_type(const tparquet::SchemaElement&
physical_schema);
+ std::pair<TypeDescriptor, bool> get_doris_type(const
tparquet::SchemaElement& physical_schema);
// org.apache.iceberg.avro.AvroSchemaUtil#sanitize will encode special
characters,
// we have to decode these characters
diff --git
a/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
b/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
index 406bc7660ff..5748cedd228 100644
---
a/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
+++
b/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
@@ -100,12 +100,12 @@ id int Yes false \N NONE
9 1 string 27 false 5 true 1
-- !desc_s3 --
-__add_5 int Yes false \N NONE
-__bit_or_7 int Yes false \N NONE
+__add_5 smallint Yes false \N NONE
+__bit_or_7 tinyint Yes false \N NONE
__cast_3 bigint Yes false \N NONE
__greater_than_4 boolean Yes false \N NONE
__in_predicate_6 boolean Yes false \N NONE
-__literal_1 int Yes false \N NONE
+__literal_1 tinyint Yes false \N NONE
__literal_2 text Yes false \N NONE
id int Yes false \N NONE
diff --git
a/regression-test/data/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.out
b/regression-test/data/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.out
new file mode 100644
index 00000000000..4cda2746a00
--- /dev/null
+++
b/regression-test/data/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.out
@@ -0,0 +1,440 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !test_1 --
+0 254 54979 3876010132 12292188520939801104
+1 194 41087 2468877118 11276751473207154134
+2 204 40618 3112417582 12531644712494408881
+3 252 42885 2959241092 12524633273560617315
+4 168 34898 3576407414 13096188234563161622
+5 252 39188 3127662355 16817880057727309281
+6 146 45695 2457180674 13258241842726882717
+7 223 54096 3818049848 13392705476834798352
+8 247 49103 4235043353 14882039847048390015
+9 171 37711 4121304297 12690575475523254418
+10 208 45522 2849886325 9887016810088790216
+11 140 57808 4249890046 11248669065541052626
+12 154 63353 2494834510 10992021804074820099
+13 169 62034 3442632722 13741258842709060503
+14 201 39884 2544963866 13079528300040741505
+15 208 48887 2865890129 15763071531082484590
+16 200 58596 2907876388 13993629507485604147
+17 210 50277 3777672898 9348401130412952441
+18 212 51965 3618857131 16174811648348878672
+19 161 39818 2368776725 15511547001336881917
+20 160 41750 2980821608 11023149785626200227
+21 141 57340 4284126112 15552326510762976216
+22 139 58391 4117149789 15878585387857525976
+23 212 59593 2459376464 15173162289502083954
+24 165 53957 3427228999 17619046273898992624
+25 229 49948 2600007878 15455673680657024177
+26 134 64599 3738904119 16329279862825806683
+27 205 41073 2442068534 14375087414295500534
+28 204 60681 3422219729 18264453353631472102
+29 136 47361 3588730082 13238158010948418606
+30 252 42761 4175633691 15734976010511914446
+31 150 45199 4078820644 15090555688768508964
+32 210 57428 4009891336 16805687728956091249
+33 235 34379 3963852622 13291314989308428602
+34 226 33850 3643451160 15146799101548124057
+35 133 54253 2734990478 12652028700805136484
+36 166 61697 3958278249 9924263812189602832
+37 137 55610 2782254392 18017977508881163787
+38 166 41727 3012921589 12285785983985756200
+39 194 46987 2351267776 18191890018150428322
+40 201 61160 3877183539 17825711874965469887
+41 209 54646 3739634163 12447543494188025837
+42 157 65086 2250645811 15257822118590463416
+43 223 56812 4101782180 9308776360130037320
+44 135 61061 2806848998 15345311951702932353
+45 147 39862 3521569945 17679765098696012133
+46 229 38351 2299955463 11589155413771055105
+47 141 49268 2740241757 16646921996087125692
+48 144 53152 3733884127 12946127273932789697
+49 200 48517 4024062219 15103440093398422258
+
+-- !test_2 --
+id int Yes false \N NONE
+uint8_column smallint Yes false \N NONE
+uint16_column int Yes false \N NONE
+uint32_column bigint Yes false \N NONE
+uint64_column largeint Yes false \N NONE
+
+-- !test_3 --
+id int Yes false \N NONE
+uint8_column smallint Yes false \N NONE
+uint16_column int Yes false \N NONE
+uint32_column bigint Yes false \N NONE
+uint64_column largeint Yes false \N NONE
+
+-- !test_4 --
+id int Yes false \N NONE
+uint8_column smallint Yes false \N NONE
+uint16_column int Yes false \N NONE
+uint32_column bigint Yes false \N NONE
+uint64_column largeint Yes false \N NONE
+
+-- !test_5 --
+0 200 48438 3775964178 9957179629640951554
+1 197 37330 2548711656 11127988488306710308
+2 200 65117 3419624570 11652168873218179696
+3 223 39174 3669818470 17675778338328486831
+4 196 60825 3064802389 18435946529637287047
+5 148 58165 3414732886 10219427643252484377
+6 217 61318 4075782121 10508828167797335037
+7 185 46040 2798591473 16764958584953558745
+8 148 50700 2541953946 11228894632595899400
+9 235 41262 3398155979 14622381114650660637
+10 189 49552 3754066331 16077913453789464999
+11 138 41102 2488033055 11226600460306403745
+12 132 62435 3745892606 17429590483490742063
+13 164 34858 3380810793 15447251686769296941
+14 246 44518 4213847006 13168159509128150529
+15 128 34475 2584892403 14470296482506793549
+16 180 40732 2667143993 13453719924509014943
+17 206 49824 3079026622 9242535560261271930
+18 250 61860 3723105814 9754643556067415401
+19 248 51724 4229819070 15295668751829551718
+20 237 38710 2951615403 14702842917502593482
+21 133 46707 2464573031 15860122391678970590
+22 249 41930 4210407904 12677893784378713520
+23 218 34250 3657784995 14768434928936445583
+24 134 52064 4274319068 16672757830801563734
+25 179 54232 3587434213 10613547124477746521
+26 217 63838 2348239122 17998346098073086386
+27 190 52362 3442840997 13122160861538572795
+28 131 39957 2321439682 16792774292797573856
+29 143 50902 2914375790 10557924491128546903
+30 228 33365 2438054546 14302876531585763284
+31 251 59126 2308219390 12753986538521770387
+32 197 58579 2647214662 16916351783057256258
+33 246 62028 4202894981 16869494938800942234
+34 229 47800 3963077237 12000510020655611310
+35 195 43221 2980563838 17324881735239531242
+36 210 46389 3302751013 10936691069329977133
+37 190 60582 4294458835 13465474203101539625
+38 234 58068 2575498858 18264769986785189204
+39 244 55282 2242973260 14725356984149511589
+40 224 33882 3169401634 9641824149700693760
+41 210 48824 2805949235 17877575698845246971
+42 250 49871 4015422133 11571502995585068959
+43 253 53173 3315146396 15527276320077174431
+44 205 48824 2153923483 18227485227719579199
+45 202 65101 2286014232 10596463304445669324
+46 144 39272 3664679383 14619550616500589262
+47 207 58788 2459180916 15787272910593406395
+48 236 61302 2814354943 15797302386492157450
+49 222 44025 2267428102 16226182608093468653
+
+-- !test_6 --
+0 254 65535 4294967294 18446744073709551614
+1 254 65534 4294967295 18446744073709551615
+2 254 65535 4294967294 18446744073709551615
+3 255 65535 4294967294 18446744073709551615
+4 255 65534 4294967295 18446744073709551615
+5 254 65534 4294967295 18446744073709551614
+6 254 65534 4294967294 18446744073709551614
+7 255 65534 4294967294 18446744073709551614
+8 255 65534 4294967295 18446744073709551614
+9 255 65535 4294967294 18446744073709551614
+
+-- !test_7 --
+id bigint Yes false \N NONE
+mediumint_unsigned bigint Yes false \N NONE
+int_unsigned bigint Yes false \N NONE
+
+-- !test_8 --
+1 111 111
+2 222 222
+
+-- !test_9 --
+16 200 58596 2907876388 13993629507485604147
+49 200 48517 4024062219 15103440093398422258
+
+-- !test_10 --
+38 166 41727 3012921589 12285785983985756200
+
+-- !test_11 --
+46 229 38351 2299955463 11589155413771055105
+
+-- !test_12 --
+49 200 48517 4024062219 15103440093398422258
+
+-- !test_13 --
+49 222 44025 2267428102 16226182608093468653
+
+-- !test_14 --
+38 234 58068 2575498858 18264769986785189204
+
+-- !test_15 --
+14 246 44518 4213847006 13168159509128150529
+
+-- !test_16 --
+25 179 54232 3587434213 10613547124477746521
+
+-- !test_17 --
+500
+
+-- !test_18 --
+500
+
+-- !test_19 --
+500
+
+-- !test_20 --
+500
+
+-- !test_21 --
+500
+
+-- !test_22 --
+0 254 65535 4294967294 18446744073709551614
+5 254 65534 4294967295 18446744073709551614
+6 254 65534 4294967294 18446744073709551614
+7 255 65534 4294967294 18446744073709551614
+8 255 65534 4294967295 18446744073709551614
+9 255 65535 4294967294 18446744073709551614
+10 255 65535 4294967294 18446744073709551614
+11 254 65535 4294967294 18446744073709551614
+12 255 65535 4294967294 18446744073709551614
+14 254 65535 4294967295 18446744073709551614
+16 255 65535 4294967295 18446744073709551614
+17 255 65535 4294967294 18446744073709551614
+18 255 65534 4294967295 18446744073709551614
+20 255 65535 4294967294 18446744073709551614
+21 255 65534 4294967294 18446744073709551614
+22 255 65534 4294967294 18446744073709551614
+24 255 65534 4294967294 18446744073709551614
+25 254 65535 4294967295 18446744073709551614
+27 255 65534 4294967294 18446744073709551614
+29 255 65534 4294967295 18446744073709551614
+30 255 65534 4294967295 18446744073709551614
+32 254 65534 4294967294 18446744073709551614
+37 254 65534 4294967295 18446744073709551614
+38 254 65535 4294967294 18446744073709551614
+40 255 65535 4294967294 18446744073709551614
+41 255 65534 4294967294 18446744073709551614
+43 254 65534 4294967295 18446744073709551614
+44 254 65534 4294967294 18446744073709551614
+45 255 65534 4294967295 18446744073709551614
+46 254 65534 4294967295 18446744073709551614
+53 254 65534 4294967294 18446744073709551614
+56 254 65535 4294967294 18446744073709551614
+58 255 65534 4294967295 18446744073709551614
+60 254 65535 4294967295 18446744073709551614
+62 255 65534 4294967295 18446744073709551614
+63 255 65535 4294967294 18446744073709551614
+66 255 65534 4294967295 18446744073709551614
+68 254 65535 4294967295 18446744073709551614
+71 254 65535 4294967295 18446744073709551614
+73 254 65535 4294967295 18446744073709551614
+75 254 65534 4294967294 18446744073709551614
+76 255 65534 4294967294 18446744073709551614
+77 254 65535 4294967295 18446744073709551614
+80 254 65534 4294967295 18446744073709551614
+81 255 65534 4294967294 18446744073709551614
+84 255 65534 4294967295 18446744073709551614
+86 255 65535 4294967295 18446744073709551614
+87 254 65534 4294967295 18446744073709551614
+89 255 65535 4294967294 18446744073709551614
+91 254 65534 4294967294 18446744073709551614
+92 255 65534 4294967294 18446744073709551614
+94 254 65535 4294967294 18446744073709551614
+98 255 65534 4294967295 18446744073709551614
+99 254 65535 4294967294 18446744073709551614
+100 255 65534 4294967295 18446744073709551614
+102 255 65534 4294967295 18446744073709551614
+103 255 65534 4294967295 18446744073709551614
+106 254 65534 4294967295 18446744073709551614
+108 254 65535 4294967295 18446744073709551614
+109 255 65535 4294967294 18446744073709551614
+110 254 65534 4294967295 18446744073709551614
+112 255 65535 4294967295 18446744073709551614
+115 255 65534 4294967295 18446744073709551614
+117 255 65535 4294967294 18446744073709551614
+120 254 65535 4294967294 18446744073709551614
+121 254 65535 4294967295 18446744073709551614
+123 254 65535 4294967294 18446744073709551614
+124 254 65535 4294967295 18446744073709551614
+130 255 65534 4294967294 18446744073709551614
+131 254 65535 4294967295 18446744073709551614
+132 254 65535 4294967294 18446744073709551614
+133 254 65534 4294967295 18446744073709551614
+139 254 65535 4294967295 18446744073709551614
+141 255 65535 4294967295 18446744073709551614
+148 255 65535 4294967294 18446744073709551614
+149 254 65534 4294967295 18446744073709551614
+150 254 65534 4294967295 18446744073709551614
+151 254 65534 4294967294 18446744073709551614
+152 254 65535 4294967295 18446744073709551614
+157 255 65534 4294967295 18446744073709551614
+158 255 65535 4294967294 18446744073709551614
+167 255 65534 4294967295 18446744073709551614
+168 254 65535 4294967294 18446744073709551614
+169 254 65534 4294967294 18446744073709551614
+173 255 65534 4294967294 18446744073709551614
+174 254 65535 4294967295 18446744073709551614
+177 254 65535 4294967295 18446744073709551614
+184 255 65535 4294967295 18446744073709551614
+186 254 65535 4294967295 18446744073709551614
+187 255 65535 4294967295 18446744073709551614
+189 254 65534 4294967295 18446744073709551614
+190 254 65535 4294967295 18446744073709551614
+191 254 65535 4294967294 18446744073709551614
+192 254 65535 4294967294 18446744073709551614
+194 255 65534 4294967294 18446744073709551614
+195 255 65534 4294967295 18446744073709551614
+196 254 65535 4294967295 18446744073709551614
+197 255 65535 4294967295 18446744073709551614
+204 255 65535 4294967295 18446744073709551614
+207 255 65535 4294967295 18446744073709551614
+210 255 65535 4294967294 18446744073709551614
+211 255 65535 4294967294 18446744073709551614
+213 254 65534 4294967295 18446744073709551614
+214 255 65534 4294967295 18446744073709551614
+216 254 65535 4294967295 18446744073709551614
+217 255 65535 4294967295 18446744073709551614
+222 255 65534 4294967295 18446744073709551614
+226 255 65535 4294967294 18446744073709551614
+227 254 65534 4294967294 18446744073709551614
+228 254 65535 4294967295 18446744073709551614
+229 255 65535 4294967294 18446744073709551614
+231 254 65534 4294967294 18446744073709551614
+232 254 65534 4294967294 18446744073709551614
+233 255 65535 4294967295 18446744073709551614
+240 255 65534 4294967295 18446744073709551614
+241 254 65535 4294967294 18446744073709551614
+243 254 65534 4294967294 18446744073709551614
+244 255 65534 4294967295 18446744073709551614
+246 255 65534 4294967294 18446744073709551614
+248 255 65535 4294967295 18446744073709551614
+251 254 65535 4294967294 18446744073709551614
+253 255 65534 4294967294 18446744073709551614
+256 255 65534 4294967294 18446744073709551614
+257 255 65535 4294967294 18446744073709551614
+258 254 65534 4294967294 18446744073709551614
+261 254 65535 4294967295 18446744073709551614
+262 254 65535 4294967295 18446744073709551614
+264 255 65534 4294967294 18446744073709551614
+266 254 65535 4294967295 18446744073709551614
+267 255 65534 4294967294 18446744073709551614
+269 254 65535 4294967294 18446744073709551614
+270 255 65534 4294967294 18446744073709551614
+273 254 65535 4294967294 18446744073709551614
+279 255 65535 4294967295 18446744073709551614
+281 254 65534 4294967294 18446744073709551614
+283 255 65534 4294967295 18446744073709551614
+285 254 65535 4294967294 18446744073709551614
+287 255 65535 4294967294 18446744073709551614
+288 255 65534 4294967294 18446744073709551614
+290 254 65534 4294967295 18446744073709551614
+291 255 65535 4294967295 18446744073709551614
+292 255 65534 4294967295 18446744073709551614
+295 254 65535 4294967294 18446744073709551614
+296 255 65534 4294967295 18446744073709551614
+298 255 65535 4294967294 18446744073709551614
+301 255 65534 4294967295 18446744073709551614
+302 254 65534 4294967294 18446744073709551614
+307 254 65535 4294967294 18446744073709551614
+308 254 65535 4294967294 18446744073709551614
+309 254 65535 4294967295 18446744073709551614
+313 254 65534 4294967295 18446744073709551614
+317 254 65534 4294967294 18446744073709551614
+319 255 65535 4294967295 18446744073709551614
+320 254 65535 4294967295 18446744073709551614
+321 254 65535 4294967295 18446744073709551614
+322 254 65535 4294967295 18446744073709551614
+325 255 65535 4294967295 18446744073709551614
+326 254 65534 4294967295 18446744073709551614
+327 255 65535 4294967295 18446744073709551614
+328 255 65534 4294967294 18446744073709551614
+331 254 65535 4294967294 18446744073709551614
+332 255 65535 4294967294 18446744073709551614
+334 255 65534 4294967294 18446744073709551614
+336 254 65535 4294967295 18446744073709551614
+337 254 65535 4294967295 18446744073709551614
+339 255 65535 4294967295 18446744073709551614
+341 254 65535 4294967294 18446744073709551614
+342 255 65534 4294967294 18446744073709551614
+345 254 65534 4294967294 18446744073709551614
+346 255 65535 4294967294 18446744073709551614
+347 255 65534 4294967295 18446744073709551614
+349 255 65534 4294967295 18446744073709551614
+350 254 65535 4294967295 18446744073709551614
+353 255 65534 4294967294 18446744073709551614
+355 255 65535 4294967294 18446744073709551614
+356 254 65535 4294967295 18446744073709551614
+357 254 65535 4294967295 18446744073709551614
+361 254 65535 4294967295 18446744073709551614
+363 254 65535 4294967295 18446744073709551614
+367 255 65535 4294967294 18446744073709551614
+368 254 65534 4294967295 18446744073709551614
+372 254 65535 4294967294 18446744073709551614
+373 255 65535 4294967294 18446744073709551614
+376 255 65535 4294967294 18446744073709551614
+377 254 65535 4294967295 18446744073709551614
+379 255 65535 4294967294 18446744073709551614
+382 254 65534 4294967295 18446744073709551614
+385 254 65535 4294967294 18446744073709551614
+389 254 65534 4294967294 18446744073709551614
+390 255 65535 4294967295 18446744073709551614
+391 254 65535 4294967294 18446744073709551614
+393 255 65534 4294967295 18446744073709551614
+395 254 65535 4294967294 18446744073709551614
+396 254 65534 4294967294 18446744073709551614
+398 254 65535 4294967294 18446744073709551614
+400 254 65534 4294967294 18446744073709551614
+401 254 65534 4294967294 18446744073709551614
+402 255 65534 4294967295 18446744073709551614
+403 254 65534 4294967295 18446744073709551614
+408 255 65535 4294967294 18446744073709551614
+409 254 65534 4294967295 18446744073709551614
+410 254 65534 4294967294 18446744073709551614
+411 255 65535 4294967294 18446744073709551614
+412 254 65535 4294967294 18446744073709551614
+413 255 65534 4294967294 18446744073709551614
+415 254 65534 4294967294 18446744073709551614
+417 254 65534 4294967295 18446744073709551614
+418 254 65535 4294967294 18446744073709551614
+420 255 65535 4294967294 18446744073709551614
+424 255 65535 4294967294 18446744073709551614
+426 254 65535 4294967295 18446744073709551614
+430 255 65535 4294967294 18446744073709551614
+431 255 65534 4294967295 18446744073709551614
+432 255 65535 4294967294 18446744073709551614
+434 254 65535 4294967294 18446744073709551614
+440 255 65534 4294967294 18446744073709551614
+441 254 65535 4294967294 18446744073709551614
+443 254 65535 4294967295 18446744073709551614
+444 255 65535 4294967295 18446744073709551614
+450 254 65534 4294967294 18446744073709551614
+454 255 65534 4294967294 18446744073709551614
+456 255 65535 4294967294 18446744073709551614
+458 255 65535 4294967295 18446744073709551614
+459 254 65534 4294967294 18446744073709551614
+461 255 65535 4294967294 18446744073709551614
+462 255 65535 4294967295 18446744073709551614
+463 254 65534 4294967294 18446744073709551614
+464 255 65534 4294967295 18446744073709551614
+465 254 65535 4294967294 18446744073709551614
+466 255 65534 4294967295 18446744073709551614
+468 254 65534 4294967295 18446744073709551614
+470 254 65535 4294967294 18446744073709551614
+471 255 65534 4294967294 18446744073709551614
+472 254 65535 4294967295 18446744073709551614
+473 255 65534 4294967295 18446744073709551614
+474 255 65535 4294967295 18446744073709551614
+475 255 65535 4294967294 18446744073709551614
+477 254 65534 4294967294 18446744073709551614
+480 255 65535 4294967295 18446744073709551614
+483 254 65535 4294967294 18446744073709551614
+485 255 65535 4294967294 18446744073709551614
+486 254 65534 4294967294 18446744073709551614
+487 254 65534 4294967295 18446744073709551614
+488 255 65535 4294967295 18446744073709551614
+489 254 65535 4294967295 18446744073709551614
+490 255 65535 4294967294 18446744073709551614
+494 254 65534 4294967294 18446744073709551614
+495 255 65534 4294967295 18446744073709551614
+496 254 65534 4294967295 18446744073709551614
+497 255 65535 4294967294 18446744073709551614
+499 255 65534 4294967295 18446744073709551614
+
diff --git
a/regression-test/data/external_table_p0/tvf/unsigned_integers_1.parquet
b/regression-test/data/external_table_p0/tvf/unsigned_integers_1.parquet
new file mode 100644
index 00000000000..8f87903255f
Binary files /dev/null and
b/regression-test/data/external_table_p0/tvf/unsigned_integers_1.parquet differ
diff --git
a/regression-test/data/external_table_p0/tvf/unsigned_integers_2.parquet
b/regression-test/data/external_table_p0/tvf/unsigned_integers_2.parquet
new file mode 100644
index 00000000000..fe48ab71842
Binary files /dev/null and
b/regression-test/data/external_table_p0/tvf/unsigned_integers_2.parquet differ
diff --git
a/regression-test/data/external_table_p0/tvf/unsigned_integers_3.parquet
b/regression-test/data/external_table_p0/tvf/unsigned_integers_3.parquet
new file mode 100644
index 00000000000..ff8dae5ecec
Binary files /dev/null and
b/regression-test/data/external_table_p0/tvf/unsigned_integers_3.parquet differ
diff --git
a/regression-test/data/external_table_p0/tvf/unsigned_integers_4.parquet
b/regression-test/data/external_table_p0/tvf/unsigned_integers_4.parquet
new file mode 100644
index 00000000000..010d15497ec
Binary files /dev/null and
b/regression-test/data/external_table_p0/tvf/unsigned_integers_4.parquet differ
diff --git
a/regression-test/suites/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.groovy
b/regression-test/suites/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.groovy
new file mode 100644
index 00000000000..24cfb5f2ac2
--- /dev/null
+++
b/regression-test/suites/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.groovy
@@ -0,0 +1,102 @@
+import org.junit.Assert
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This suit test the `backends` tvf
+suite("test_local_tvf_parquet_unsigned_integers", "p0") {
+ List<List<Object>> backends = sql """ show backends """
+ def dataFilePath = context.config.dataPath + "/external_table_p0/tvf/"
+
+ assertTrue(backends.size() > 0)
+
+ def be_id = backends[0][0]
+ // cluster mode need to make sure all be has this data
+ def outFilePath="/"
+ def transFile01="${dataFilePath}/unsigned_integers_1.parquet"
+ def transFile02="${dataFilePath}/unsigned_integers_2.parquet"
+ def transFile03="${dataFilePath}/unsigned_integers_3.parquet"
+ def transFile04="${dataFilePath}/unsigned_integers_4.parquet"
+
+ for (List<Object> backend : backends) {
+ def be_host = backend[1]
+ scpFiles ("root", be_host, transFile01, outFilePath, false);
+ scpFiles ("root", be_host, transFile02, outFilePath, false);
+ scpFiles ("root", be_host, transFile03, outFilePath, false);
+ scpFiles ("root", be_host, transFile04, outFilePath, false);
+ }
+
+ def file1 = outFilePath + "unsigned_integers_1.parquet";
+ def file2 = outFilePath + "unsigned_integers_2.parquet";
+ def file3 = outFilePath + "unsigned_integers_3.parquet";
+ def file4 = outFilePath + "unsigned_integers_4.parquet";
+
+
+
+
+ qt_test_1 """ select * from local( "file_path" = "${file1}", "backend_id"
= "${be_id}", "format" = "parquet") order by id ;"""
+
+ qt_test_2 """ desc function local( "file_path" = "${file1}", "backend_id"
= "${be_id}", "format" = "parquet");"""
+
+ qt_test_3 """ desc function local( "file_path" = "${file2}", "backend_id"
= "${be_id}", "format" = "parquet");"""
+
+ qt_test_4 """ desc function local( "file_path" = "${file3}", "backend_id"
= "${be_id}", "format" = "parquet");"""
+
+ qt_test_5 """ select * from local( "file_path" = "${file2}", "backend_id"
= "${be_id}", "format" = "parquet") order by id ;"""
+
+ qt_test_6 """ select * from local( "file_path" = "${file3}", "backend_id"
= "${be_id}", "format" = "parquet") order by id limit 10;"""
+
+ qt_test_7 """ desc function local( "file_path" = "${file4}", "backend_id"
= "${be_id}", "format" = "parquet");"""
+
+ qt_test_8 """ select * from local( "file_path" = "${file4}", "backend_id"
= "${be_id}", "format" = "parquet") order by id ;"""
+
+
+
+ qt_test_9 """ select * from local( "file_path" = "${file1}", "backend_id"
= "${be_id}", "format" = "parquet") where uint8_column = 200 order by id ;"""
+
+ qt_test_10 """ select * from local( "file_path" = "${file1}", "backend_id"
= "${be_id}", "format" = "parquet") where uint16_column = 41727 order by id ;"""
+
+ qt_test_11 """ select * from local( "file_path" = "${file1}", "backend_id"
= "${be_id}", "format" = "parquet") where uint32_column = 2299955463 order by
id ;"""
+
+ qt_test_12 """ select * from local( "file_path" = "${file1}", "backend_id"
= "${be_id}", "format" = "parquet") where uint64_column = 15103440093398422258
order by id ;"""
+
+
+
+ qt_test_13 """ select * from local( "file_path" = "${file2}", "backend_id"
= "${be_id}", "format" = "parquet") where uint8_column = 222 order by id ;"""
+
+ qt_test_14 """ select * from local( "file_path" = "${file2}", "backend_id"
= "${be_id}", "format" = "parquet") where uint16_column = 58068 order by id ;"""
+
+ qt_test_15 """ select * from local( "file_path" = "${file2}", "backend_id"
= "${be_id}", "format" = "parquet") where uint32_column = 4213847006 order by
id ;"""
+
+ qt_test_16 """ select * from local( "file_path" = "${file2}", "backend_id"
= "${be_id}", "format" = "parquet") where uint64_column = 10613547124477746521
order by id ;"""
+
+
+ qt_test_17 """ select count(id) from local( "file_path" = "${file3}",
"backend_id" = "${be_id}", "format" = "parquet") ;"""
+
+ qt_test_18 """ select count(uint8_column) from local( "file_path" =
"${file3}", "backend_id" = "${be_id}", "format" = "parquet") ;"""
+
+ qt_test_19 """ select count(uint16_column) from local( "file_path" =
"${file3}", "backend_id" = "${be_id}", "format" = "parquet") ;"""
+
+ qt_test_20 """ select count(uint32_column) from local( "file_path" =
"${file3}", "backend_id" = "${be_id}", "format" = "parquet") ;"""
+
+ qt_test_21 """ select count(uint64_column) from local( "file_path" =
"${file3}", "backend_id" = "${be_id}", "format" = "parquet") ;"""
+
+ qt_test_22 """ select * from local( "file_path" = "${file3}", "backend_id"
= "${be_id}", "format" = "parquet") where uint64_column = 18446744073709551614
order by id ;"""
+
+
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]