This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 80b921344f6 [fix](parquet)Fix the be core issue when reading parquet
unsigned types. (#39926)
80b921344f6 is described below
commit 80b921344f6b96088b2a81c8768829bcb0cf0aae
Author: daidai <[email protected]>
AuthorDate: Thu Aug 29 18:23:40 2024 +0800
[fix](parquet)Fix the be core issue when reading parquet unsigned types.
(#39926)
## Proposed changes
Since Doris does not have an unsigned type, we convert parquet uint32
type to doris bigint (int64) type.
When reading the parquet file, the byte size stored in parquet and the
byte size of the data type mapped by doris are inconsistent, resulting
in be core.
Fix:
When reading, we read according to the byte size stored in parquet, and
then convert it to the data type mapped by doris.
Mapping relationship description:
parquet -> doris
UInt8 -> Int16
UInt16 -> Int32
UInt32 -> Int64
UInt64 -> Int128.
---
.../exec/format/parquet/parquet_column_convert.cpp | 18 +-
.../exec/format/parquet/parquet_column_convert.h | 65 +++
be/src/vec/exec/format/parquet/schema_desc.cpp | 58 ++-
be/src/vec/exec/format/parquet/schema_desc.h | 10 +-
.../test_outfile_expr_generate_col_name.out | 6 +-
.../test_local_tvf_parquet_unsigned_integers.out | 440 +++++++++++++++++++++
.../tvf/unsigned_integers_1.parquet | Bin 0 -> 3202 bytes
.../tvf/unsigned_integers_2.parquet | Bin 0 -> 3218 bytes
.../tvf/unsigned_integers_3.parquet | Bin 0 -> 4727 bytes
.../tvf/unsigned_integers_4.parquet | Bin 0 -> 704 bytes
...test_local_tvf_parquet_unsigned_integers.groovy | 102 +++++
11 files changed, 673 insertions(+), 26 deletions(-)
diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
index 57f1f54b7b9..2fb0afea82a 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
@@ -66,7 +66,9 @@ ColumnPtr
PhysicalToLogicalConverter::get_physical_column(tparquet::Type::type s
src_physical_type = tparquet::Type::INT32;
src_logical_type = TypeDescriptor(PrimitiveType::TYPE_INT);
}
- if (is_consistent() && _logical_converter->is_consistent()) {
+
+ if (!_convert_params->is_type_compatibility && is_consistent() &&
+ _logical_converter->is_consistent()) {
if (_cached_src_physical_type == nullptr) {
_cached_src_physical_type =
DataTypeFactory::instance().create_data_type(
src_logical_type, dst_logical_type->is_nullable());
@@ -246,7 +248,19 @@ std::unique_ptr<PhysicalToLogicalConverter>
PhysicalToLogicalConverter::get_conv
}
PrimitiveType src_logical_primitive = src_logical_type.type;
- if (is_parquet_native_type(src_logical_primitive)) {
+ if (field_schema->is_type_compatibility) {
+ if (src_logical_type == TYPE_SMALLINT) {
+ physical_converter.reset(new
UnsignedIntegerConverter<TYPE_SMALLINT>());
+ } else if (src_logical_type == TYPE_INT) {
+ physical_converter.reset(new UnsignedIntegerConverter<TYPE_INT>());
+ } else if (src_logical_type == TYPE_BIGINT) {
+ physical_converter.reset(new
UnsignedIntegerConverter<TYPE_BIGINT>());
+ } else if (src_logical_type == TYPE_LARGEINT) {
+ physical_converter.reset(new
UnsignedIntegerConverter<TYPE_LARGEINT>());
+ } else {
+ physical_converter.reset(new
UnsupportedConverter(src_physical_type, src_logical_type));
+ }
+ } else if (is_parquet_native_type(src_logical_primitive)) {
if (is_string_type(src_logical_primitive) &&
src_physical_type == tparquet::Type::FIXED_LEN_BYTE_ARRAY) {
// for FixedSizeBinary
diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h
b/be/src/vec/exec/format/parquet/parquet_column_convert.h
index 551bf7e14ed..91b81121aa4 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.h
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h
@@ -40,6 +40,9 @@ struct ConvertParams {
DecimalScaleParams decimal_scale;
FieldSchema* field_schema = nullptr;
+ //For UInt8 -> Int16,UInt16 -> Int32,UInt32 -> Int64,UInt64 -> Int128.
+ bool is_type_compatibility = false;
+
/**
* Some frameworks like paimon maybe writes non-standard parquet files.
Timestamp field doesn't have
* logicalType or converted_type to indicates its precision. We have to
reset the time mask.
@@ -108,6 +111,7 @@ struct ConvertParams {
t.from_unixtime(0, *ctz);
offset_days = t.day() == 31 ? -1 : 0;
}
+ is_type_compatibility = field_schema_->is_type_compatibility;
}
template <typename DecimalPrimitiveType>
@@ -273,6 +277,67 @@ class LittleIntPhysicalConverter : public
PhysicalToLogicalConverter {
}
};
+template <PrimitiveType type>
+struct UnsignedTypeTraits;
+
+template <>
+struct UnsignedTypeTraits<TYPE_SMALLINT> {
+ using UnsignedCppType = UInt8;
+
//https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#unsigned-integers
+ //INT(8, false), INT(16, false), and INT(32, false) must annotate an int32
primitive type and INT(64, false)
+ //must annotate an int64 primitive type.
+ using StorageCppType = Int32;
+ using StorageColumnType = vectorized::ColumnInt32;
+};
+
+template <>
+struct UnsignedTypeTraits<TYPE_INT> {
+ using UnsignedCppType = UInt16;
+ using StorageCppType = Int32;
+ using StorageColumnType = vectorized::ColumnInt32;
+};
+
+template <>
+struct UnsignedTypeTraits<TYPE_BIGINT> {
+ using UnsignedCppType = UInt32;
+ using StorageCppType = Int32;
+ using StorageColumnType = vectorized::ColumnInt32;
+};
+
+template <>
+struct UnsignedTypeTraits<TYPE_LARGEINT> {
+ using UnsignedCppType = UInt64;
+ using StorageCppType = Int64;
+ using StorageColumnType = vectorized::ColumnInt64;
+};
+
+template <PrimitiveType IntPrimitiveType>
+class UnsignedIntegerConverter : public PhysicalToLogicalConverter {
+ Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr&
src_logical_column) override {
+ using UnsignedCppType = typename
UnsignedTypeTraits<IntPrimitiveType>::UnsignedCppType;
+ using StorageCppType = typename
UnsignedTypeTraits<IntPrimitiveType>::StorageCppType;
+ using StorageColumnType = typename
UnsignedTypeTraits<IntPrimitiveType>::StorageColumnType;
+ using DstColumnType = typename
PrimitiveTypeTraits<IntPrimitiveType>::ColumnType;
+
+ ColumnPtr from_col = remove_nullable(src_physical_col);
+ MutableColumnPtr to_col =
remove_nullable(src_logical_column)->assume_mutable();
+ auto& src_data = static_cast<const
StorageColumnType*>(from_col.get())->get_data();
+
+ size_t rows = src_data.size();
+ size_t start_idx = to_col->size();
+ to_col->resize(start_idx + rows);
+ auto& data = static_cast<DstColumnType&>(*to_col.get()).get_data();
+
+ for (int i = 0; i < rows; i++) {
+ StorageCppType src_value = src_data[i];
+ auto unsigned_value = static_cast<UnsignedCppType>(src_value);
+ data[start_idx + i] = unsigned_value;
+ }
+
+ return Status::OK();
+ }
+};
+
class FixedSizeBinaryConverter : public PhysicalToLogicalConverter {
private:
int _type_length;
diff --git a/be/src/vec/exec/format/parquet/schema_desc.cpp
b/be/src/vec/exec/format/parquet/schema_desc.cpp
index 08692de8743..9097b65718f 100644
--- a/be/src/vec/exec/format/parquet/schema_desc.cpp
+++ b/be/src/vec/exec/format/parquet/schema_desc.cpp
@@ -191,16 +191,19 @@ void FieldDescriptor::parse_physical_field(const
tparquet::SchemaElement& physic
physical_field->physical_type = physical_schema.type;
_physical_fields.push_back(physical_field);
physical_field->physical_column_index = _physical_fields.size() - 1;
- physical_field->type = get_doris_type(physical_schema);
+ auto type = get_doris_type(physical_schema);
+ physical_field->type = type.first;
+ physical_field->is_type_compatibility = type.second;
}
-TypeDescriptor FieldDescriptor::get_doris_type(const tparquet::SchemaElement&
physical_schema) {
- TypeDescriptor type;
- type.type = INVALID_TYPE;
+std::pair<TypeDescriptor, bool> FieldDescriptor::get_doris_type(
+ const tparquet::SchemaElement& physical_schema) {
+ std::pair<TypeDescriptor, bool> ans = {INVALID_TYPE, false};
+ TypeDescriptor& type = ans.first;
if (physical_schema.__isset.logicalType) {
- type = convert_to_doris_type(physical_schema.logicalType);
+ ans = convert_to_doris_type(physical_schema.logicalType);
} else if (physical_schema.__isset.converted_type) {
- type = convert_to_doris_type(physical_schema);
+ ans = convert_to_doris_type(physical_schema);
}
// use physical type instead
if (type.type == INVALID_TYPE) {
@@ -233,7 +236,7 @@ TypeDescriptor FieldDescriptor::get_doris_type(const
tparquet::SchemaElement& ph
break;
}
}
- return type;
+ return ans;
}
// Copy from org.apache.iceberg.avro.AvroSchemaUtil#validAvroName
@@ -302,8 +305,11 @@ void FieldDescriptor::iceberg_sanitize(const
std::vector<std::string>& read_colu
}
}
-TypeDescriptor FieldDescriptor::convert_to_doris_type(tparquet::LogicalType
logicalType) {
- TypeDescriptor type;
+std::pair<TypeDescriptor, bool> FieldDescriptor::convert_to_doris_type(
+ tparquet::LogicalType logicalType) {
+ std::pair<TypeDescriptor, bool> ans = {INVALID_TYPE, false};
+ TypeDescriptor& type = ans.first;
+ bool& is_type_compatibility = ans.second;
if (logicalType.__isset.STRING) {
type = TypeDescriptor(TYPE_STRING);
} else if (logicalType.__isset.DECIMAL) {
@@ -313,16 +319,25 @@ TypeDescriptor
FieldDescriptor::convert_to_doris_type(tparquet::LogicalType logi
type = TypeDescriptor(TYPE_DATEV2);
} else if (logicalType.__isset.INTEGER) {
if (logicalType.INTEGER.isSigned) {
- if (logicalType.INTEGER.bitWidth <= 32) {
+ if (logicalType.INTEGER.bitWidth <= 8) {
+ type = TypeDescriptor(TYPE_TINYINT);
+ } else if (logicalType.INTEGER.bitWidth <= 16) {
+ type = TypeDescriptor(TYPE_SMALLINT);
+ } else if (logicalType.INTEGER.bitWidth <= 32) {
type = TypeDescriptor(TYPE_INT);
} else {
type = TypeDescriptor(TYPE_BIGINT);
}
} else {
- if (logicalType.INTEGER.bitWidth <= 16) {
+ is_type_compatibility = true;
+ if (logicalType.INTEGER.bitWidth <= 8) {
+ type = TypeDescriptor(TYPE_SMALLINT);
+ } else if (logicalType.INTEGER.bitWidth <= 16) {
type = TypeDescriptor(TYPE_INT);
- } else {
+ } else if (logicalType.INTEGER.bitWidth <= 32) {
type = TypeDescriptor(TYPE_BIGINT);
+ } else {
+ type = TypeDescriptor(TYPE_LARGEINT);
}
}
} else if (logicalType.__isset.TIME) {
@@ -344,12 +359,14 @@ TypeDescriptor
FieldDescriptor::convert_to_doris_type(tparquet::LogicalType logi
} else {
type = TypeDescriptor(INVALID_TYPE);
}
- return type;
+ return ans;
}
-TypeDescriptor FieldDescriptor::convert_to_doris_type(
+std::pair<TypeDescriptor, bool> FieldDescriptor::convert_to_doris_type(
const tparquet::SchemaElement& physical_schema) {
- TypeDescriptor type;
+ std::pair<TypeDescriptor, bool> ans = {INVALID_TYPE, false};
+ TypeDescriptor& type = ans.first;
+ bool& is_type_compatibility = ans.second;
switch (physical_schema.converted_type) {
case tparquet::ConvertedType::type::UTF8:
type = TypeDescriptor(TYPE_STRING);
@@ -378,28 +395,33 @@ TypeDescriptor FieldDescriptor::convert_to_doris_type(
type = TypeDescriptor(TYPE_TINYINT);
break;
case tparquet::ConvertedType::type::UINT_8:
+ is_type_compatibility = true;
[[fallthrough]];
case tparquet::ConvertedType::type::INT_16:
type = TypeDescriptor(TYPE_SMALLINT);
break;
case tparquet::ConvertedType::type::UINT_16:
+ is_type_compatibility = true;
[[fallthrough]];
case tparquet::ConvertedType::type::INT_32:
type = TypeDescriptor(TYPE_INT);
break;
case tparquet::ConvertedType::type::UINT_32:
- [[fallthrough]];
- case tparquet::ConvertedType::type::UINT_64:
+ is_type_compatibility = true;
[[fallthrough]];
case tparquet::ConvertedType::type::INT_64:
type = TypeDescriptor(TYPE_BIGINT);
break;
+ case tparquet::ConvertedType::type::UINT_64:
+ is_type_compatibility = true;
+ type = TypeDescriptor(TYPE_LARGEINT);
+ break;
default:
LOG(WARNING) << "Not supported parquet ConvertedType: " <<
physical_schema.converted_type;
type = TypeDescriptor(INVALID_TYPE);
break;
}
- return type;
+ return ans;
}
Status FieldDescriptor::parse_group_field(const
std::vector<tparquet::SchemaElement>& t_schemas,
diff --git a/be/src/vec/exec/format/parquet/schema_desc.h
b/be/src/vec/exec/format/parquet/schema_desc.h
index 50e526bd730..ca726ef1b57 100644
--- a/be/src/vec/exec/format/parquet/schema_desc.h
+++ b/be/src/vec/exec/format/parquet/schema_desc.h
@@ -49,6 +49,9 @@ struct FieldSchema {
int16_t repeated_parent_def_level = 0;
std::vector<FieldSchema> children;
+ //For UInt8 -> Int16,UInt16 -> Int32,UInt32 -> Int64,UInt64 -> Int128.
+ bool is_type_compatibility = false;
+
FieldSchema() = default;
~FieldSchema() = default;
FieldSchema(const FieldSchema& fieldSchema) = default;
@@ -84,12 +87,13 @@ private:
Status parse_node_field(const std::vector<tparquet::SchemaElement>&
t_schemas, size_t curr_pos,
FieldSchema* node_field);
- TypeDescriptor convert_to_doris_type(tparquet::LogicalType logicalType);
+ std::pair<TypeDescriptor, bool>
convert_to_doris_type(tparquet::LogicalType logicalType);
- TypeDescriptor convert_to_doris_type(const tparquet::SchemaElement&
physical_schema);
+ std::pair<TypeDescriptor, bool> convert_to_doris_type(
+ const tparquet::SchemaElement& physical_schema);
public:
- TypeDescriptor get_doris_type(const tparquet::SchemaElement&
physical_schema);
+ std::pair<TypeDescriptor, bool> get_doris_type(const
tparquet::SchemaElement& physical_schema);
// org.apache.iceberg.avro.AvroSchemaUtil#sanitize will encode special
characters,
// we have to decode these characters
diff --git
a/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
b/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
index 406bc7660ff..5748cedd228 100644
---
a/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
+++
b/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
@@ -100,12 +100,12 @@ id int Yes false \N NONE
9 1 string 27 false 5 true 1
-- !desc_s3 --
-__add_5 int Yes false \N NONE
-__bit_or_7 int Yes false \N NONE
+__add_5 smallint Yes false \N NONE
+__bit_or_7 tinyint Yes false \N NONE
__cast_3 bigint Yes false \N NONE
__greater_than_4 boolean Yes false \N NONE
__in_predicate_6 boolean Yes false \N NONE
-__literal_1 int Yes false \N NONE
+__literal_1 tinyint Yes false \N NONE
__literal_2 text Yes false \N NONE
id int Yes false \N NONE
diff --git
a/regression-test/data/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.out
b/regression-test/data/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.out
new file mode 100644
index 00000000000..4cda2746a00
--- /dev/null
+++
b/regression-test/data/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.out
@@ -0,0 +1,440 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !test_1 --
+0 254 54979 3876010132 12292188520939801104
+1 194 41087 2468877118 11276751473207154134
+2 204 40618 3112417582 12531644712494408881
+3 252 42885 2959241092 12524633273560617315
+4 168 34898 3576407414 13096188234563161622
+5 252 39188 3127662355 16817880057727309281
+6 146 45695 2457180674 13258241842726882717
+7 223 54096 3818049848 13392705476834798352
+8 247 49103 4235043353 14882039847048390015
+9 171 37711 4121304297 12690575475523254418
+10 208 45522 2849886325 9887016810088790216
+11 140 57808 4249890046 11248669065541052626
+12 154 63353 2494834510 10992021804074820099
+13 169 62034 3442632722 13741258842709060503
+14 201 39884 2544963866 13079528300040741505
+15 208 48887 2865890129 15763071531082484590
+16 200 58596 2907876388 13993629507485604147
+17 210 50277 3777672898 9348401130412952441
+18 212 51965 3618857131 16174811648348878672
+19 161 39818 2368776725 15511547001336881917
+20 160 41750 2980821608 11023149785626200227
+21 141 57340 4284126112 15552326510762976216
+22 139 58391 4117149789 15878585387857525976
+23 212 59593 2459376464 15173162289502083954
+24 165 53957 3427228999 17619046273898992624
+25 229 49948 2600007878 15455673680657024177
+26 134 64599 3738904119 16329279862825806683
+27 205 41073 2442068534 14375087414295500534
+28 204 60681 3422219729 18264453353631472102
+29 136 47361 3588730082 13238158010948418606
+30 252 42761 4175633691 15734976010511914446
+31 150 45199 4078820644 15090555688768508964
+32 210 57428 4009891336 16805687728956091249
+33 235 34379 3963852622 13291314989308428602
+34 226 33850 3643451160 15146799101548124057
+35 133 54253 2734990478 12652028700805136484
+36 166 61697 3958278249 9924263812189602832
+37 137 55610 2782254392 18017977508881163787
+38 166 41727 3012921589 12285785983985756200
+39 194 46987 2351267776 18191890018150428322
+40 201 61160 3877183539 17825711874965469887
+41 209 54646 3739634163 12447543494188025837
+42 157 65086 2250645811 15257822118590463416
+43 223 56812 4101782180 9308776360130037320
+44 135 61061 2806848998 15345311951702932353
+45 147 39862 3521569945 17679765098696012133
+46 229 38351 2299955463 11589155413771055105
+47 141 49268 2740241757 16646921996087125692
+48 144 53152 3733884127 12946127273932789697
+49 200 48517 4024062219 15103440093398422258
+
+-- !test_2 --
+id int Yes false \N NONE
+uint8_column smallint Yes false \N NONE
+uint16_column int Yes false \N NONE
+uint32_column bigint Yes false \N NONE
+uint64_column largeint Yes false \N NONE
+
+-- !test_3 --
+id int Yes false \N NONE
+uint8_column smallint Yes false \N NONE
+uint16_column int Yes false \N NONE
+uint32_column bigint Yes false \N NONE
+uint64_column largeint Yes false \N NONE
+
+-- !test_4 --
+id int Yes false \N NONE
+uint8_column smallint Yes false \N NONE
+uint16_column int Yes false \N NONE
+uint32_column bigint Yes false \N NONE
+uint64_column largeint Yes false \N NONE
+
+-- !test_5 --
+0 200 48438 3775964178 9957179629640951554
+1 197 37330 2548711656 11127988488306710308
+2 200 65117 3419624570 11652168873218179696
+3 223 39174 3669818470 17675778338328486831
+4 196 60825 3064802389 18435946529637287047
+5 148 58165 3414732886 10219427643252484377
+6 217 61318 4075782121 10508828167797335037
+7 185 46040 2798591473 16764958584953558745
+8 148 50700 2541953946 11228894632595899400
+9 235 41262 3398155979 14622381114650660637
+10 189 49552 3754066331 16077913453789464999
+11 138 41102 2488033055 11226600460306403745
+12 132 62435 3745892606 17429590483490742063
+13 164 34858 3380810793 15447251686769296941
+14 246 44518 4213847006 13168159509128150529
+15 128 34475 2584892403 14470296482506793549
+16 180 40732 2667143993 13453719924509014943
+17 206 49824 3079026622 9242535560261271930
+18 250 61860 3723105814 9754643556067415401
+19 248 51724 4229819070 15295668751829551718
+20 237 38710 2951615403 14702842917502593482
+21 133 46707 2464573031 15860122391678970590
+22 249 41930 4210407904 12677893784378713520
+23 218 34250 3657784995 14768434928936445583
+24 134 52064 4274319068 16672757830801563734
+25 179 54232 3587434213 10613547124477746521
+26 217 63838 2348239122 17998346098073086386
+27 190 52362 3442840997 13122160861538572795
+28 131 39957 2321439682 16792774292797573856
+29 143 50902 2914375790 10557924491128546903
+30 228 33365 2438054546 14302876531585763284
+31 251 59126 2308219390 12753986538521770387
+32 197 58579 2647214662 16916351783057256258
+33 246 62028 4202894981 16869494938800942234
+34 229 47800 3963077237 12000510020655611310
+35 195 43221 2980563838 17324881735239531242
+36 210 46389 3302751013 10936691069329977133
+37 190 60582 4294458835 13465474203101539625
+38 234 58068 2575498858 18264769986785189204
+39 244 55282 2242973260 14725356984149511589
+40 224 33882 3169401634 9641824149700693760
+41 210 48824 2805949235 17877575698845246971
+42 250 49871 4015422133 11571502995585068959
+43 253 53173 3315146396 15527276320077174431
+44 205 48824 2153923483 18227485227719579199
+45 202 65101 2286014232 10596463304445669324
+46 144 39272 3664679383 14619550616500589262
+47 207 58788 2459180916 15787272910593406395
+48 236 61302 2814354943 15797302386492157450
+49 222 44025 2267428102 16226182608093468653
+
+-- !test_6 --
+0 254 65535 4294967294 18446744073709551614
+1 254 65534 4294967295 18446744073709551615
+2 254 65535 4294967294 18446744073709551615
+3 255 65535 4294967294 18446744073709551615
+4 255 65534 4294967295 18446744073709551615
+5 254 65534 4294967295 18446744073709551614
+6 254 65534 4294967294 18446744073709551614
+7 255 65534 4294967294 18446744073709551614
+8 255 65534 4294967295 18446744073709551614
+9 255 65535 4294967294 18446744073709551614
+
+-- !test_7 --
+id bigint Yes false \N NONE
+mediumint_unsigned bigint Yes false \N NONE
+int_unsigned bigint Yes false \N NONE
+
+-- !test_8 --
+1 111 111
+2 222 222
+
+-- !test_9 --
+16 200 58596 2907876388 13993629507485604147
+49 200 48517 4024062219 15103440093398422258
+
+-- !test_10 --
+38 166 41727 3012921589 12285785983985756200
+
+-- !test_11 --
+46 229 38351 2299955463 11589155413771055105
+
+-- !test_12 --
+49 200 48517 4024062219 15103440093398422258
+
+-- !test_13 --
+49 222 44025 2267428102 16226182608093468653
+
+-- !test_14 --
+38 234 58068 2575498858 18264769986785189204
+
+-- !test_15 --
+14 246 44518 4213847006 13168159509128150529
+
+-- !test_16 --
+25 179 54232 3587434213 10613547124477746521
+
+-- !test_17 --
+500
+
+-- !test_18 --
+500
+
+-- !test_19 --
+500
+
+-- !test_20 --
+500
+
+-- !test_21 --
+500
+
+-- !test_22 --
+0 254 65535 4294967294 18446744073709551614
+5 254 65534 4294967295 18446744073709551614
+6 254 65534 4294967294 18446744073709551614
+7 255 65534 4294967294 18446744073709551614
+8 255 65534 4294967295 18446744073709551614
+9 255 65535 4294967294 18446744073709551614
+10 255 65535 4294967294 18446744073709551614
+11 254 65535 4294967294 18446744073709551614
+12 255 65535 4294967294 18446744073709551614
+14 254 65535 4294967295 18446744073709551614
+16 255 65535 4294967295 18446744073709551614
+17 255 65535 4294967294 18446744073709551614
+18 255 65534 4294967295 18446744073709551614
+20 255 65535 4294967294 18446744073709551614
+21 255 65534 4294967294 18446744073709551614
+22 255 65534 4294967294 18446744073709551614
+24 255 65534 4294967294 18446744073709551614
+25 254 65535 4294967295 18446744073709551614
+27 255 65534 4294967294 18446744073709551614
+29 255 65534 4294967295 18446744073709551614
+30 255 65534 4294967295 18446744073709551614
+32 254 65534 4294967294 18446744073709551614
+37 254 65534 4294967295 18446744073709551614
+38 254 65535 4294967294 18446744073709551614
+40 255 65535 4294967294 18446744073709551614
+41 255 65534 4294967294 18446744073709551614
+43 254 65534 4294967295 18446744073709551614
+44 254 65534 4294967294 18446744073709551614
+45 255 65534 4294967295 18446744073709551614
+46 254 65534 4294967295 18446744073709551614
+53 254 65534 4294967294 18446744073709551614
+56 254 65535 4294967294 18446744073709551614
+58 255 65534 4294967295 18446744073709551614
+60 254 65535 4294967295 18446744073709551614
+62 255 65534 4294967295 18446744073709551614
+63 255 65535 4294967294 18446744073709551614
+66 255 65534 4294967295 18446744073709551614
+68 254 65535 4294967295 18446744073709551614
+71 254 65535 4294967295 18446744073709551614
+73 254 65535 4294967295 18446744073709551614
+75 254 65534 4294967294 18446744073709551614
+76 255 65534 4294967294 18446744073709551614
+77 254 65535 4294967295 18446744073709551614
+80 254 65534 4294967295 18446744073709551614
+81 255 65534 4294967294 18446744073709551614
+84 255 65534 4294967295 18446744073709551614
+86 255 65535 4294967295 18446744073709551614
+87 254 65534 4294967295 18446744073709551614
+89 255 65535 4294967294 18446744073709551614
+91 254 65534 4294967294 18446744073709551614
+92 255 65534 4294967294 18446744073709551614
+94 254 65535 4294967294 18446744073709551614
+98 255 65534 4294967295 18446744073709551614
+99 254 65535 4294967294 18446744073709551614
+100 255 65534 4294967295 18446744073709551614
+102 255 65534 4294967295 18446744073709551614
+103 255 65534 4294967295 18446744073709551614
+106 254 65534 4294967295 18446744073709551614
+108 254 65535 4294967295 18446744073709551614
+109 255 65535 4294967294 18446744073709551614
+110 254 65534 4294967295 18446744073709551614
+112 255 65535 4294967295 18446744073709551614
+115 255 65534 4294967295 18446744073709551614
+117 255 65535 4294967294 18446744073709551614
+120 254 65535 4294967294 18446744073709551614
+121 254 65535 4294967295 18446744073709551614
+123 254 65535 4294967294 18446744073709551614
+124 254 65535 4294967295 18446744073709551614
+130 255 65534 4294967294 18446744073709551614
+131 254 65535 4294967295 18446744073709551614
+132 254 65535 4294967294 18446744073709551614
+133 254 65534 4294967295 18446744073709551614
+139 254 65535 4294967295 18446744073709551614
+141 255 65535 4294967295 18446744073709551614
+148 255 65535 4294967294 18446744073709551614
+149 254 65534 4294967295 18446744073709551614
+150 254 65534 4294967295 18446744073709551614
+151 254 65534 4294967294 18446744073709551614
+152 254 65535 4294967295 18446744073709551614
+157 255 65534 4294967295 18446744073709551614
+158 255 65535 4294967294 18446744073709551614
+167 255 65534 4294967295 18446744073709551614
+168 254 65535 4294967294 18446744073709551614
+169 254 65534 4294967294 18446744073709551614
+173 255 65534 4294967294 18446744073709551614
+174 254 65535 4294967295 18446744073709551614
+177 254 65535 4294967295 18446744073709551614
+184 255 65535 4294967295 18446744073709551614
+186 254 65535 4294967295 18446744073709551614
+187 255 65535 4294967295 18446744073709551614
+189 254 65534 4294967295 18446744073709551614
+190 254 65535 4294967295 18446744073709551614
+191 254 65535 4294967294 18446744073709551614
+192 254 65535 4294967294 18446744073709551614
+194 255 65534 4294967294 18446744073709551614
+195 255 65534 4294967295 18446744073709551614
+196 254 65535 4294967295 18446744073709551614
+197 255 65535 4294967295 18446744073709551614
+204 255 65535 4294967295 18446744073709551614
+207 255 65535 4294967295 18446744073709551614
+210 255 65535 4294967294 18446744073709551614
+211 255 65535 4294967294 18446744073709551614
+213 254 65534 4294967295 18446744073709551614
+214 255 65534 4294967295 18446744073709551614
+216 254 65535 4294967295 18446744073709551614
+217 255 65535 4294967295 18446744073709551614
+222 255 65534 4294967295 18446744073709551614
+226 255 65535 4294967294 18446744073709551614
+227 254 65534 4294967294 18446744073709551614
+228 254 65535 4294967295 18446744073709551614
+229 255 65535 4294967294 18446744073709551614
+231 254 65534 4294967294 18446744073709551614
+232 254 65534 4294967294 18446744073709551614
+233 255 65535 4294967295 18446744073709551614
+240 255 65534 4294967295 18446744073709551614
+241 254 65535 4294967294 18446744073709551614
+243 254 65534 4294967294 18446744073709551614
+244 255 65534 4294967295 18446744073709551614
+246 255 65534 4294967294 18446744073709551614
+248 255 65535 4294967295 18446744073709551614
+251 254 65535 4294967294 18446744073709551614
+253 255 65534 4294967294 18446744073709551614
+256 255 65534 4294967294 18446744073709551614
+257 255 65535 4294967294 18446744073709551614
+258 254 65534 4294967294 18446744073709551614
+261 254 65535 4294967295 18446744073709551614
+262 254 65535 4294967295 18446744073709551614
+264 255 65534 4294967294 18446744073709551614
+266 254 65535 4294967295 18446744073709551614
+267 255 65534 4294967294 18446744073709551614
+269 254 65535 4294967294 18446744073709551614
+270 255 65534 4294967294 18446744073709551614
+273 254 65535 4294967294 18446744073709551614
+279 255 65535 4294967295 18446744073709551614
+281 254 65534 4294967294 18446744073709551614
+283 255 65534 4294967295 18446744073709551614
+285 254 65535 4294967294 18446744073709551614
+287 255 65535 4294967294 18446744073709551614
+288 255 65534 4294967294 18446744073709551614
+290 254 65534 4294967295 18446744073709551614
+291 255 65535 4294967295 18446744073709551614
+292 255 65534 4294967295 18446744073709551614
+295 254 65535 4294967294 18446744073709551614
+296 255 65534 4294967295 18446744073709551614
+298 255 65535 4294967294 18446744073709551614
+301 255 65534 4294967295 18446744073709551614
+302 254 65534 4294967294 18446744073709551614
+307 254 65535 4294967294 18446744073709551614
+308 254 65535 4294967294 18446744073709551614
+309 254 65535 4294967295 18446744073709551614
+313 254 65534 4294967295 18446744073709551614
+317 254 65534 4294967294 18446744073709551614
+319 255 65535 4294967295 18446744073709551614
+320 254 65535 4294967295 18446744073709551614
+321 254 65535 4294967295 18446744073709551614
+322 254 65535 4294967295 18446744073709551614
+325 255 65535 4294967295 18446744073709551614
+326 254 65534 4294967295 18446744073709551614
+327 255 65535 4294967295 18446744073709551614
+328 255 65534 4294967294 18446744073709551614
+331 254 65535 4294967294 18446744073709551614
+332 255 65535 4294967294 18446744073709551614
+334 255 65534 4294967294 18446744073709551614
+336 254 65535 4294967295 18446744073709551614
+337 254 65535 4294967295 18446744073709551614
+339 255 65535 4294967295 18446744073709551614
+341 254 65535 4294967294 18446744073709551614
+342 255 65534 4294967294 18446744073709551614
+345 254 65534 4294967294 18446744073709551614
+346 255 65535 4294967294 18446744073709551614
+347 255 65534 4294967295 18446744073709551614
+349 255 65534 4294967295 18446744073709551614
+350 254 65535 4294967295 18446744073709551614
+353 255 65534 4294967294 18446744073709551614
+355 255 65535 4294967294 18446744073709551614
+356 254 65535 4294967295 18446744073709551614
+357 254 65535 4294967295 18446744073709551614
+361 254 65535 4294967295 18446744073709551614
+363 254 65535 4294967295 18446744073709551614
+367 255 65535 4294967294 18446744073709551614
+368 254 65534 4294967295 18446744073709551614
+372 254 65535 4294967294 18446744073709551614
+373 255 65535 4294967294 18446744073709551614
+376 255 65535 4294967294 18446744073709551614
+377 254 65535 4294967295 18446744073709551614
+379 255 65535 4294967294 18446744073709551614
+382 254 65534 4294967295 18446744073709551614
+385 254 65535 4294967294 18446744073709551614
+389 254 65534 4294967294 18446744073709551614
+390 255 65535 4294967295 18446744073709551614
+391 254 65535 4294967294 18446744073709551614
+393 255 65534 4294967295 18446744073709551614
+395 254 65535 4294967294 18446744073709551614
+396 254 65534 4294967294 18446744073709551614
+398 254 65535 4294967294 18446744073709551614
+400 254 65534 4294967294 18446744073709551614
+401 254 65534 4294967294 18446744073709551614
+402 255 65534 4294967295 18446744073709551614
+403 254 65534 4294967295 18446744073709551614
+408 255 65535 4294967294 18446744073709551614
+409 254 65534 4294967295 18446744073709551614
+410 254 65534 4294967294 18446744073709551614
+411 255 65535 4294967294 18446744073709551614
+412 254 65535 4294967294 18446744073709551614
+413 255 65534 4294967294 18446744073709551614
+415 254 65534 4294967294 18446744073709551614
+417 254 65534 4294967295 18446744073709551614
+418 254 65535 4294967294 18446744073709551614
+420 255 65535 4294967294 18446744073709551614
+424 255 65535 4294967294 18446744073709551614
+426 254 65535 4294967295 18446744073709551614
+430 255 65535 4294967294 18446744073709551614
+431 255 65534 4294967295 18446744073709551614
+432 255 65535 4294967294 18446744073709551614
+434 254 65535 4294967294 18446744073709551614
+440 255 65534 4294967294 18446744073709551614
+441 254 65535 4294967294 18446744073709551614
+443 254 65535 4294967295 18446744073709551614
+444 255 65535 4294967295 18446744073709551614
+450 254 65534 4294967294 18446744073709551614
+454 255 65534 4294967294 18446744073709551614
+456 255 65535 4294967294 18446744073709551614
+458 255 65535 4294967295 18446744073709551614
+459 254 65534 4294967294 18446744073709551614
+461 255 65535 4294967294 18446744073709551614
+462 255 65535 4294967295 18446744073709551614
+463 254 65534 4294967294 18446744073709551614
+464 255 65534 4294967295 18446744073709551614
+465 254 65535 4294967294 18446744073709551614
+466 255 65534 4294967295 18446744073709551614
+468 254 65534 4294967295 18446744073709551614
+470 254 65535 4294967294 18446744073709551614
+471 255 65534 4294967294 18446744073709551614
+472 254 65535 4294967295 18446744073709551614
+473 255 65534 4294967295 18446744073709551614
+474 255 65535 4294967295 18446744073709551614
+475 255 65535 4294967294 18446744073709551614
+477 254 65534 4294967294 18446744073709551614
+480 255 65535 4294967295 18446744073709551614
+483 254 65535 4294967294 18446744073709551614
+485 255 65535 4294967294 18446744073709551614
+486 254 65534 4294967294 18446744073709551614
+487 254 65534 4294967295 18446744073709551614
+488 255 65535 4294967295 18446744073709551614
+489 254 65535 4294967295 18446744073709551614
+490 255 65535 4294967294 18446744073709551614
+494 254 65534 4294967294 18446744073709551614
+495 255 65534 4294967295 18446744073709551614
+496 254 65534 4294967295 18446744073709551614
+497 255 65535 4294967294 18446744073709551614
+499 255 65534 4294967295 18446744073709551614
+
diff --git
a/regression-test/data/external_table_p0/tvf/unsigned_integers_1.parquet
b/regression-test/data/external_table_p0/tvf/unsigned_integers_1.parquet
new file mode 100644
index 00000000000..8f87903255f
Binary files /dev/null and
b/regression-test/data/external_table_p0/tvf/unsigned_integers_1.parquet differ
diff --git
a/regression-test/data/external_table_p0/tvf/unsigned_integers_2.parquet
b/regression-test/data/external_table_p0/tvf/unsigned_integers_2.parquet
new file mode 100644
index 00000000000..fe48ab71842
Binary files /dev/null and
b/regression-test/data/external_table_p0/tvf/unsigned_integers_2.parquet differ
diff --git
a/regression-test/data/external_table_p0/tvf/unsigned_integers_3.parquet
b/regression-test/data/external_table_p0/tvf/unsigned_integers_3.parquet
new file mode 100644
index 00000000000..ff8dae5ecec
Binary files /dev/null and
b/regression-test/data/external_table_p0/tvf/unsigned_integers_3.parquet differ
diff --git
a/regression-test/data/external_table_p0/tvf/unsigned_integers_4.parquet
b/regression-test/data/external_table_p0/tvf/unsigned_integers_4.parquet
new file mode 100644
index 00000000000..010d15497ec
Binary files /dev/null and
b/regression-test/data/external_table_p0/tvf/unsigned_integers_4.parquet differ
diff --git
a/regression-test/suites/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.groovy
b/regression-test/suites/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.groovy
new file mode 100644
index 00000000000..24cfb5f2ac2
--- /dev/null
+++
b/regression-test/suites/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.groovy
@@ -0,0 +1,102 @@
+import org.junit.Assert
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This suit test the `backends` tvf
+suite("test_local_tvf_parquet_unsigned_integers", "p0") {
+ List<List<Object>> backends = sql """ show backends """
+ def dataFilePath = context.config.dataPath + "/external_table_p0/tvf/"
+
+ assertTrue(backends.size() > 0)
+
+ def be_id = backends[0][0]
+ // cluster mode need to make sure all be has this data
+ def outFilePath="/"
+ def transFile01="${dataFilePath}/unsigned_integers_1.parquet"
+ def transFile02="${dataFilePath}/unsigned_integers_2.parquet"
+ def transFile03="${dataFilePath}/unsigned_integers_3.parquet"
+ def transFile04="${dataFilePath}/unsigned_integers_4.parquet"
+
+ for (List<Object> backend : backends) {
+ def be_host = backend[1]
+ scpFiles ("root", be_host, transFile01, outFilePath, false);
+ scpFiles ("root", be_host, transFile02, outFilePath, false);
+ scpFiles ("root", be_host, transFile03, outFilePath, false);
+ scpFiles ("root", be_host, transFile04, outFilePath, false);
+ }
+
+ def file1 = outFilePath + "unsigned_integers_1.parquet";
+ def file2 = outFilePath + "unsigned_integers_2.parquet";
+ def file3 = outFilePath + "unsigned_integers_3.parquet";
+ def file4 = outFilePath + "unsigned_integers_4.parquet";
+
+
+
+
+ qt_test_1 """ select * from local( "file_path" = "${file1}", "backend_id"
= "${be_id}", "format" = "parquet") order by id ;"""
+
+ qt_test_2 """ desc function local( "file_path" = "${file1}", "backend_id"
= "${be_id}", "format" = "parquet");"""
+
+ qt_test_3 """ desc function local( "file_path" = "${file2}", "backend_id"
= "${be_id}", "format" = "parquet");"""
+
+ qt_test_4 """ desc function local( "file_path" = "${file3}", "backend_id"
= "${be_id}", "format" = "parquet");"""
+
+ qt_test_5 """ select * from local( "file_path" = "${file2}", "backend_id"
= "${be_id}", "format" = "parquet") order by id ;"""
+
+ qt_test_6 """ select * from local( "file_path" = "${file3}", "backend_id"
= "${be_id}", "format" = "parquet") order by id limit 10;"""
+
+ qt_test_7 """ desc function local( "file_path" = "${file4}", "backend_id"
= "${be_id}", "format" = "parquet");"""
+
+ qt_test_8 """ select * from local( "file_path" = "${file4}", "backend_id"
= "${be_id}", "format" = "parquet") order by id ;"""
+
+
+
+ qt_test_9 """ select * from local( "file_path" = "${file1}", "backend_id"
= "${be_id}", "format" = "parquet") where uint8_column = 200 order by id ;"""
+
+ qt_test_10 """ select * from local( "file_path" = "${file1}", "backend_id"
= "${be_id}", "format" = "parquet") where uint16_column = 41727 order by id ;"""
+
+ qt_test_11 """ select * from local( "file_path" = "${file1}", "backend_id"
= "${be_id}", "format" = "parquet") where uint32_column = 2299955463 order by
id ;"""
+
+ qt_test_12 """ select * from local( "file_path" = "${file1}", "backend_id"
= "${be_id}", "format" = "parquet") where uint64_column = 15103440093398422258
order by id ;"""
+
+
+
+ qt_test_13 """ select * from local( "file_path" = "${file2}", "backend_id"
= "${be_id}", "format" = "parquet") where uint8_column = 222 order by id ;"""
+
+ qt_test_14 """ select * from local( "file_path" = "${file2}", "backend_id"
= "${be_id}", "format" = "parquet") where uint16_column = 58068 order by id ;"""
+
+ qt_test_15 """ select * from local( "file_path" = "${file2}", "backend_id"
= "${be_id}", "format" = "parquet") where uint32_column = 4213847006 order by
id ;"""
+
+ qt_test_16 """ select * from local( "file_path" = "${file2}", "backend_id"
= "${be_id}", "format" = "parquet") where uint64_column = 10613547124477746521
order by id ;"""
+
+
+ qt_test_17 """ select count(id) from local( "file_path" = "${file3}",
"backend_id" = "${be_id}", "format" = "parquet") ;"""
+
+ qt_test_18 """ select count(uint8_column) from local( "file_path" =
"${file3}", "backend_id" = "${be_id}", "format" = "parquet") ;"""
+
+ qt_test_19 """ select count(uint16_column) from local( "file_path" =
"${file3}", "backend_id" = "${be_id}", "format" = "parquet") ;"""
+
+ qt_test_20 """ select count(uint32_column) from local( "file_path" =
"${file3}", "backend_id" = "${be_id}", "format" = "parquet") ;"""
+
+ qt_test_21 """ select count(uint64_column) from local( "file_path" =
"${file3}", "backend_id" = "${be_id}", "format" = "parquet") ;"""
+
+ qt_test_22 """ select * from local( "file_path" = "${file3}", "backend_id"
= "${be_id}", "format" = "parquet") where uint64_column = 18446744073709551614
order by id ;"""
+
+
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]