This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new a7156ee7756 [fix](parquet)Fix the be core issue when reading parquet 
unsigned types. (#39926) (#40123)
a7156ee7756 is described below

commit a7156ee7756b408263248e26e6dcc7937b6c74e1
Author: daidai <[email protected]>
AuthorDate: Thu Aug 29 21:52:52 2024 +0800

    [fix](parquet)Fix the be core issue when reading parquet unsigned types. 
(#39926) (#40123)
    
    bp #39926
---
 .../exec/format/parquet/parquet_column_convert.cpp |  18 +-
 .../exec/format/parquet/parquet_column_convert.h   |  65 +++
 be/src/vec/exec/format/parquet/schema_desc.cpp     |  58 ++-
 be/src/vec/exec/format/parquet/schema_desc.h       |  10 +-
 .../test_outfile_expr_generate_col_name.out        |   6 +-
 .../test_local_tvf_parquet_unsigned_integers.out   | 440 +++++++++++++++++++++
 .../tvf/unsigned_integers_1.parquet                | Bin 0 -> 3202 bytes
 .../tvf/unsigned_integers_2.parquet                | Bin 0 -> 3218 bytes
 .../tvf/unsigned_integers_3.parquet                | Bin 0 -> 4727 bytes
 .../tvf/unsigned_integers_4.parquet                | Bin 0 -> 704 bytes
 ...test_local_tvf_parquet_unsigned_integers.groovy | 102 +++++
 11 files changed, 673 insertions(+), 26 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp 
b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
index 57f1f54b7b9..2fb0afea82a 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
@@ -66,7 +66,9 @@ ColumnPtr 
PhysicalToLogicalConverter::get_physical_column(tparquet::Type::type s
         src_physical_type = tparquet::Type::INT32;
         src_logical_type = TypeDescriptor(PrimitiveType::TYPE_INT);
     }
-    if (is_consistent() && _logical_converter->is_consistent()) {
+
+    if (!_convert_params->is_type_compatibility && is_consistent() &&
+        _logical_converter->is_consistent()) {
         if (_cached_src_physical_type == nullptr) {
             _cached_src_physical_type = 
DataTypeFactory::instance().create_data_type(
                     src_logical_type, dst_logical_type->is_nullable());
@@ -246,7 +248,19 @@ std::unique_ptr<PhysicalToLogicalConverter> 
PhysicalToLogicalConverter::get_conv
     }
     PrimitiveType src_logical_primitive = src_logical_type.type;
 
-    if (is_parquet_native_type(src_logical_primitive)) {
+    if (field_schema->is_type_compatibility) {
+        if (src_logical_type == TYPE_SMALLINT) {
+            physical_converter.reset(new 
UnsignedIntegerConverter<TYPE_SMALLINT>());
+        } else if (src_logical_type == TYPE_INT) {
+            physical_converter.reset(new UnsignedIntegerConverter<TYPE_INT>());
+        } else if (src_logical_type == TYPE_BIGINT) {
+            physical_converter.reset(new 
UnsignedIntegerConverter<TYPE_BIGINT>());
+        } else if (src_logical_type == TYPE_LARGEINT) {
+            physical_converter.reset(new 
UnsignedIntegerConverter<TYPE_LARGEINT>());
+        } else {
+            physical_converter.reset(new 
UnsupportedConverter(src_physical_type, src_logical_type));
+        }
+    } else if (is_parquet_native_type(src_logical_primitive)) {
         if (is_string_type(src_logical_primitive) &&
             src_physical_type == tparquet::Type::FIXED_LEN_BYTE_ARRAY) {
             // for FixedSizeBinary
diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h 
b/be/src/vec/exec/format/parquet/parquet_column_convert.h
index 551bf7e14ed..91b81121aa4 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.h
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h
@@ -40,6 +40,9 @@ struct ConvertParams {
     DecimalScaleParams decimal_scale;
     FieldSchema* field_schema = nullptr;
 
+    //For UInt8 -> Int16,UInt16 -> Int32,UInt32 -> Int64,UInt64 -> Int128.
+    bool is_type_compatibility = false;
+
     /**
      * Some frameworks like paimon maybe writes non-standard parquet files. 
Timestamp field doesn't have
      * logicalType or converted_type to indicates its precision. We have to 
reset the time mask.
@@ -108,6 +111,7 @@ struct ConvertParams {
             t.from_unixtime(0, *ctz);
             offset_days = t.day() == 31 ? -1 : 0;
         }
+        is_type_compatibility = field_schema_->is_type_compatibility;
     }
 
     template <typename DecimalPrimitiveType>
@@ -273,6 +277,67 @@ class LittleIntPhysicalConverter : public 
PhysicalToLogicalConverter {
     }
 };
 
+template <PrimitiveType type>
+struct UnsignedTypeTraits;
+
+template <>
+struct UnsignedTypeTraits<TYPE_SMALLINT> {
+    using UnsignedCppType = UInt8;
+    
//https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#unsigned-integers
+    //INT(8, false), INT(16, false), and INT(32, false) must annotate an int32 
primitive type and INT(64, false)
+    //must annotate an int64 primitive type.
+    using StorageCppType = Int32;
+    using StorageColumnType = vectorized::ColumnInt32;
+};
+
+template <>
+struct UnsignedTypeTraits<TYPE_INT> {
+    using UnsignedCppType = UInt16;
+    using StorageCppType = Int32;
+    using StorageColumnType = vectorized::ColumnInt32;
+};
+
+template <>
+struct UnsignedTypeTraits<TYPE_BIGINT> {
+    using UnsignedCppType = UInt32;
+    using StorageCppType = Int32;
+    using StorageColumnType = vectorized::ColumnInt32;
+};
+
+template <>
+struct UnsignedTypeTraits<TYPE_LARGEINT> {
+    using UnsignedCppType = UInt64;
+    using StorageCppType = Int64;
+    using StorageColumnType = vectorized::ColumnInt64;
+};
+
+template <PrimitiveType IntPrimitiveType>
+class UnsignedIntegerConverter : public PhysicalToLogicalConverter {
+    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& 
src_logical_column) override {
+        using UnsignedCppType = typename 
UnsignedTypeTraits<IntPrimitiveType>::UnsignedCppType;
+        using StorageCppType = typename 
UnsignedTypeTraits<IntPrimitiveType>::StorageCppType;
+        using StorageColumnType = typename 
UnsignedTypeTraits<IntPrimitiveType>::StorageColumnType;
+        using DstColumnType = typename 
PrimitiveTypeTraits<IntPrimitiveType>::ColumnType;
+
+        ColumnPtr from_col = remove_nullable(src_physical_col);
+        MutableColumnPtr to_col = 
remove_nullable(src_logical_column)->assume_mutable();
+        auto& src_data = static_cast<const 
StorageColumnType*>(from_col.get())->get_data();
+
+        size_t rows = src_data.size();
+        size_t start_idx = to_col->size();
+        to_col->resize(start_idx + rows);
+        auto& data = static_cast<DstColumnType&>(*to_col.get()).get_data();
+
+        for (int i = 0; i < rows; i++) {
+            StorageCppType src_value = src_data[i];
+            auto unsigned_value = static_cast<UnsignedCppType>(src_value);
+            data[start_idx + i] = unsigned_value;
+        }
+
+        return Status::OK();
+    }
+};
+
 class FixedSizeBinaryConverter : public PhysicalToLogicalConverter {
 private:
     int _type_length;
diff --git a/be/src/vec/exec/format/parquet/schema_desc.cpp 
b/be/src/vec/exec/format/parquet/schema_desc.cpp
index 08692de8743..9097b65718f 100644
--- a/be/src/vec/exec/format/parquet/schema_desc.cpp
+++ b/be/src/vec/exec/format/parquet/schema_desc.cpp
@@ -191,16 +191,19 @@ void FieldDescriptor::parse_physical_field(const 
tparquet::SchemaElement& physic
     physical_field->physical_type = physical_schema.type;
     _physical_fields.push_back(physical_field);
     physical_field->physical_column_index = _physical_fields.size() - 1;
-    physical_field->type = get_doris_type(physical_schema);
+    auto type = get_doris_type(physical_schema);
+    physical_field->type = type.first;
+    physical_field->is_type_compatibility = type.second;
 }
 
-TypeDescriptor FieldDescriptor::get_doris_type(const tparquet::SchemaElement& 
physical_schema) {
-    TypeDescriptor type;
-    type.type = INVALID_TYPE;
+std::pair<TypeDescriptor, bool> FieldDescriptor::get_doris_type(
+        const tparquet::SchemaElement& physical_schema) {
+    std::pair<TypeDescriptor, bool> ans = {INVALID_TYPE, false};
+    TypeDescriptor& type = ans.first;
     if (physical_schema.__isset.logicalType) {
-        type = convert_to_doris_type(physical_schema.logicalType);
+        ans = convert_to_doris_type(physical_schema.logicalType);
     } else if (physical_schema.__isset.converted_type) {
-        type = convert_to_doris_type(physical_schema);
+        ans = convert_to_doris_type(physical_schema);
     }
     // use physical type instead
     if (type.type == INVALID_TYPE) {
@@ -233,7 +236,7 @@ TypeDescriptor FieldDescriptor::get_doris_type(const 
tparquet::SchemaElement& ph
             break;
         }
     }
-    return type;
+    return ans;
 }
 
 // Copy from org.apache.iceberg.avro.AvroSchemaUtil#validAvroName
@@ -302,8 +305,11 @@ void FieldDescriptor::iceberg_sanitize(const 
std::vector<std::string>& read_colu
     }
 }
 
-TypeDescriptor FieldDescriptor::convert_to_doris_type(tparquet::LogicalType 
logicalType) {
-    TypeDescriptor type;
+std::pair<TypeDescriptor, bool> FieldDescriptor::convert_to_doris_type(
+        tparquet::LogicalType logicalType) {
+    std::pair<TypeDescriptor, bool> ans = {INVALID_TYPE, false};
+    TypeDescriptor& type = ans.first;
+    bool& is_type_compatibility = ans.second;
     if (logicalType.__isset.STRING) {
         type = TypeDescriptor(TYPE_STRING);
     } else if (logicalType.__isset.DECIMAL) {
@@ -313,16 +319,25 @@ TypeDescriptor 
FieldDescriptor::convert_to_doris_type(tparquet::LogicalType logi
         type = TypeDescriptor(TYPE_DATEV2);
     } else if (logicalType.__isset.INTEGER) {
         if (logicalType.INTEGER.isSigned) {
-            if (logicalType.INTEGER.bitWidth <= 32) {
+            if (logicalType.INTEGER.bitWidth <= 8) {
+                type = TypeDescriptor(TYPE_TINYINT);
+            } else if (logicalType.INTEGER.bitWidth <= 16) {
+                type = TypeDescriptor(TYPE_SMALLINT);
+            } else if (logicalType.INTEGER.bitWidth <= 32) {
                 type = TypeDescriptor(TYPE_INT);
             } else {
                 type = TypeDescriptor(TYPE_BIGINT);
             }
         } else {
-            if (logicalType.INTEGER.bitWidth <= 16) {
+            is_type_compatibility = true;
+            if (logicalType.INTEGER.bitWidth <= 8) {
+                type = TypeDescriptor(TYPE_SMALLINT);
+            } else if (logicalType.INTEGER.bitWidth <= 16) {
                 type = TypeDescriptor(TYPE_INT);
-            } else {
+            } else if (logicalType.INTEGER.bitWidth <= 32) {
                 type = TypeDescriptor(TYPE_BIGINT);
+            } else {
+                type = TypeDescriptor(TYPE_LARGEINT);
             }
         }
     } else if (logicalType.__isset.TIME) {
@@ -344,12 +359,14 @@ TypeDescriptor 
FieldDescriptor::convert_to_doris_type(tparquet::LogicalType logi
     } else {
         type = TypeDescriptor(INVALID_TYPE);
     }
-    return type;
+    return ans;
 }
 
-TypeDescriptor FieldDescriptor::convert_to_doris_type(
+std::pair<TypeDescriptor, bool> FieldDescriptor::convert_to_doris_type(
         const tparquet::SchemaElement& physical_schema) {
-    TypeDescriptor type;
+    std::pair<TypeDescriptor, bool> ans = {INVALID_TYPE, false};
+    TypeDescriptor& type = ans.first;
+    bool& is_type_compatibility = ans.second;
     switch (physical_schema.converted_type) {
     case tparquet::ConvertedType::type::UTF8:
         type = TypeDescriptor(TYPE_STRING);
@@ -378,28 +395,33 @@ TypeDescriptor FieldDescriptor::convert_to_doris_type(
         type = TypeDescriptor(TYPE_TINYINT);
         break;
     case tparquet::ConvertedType::type::UINT_8:
+        is_type_compatibility = true;
         [[fallthrough]];
     case tparquet::ConvertedType::type::INT_16:
         type = TypeDescriptor(TYPE_SMALLINT);
         break;
     case tparquet::ConvertedType::type::UINT_16:
+        is_type_compatibility = true;
         [[fallthrough]];
     case tparquet::ConvertedType::type::INT_32:
         type = TypeDescriptor(TYPE_INT);
         break;
     case tparquet::ConvertedType::type::UINT_32:
-        [[fallthrough]];
-    case tparquet::ConvertedType::type::UINT_64:
+        is_type_compatibility = true;
         [[fallthrough]];
     case tparquet::ConvertedType::type::INT_64:
         type = TypeDescriptor(TYPE_BIGINT);
         break;
+    case tparquet::ConvertedType::type::UINT_64:
+        is_type_compatibility = true;
+        type = TypeDescriptor(TYPE_LARGEINT);
+        break;
     default:
         LOG(WARNING) << "Not supported parquet ConvertedType: " << 
physical_schema.converted_type;
         type = TypeDescriptor(INVALID_TYPE);
         break;
     }
-    return type;
+    return ans;
 }
 
 Status FieldDescriptor::parse_group_field(const 
std::vector<tparquet::SchemaElement>& t_schemas,
diff --git a/be/src/vec/exec/format/parquet/schema_desc.h 
b/be/src/vec/exec/format/parquet/schema_desc.h
index 50e526bd730..ca726ef1b57 100644
--- a/be/src/vec/exec/format/parquet/schema_desc.h
+++ b/be/src/vec/exec/format/parquet/schema_desc.h
@@ -49,6 +49,9 @@ struct FieldSchema {
     int16_t repeated_parent_def_level = 0;
     std::vector<FieldSchema> children;
 
+    //For UInt8 -> Int16,UInt16 -> Int32,UInt32 -> Int64,UInt64 -> Int128.
+    bool is_type_compatibility = false;
+
     FieldSchema() = default;
     ~FieldSchema() = default;
     FieldSchema(const FieldSchema& fieldSchema) = default;
@@ -84,12 +87,13 @@ private:
     Status parse_node_field(const std::vector<tparquet::SchemaElement>& 
t_schemas, size_t curr_pos,
                             FieldSchema* node_field);
 
-    TypeDescriptor convert_to_doris_type(tparquet::LogicalType logicalType);
+    std::pair<TypeDescriptor, bool> 
convert_to_doris_type(tparquet::LogicalType logicalType);
 
-    TypeDescriptor convert_to_doris_type(const tparquet::SchemaElement& 
physical_schema);
+    std::pair<TypeDescriptor, bool> convert_to_doris_type(
+            const tparquet::SchemaElement& physical_schema);
 
 public:
-    TypeDescriptor get_doris_type(const tparquet::SchemaElement& 
physical_schema);
+    std::pair<TypeDescriptor, bool> get_doris_type(const 
tparquet::SchemaElement& physical_schema);
 
     // org.apache.iceberg.avro.AvroSchemaUtil#sanitize will encode special 
characters,
     // we have to decode these characters
diff --git 
a/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
 
b/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
index 406bc7660ff..5748cedd228 100644
--- 
a/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
+++ 
b/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
@@ -100,12 +100,12 @@ id        int     Yes     false   \N      NONE
 9      1       string  27      false   5       true    1
 
 -- !desc_s3 --
-__add_5        int     Yes     false   \N      NONE
-__bit_or_7     int     Yes     false   \N      NONE
+__add_5        smallint        Yes     false   \N      NONE
+__bit_or_7     tinyint Yes     false   \N      NONE
 __cast_3       bigint  Yes     false   \N      NONE
 __greater_than_4       boolean Yes     false   \N      NONE
 __in_predicate_6       boolean Yes     false   \N      NONE
-__literal_1    int     Yes     false   \N      NONE
+__literal_1    tinyint Yes     false   \N      NONE
 __literal_2    text    Yes     false   \N      NONE
 id     int     Yes     false   \N      NONE
 
diff --git 
a/regression-test/data/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.out
 
b/regression-test/data/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.out
new file mode 100644
index 00000000000..4cda2746a00
--- /dev/null
+++ 
b/regression-test/data/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.out
@@ -0,0 +1,440 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !test_1 --
+0      254     54979   3876010132      12292188520939801104
+1      194     41087   2468877118      11276751473207154134
+2      204     40618   3112417582      12531644712494408881
+3      252     42885   2959241092      12524633273560617315
+4      168     34898   3576407414      13096188234563161622
+5      252     39188   3127662355      16817880057727309281
+6      146     45695   2457180674      13258241842726882717
+7      223     54096   3818049848      13392705476834798352
+8      247     49103   4235043353      14882039847048390015
+9      171     37711   4121304297      12690575475523254418
+10     208     45522   2849886325      9887016810088790216
+11     140     57808   4249890046      11248669065541052626
+12     154     63353   2494834510      10992021804074820099
+13     169     62034   3442632722      13741258842709060503
+14     201     39884   2544963866      13079528300040741505
+15     208     48887   2865890129      15763071531082484590
+16     200     58596   2907876388      13993629507485604147
+17     210     50277   3777672898      9348401130412952441
+18     212     51965   3618857131      16174811648348878672
+19     161     39818   2368776725      15511547001336881917
+20     160     41750   2980821608      11023149785626200227
+21     141     57340   4284126112      15552326510762976216
+22     139     58391   4117149789      15878585387857525976
+23     212     59593   2459376464      15173162289502083954
+24     165     53957   3427228999      17619046273898992624
+25     229     49948   2600007878      15455673680657024177
+26     134     64599   3738904119      16329279862825806683
+27     205     41073   2442068534      14375087414295500534
+28     204     60681   3422219729      18264453353631472102
+29     136     47361   3588730082      13238158010948418606
+30     252     42761   4175633691      15734976010511914446
+31     150     45199   4078820644      15090555688768508964
+32     210     57428   4009891336      16805687728956091249
+33     235     34379   3963852622      13291314989308428602
+34     226     33850   3643451160      15146799101548124057
+35     133     54253   2734990478      12652028700805136484
+36     166     61697   3958278249      9924263812189602832
+37     137     55610   2782254392      18017977508881163787
+38     166     41727   3012921589      12285785983985756200
+39     194     46987   2351267776      18191890018150428322
+40     201     61160   3877183539      17825711874965469887
+41     209     54646   3739634163      12447543494188025837
+42     157     65086   2250645811      15257822118590463416
+43     223     56812   4101782180      9308776360130037320
+44     135     61061   2806848998      15345311951702932353
+45     147     39862   3521569945      17679765098696012133
+46     229     38351   2299955463      11589155413771055105
+47     141     49268   2740241757      16646921996087125692
+48     144     53152   3733884127      12946127273932789697
+49     200     48517   4024062219      15103440093398422258
+
+-- !test_2 --
+id     int     Yes     false   \N      NONE
+uint8_column   smallint        Yes     false   \N      NONE
+uint16_column  int     Yes     false   \N      NONE
+uint32_column  bigint  Yes     false   \N      NONE
+uint64_column  largeint        Yes     false   \N      NONE
+
+-- !test_3 --
+id     int     Yes     false   \N      NONE
+uint8_column   smallint        Yes     false   \N      NONE
+uint16_column  int     Yes     false   \N      NONE
+uint32_column  bigint  Yes     false   \N      NONE
+uint64_column  largeint        Yes     false   \N      NONE
+
+-- !test_4 --
+id     int     Yes     false   \N      NONE
+uint8_column   smallint        Yes     false   \N      NONE
+uint16_column  int     Yes     false   \N      NONE
+uint32_column  bigint  Yes     false   \N      NONE
+uint64_column  largeint        Yes     false   \N      NONE
+
+-- !test_5 --
+0      200     48438   3775964178      9957179629640951554
+1      197     37330   2548711656      11127988488306710308
+2      200     65117   3419624570      11652168873218179696
+3      223     39174   3669818470      17675778338328486831
+4      196     60825   3064802389      18435946529637287047
+5      148     58165   3414732886      10219427643252484377
+6      217     61318   4075782121      10508828167797335037
+7      185     46040   2798591473      16764958584953558745
+8      148     50700   2541953946      11228894632595899400
+9      235     41262   3398155979      14622381114650660637
+10     189     49552   3754066331      16077913453789464999
+11     138     41102   2488033055      11226600460306403745
+12     132     62435   3745892606      17429590483490742063
+13     164     34858   3380810793      15447251686769296941
+14     246     44518   4213847006      13168159509128150529
+15     128     34475   2584892403      14470296482506793549
+16     180     40732   2667143993      13453719924509014943
+17     206     49824   3079026622      9242535560261271930
+18     250     61860   3723105814      9754643556067415401
+19     248     51724   4229819070      15295668751829551718
+20     237     38710   2951615403      14702842917502593482
+21     133     46707   2464573031      15860122391678970590
+22     249     41930   4210407904      12677893784378713520
+23     218     34250   3657784995      14768434928936445583
+24     134     52064   4274319068      16672757830801563734
+25     179     54232   3587434213      10613547124477746521
+26     217     63838   2348239122      17998346098073086386
+27     190     52362   3442840997      13122160861538572795
+28     131     39957   2321439682      16792774292797573856
+29     143     50902   2914375790      10557924491128546903
+30     228     33365   2438054546      14302876531585763284
+31     251     59126   2308219390      12753986538521770387
+32     197     58579   2647214662      16916351783057256258
+33     246     62028   4202894981      16869494938800942234
+34     229     47800   3963077237      12000510020655611310
+35     195     43221   2980563838      17324881735239531242
+36     210     46389   3302751013      10936691069329977133
+37     190     60582   4294458835      13465474203101539625
+38     234     58068   2575498858      18264769986785189204
+39     244     55282   2242973260      14725356984149511589
+40     224     33882   3169401634      9641824149700693760
+41     210     48824   2805949235      17877575698845246971
+42     250     49871   4015422133      11571502995585068959
+43     253     53173   3315146396      15527276320077174431
+44     205     48824   2153923483      18227485227719579199
+45     202     65101   2286014232      10596463304445669324
+46     144     39272   3664679383      14619550616500589262
+47     207     58788   2459180916      15787272910593406395
+48     236     61302   2814354943      15797302386492157450
+49     222     44025   2267428102      16226182608093468653
+
+-- !test_6 --
+0      254     65535   4294967294      18446744073709551614
+1      254     65534   4294967295      18446744073709551615
+2      254     65535   4294967294      18446744073709551615
+3      255     65535   4294967294      18446744073709551615
+4      255     65534   4294967295      18446744073709551615
+5      254     65534   4294967295      18446744073709551614
+6      254     65534   4294967294      18446744073709551614
+7      255     65534   4294967294      18446744073709551614
+8      255     65534   4294967295      18446744073709551614
+9      255     65535   4294967294      18446744073709551614
+
+-- !test_7 --
+id     bigint  Yes     false   \N      NONE
+mediumint_unsigned     bigint  Yes     false   \N      NONE
+int_unsigned   bigint  Yes     false   \N      NONE
+
+-- !test_8 --
+1      111     111
+2      222     222
+
+-- !test_9 --
+16     200     58596   2907876388      13993629507485604147
+49     200     48517   4024062219      15103440093398422258
+
+-- !test_10 --
+38     166     41727   3012921589      12285785983985756200
+
+-- !test_11 --
+46     229     38351   2299955463      11589155413771055105
+
+-- !test_12 --
+49     200     48517   4024062219      15103440093398422258
+
+-- !test_13 --
+49     222     44025   2267428102      16226182608093468653
+
+-- !test_14 --
+38     234     58068   2575498858      18264769986785189204
+
+-- !test_15 --
+14     246     44518   4213847006      13168159509128150529
+
+-- !test_16 --
+25     179     54232   3587434213      10613547124477746521
+
+-- !test_17 --
+500
+
+-- !test_18 --
+500
+
+-- !test_19 --
+500
+
+-- !test_20 --
+500
+
+-- !test_21 --
+500
+
+-- !test_22 --
+0      254     65535   4294967294      18446744073709551614
+5      254     65534   4294967295      18446744073709551614
+6      254     65534   4294967294      18446744073709551614
+7      255     65534   4294967294      18446744073709551614
+8      255     65534   4294967295      18446744073709551614
+9      255     65535   4294967294      18446744073709551614
+10     255     65535   4294967294      18446744073709551614
+11     254     65535   4294967294      18446744073709551614
+12     255     65535   4294967294      18446744073709551614
+14     254     65535   4294967295      18446744073709551614
+16     255     65535   4294967295      18446744073709551614
+17     255     65535   4294967294      18446744073709551614
+18     255     65534   4294967295      18446744073709551614
+20     255     65535   4294967294      18446744073709551614
+21     255     65534   4294967294      18446744073709551614
+22     255     65534   4294967294      18446744073709551614
+24     255     65534   4294967294      18446744073709551614
+25     254     65535   4294967295      18446744073709551614
+27     255     65534   4294967294      18446744073709551614
+29     255     65534   4294967295      18446744073709551614
+30     255     65534   4294967295      18446744073709551614
+32     254     65534   4294967294      18446744073709551614
+37     254     65534   4294967295      18446744073709551614
+38     254     65535   4294967294      18446744073709551614
+40     255     65535   4294967294      18446744073709551614
+41     255     65534   4294967294      18446744073709551614
+43     254     65534   4294967295      18446744073709551614
+44     254     65534   4294967294      18446744073709551614
+45     255     65534   4294967295      18446744073709551614
+46     254     65534   4294967295      18446744073709551614
+53     254     65534   4294967294      18446744073709551614
+56     254     65535   4294967294      18446744073709551614
+58     255     65534   4294967295      18446744073709551614
+60     254     65535   4294967295      18446744073709551614
+62     255     65534   4294967295      18446744073709551614
+63     255     65535   4294967294      18446744073709551614
+66     255     65534   4294967295      18446744073709551614
+68     254     65535   4294967295      18446744073709551614
+71     254     65535   4294967295      18446744073709551614
+73     254     65535   4294967295      18446744073709551614
+75     254     65534   4294967294      18446744073709551614
+76     255     65534   4294967294      18446744073709551614
+77     254     65535   4294967295      18446744073709551614
+80     254     65534   4294967295      18446744073709551614
+81     255     65534   4294967294      18446744073709551614
+84     255     65534   4294967295      18446744073709551614
+86     255     65535   4294967295      18446744073709551614
+87     254     65534   4294967295      18446744073709551614
+89     255     65535   4294967294      18446744073709551614
+91     254     65534   4294967294      18446744073709551614
+92     255     65534   4294967294      18446744073709551614
+94     254     65535   4294967294      18446744073709551614
+98     255     65534   4294967295      18446744073709551614
+99     254     65535   4294967294      18446744073709551614
+100    255     65534   4294967295      18446744073709551614
+102    255     65534   4294967295      18446744073709551614
+103    255     65534   4294967295      18446744073709551614
+106    254     65534   4294967295      18446744073709551614
+108    254     65535   4294967295      18446744073709551614
+109    255     65535   4294967294      18446744073709551614
+110    254     65534   4294967295      18446744073709551614
+112    255     65535   4294967295      18446744073709551614
+115    255     65534   4294967295      18446744073709551614
+117    255     65535   4294967294      18446744073709551614
+120    254     65535   4294967294      18446744073709551614
+121    254     65535   4294967295      18446744073709551614
+123    254     65535   4294967294      18446744073709551614
+124    254     65535   4294967295      18446744073709551614
+130    255     65534   4294967294      18446744073709551614
+131    254     65535   4294967295      18446744073709551614
+132    254     65535   4294967294      18446744073709551614
+133    254     65534   4294967295      18446744073709551614
+139    254     65535   4294967295      18446744073709551614
+141    255     65535   4294967295      18446744073709551614
+148    255     65535   4294967294      18446744073709551614
+149    254     65534   4294967295      18446744073709551614
+150    254     65534   4294967295      18446744073709551614
+151    254     65534   4294967294      18446744073709551614
+152    254     65535   4294967295      18446744073709551614
+157    255     65534   4294967295      18446744073709551614
+158    255     65535   4294967294      18446744073709551614
+167    255     65534   4294967295      18446744073709551614
+168    254     65535   4294967294      18446744073709551614
+169    254     65534   4294967294      18446744073709551614
+173    255     65534   4294967294      18446744073709551614
+174    254     65535   4294967295      18446744073709551614
+177    254     65535   4294967295      18446744073709551614
+184    255     65535   4294967295      18446744073709551614
+186    254     65535   4294967295      18446744073709551614
+187    255     65535   4294967295      18446744073709551614
+189    254     65534   4294967295      18446744073709551614
+190    254     65535   4294967295      18446744073709551614
+191    254     65535   4294967294      18446744073709551614
+192    254     65535   4294967294      18446744073709551614
+194    255     65534   4294967294      18446744073709551614
+195    255     65534   4294967295      18446744073709551614
+196    254     65535   4294967295      18446744073709551614
+197    255     65535   4294967295      18446744073709551614
+204    255     65535   4294967295      18446744073709551614
+207    255     65535   4294967295      18446744073709551614
+210    255     65535   4294967294      18446744073709551614
+211    255     65535   4294967294      18446744073709551614
+213    254     65534   4294967295      18446744073709551614
+214    255     65534   4294967295      18446744073709551614
+216    254     65535   4294967295      18446744073709551614
+217    255     65535   4294967295      18446744073709551614
+222    255     65534   4294967295      18446744073709551614
+226    255     65535   4294967294      18446744073709551614
+227    254     65534   4294967294      18446744073709551614
+228    254     65535   4294967295      18446744073709551614
+229    255     65535   4294967294      18446744073709551614
+231    254     65534   4294967294      18446744073709551614
+232    254     65534   4294967294      18446744073709551614
+233    255     65535   4294967295      18446744073709551614
+240    255     65534   4294967295      18446744073709551614
+241    254     65535   4294967294      18446744073709551614
+243    254     65534   4294967294      18446744073709551614
+244    255     65534   4294967295      18446744073709551614
+246    255     65534   4294967294      18446744073709551614
+248    255     65535   4294967295      18446744073709551614
+251    254     65535   4294967294      18446744073709551614
+253    255     65534   4294967294      18446744073709551614
+256    255     65534   4294967294      18446744073709551614
+257    255     65535   4294967294      18446744073709551614
+258    254     65534   4294967294      18446744073709551614
+261    254     65535   4294967295      18446744073709551614
+262    254     65535   4294967295      18446744073709551614
+264    255     65534   4294967294      18446744073709551614
+266    254     65535   4294967295      18446744073709551614
+267    255     65534   4294967294      18446744073709551614
+269    254     65535   4294967294      18446744073709551614
+270    255     65534   4294967294      18446744073709551614
+273    254     65535   4294967294      18446744073709551614
+279    255     65535   4294967295      18446744073709551614
+281    254     65534   4294967294      18446744073709551614
+283    255     65534   4294967295      18446744073709551614
+285    254     65535   4294967294      18446744073709551614
+287    255     65535   4294967294      18446744073709551614
+288    255     65534   4294967294      18446744073709551614
+290    254     65534   4294967295      18446744073709551614
+291    255     65535   4294967295      18446744073709551614
+292    255     65534   4294967295      18446744073709551614
+295    254     65535   4294967294      18446744073709551614
+296    255     65534   4294967295      18446744073709551614
+298    255     65535   4294967294      18446744073709551614
+301    255     65534   4294967295      18446744073709551614
+302    254     65534   4294967294      18446744073709551614
+307    254     65535   4294967294      18446744073709551614
+308    254     65535   4294967294      18446744073709551614
+309    254     65535   4294967295      18446744073709551614
+313    254     65534   4294967295      18446744073709551614
+317    254     65534   4294967294      18446744073709551614
+319    255     65535   4294967295      18446744073709551614
+320    254     65535   4294967295      18446744073709551614
+321    254     65535   4294967295      18446744073709551614
+322    254     65535   4294967295      18446744073709551614
+325    255     65535   4294967295      18446744073709551614
+326    254     65534   4294967295      18446744073709551614
+327    255     65535   4294967295      18446744073709551614
+328    255     65534   4294967294      18446744073709551614
+331    254     65535   4294967294      18446744073709551614
+332    255     65535   4294967294      18446744073709551614
+334    255     65534   4294967294      18446744073709551614
+336    254     65535   4294967295      18446744073709551614
+337    254     65535   4294967295      18446744073709551614
+339    255     65535   4294967295      18446744073709551614
+341    254     65535   4294967294      18446744073709551614
+342    255     65534   4294967294      18446744073709551614
+345    254     65534   4294967294      18446744073709551614
+346    255     65535   4294967294      18446744073709551614
+347    255     65534   4294967295      18446744073709551614
+349    255     65534   4294967295      18446744073709551614
+350    254     65535   4294967295      18446744073709551614
+353    255     65534   4294967294      18446744073709551614
+355    255     65535   4294967294      18446744073709551614
+356    254     65535   4294967295      18446744073709551614
+357    254     65535   4294967295      18446744073709551614
+361    254     65535   4294967295      18446744073709551614
+363    254     65535   4294967295      18446744073709551614
+367    255     65535   4294967294      18446744073709551614
+368    254     65534   4294967295      18446744073709551614
+372    254     65535   4294967294      18446744073709551614
+373    255     65535   4294967294      18446744073709551614
+376    255     65535   4294967294      18446744073709551614
+377    254     65535   4294967295      18446744073709551614
+379    255     65535   4294967294      18446744073709551614
+382    254     65534   4294967295      18446744073709551614
+385    254     65535   4294967294      18446744073709551614
+389    254     65534   4294967294      18446744073709551614
+390    255     65535   4294967295      18446744073709551614
+391    254     65535   4294967294      18446744073709551614
+393    255     65534   4294967295      18446744073709551614
+395    254     65535   4294967294      18446744073709551614
+396    254     65534   4294967294      18446744073709551614
+398    254     65535   4294967294      18446744073709551614
+400    254     65534   4294967294      18446744073709551614
+401    254     65534   4294967294      18446744073709551614
+402    255     65534   4294967295      18446744073709551614
+403    254     65534   4294967295      18446744073709551614
+408    255     65535   4294967294      18446744073709551614
+409    254     65534   4294967295      18446744073709551614
+410    254     65534   4294967294      18446744073709551614
+411    255     65535   4294967294      18446744073709551614
+412    254     65535   4294967294      18446744073709551614
+413    255     65534   4294967294      18446744073709551614
+415    254     65534   4294967294      18446744073709551614
+417    254     65534   4294967295      18446744073709551614
+418    254     65535   4294967294      18446744073709551614
+420    255     65535   4294967294      18446744073709551614
+424    255     65535   4294967294      18446744073709551614
+426    254     65535   4294967295      18446744073709551614
+430    255     65535   4294967294      18446744073709551614
+431    255     65534   4294967295      18446744073709551614
+432    255     65535   4294967294      18446744073709551614
+434    254     65535   4294967294      18446744073709551614
+440    255     65534   4294967294      18446744073709551614
+441    254     65535   4294967294      18446744073709551614
+443    254     65535   4294967295      18446744073709551614
+444    255     65535   4294967295      18446744073709551614
+450    254     65534   4294967294      18446744073709551614
+454    255     65534   4294967294      18446744073709551614
+456    255     65535   4294967294      18446744073709551614
+458    255     65535   4294967295      18446744073709551614
+459    254     65534   4294967294      18446744073709551614
+461    255     65535   4294967294      18446744073709551614
+462    255     65535   4294967295      18446744073709551614
+463    254     65534   4294967294      18446744073709551614
+464    255     65534   4294967295      18446744073709551614
+465    254     65535   4294967294      18446744073709551614
+466    255     65534   4294967295      18446744073709551614
+468    254     65534   4294967295      18446744073709551614
+470    254     65535   4294967294      18446744073709551614
+471    255     65534   4294967294      18446744073709551614
+472    254     65535   4294967295      18446744073709551614
+473    255     65534   4294967295      18446744073709551614
+474    255     65535   4294967295      18446744073709551614
+475    255     65535   4294967294      18446744073709551614
+477    254     65534   4294967294      18446744073709551614
+480    255     65535   4294967295      18446744073709551614
+483    254     65535   4294967294      18446744073709551614
+485    255     65535   4294967294      18446744073709551614
+486    254     65534   4294967294      18446744073709551614
+487    254     65534   4294967295      18446744073709551614
+488    255     65535   4294967295      18446744073709551614
+489    254     65535   4294967295      18446744073709551614
+490    255     65535   4294967294      18446744073709551614
+494    254     65534   4294967294      18446744073709551614
+495    255     65534   4294967295      18446744073709551614
+496    254     65534   4294967295      18446744073709551614
+497    255     65535   4294967294      18446744073709551614
+499    255     65534   4294967295      18446744073709551614
+
diff --git 
a/regression-test/data/external_table_p0/tvf/unsigned_integers_1.parquet 
b/regression-test/data/external_table_p0/tvf/unsigned_integers_1.parquet
new file mode 100644
index 00000000000..8f87903255f
Binary files /dev/null and 
b/regression-test/data/external_table_p0/tvf/unsigned_integers_1.parquet differ
diff --git 
a/regression-test/data/external_table_p0/tvf/unsigned_integers_2.parquet 
b/regression-test/data/external_table_p0/tvf/unsigned_integers_2.parquet
new file mode 100644
index 00000000000..fe48ab71842
Binary files /dev/null and 
b/regression-test/data/external_table_p0/tvf/unsigned_integers_2.parquet differ
diff --git 
a/regression-test/data/external_table_p0/tvf/unsigned_integers_3.parquet 
b/regression-test/data/external_table_p0/tvf/unsigned_integers_3.parquet
new file mode 100644
index 00000000000..ff8dae5ecec
Binary files /dev/null and 
b/regression-test/data/external_table_p0/tvf/unsigned_integers_3.parquet differ
diff --git 
a/regression-test/data/external_table_p0/tvf/unsigned_integers_4.parquet 
b/regression-test/data/external_table_p0/tvf/unsigned_integers_4.parquet
new file mode 100644
index 00000000000..010d15497ec
Binary files /dev/null and 
b/regression-test/data/external_table_p0/tvf/unsigned_integers_4.parquet differ
diff --git 
a/regression-test/suites/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.groovy
 
b/regression-test/suites/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.groovy
new file mode 100644
index 00000000000..24cfb5f2ac2
--- /dev/null
+++ 
b/regression-test/suites/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.groovy
@@ -0,0 +1,102 @@
+import org.junit.Assert
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This suit test the `backends` tvf
+suite("test_local_tvf_parquet_unsigned_integers", "p0") {
+    List<List<Object>> backends =  sql """ show backends """
+    def dataFilePath = context.config.dataPath + "/external_table_p0/tvf/"
+
+    assertTrue(backends.size() > 0)
+
+    def be_id = backends[0][0]
+    // cluster mode need to make sure all be has this data
+    def outFilePath="/"
+    def transFile01="${dataFilePath}/unsigned_integers_1.parquet"
+    def transFile02="${dataFilePath}/unsigned_integers_2.parquet"
+    def transFile03="${dataFilePath}/unsigned_integers_3.parquet"
+    def transFile04="${dataFilePath}/unsigned_integers_4.parquet"
+
+    for (List<Object> backend : backends) {
+        def be_host = backend[1]
+        scpFiles ("root", be_host, transFile01, outFilePath, false);
+        scpFiles ("root", be_host, transFile02, outFilePath, false);
+        scpFiles ("root", be_host, transFile03, outFilePath, false);
+        scpFiles ("root", be_host, transFile04, outFilePath, false);
+    }
+
+    def file1 = outFilePath + "unsigned_integers_1.parquet";
+    def file2 = outFilePath + "unsigned_integers_2.parquet";
+    def file3 = outFilePath + "unsigned_integers_3.parquet";
+    def file4 = outFilePath + "unsigned_integers_4.parquet";
+
+
+
+
+    qt_test_1 """ select * from local( "file_path" = "${file1}", "backend_id" 
= "${be_id}", "format" = "parquet") order by id ;"""
+
+    qt_test_2 """ desc function local( "file_path" = "${file1}", "backend_id" 
= "${be_id}", "format" = "parquet");"""
+
+    qt_test_3 """ desc function local( "file_path" = "${file2}", "backend_id" 
= "${be_id}", "format" = "parquet");"""
+    
+    qt_test_4 """ desc function local( "file_path" = "${file3}", "backend_id" 
= "${be_id}", "format" = "parquet");"""
+
+    qt_test_5 """ select * from local( "file_path" = "${file2}", "backend_id" 
= "${be_id}", "format" = "parquet") order by id ;"""
+
+    qt_test_6 """ select * from local( "file_path" = "${file3}", "backend_id" 
= "${be_id}", "format" = "parquet") order by id limit 10;"""
+
+    qt_test_7 """ desc function local( "file_path" = "${file4}", "backend_id" 
= "${be_id}", "format" = "parquet");"""
+
+    qt_test_8 """ select * from local( "file_path" = "${file4}", "backend_id" 
= "${be_id}", "format" = "parquet") order by id ;"""
+
+
+
+    qt_test_9 """ select * from local( "file_path" = "${file1}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint8_column = 200 order by id ;"""
+
+    qt_test_10 """ select * from local( "file_path" = "${file1}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint16_column = 41727 order by id ;"""
+
+    qt_test_11 """ select * from local( "file_path" = "${file1}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint32_column = 2299955463 order by 
id ;"""
+
+    qt_test_12 """ select * from local( "file_path" = "${file1}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint64_column = 15103440093398422258 
order by id ;"""
+
+
+
+    qt_test_13 """ select * from local( "file_path" = "${file2}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint8_column = 222 order by id ;"""
+
+    qt_test_14 """ select * from local( "file_path" = "${file2}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint16_column = 58068 order by id ;"""
+
+    qt_test_15 """ select * from local( "file_path" = "${file2}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint32_column = 4213847006 order by 
id ;"""
+
+    qt_test_16 """ select * from local( "file_path" = "${file2}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint64_column = 10613547124477746521 
order by id ;"""
+
+
+    qt_test_17 """ select count(id) from local( "file_path" = "${file3}", 
"backend_id" = "${be_id}", "format" = "parquet")  ;"""
+
+    qt_test_18 """ select count(uint8_column) from local( "file_path" = 
"${file3}", "backend_id" = "${be_id}", "format" = "parquet")  ;"""
+
+    qt_test_19 """ select count(uint16_column) from local( "file_path" = 
"${file3}", "backend_id" = "${be_id}", "format" = "parquet")  ;"""
+    
+    qt_test_20 """ select count(uint32_column) from local( "file_path" = 
"${file3}", "backend_id" = "${be_id}", "format" = "parquet")  ;"""
+ 
+    qt_test_21 """ select count(uint64_column) from local( "file_path" = 
"${file3}", "backend_id" = "${be_id}", "format" = "parquet")  ;"""
+    
+    qt_test_22 """ select * from local( "file_path" = "${file3}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint64_column = 18446744073709551614 
order by id ;"""
+
+
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to