This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 80b921344f6 [fix](parquet)Fix the be core issue when reading parquet 
unsigned types. (#39926)
80b921344f6 is described below

commit 80b921344f6b96088b2a81c8768829bcb0cf0aae
Author: daidai <[email protected]>
AuthorDate: Thu Aug 29 18:23:40 2024 +0800

    [fix](parquet)Fix the be core issue when reading parquet unsigned types. 
(#39926)
    
    ## Proposed changes
    Since Doris does not have an unsigned type, we convert parquet uint32
    type to doris bigint (int64) type.
    When reading the parquet file, the byte size stored in parquet and the
    byte size of the data type mapped by doris are inconsistent, resulting
    in be core.
    Fix:
    When reading, we read according to the byte size stored in parquet, and
    then convert it to the data type mapped by doris.
    
    Mapping relationship description:
    parquet -> doris
    UInt8 -> Int16
    UInt16 -> Int32
    UInt32 -> Int64
    UInt64 -> Int128.
---
 .../exec/format/parquet/parquet_column_convert.cpp |  18 +-
 .../exec/format/parquet/parquet_column_convert.h   |  65 +++
 be/src/vec/exec/format/parquet/schema_desc.cpp     |  58 ++-
 be/src/vec/exec/format/parquet/schema_desc.h       |  10 +-
 .../test_outfile_expr_generate_col_name.out        |   6 +-
 .../test_local_tvf_parquet_unsigned_integers.out   | 440 +++++++++++++++++++++
 .../tvf/unsigned_integers_1.parquet                | Bin 0 -> 3202 bytes
 .../tvf/unsigned_integers_2.parquet                | Bin 0 -> 3218 bytes
 .../tvf/unsigned_integers_3.parquet                | Bin 0 -> 4727 bytes
 .../tvf/unsigned_integers_4.parquet                | Bin 0 -> 704 bytes
 ...test_local_tvf_parquet_unsigned_integers.groovy | 102 +++++
 11 files changed, 673 insertions(+), 26 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp 
b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
index 57f1f54b7b9..2fb0afea82a 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp
@@ -66,7 +66,9 @@ ColumnPtr 
PhysicalToLogicalConverter::get_physical_column(tparquet::Type::type s
         src_physical_type = tparquet::Type::INT32;
         src_logical_type = TypeDescriptor(PrimitiveType::TYPE_INT);
     }
-    if (is_consistent() && _logical_converter->is_consistent()) {
+
+    if (!_convert_params->is_type_compatibility && is_consistent() &&
+        _logical_converter->is_consistent()) {
         if (_cached_src_physical_type == nullptr) {
             _cached_src_physical_type = 
DataTypeFactory::instance().create_data_type(
                     src_logical_type, dst_logical_type->is_nullable());
@@ -246,7 +248,19 @@ std::unique_ptr<PhysicalToLogicalConverter> 
PhysicalToLogicalConverter::get_conv
     }
     PrimitiveType src_logical_primitive = src_logical_type.type;
 
-    if (is_parquet_native_type(src_logical_primitive)) {
+    if (field_schema->is_type_compatibility) {
+        if (src_logical_type == TYPE_SMALLINT) {
+            physical_converter.reset(new 
UnsignedIntegerConverter<TYPE_SMALLINT>());
+        } else if (src_logical_type == TYPE_INT) {
+            physical_converter.reset(new UnsignedIntegerConverter<TYPE_INT>());
+        } else if (src_logical_type == TYPE_BIGINT) {
+            physical_converter.reset(new 
UnsignedIntegerConverter<TYPE_BIGINT>());
+        } else if (src_logical_type == TYPE_LARGEINT) {
+            physical_converter.reset(new 
UnsignedIntegerConverter<TYPE_LARGEINT>());
+        } else {
+            physical_converter.reset(new 
UnsupportedConverter(src_physical_type, src_logical_type));
+        }
+    } else if (is_parquet_native_type(src_logical_primitive)) {
         if (is_string_type(src_logical_primitive) &&
             src_physical_type == tparquet::Type::FIXED_LEN_BYTE_ARRAY) {
             // for FixedSizeBinary
diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h 
b/be/src/vec/exec/format/parquet/parquet_column_convert.h
index 551bf7e14ed..91b81121aa4 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.h
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h
@@ -40,6 +40,9 @@ struct ConvertParams {
     DecimalScaleParams decimal_scale;
     FieldSchema* field_schema = nullptr;
 
+    //For UInt8 -> Int16,UInt16 -> Int32,UInt32 -> Int64,UInt64 -> Int128.
+    bool is_type_compatibility = false;
+
     /**
      * Some frameworks like paimon maybe writes non-standard parquet files. 
Timestamp field doesn't have
      * logicalType or converted_type to indicates its precision. We have to 
reset the time mask.
@@ -108,6 +111,7 @@ struct ConvertParams {
             t.from_unixtime(0, *ctz);
             offset_days = t.day() == 31 ? -1 : 0;
         }
+        is_type_compatibility = field_schema_->is_type_compatibility;
     }
 
     template <typename DecimalPrimitiveType>
@@ -273,6 +277,67 @@ class LittleIntPhysicalConverter : public 
PhysicalToLogicalConverter {
     }
 };
 
+template <PrimitiveType type>
+struct UnsignedTypeTraits;
+
+template <>
+struct UnsignedTypeTraits<TYPE_SMALLINT> {
+    using UnsignedCppType = UInt8;
+    
//https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#unsigned-integers
+    //INT(8, false), INT(16, false), and INT(32, false) must annotate an int32 
primitive type and INT(64, false)
+    //must annotate an int64 primitive type.
+    using StorageCppType = Int32;
+    using StorageColumnType = vectorized::ColumnInt32;
+};
+
+template <>
+struct UnsignedTypeTraits<TYPE_INT> {
+    using UnsignedCppType = UInt16;
+    using StorageCppType = Int32;
+    using StorageColumnType = vectorized::ColumnInt32;
+};
+
+template <>
+struct UnsignedTypeTraits<TYPE_BIGINT> {
+    using UnsignedCppType = UInt32;
+    using StorageCppType = Int32;
+    using StorageColumnType = vectorized::ColumnInt32;
+};
+
+template <>
+struct UnsignedTypeTraits<TYPE_LARGEINT> {
+    using UnsignedCppType = UInt64;
+    using StorageCppType = Int64;
+    using StorageColumnType = vectorized::ColumnInt64;
+};
+
+template <PrimitiveType IntPrimitiveType>
+class UnsignedIntegerConverter : public PhysicalToLogicalConverter {
+    Status physical_convert(ColumnPtr& src_physical_col, ColumnPtr& 
src_logical_column) override {
+        using UnsignedCppType = typename 
UnsignedTypeTraits<IntPrimitiveType>::UnsignedCppType;
+        using StorageCppType = typename 
UnsignedTypeTraits<IntPrimitiveType>::StorageCppType;
+        using StorageColumnType = typename 
UnsignedTypeTraits<IntPrimitiveType>::StorageColumnType;
+        using DstColumnType = typename 
PrimitiveTypeTraits<IntPrimitiveType>::ColumnType;
+
+        ColumnPtr from_col = remove_nullable(src_physical_col);
+        MutableColumnPtr to_col = 
remove_nullable(src_logical_column)->assume_mutable();
+        auto& src_data = static_cast<const 
StorageColumnType*>(from_col.get())->get_data();
+
+        size_t rows = src_data.size();
+        size_t start_idx = to_col->size();
+        to_col->resize(start_idx + rows);
+        auto& data = static_cast<DstColumnType&>(*to_col.get()).get_data();
+
+        for (int i = 0; i < rows; i++) {
+            StorageCppType src_value = src_data[i];
+            auto unsigned_value = static_cast<UnsignedCppType>(src_value);
+            data[start_idx + i] = unsigned_value;
+        }
+
+        return Status::OK();
+    }
+};
+
 class FixedSizeBinaryConverter : public PhysicalToLogicalConverter {
 private:
     int _type_length;
diff --git a/be/src/vec/exec/format/parquet/schema_desc.cpp 
b/be/src/vec/exec/format/parquet/schema_desc.cpp
index 08692de8743..9097b65718f 100644
--- a/be/src/vec/exec/format/parquet/schema_desc.cpp
+++ b/be/src/vec/exec/format/parquet/schema_desc.cpp
@@ -191,16 +191,19 @@ void FieldDescriptor::parse_physical_field(const 
tparquet::SchemaElement& physic
     physical_field->physical_type = physical_schema.type;
     _physical_fields.push_back(physical_field);
     physical_field->physical_column_index = _physical_fields.size() - 1;
-    physical_field->type = get_doris_type(physical_schema);
+    auto type = get_doris_type(physical_schema);
+    physical_field->type = type.first;
+    physical_field->is_type_compatibility = type.second;
 }
 
-TypeDescriptor FieldDescriptor::get_doris_type(const tparquet::SchemaElement& 
physical_schema) {
-    TypeDescriptor type;
-    type.type = INVALID_TYPE;
+std::pair<TypeDescriptor, bool> FieldDescriptor::get_doris_type(
+        const tparquet::SchemaElement& physical_schema) {
+    std::pair<TypeDescriptor, bool> ans = {INVALID_TYPE, false};
+    TypeDescriptor& type = ans.first;
     if (physical_schema.__isset.logicalType) {
-        type = convert_to_doris_type(physical_schema.logicalType);
+        ans = convert_to_doris_type(physical_schema.logicalType);
     } else if (physical_schema.__isset.converted_type) {
-        type = convert_to_doris_type(physical_schema);
+        ans = convert_to_doris_type(physical_schema);
     }
     // use physical type instead
     if (type.type == INVALID_TYPE) {
@@ -233,7 +236,7 @@ TypeDescriptor FieldDescriptor::get_doris_type(const 
tparquet::SchemaElement& ph
             break;
         }
     }
-    return type;
+    return ans;
 }
 
 // Copy from org.apache.iceberg.avro.AvroSchemaUtil#validAvroName
@@ -302,8 +305,11 @@ void FieldDescriptor::iceberg_sanitize(const 
std::vector<std::string>& read_colu
     }
 }
 
-TypeDescriptor FieldDescriptor::convert_to_doris_type(tparquet::LogicalType 
logicalType) {
-    TypeDescriptor type;
+std::pair<TypeDescriptor, bool> FieldDescriptor::convert_to_doris_type(
+        tparquet::LogicalType logicalType) {
+    std::pair<TypeDescriptor, bool> ans = {INVALID_TYPE, false};
+    TypeDescriptor& type = ans.first;
+    bool& is_type_compatibility = ans.second;
     if (logicalType.__isset.STRING) {
         type = TypeDescriptor(TYPE_STRING);
     } else if (logicalType.__isset.DECIMAL) {
@@ -313,16 +319,25 @@ TypeDescriptor 
FieldDescriptor::convert_to_doris_type(tparquet::LogicalType logi
         type = TypeDescriptor(TYPE_DATEV2);
     } else if (logicalType.__isset.INTEGER) {
         if (logicalType.INTEGER.isSigned) {
-            if (logicalType.INTEGER.bitWidth <= 32) {
+            if (logicalType.INTEGER.bitWidth <= 8) {
+                type = TypeDescriptor(TYPE_TINYINT);
+            } else if (logicalType.INTEGER.bitWidth <= 16) {
+                type = TypeDescriptor(TYPE_SMALLINT);
+            } else if (logicalType.INTEGER.bitWidth <= 32) {
                 type = TypeDescriptor(TYPE_INT);
             } else {
                 type = TypeDescriptor(TYPE_BIGINT);
             }
         } else {
-            if (logicalType.INTEGER.bitWidth <= 16) {
+            is_type_compatibility = true;
+            if (logicalType.INTEGER.bitWidth <= 8) {
+                type = TypeDescriptor(TYPE_SMALLINT);
+            } else if (logicalType.INTEGER.bitWidth <= 16) {
                 type = TypeDescriptor(TYPE_INT);
-            } else {
+            } else if (logicalType.INTEGER.bitWidth <= 32) {
                 type = TypeDescriptor(TYPE_BIGINT);
+            } else {
+                type = TypeDescriptor(TYPE_LARGEINT);
             }
         }
     } else if (logicalType.__isset.TIME) {
@@ -344,12 +359,14 @@ TypeDescriptor 
FieldDescriptor::convert_to_doris_type(tparquet::LogicalType logi
     } else {
         type = TypeDescriptor(INVALID_TYPE);
     }
-    return type;
+    return ans;
 }
 
-TypeDescriptor FieldDescriptor::convert_to_doris_type(
+std::pair<TypeDescriptor, bool> FieldDescriptor::convert_to_doris_type(
         const tparquet::SchemaElement& physical_schema) {
-    TypeDescriptor type;
+    std::pair<TypeDescriptor, bool> ans = {INVALID_TYPE, false};
+    TypeDescriptor& type = ans.first;
+    bool& is_type_compatibility = ans.second;
     switch (physical_schema.converted_type) {
     case tparquet::ConvertedType::type::UTF8:
         type = TypeDescriptor(TYPE_STRING);
@@ -378,28 +395,33 @@ TypeDescriptor FieldDescriptor::convert_to_doris_type(
         type = TypeDescriptor(TYPE_TINYINT);
         break;
     case tparquet::ConvertedType::type::UINT_8:
+        is_type_compatibility = true;
         [[fallthrough]];
     case tparquet::ConvertedType::type::INT_16:
         type = TypeDescriptor(TYPE_SMALLINT);
         break;
     case tparquet::ConvertedType::type::UINT_16:
+        is_type_compatibility = true;
         [[fallthrough]];
     case tparquet::ConvertedType::type::INT_32:
         type = TypeDescriptor(TYPE_INT);
         break;
     case tparquet::ConvertedType::type::UINT_32:
-        [[fallthrough]];
-    case tparquet::ConvertedType::type::UINT_64:
+        is_type_compatibility = true;
         [[fallthrough]];
     case tparquet::ConvertedType::type::INT_64:
         type = TypeDescriptor(TYPE_BIGINT);
         break;
+    case tparquet::ConvertedType::type::UINT_64:
+        is_type_compatibility = true;
+        type = TypeDescriptor(TYPE_LARGEINT);
+        break;
     default:
         LOG(WARNING) << "Not supported parquet ConvertedType: " << 
physical_schema.converted_type;
         type = TypeDescriptor(INVALID_TYPE);
         break;
     }
-    return type;
+    return ans;
 }
 
 Status FieldDescriptor::parse_group_field(const 
std::vector<tparquet::SchemaElement>& t_schemas,
diff --git a/be/src/vec/exec/format/parquet/schema_desc.h 
b/be/src/vec/exec/format/parquet/schema_desc.h
index 50e526bd730..ca726ef1b57 100644
--- a/be/src/vec/exec/format/parquet/schema_desc.h
+++ b/be/src/vec/exec/format/parquet/schema_desc.h
@@ -49,6 +49,9 @@ struct FieldSchema {
     int16_t repeated_parent_def_level = 0;
     std::vector<FieldSchema> children;
 
+    //For UInt8 -> Int16,UInt16 -> Int32,UInt32 -> Int64,UInt64 -> Int128.
+    bool is_type_compatibility = false;
+
     FieldSchema() = default;
     ~FieldSchema() = default;
     FieldSchema(const FieldSchema& fieldSchema) = default;
@@ -84,12 +87,13 @@ private:
     Status parse_node_field(const std::vector<tparquet::SchemaElement>& 
t_schemas, size_t curr_pos,
                             FieldSchema* node_field);
 
-    TypeDescriptor convert_to_doris_type(tparquet::LogicalType logicalType);
+    std::pair<TypeDescriptor, bool> 
convert_to_doris_type(tparquet::LogicalType logicalType);
 
-    TypeDescriptor convert_to_doris_type(const tparquet::SchemaElement& 
physical_schema);
+    std::pair<TypeDescriptor, bool> convert_to_doris_type(
+            const tparquet::SchemaElement& physical_schema);
 
 public:
-    TypeDescriptor get_doris_type(const tparquet::SchemaElement& 
physical_schema);
+    std::pair<TypeDescriptor, bool> get_doris_type(const 
tparquet::SchemaElement& physical_schema);
 
     // org.apache.iceberg.avro.AvroSchemaUtil#sanitize will encode special 
characters,
     // we have to decode these characters
diff --git 
a/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
 
b/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
index 406bc7660ff..5748cedd228 100644
--- 
a/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
+++ 
b/regression-test/data/export_p0/outfile/outfile_expr/test_outfile_expr_generate_col_name.out
@@ -100,12 +100,12 @@ id        int     Yes     false   \N      NONE
 9      1       string  27      false   5       true    1
 
 -- !desc_s3 --
-__add_5        int     Yes     false   \N      NONE
-__bit_or_7     int     Yes     false   \N      NONE
+__add_5        smallint        Yes     false   \N      NONE
+__bit_or_7     tinyint Yes     false   \N      NONE
 __cast_3       bigint  Yes     false   \N      NONE
 __greater_than_4       boolean Yes     false   \N      NONE
 __in_predicate_6       boolean Yes     false   \N      NONE
-__literal_1    int     Yes     false   \N      NONE
+__literal_1    tinyint Yes     false   \N      NONE
 __literal_2    text    Yes     false   \N      NONE
 id     int     Yes     false   \N      NONE
 
diff --git 
a/regression-test/data/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.out
 
b/regression-test/data/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.out
new file mode 100644
index 00000000000..4cda2746a00
--- /dev/null
+++ 
b/regression-test/data/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.out
@@ -0,0 +1,440 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !test_1 --
+0      254     54979   3876010132      12292188520939801104
+1      194     41087   2468877118      11276751473207154134
+2      204     40618   3112417582      12531644712494408881
+3      252     42885   2959241092      12524633273560617315
+4      168     34898   3576407414      13096188234563161622
+5      252     39188   3127662355      16817880057727309281
+6      146     45695   2457180674      13258241842726882717
+7      223     54096   3818049848      13392705476834798352
+8      247     49103   4235043353      14882039847048390015
+9      171     37711   4121304297      12690575475523254418
+10     208     45522   2849886325      9887016810088790216
+11     140     57808   4249890046      11248669065541052626
+12     154     63353   2494834510      10992021804074820099
+13     169     62034   3442632722      13741258842709060503
+14     201     39884   2544963866      13079528300040741505
+15     208     48887   2865890129      15763071531082484590
+16     200     58596   2907876388      13993629507485604147
+17     210     50277   3777672898      9348401130412952441
+18     212     51965   3618857131      16174811648348878672
+19     161     39818   2368776725      15511547001336881917
+20     160     41750   2980821608      11023149785626200227
+21     141     57340   4284126112      15552326510762976216
+22     139     58391   4117149789      15878585387857525976
+23     212     59593   2459376464      15173162289502083954
+24     165     53957   3427228999      17619046273898992624
+25     229     49948   2600007878      15455673680657024177
+26     134     64599   3738904119      16329279862825806683
+27     205     41073   2442068534      14375087414295500534
+28     204     60681   3422219729      18264453353631472102
+29     136     47361   3588730082      13238158010948418606
+30     252     42761   4175633691      15734976010511914446
+31     150     45199   4078820644      15090555688768508964
+32     210     57428   4009891336      16805687728956091249
+33     235     34379   3963852622      13291314989308428602
+34     226     33850   3643451160      15146799101548124057
+35     133     54253   2734990478      12652028700805136484
+36     166     61697   3958278249      9924263812189602832
+37     137     55610   2782254392      18017977508881163787
+38     166     41727   3012921589      12285785983985756200
+39     194     46987   2351267776      18191890018150428322
+40     201     61160   3877183539      17825711874965469887
+41     209     54646   3739634163      12447543494188025837
+42     157     65086   2250645811      15257822118590463416
+43     223     56812   4101782180      9308776360130037320
+44     135     61061   2806848998      15345311951702932353
+45     147     39862   3521569945      17679765098696012133
+46     229     38351   2299955463      11589155413771055105
+47     141     49268   2740241757      16646921996087125692
+48     144     53152   3733884127      12946127273932789697
+49     200     48517   4024062219      15103440093398422258
+
+-- !test_2 --
+id     int     Yes     false   \N      NONE
+uint8_column   smallint        Yes     false   \N      NONE
+uint16_column  int     Yes     false   \N      NONE
+uint32_column  bigint  Yes     false   \N      NONE
+uint64_column  largeint        Yes     false   \N      NONE
+
+-- !test_3 --
+id     int     Yes     false   \N      NONE
+uint8_column   smallint        Yes     false   \N      NONE
+uint16_column  int     Yes     false   \N      NONE
+uint32_column  bigint  Yes     false   \N      NONE
+uint64_column  largeint        Yes     false   \N      NONE
+
+-- !test_4 --
+id     int     Yes     false   \N      NONE
+uint8_column   smallint        Yes     false   \N      NONE
+uint16_column  int     Yes     false   \N      NONE
+uint32_column  bigint  Yes     false   \N      NONE
+uint64_column  largeint        Yes     false   \N      NONE
+
+-- !test_5 --
+0      200     48438   3775964178      9957179629640951554
+1      197     37330   2548711656      11127988488306710308
+2      200     65117   3419624570      11652168873218179696
+3      223     39174   3669818470      17675778338328486831
+4      196     60825   3064802389      18435946529637287047
+5      148     58165   3414732886      10219427643252484377
+6      217     61318   4075782121      10508828167797335037
+7      185     46040   2798591473      16764958584953558745
+8      148     50700   2541953946      11228894632595899400
+9      235     41262   3398155979      14622381114650660637
+10     189     49552   3754066331      16077913453789464999
+11     138     41102   2488033055      11226600460306403745
+12     132     62435   3745892606      17429590483490742063
+13     164     34858   3380810793      15447251686769296941
+14     246     44518   4213847006      13168159509128150529
+15     128     34475   2584892403      14470296482506793549
+16     180     40732   2667143993      13453719924509014943
+17     206     49824   3079026622      9242535560261271930
+18     250     61860   3723105814      9754643556067415401
+19     248     51724   4229819070      15295668751829551718
+20     237     38710   2951615403      14702842917502593482
+21     133     46707   2464573031      15860122391678970590
+22     249     41930   4210407904      12677893784378713520
+23     218     34250   3657784995      14768434928936445583
+24     134     52064   4274319068      16672757830801563734
+25     179     54232   3587434213      10613547124477746521
+26     217     63838   2348239122      17998346098073086386
+27     190     52362   3442840997      13122160861538572795
+28     131     39957   2321439682      16792774292797573856
+29     143     50902   2914375790      10557924491128546903
+30     228     33365   2438054546      14302876531585763284
+31     251     59126   2308219390      12753986538521770387
+32     197     58579   2647214662      16916351783057256258
+33     246     62028   4202894981      16869494938800942234
+34     229     47800   3963077237      12000510020655611310
+35     195     43221   2980563838      17324881735239531242
+36     210     46389   3302751013      10936691069329977133
+37     190     60582   4294458835      13465474203101539625
+38     234     58068   2575498858      18264769986785189204
+39     244     55282   2242973260      14725356984149511589
+40     224     33882   3169401634      9641824149700693760
+41     210     48824   2805949235      17877575698845246971
+42     250     49871   4015422133      11571502995585068959
+43     253     53173   3315146396      15527276320077174431
+44     205     48824   2153923483      18227485227719579199
+45     202     65101   2286014232      10596463304445669324
+46     144     39272   3664679383      14619550616500589262
+47     207     58788   2459180916      15787272910593406395
+48     236     61302   2814354943      15797302386492157450
+49     222     44025   2267428102      16226182608093468653
+
+-- !test_6 --
+0      254     65535   4294967294      18446744073709551614
+1      254     65534   4294967295      18446744073709551615
+2      254     65535   4294967294      18446744073709551615
+3      255     65535   4294967294      18446744073709551615
+4      255     65534   4294967295      18446744073709551615
+5      254     65534   4294967295      18446744073709551614
+6      254     65534   4294967294      18446744073709551614
+7      255     65534   4294967294      18446744073709551614
+8      255     65534   4294967295      18446744073709551614
+9      255     65535   4294967294      18446744073709551614
+
+-- !test_7 --
+id     bigint  Yes     false   \N      NONE
+mediumint_unsigned     bigint  Yes     false   \N      NONE
+int_unsigned   bigint  Yes     false   \N      NONE
+
+-- !test_8 --
+1      111     111
+2      222     222
+
+-- !test_9 --
+16     200     58596   2907876388      13993629507485604147
+49     200     48517   4024062219      15103440093398422258
+
+-- !test_10 --
+38     166     41727   3012921589      12285785983985756200
+
+-- !test_11 --
+46     229     38351   2299955463      11589155413771055105
+
+-- !test_12 --
+49     200     48517   4024062219      15103440093398422258
+
+-- !test_13 --
+49     222     44025   2267428102      16226182608093468653
+
+-- !test_14 --
+38     234     58068   2575498858      18264769986785189204
+
+-- !test_15 --
+14     246     44518   4213847006      13168159509128150529
+
+-- !test_16 --
+25     179     54232   3587434213      10613547124477746521
+
+-- !test_17 --
+500
+
+-- !test_18 --
+500
+
+-- !test_19 --
+500
+
+-- !test_20 --
+500
+
+-- !test_21 --
+500
+
+-- !test_22 --
+0      254     65535   4294967294      18446744073709551614
+5      254     65534   4294967295      18446744073709551614
+6      254     65534   4294967294      18446744073709551614
+7      255     65534   4294967294      18446744073709551614
+8      255     65534   4294967295      18446744073709551614
+9      255     65535   4294967294      18446744073709551614
+10     255     65535   4294967294      18446744073709551614
+11     254     65535   4294967294      18446744073709551614
+12     255     65535   4294967294      18446744073709551614
+14     254     65535   4294967295      18446744073709551614
+16     255     65535   4294967295      18446744073709551614
+17     255     65535   4294967294      18446744073709551614
+18     255     65534   4294967295      18446744073709551614
+20     255     65535   4294967294      18446744073709551614
+21     255     65534   4294967294      18446744073709551614
+22     255     65534   4294967294      18446744073709551614
+24     255     65534   4294967294      18446744073709551614
+25     254     65535   4294967295      18446744073709551614
+27     255     65534   4294967294      18446744073709551614
+29     255     65534   4294967295      18446744073709551614
+30     255     65534   4294967295      18446744073709551614
+32     254     65534   4294967294      18446744073709551614
+37     254     65534   4294967295      18446744073709551614
+38     254     65535   4294967294      18446744073709551614
+40     255     65535   4294967294      18446744073709551614
+41     255     65534   4294967294      18446744073709551614
+43     254     65534   4294967295      18446744073709551614
+44     254     65534   4294967294      18446744073709551614
+45     255     65534   4294967295      18446744073709551614
+46     254     65534   4294967295      18446744073709551614
+53     254     65534   4294967294      18446744073709551614
+56     254     65535   4294967294      18446744073709551614
+58     255     65534   4294967295      18446744073709551614
+60     254     65535   4294967295      18446744073709551614
+62     255     65534   4294967295      18446744073709551614
+63     255     65535   4294967294      18446744073709551614
+66     255     65534   4294967295      18446744073709551614
+68     254     65535   4294967295      18446744073709551614
+71     254     65535   4294967295      18446744073709551614
+73     254     65535   4294967295      18446744073709551614
+75     254     65534   4294967294      18446744073709551614
+76     255     65534   4294967294      18446744073709551614
+77     254     65535   4294967295      18446744073709551614
+80     254     65534   4294967295      18446744073709551614
+81     255     65534   4294967294      18446744073709551614
+84     255     65534   4294967295      18446744073709551614
+86     255     65535   4294967295      18446744073709551614
+87     254     65534   4294967295      18446744073709551614
+89     255     65535   4294967294      18446744073709551614
+91     254     65534   4294967294      18446744073709551614
+92     255     65534   4294967294      18446744073709551614
+94     254     65535   4294967294      18446744073709551614
+98     255     65534   4294967295      18446744073709551614
+99     254     65535   4294967294      18446744073709551614
+100    255     65534   4294967295      18446744073709551614
+102    255     65534   4294967295      18446744073709551614
+103    255     65534   4294967295      18446744073709551614
+106    254     65534   4294967295      18446744073709551614
+108    254     65535   4294967295      18446744073709551614
+109    255     65535   4294967294      18446744073709551614
+110    254     65534   4294967295      18446744073709551614
+112    255     65535   4294967295      18446744073709551614
+115    255     65534   4294967295      18446744073709551614
+117    255     65535   4294967294      18446744073709551614
+120    254     65535   4294967294      18446744073709551614
+121    254     65535   4294967295      18446744073709551614
+123    254     65535   4294967294      18446744073709551614
+124    254     65535   4294967295      18446744073709551614
+130    255     65534   4294967294      18446744073709551614
+131    254     65535   4294967295      18446744073709551614
+132    254     65535   4294967294      18446744073709551614
+133    254     65534   4294967295      18446744073709551614
+139    254     65535   4294967295      18446744073709551614
+141    255     65535   4294967295      18446744073709551614
+148    255     65535   4294967294      18446744073709551614
+149    254     65534   4294967295      18446744073709551614
+150    254     65534   4294967295      18446744073709551614
+151    254     65534   4294967294      18446744073709551614
+152    254     65535   4294967295      18446744073709551614
+157    255     65534   4294967295      18446744073709551614
+158    255     65535   4294967294      18446744073709551614
+167    255     65534   4294967295      18446744073709551614
+168    254     65535   4294967294      18446744073709551614
+169    254     65534   4294967294      18446744073709551614
+173    255     65534   4294967294      18446744073709551614
+174    254     65535   4294967295      18446744073709551614
+177    254     65535   4294967295      18446744073709551614
+184    255     65535   4294967295      18446744073709551614
+186    254     65535   4294967295      18446744073709551614
+187    255     65535   4294967295      18446744073709551614
+189    254     65534   4294967295      18446744073709551614
+190    254     65535   4294967295      18446744073709551614
+191    254     65535   4294967294      18446744073709551614
+192    254     65535   4294967294      18446744073709551614
+194    255     65534   4294967294      18446744073709551614
+195    255     65534   4294967295      18446744073709551614
+196    254     65535   4294967295      18446744073709551614
+197    255     65535   4294967295      18446744073709551614
+204    255     65535   4294967295      18446744073709551614
+207    255     65535   4294967295      18446744073709551614
+210    255     65535   4294967294      18446744073709551614
+211    255     65535   4294967294      18446744073709551614
+213    254     65534   4294967295      18446744073709551614
+214    255     65534   4294967295      18446744073709551614
+216    254     65535   4294967295      18446744073709551614
+217    255     65535   4294967295      18446744073709551614
+222    255     65534   4294967295      18446744073709551614
+226    255     65535   4294967294      18446744073709551614
+227    254     65534   4294967294      18446744073709551614
+228    254     65535   4294967295      18446744073709551614
+229    255     65535   4294967294      18446744073709551614
+231    254     65534   4294967294      18446744073709551614
+232    254     65534   4294967294      18446744073709551614
+233    255     65535   4294967295      18446744073709551614
+240    255     65534   4294967295      18446744073709551614
+241    254     65535   4294967294      18446744073709551614
+243    254     65534   4294967294      18446744073709551614
+244    255     65534   4294967295      18446744073709551614
+246    255     65534   4294967294      18446744073709551614
+248    255     65535   4294967295      18446744073709551614
+251    254     65535   4294967294      18446744073709551614
+253    255     65534   4294967294      18446744073709551614
+256    255     65534   4294967294      18446744073709551614
+257    255     65535   4294967294      18446744073709551614
+258    254     65534   4294967294      18446744073709551614
+261    254     65535   4294967295      18446744073709551614
+262    254     65535   4294967295      18446744073709551614
+264    255     65534   4294967294      18446744073709551614
+266    254     65535   4294967295      18446744073709551614
+267    255     65534   4294967294      18446744073709551614
+269    254     65535   4294967294      18446744073709551614
+270    255     65534   4294967294      18446744073709551614
+273    254     65535   4294967294      18446744073709551614
+279    255     65535   4294967295      18446744073709551614
+281    254     65534   4294967294      18446744073709551614
+283    255     65534   4294967295      18446744073709551614
+285    254     65535   4294967294      18446744073709551614
+287    255     65535   4294967294      18446744073709551614
+288    255     65534   4294967294      18446744073709551614
+290    254     65534   4294967295      18446744073709551614
+291    255     65535   4294967295      18446744073709551614
+292    255     65534   4294967295      18446744073709551614
+295    254     65535   4294967294      18446744073709551614
+296    255     65534   4294967295      18446744073709551614
+298    255     65535   4294967294      18446744073709551614
+301    255     65534   4294967295      18446744073709551614
+302    254     65534   4294967294      18446744073709551614
+307    254     65535   4294967294      18446744073709551614
+308    254     65535   4294967294      18446744073709551614
+309    254     65535   4294967295      18446744073709551614
+313    254     65534   4294967295      18446744073709551614
+317    254     65534   4294967294      18446744073709551614
+319    255     65535   4294967295      18446744073709551614
+320    254     65535   4294967295      18446744073709551614
+321    254     65535   4294967295      18446744073709551614
+322    254     65535   4294967295      18446744073709551614
+325    255     65535   4294967295      18446744073709551614
+326    254     65534   4294967295      18446744073709551614
+327    255     65535   4294967295      18446744073709551614
+328    255     65534   4294967294      18446744073709551614
+331    254     65535   4294967294      18446744073709551614
+332    255     65535   4294967294      18446744073709551614
+334    255     65534   4294967294      18446744073709551614
+336    254     65535   4294967295      18446744073709551614
+337    254     65535   4294967295      18446744073709551614
+339    255     65535   4294967295      18446744073709551614
+341    254     65535   4294967294      18446744073709551614
+342    255     65534   4294967294      18446744073709551614
+345    254     65534   4294967294      18446744073709551614
+346    255     65535   4294967294      18446744073709551614
+347    255     65534   4294967295      18446744073709551614
+349    255     65534   4294967295      18446744073709551614
+350    254     65535   4294967295      18446744073709551614
+353    255     65534   4294967294      18446744073709551614
+355    255     65535   4294967294      18446744073709551614
+356    254     65535   4294967295      18446744073709551614
+357    254     65535   4294967295      18446744073709551614
+361    254     65535   4294967295      18446744073709551614
+363    254     65535   4294967295      18446744073709551614
+367    255     65535   4294967294      18446744073709551614
+368    254     65534   4294967295      18446744073709551614
+372    254     65535   4294967294      18446744073709551614
+373    255     65535   4294967294      18446744073709551614
+376    255     65535   4294967294      18446744073709551614
+377    254     65535   4294967295      18446744073709551614
+379    255     65535   4294967294      18446744073709551614
+382    254     65534   4294967295      18446744073709551614
+385    254     65535   4294967294      18446744073709551614
+389    254     65534   4294967294      18446744073709551614
+390    255     65535   4294967295      18446744073709551614
+391    254     65535   4294967294      18446744073709551614
+393    255     65534   4294967295      18446744073709551614
+395    254     65535   4294967294      18446744073709551614
+396    254     65534   4294967294      18446744073709551614
+398    254     65535   4294967294      18446744073709551614
+400    254     65534   4294967294      18446744073709551614
+401    254     65534   4294967294      18446744073709551614
+402    255     65534   4294967295      18446744073709551614
+403    254     65534   4294967295      18446744073709551614
+408    255     65535   4294967294      18446744073709551614
+409    254     65534   4294967295      18446744073709551614
+410    254     65534   4294967294      18446744073709551614
+411    255     65535   4294967294      18446744073709551614
+412    254     65535   4294967294      18446744073709551614
+413    255     65534   4294967294      18446744073709551614
+415    254     65534   4294967294      18446744073709551614
+417    254     65534   4294967295      18446744073709551614
+418    254     65535   4294967294      18446744073709551614
+420    255     65535   4294967294      18446744073709551614
+424    255     65535   4294967294      18446744073709551614
+426    254     65535   4294967295      18446744073709551614
+430    255     65535   4294967294      18446744073709551614
+431    255     65534   4294967295      18446744073709551614
+432    255     65535   4294967294      18446744073709551614
+434    254     65535   4294967294      18446744073709551614
+440    255     65534   4294967294      18446744073709551614
+441    254     65535   4294967294      18446744073709551614
+443    254     65535   4294967295      18446744073709551614
+444    255     65535   4294967295      18446744073709551614
+450    254     65534   4294967294      18446744073709551614
+454    255     65534   4294967294      18446744073709551614
+456    255     65535   4294967294      18446744073709551614
+458    255     65535   4294967295      18446744073709551614
+459    254     65534   4294967294      18446744073709551614
+461    255     65535   4294967294      18446744073709551614
+462    255     65535   4294967295      18446744073709551614
+463    254     65534   4294967294      18446744073709551614
+464    255     65534   4294967295      18446744073709551614
+465    254     65535   4294967294      18446744073709551614
+466    255     65534   4294967295      18446744073709551614
+468    254     65534   4294967295      18446744073709551614
+470    254     65535   4294967294      18446744073709551614
+471    255     65534   4294967294      18446744073709551614
+472    254     65535   4294967295      18446744073709551614
+473    255     65534   4294967295      18446744073709551614
+474    255     65535   4294967295      18446744073709551614
+475    255     65535   4294967294      18446744073709551614
+477    254     65534   4294967294      18446744073709551614
+480    255     65535   4294967295      18446744073709551614
+483    254     65535   4294967294      18446744073709551614
+485    255     65535   4294967294      18446744073709551614
+486    254     65534   4294967294      18446744073709551614
+487    254     65534   4294967295      18446744073709551614
+488    255     65535   4294967295      18446744073709551614
+489    254     65535   4294967295      18446744073709551614
+490    255     65535   4294967294      18446744073709551614
+494    254     65534   4294967294      18446744073709551614
+495    255     65534   4294967295      18446744073709551614
+496    254     65534   4294967295      18446744073709551614
+497    255     65535   4294967294      18446744073709551614
+499    255     65534   4294967295      18446744073709551614
+
diff --git 
a/regression-test/data/external_table_p0/tvf/unsigned_integers_1.parquet 
b/regression-test/data/external_table_p0/tvf/unsigned_integers_1.parquet
new file mode 100644
index 00000000000..8f87903255f
Binary files /dev/null and 
b/regression-test/data/external_table_p0/tvf/unsigned_integers_1.parquet differ
diff --git 
a/regression-test/data/external_table_p0/tvf/unsigned_integers_2.parquet 
b/regression-test/data/external_table_p0/tvf/unsigned_integers_2.parquet
new file mode 100644
index 00000000000..fe48ab71842
Binary files /dev/null and 
b/regression-test/data/external_table_p0/tvf/unsigned_integers_2.parquet differ
diff --git 
a/regression-test/data/external_table_p0/tvf/unsigned_integers_3.parquet 
b/regression-test/data/external_table_p0/tvf/unsigned_integers_3.parquet
new file mode 100644
index 00000000000..ff8dae5ecec
Binary files /dev/null and 
b/regression-test/data/external_table_p0/tvf/unsigned_integers_3.parquet differ
diff --git 
a/regression-test/data/external_table_p0/tvf/unsigned_integers_4.parquet 
b/regression-test/data/external_table_p0/tvf/unsigned_integers_4.parquet
new file mode 100644
index 00000000000..010d15497ec
Binary files /dev/null and 
b/regression-test/data/external_table_p0/tvf/unsigned_integers_4.parquet differ
diff --git 
a/regression-test/suites/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.groovy
 
b/regression-test/suites/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.groovy
new file mode 100644
index 00000000000..24cfb5f2ac2
--- /dev/null
+++ 
b/regression-test/suites/external_table_p0/tvf/test_local_tvf_parquet_unsigned_integers.groovy
@@ -0,0 +1,102 @@
+import org.junit.Assert
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This suit test the `backends` tvf
+suite("test_local_tvf_parquet_unsigned_integers", "p0") {
+    List<List<Object>> backends =  sql """ show backends """
+    def dataFilePath = context.config.dataPath + "/external_table_p0/tvf/"
+
+    assertTrue(backends.size() > 0)
+
+    def be_id = backends[0][0]
+    // cluster mode need to make sure all be has this data
+    def outFilePath="/"
+    def transFile01="${dataFilePath}/unsigned_integers_1.parquet"
+    def transFile02="${dataFilePath}/unsigned_integers_2.parquet"
+    def transFile03="${dataFilePath}/unsigned_integers_3.parquet"
+    def transFile04="${dataFilePath}/unsigned_integers_4.parquet"
+
+    for (List<Object> backend : backends) {
+        def be_host = backend[1]
+        scpFiles ("root", be_host, transFile01, outFilePath, false);
+        scpFiles ("root", be_host, transFile02, outFilePath, false);
+        scpFiles ("root", be_host, transFile03, outFilePath, false);
+        scpFiles ("root", be_host, transFile04, outFilePath, false);
+    }
+
+    def file1 = outFilePath + "unsigned_integers_1.parquet";
+    def file2 = outFilePath + "unsigned_integers_2.parquet";
+    def file3 = outFilePath + "unsigned_integers_3.parquet";
+    def file4 = outFilePath + "unsigned_integers_4.parquet";
+
+
+
+
+    qt_test_1 """ select * from local( "file_path" = "${file1}", "backend_id" 
= "${be_id}", "format" = "parquet") order by id ;"""
+
+    qt_test_2 """ desc function local( "file_path" = "${file1}", "backend_id" 
= "${be_id}", "format" = "parquet");"""
+
+    qt_test_3 """ desc function local( "file_path" = "${file2}", "backend_id" 
= "${be_id}", "format" = "parquet");"""
+    
+    qt_test_4 """ desc function local( "file_path" = "${file3}", "backend_id" 
= "${be_id}", "format" = "parquet");"""
+
+    qt_test_5 """ select * from local( "file_path" = "${file2}", "backend_id" 
= "${be_id}", "format" = "parquet") order by id ;"""
+
+    qt_test_6 """ select * from local( "file_path" = "${file3}", "backend_id" 
= "${be_id}", "format" = "parquet") order by id limit 10;"""
+
+    qt_test_7 """ desc function local( "file_path" = "${file4}", "backend_id" 
= "${be_id}", "format" = "parquet");"""
+
+    qt_test_8 """ select * from local( "file_path" = "${file4}", "backend_id" 
= "${be_id}", "format" = "parquet") order by id ;"""
+
+
+
+    qt_test_9 """ select * from local( "file_path" = "${file1}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint8_column = 200 order by id ;"""
+
+    qt_test_10 """ select * from local( "file_path" = "${file1}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint16_column = 41727 order by id ;"""
+
+    qt_test_11 """ select * from local( "file_path" = "${file1}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint32_column = 2299955463 order by 
id ;"""
+
+    qt_test_12 """ select * from local( "file_path" = "${file1}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint64_column = 15103440093398422258 
order by id ;"""
+
+
+
+    qt_test_13 """ select * from local( "file_path" = "${file2}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint8_column = 222 order by id ;"""
+
+    qt_test_14 """ select * from local( "file_path" = "${file2}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint16_column = 58068 order by id ;"""
+
+    qt_test_15 """ select * from local( "file_path" = "${file2}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint32_column = 4213847006 order by 
id ;"""
+
+    qt_test_16 """ select * from local( "file_path" = "${file2}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint64_column = 10613547124477746521 
order by id ;"""
+
+
+    qt_test_17 """ select count(id) from local( "file_path" = "${file3}", 
"backend_id" = "${be_id}", "format" = "parquet")  ;"""
+
+    qt_test_18 """ select count(uint8_column) from local( "file_path" = 
"${file3}", "backend_id" = "${be_id}", "format" = "parquet")  ;"""
+
+    qt_test_19 """ select count(uint16_column) from local( "file_path" = 
"${file3}", "backend_id" = "${be_id}", "format" = "parquet")  ;"""
+    
+    qt_test_20 """ select count(uint32_column) from local( "file_path" = 
"${file3}", "backend_id" = "${be_id}", "format" = "parquet")  ;"""
+ 
+    qt_test_21 """ select count(uint64_column) from local( "file_path" = 
"${file3}", "backend_id" = "${be_id}", "format" = "parquet")  ;"""
+    
+    qt_test_22 """ select * from local( "file_path" = "${file3}", "backend_id" 
= "${be_id}", "format" = "parquet") where uint64_column = 18446744073709551614 
order by id ;"""
+
+
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to