This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new be1e0dcd27 [new-feature](complex-type) support read nested parquet and 
orc file with complex type (#22793)
be1e0dcd27 is described below

commit be1e0dcd275ccc4762f48a0b87ad67d056d24f0e
Author: amory <[email protected]>
AuthorDate: Thu Aug 10 18:23:07 2023 +0800

    [new-feature](complex-type) support read nested parquet and orc file with 
complex type (#22793)
---
 .../exec/format/parquet/vparquet_column_reader.cpp | 34 ++++++----------------
 .../exec/format/parquet/vparquet_column_reader.h   |  4 +++
 2 files changed, 13 insertions(+), 25 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index 7499a63675..0a688f6ed3 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -123,25 +123,13 @@ Status ParquetColumnReader::create(io::FileReaderSPtr 
file, FieldSchema* field,
                                    size_t max_buf_size) {
     if (field->type.type == TYPE_ARRAY) {
         std::unique_ptr<ParquetColumnReader> element_reader;
-        if (field->children[0].type.type == TYPE_MAP ||
-            field->children[0].type.type == TYPE_STRUCT) {
-            return Status::InternalError(
-                    "Array does not support nested map/struct type in column 
{}", field->name);
-        }
         RETURN_IF_ERROR(create(file, &field->children[0], row_group, 
row_ranges, ctz, io_ctx,
                                element_reader, max_buf_size));
         element_reader->set_nested_column();
-        ArrayColumnReader* array_reader = new ArrayColumnReader(row_ranges, 
ctz, io_ctx);
+        auto array_reader = ArrayColumnReader::create_unique(row_ranges, ctz, 
io_ctx);
         RETURN_IF_ERROR(array_reader->init(std::move(element_reader), field));
-        reader.reset(array_reader);
+        reader.reset(array_reader.release());
     } else if (field->type.type == TYPE_MAP) {
-        auto key_type = field->children[0].children[0].type.type;
-        auto value_type = field->children[0].children[1].type.type;
-        if (key_type == TYPE_ARRAY || key_type == TYPE_MAP || key_type == 
TYPE_STRUCT ||
-            value_type == TYPE_ARRAY || value_type == TYPE_MAP || value_type 
== TYPE_STRUCT) {
-            return Status::InternalError("Map does not support nested complex 
type in column {}",
-                                         field->name);
-        }
         std::unique_ptr<ParquetColumnReader> key_reader;
         std::unique_ptr<ParquetColumnReader> value_reader;
         RETURN_IF_ERROR(create(file, &field->children[0].children[0], 
row_group, row_ranges, ctz,
@@ -150,31 +138,27 @@ Status ParquetColumnReader::create(io::FileReaderSPtr 
file, FieldSchema* field,
                                io_ctx, value_reader, max_buf_size));
         key_reader->set_nested_column();
         value_reader->set_nested_column();
-        MapColumnReader* map_reader = new MapColumnReader(row_ranges, ctz, 
io_ctx);
+        auto map_reader = MapColumnReader::create_unique(row_ranges, ctz, 
io_ctx);
         RETURN_IF_ERROR(map_reader->init(std::move(key_reader), 
std::move(value_reader), field));
-        reader.reset(map_reader);
+        reader.reset(map_reader.release());
     } else if (field->type.type == TYPE_STRUCT) {
         std::vector<std::unique_ptr<ParquetColumnReader>> child_readers;
+        child_readers.reserve(field->children.size());
         for (int i = 0; i < field->children.size(); ++i) {
-            auto child_type = field->children[i].type.type;
-            if (child_type == TYPE_ARRAY || child_type == TYPE_MAP || 
child_type == TYPE_STRUCT) {
-                return Status::InternalError(
-                        "Struct does not support nested complex type in column 
{}", field->name);
-            }
             std::unique_ptr<ParquetColumnReader> child_reader;
             RETURN_IF_ERROR(create(file, &field->children[i], row_group, 
row_ranges, ctz, io_ctx,
                                    child_reader, max_buf_size));
             child_reader->set_nested_column();
             child_readers.emplace_back(std::move(child_reader));
         }
-        StructColumnReader* struct_reader = new StructColumnReader(row_ranges, 
ctz, io_ctx);
+        auto struct_reader = StructColumnReader::create_unique(row_ranges, 
ctz, io_ctx);
         RETURN_IF_ERROR(struct_reader->init(std::move(child_readers), field));
-        reader.reset(struct_reader);
+        reader.reset(struct_reader.release());
     } else {
         const tparquet::ColumnChunk& chunk = 
row_group.columns[field->physical_column_index];
-        ScalarColumnReader* scalar_reader = new ScalarColumnReader(row_ranges, 
chunk, ctz, io_ctx);
+        auto scalar_reader = ScalarColumnReader::create_unique(row_ranges, 
chunk, ctz, io_ctx);
         RETURN_IF_ERROR(scalar_reader->init(file, field, max_buf_size));
-        reader.reset(scalar_reader);
+        reader.reset(scalar_reader.release());
     }
     return Status::OK();
 }
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.h 
b/be/src/vec/exec/format/parquet/vparquet_column_reader.h
index f60958b252..6d0e2e3f6e 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.h
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.h
@@ -156,6 +156,7 @@ protected:
 };
 
 class ScalarColumnReader : public ParquetColumnReader {
+    ENABLE_FACTORY_CREATOR(ScalarColumnReader)
 public:
     ScalarColumnReader(const std::vector<RowRange>& row_ranges,
                        const tparquet::ColumnChunk& chunk_meta, 
cctz::time_zone* ctz,
@@ -195,6 +196,7 @@ private:
 };
 
 class ArrayColumnReader : public ParquetColumnReader {
+    ENABLE_FACTORY_CREATOR(ArrayColumnReader)
 public:
     ArrayColumnReader(const std::vector<RowRange>& row_ranges, 
cctz::time_zone* ctz,
                       io::IOContext* io_ctx)
@@ -218,6 +220,7 @@ private:
 };
 
 class MapColumnReader : public ParquetColumnReader {
+    ENABLE_FACTORY_CREATOR(MapColumnReader)
 public:
     MapColumnReader(const std::vector<RowRange>& row_ranges, cctz::time_zone* 
ctz,
                     io::IOContext* io_ctx)
@@ -252,6 +255,7 @@ private:
 };
 
 class StructColumnReader : public ParquetColumnReader {
+    ENABLE_FACTORY_CREATOR(StructColumnReader)
 public:
     StructColumnReader(const std::vector<RowRange>& row_ranges, 
cctz::time_zone* ctz,
                        io::IOContext* io_ctx)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to