pitrou commented on code in PR #38850:
URL: https://github.com/apache/arrow/pull/38850#discussion_r1565906087


##########
cpp/src/parquet/arrow/arrow_schema_test.cc:
##########
@@ -999,6 +999,17 @@ TEST_F(TestConvertArrowSchema, ParquetLists) {
     auto arrow_list = ::arrow::list(arrow_element);
     arrow_fields.push_back(::arrow::field("my_list", arrow_list, false));
   }
+  // Same as above but using list_view as arrow type.

Review Comment:
   The only thing that changes below is the list factory function, this could 
perhaps be factored out (also with `list_cast_fns` below), e.g.:
   ```c++
   auto list_type_factory = [](const std::shared_ptr<::arrow::Field> field) {
       return ::arrow::list(field->type());
   };
   
   ...
   
   for (const auto& list_factory : {list_type_factory, list_view_type_factory}) 
{ ... }
   ```



##########
cpp/src/parquet/arrow/schema.cc:
##########
@@ -870,6 +872,20 @@ 
std::function<std::shared_ptr<::arrow::DataType>(FieldVector)> GetNestedFactory(
           return ::arrow::fixed_size_list(std::move(fields[0]), list_size);
         };
       }
+      if (origin_type.id() == ::arrow::Type::LIST_VIEW) {
+        // Reading LIST_VIEW as LIST as a workaround.
+        return [](FieldVector fields) {
+          DCHECK_EQ(fields.size(), 1);
+          return ::arrow::list(std::move(fields[0]));
+        };
+      }
+      if (origin_type.id() == ::arrow::Type::LARGE_LIST_VIEW) {
+        // Reading LARGE_LIST_VIEW as LARGE_LIST as a workaround.
+        return [](FieldVector fields) {
+          DCHECK_EQ(fields.size(), 1);
+          return ::arrow::large_list_view(std::move(fields[0]));

Review Comment:
   This contradicts the above?



##########
cpp/src/parquet/arrow/path_internal_test.cc:
##########
@@ -34,6 +34,7 @@ namespace parquet::arrow {
 using ::arrow::default_memory_pool;
 using ::arrow::field;
 using ::arrow::fixed_size_list;
+using ::arrow::list_view;

Review Comment:
   Why do you need this? You seem to always write `::arrow::list_view` 
explicitly below?



##########
cpp/src/parquet/arrow/path_internal_test.cc:
##########
@@ -113,9 +114,50 @@ class MultipathLevelBuilderTest : public testing::Test {
       ArrowWriteContext(default_memory_pool(), arrow_properties_.get());
 };
 
-TEST_F(MultipathLevelBuilderTest, NonNullableSingleListNonNullableEntries) {
+class MultipathLevelListTool {
+ public:
+  static std::shared_ptr<::arrow::DataType> get_list(
+      const std::shared_ptr<::arrow::Field>& element) {
+    return ::arrow::list(element);
+  }
+  static std::shared_ptr<::arrow::DataType> get_list(
+      const std::shared_ptr<::arrow::DataType>& element) {
+    return ::arrow::list(element);
+  }
+  static std::shared_ptr<::arrow::DataType> get_large_list(
+      const std::shared_ptr<::arrow::Field>& element) {
+    return ::arrow::large_list(element);
+  }
+};
+
+class MultipathLevelListViewTool {
+ public:
+  static std::shared_ptr<::arrow::DataType> get_list(
+      const std::shared_ptr<::arrow::Field>& element) {
+    return ::arrow::list_view(element);
+  }
+  static std::shared_ptr<::arrow::DataType> get_list(
+      const std::shared_ptr<::arrow::DataType>& element) {
+    return ::arrow::list_view(element);
+  }
+  static std::shared_ptr<::arrow::DataType> get_large_list(
+      const std::shared_ptr<::arrow::Field>& element) {
+    return ::arrow::large_list_view(element);
+  }
+};
+
+template <typename ListTool>

Review Comment:
   You don't need to template this by type, since all factories have the same 
signature anyway.



##########
cpp/src/parquet/arrow/path_internal.cc:
##########
@@ -724,19 +745,31 @@ class PathBuilder {
 
   template <typename T>
   ::arrow::enable_if_t<std::is_same<::arrow::ListArray, T>::value ||
-                           std::is_same<::arrow::LargeListArray, T>::value,
+                           std::is_same<::arrow::LargeListArray, T>::value ||
+                           std::is_same<::arrow::ListViewArray, T>::value ||
+                           std::is_same<::arrow::LargeListViewArray, T>::value,
                        Status>
   Visit(const T& array) {
     MaybeAddNullable(array);
     // Increment necessary due to empty lists.
     info_.max_def_level++;
     info_.max_rep_level++;
-    // raw_value_offsets() accounts for any slice offset.
-    ListPathNode<VarRangeSelector<typename T::offset_type>> node(
-        VarRangeSelector<typename T::offset_type>{array.raw_value_offsets()},
-        info_.max_rep_level, info_.max_def_level - 1);
-    info_.path.emplace_back(std::move(node));
-    nullable_in_parent_ = array.list_type()->value_field()->nullable();
+    // raw_value_offsets() and raw_value_sizes() accounts for any slice 
offset/size.
+    if constexpr (std::is_same<::arrow::ListViewArray, T>::value ||

Review Comment:
   You can use `is_list_view_type` from `arrow/type_traits.h`.



##########
cpp/src/parquet/arrow/schema.cc:
##########
@@ -870,6 +872,20 @@ 
std::function<std::shared_ptr<::arrow::DataType>(FieldVector)> GetNestedFactory(
           return ::arrow::fixed_size_list(std::move(fields[0]), list_size);
         };
       }
+      if (origin_type.id() == ::arrow::Type::LIST_VIEW) {
+        // Reading LIST_VIEW as LIST as a workaround.

Review Comment:
   Why the workaround?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to