HuaHuaY commented on code in PR #461:
URL: https://github.com/apache/iceberg-cpp/pull/461#discussion_r2654997993


##########
src/iceberg/manifest/manifest_reader.cc:
##########
@@ -33,270 +33,357 @@
 #include "iceberg/manifest/manifest_entry.h"
 #include "iceberg/manifest/manifest_list.h"
 #include "iceberg/manifest/manifest_reader_internal.h"
-#include "iceberg/metadata_columns.h"
 #include "iceberg/partition_spec.h"
 #include "iceberg/schema.h"
 #include "iceberg/schema_field.h"
 #include "iceberg/type.h"
 #include "iceberg/util/checked_cast.h"
 #include "iceberg/util/content_file_util.h"
 #include "iceberg/util/macros.h"
+#include "nanoarrow/common/inline_types.h"
 
 namespace iceberg {
 
 namespace {
 
-// TODO(gangwu): refactor these macros with template functions.
-#define PARSE_PRIMITIVE_FIELD(item, array_view, type)                          
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(array_view, row_idx)) {                          
         \
-      auto value = ArrowArrayViewGetIntUnsafe(array_view, row_idx);            
         \
-      item = static_cast<type>(value);                                         
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_STRING_FIELD(item, array_view)                                   
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(array_view, row_idx)) {                          
         \
-      auto value = ArrowArrayViewGetStringUnsafe(array_view, row_idx);         
         \
-      item = std::string(value.data, value.size_bytes);                        
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_BINARY_FIELD(item, array_view)                                   
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(view_of_column, row_idx)) {                      
         \
-      item = ArrowArrayViewGetInt8Vector(array_view, row_idx);                 
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_INTEGER_VECTOR_FIELD(item, count, array_view, type)              
     \
-  for (int64_t manifest_idx = 0; manifest_idx < count; manifest_idx++) {       
     \
-    auto offset = ArrowArrayViewListChildOffset(array_view, manifest_idx);     
     \
-    auto next_offset = ArrowArrayViewListChildOffset(array_view, manifest_idx 
+ 1); \
-    for (int64_t offset_idx = offset; offset_idx < next_offset; offset_idx++) 
{     \
-      item.emplace_back(static_cast<type>(                                     
     \
-          ArrowArrayViewGetIntUnsafe(array_view->children[0], offset_idx)));   
     \
-    }                                                                          
     \
-  }
-
-#define PARSE_MAP_FIELD(item, count, array_view, key_type, value_type, 
assignment)   \
-  do {                                                                         
      \
-    if (array_view->storage_type != ArrowType::NANOARROW_TYPE_MAP) {           
      \
-      return InvalidManifest("Field:{} should be a map.", field_name);         
      \
-    }                                                                          
      \
-    auto view_of_map = array_view->children[0];                                
      \
-    ASSERT_VIEW_TYPE_AND_CHILDREN(view_of_map, 
ArrowType::NANOARROW_TYPE_STRUCT, 2); \
-    auto view_of_map_key = view_of_map->children[0];                           
      \
-    ASSERT_VIEW_TYPE(view_of_map_key, key_type);                               
      \
-    auto view_of_map_value = view_of_map->children[1];                         
      \
-    ASSERT_VIEW_TYPE(view_of_map_value, value_type);                           
      \
-    for (int64_t row_idx = 0; row_idx < count; row_idx++) {                    
      \
-      auto offset = array_view->buffer_views[1].data.as_int32[row_idx];        
      \
-      auto next_offset = array_view->buffer_views[1].data.as_int32[row_idx + 
1];     \
-      for (int32_t offset_idx = offset; offset_idx < next_offset; 
offset_idx++) {    \
-        auto key = ArrowArrayViewGetIntUnsafe(view_of_map_key, offset_idx);    
      \
-        item[key] = assignment;                                                
      \
-      }                                                                        
      \
-    }                                                                          
      \
-  } while (0)
-
-#define PARSE_INT_LONG_MAP_FIELD(item, count, array_view)                   \
-  PARSE_MAP_FIELD(item, count, array_view, ArrowType::NANOARROW_TYPE_INT32, \
-                  ArrowType::NANOARROW_TYPE_INT64,                          \
-                  ArrowArrayViewGetIntUnsafe(view_of_map_value, offset_idx));
-
-#define PARSE_INT_BINARY_MAP_FIELD(item, count, array_view)                 \
-  PARSE_MAP_FIELD(item, count, array_view, ArrowType::NANOARROW_TYPE_INT32, \
-                  ArrowType::NANOARROW_TYPE_BINARY,                         \
-                  ArrowArrayViewGetInt8Vector(view_of_map_value, offset_idx));
-
-#define ASSERT_VIEW_TYPE(view, type)                                           
\
-  if (view->storage_type != type) {                                            
\
-    return InvalidManifest("Sub Field:{} should be a {}.", field_name, #type); 
\
-  }
-
-#define ASSERT_VIEW_TYPE_AND_CHILDREN(view, type, n_child)                     
  \
-  if (view->storage_type != type) {                                            
  \
-    return InvalidManifest("Sub Field:{} should be a {}.", field_name, #type); 
  \
-  }                                                                            
  \
-  if (view->n_children != n_child) {                                           
  \
-    return InvalidManifest("Sub Field for:{} should have key&value:{} 
columns.", \
-                           field_name, n_child);                               
  \
-  }
-
-std::vector<uint8_t> ArrowArrayViewGetInt8Vector(const ArrowArrayView* view,
-                                                 int32_t offset_idx) {
+template <typename... Args>
+[[nodiscard]] Status MakeError(ErrorKind error_kind, 
std::format_string<Args...> fmt,
+                               Args&&... args) {
+  return std::unexpected<Error>(
+      {error_kind, std::format(fmt, std::forward<Args>(args)...)});
+}
+
+[[nodiscard]] Status MakeNullError(ErrorKind error_kind, std::string_view 
field_name,
+                                   int64_t row_idx) {
+  return MakeError(error_kind, "Required field '{}' is null at row {}", 
field_name,
+                   row_idx);
+}
+
+[[nodiscard]] Status AssertViewType(const ArrowArrayView* view, ArrowType 
expected_type,
+                                    std::string_view field_name, ErrorKind 
error_kind) {
+  if (view->storage_type != expected_type) {
+    return MakeError(error_kind, "Field '{}' should be of type {}, got {}", 
field_name,
+                     ArrowTypeString(expected_type), 
ArrowTypeString(view->storage_type));
+  }
+  return {};
+}
+
+[[nodiscard]] Status AssertViewTypeAndChildren(const ArrowArrayView* view,
+                                               ArrowType expected_type,
+                                               int64_t expected_children,
+                                               std::string_view field_name,
+                                               ErrorKind error_kind) {
+  if (view->storage_type != expected_type) {
+    return MakeError(error_kind, "Field '{}' should be of type {}, got {}", 
field_name,
+                     ArrowTypeString(expected_type), 
ArrowTypeString(view->storage_type));
+  }
+  if (view->n_children != expected_children) {
+    return MakeError(error_kind, "Field '{}' should have {} children, got {}", 
field_name,
+                     expected_children, view->n_children);
+  }
+  return {};
+}
+
+std::vector<uint8_t> ParseInt8VectorField(const ArrowArrayView* view,
+                                          int64_t offset_idx) {
   auto buffer = ArrowArrayViewGetBytesUnsafe(view, offset_idx);
   return {buffer.data.as_char, buffer.data.as_char + buffer.size_bytes};
 }
 
-Status ParsePartitionFieldSummaryList(ArrowArrayView* view_of_column,
+template <typename T, typename Container, typename Accessor>
+Status ParseIntegerField(const ArrowArrayView* array_view, Container& 
container,

Review Comment:
   After offline discussion, I will open a new PR to modify this.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to