HuaHuaY commented on code in PR #461:
URL: https://github.com/apache/iceberg-cpp/pull/461#discussion_r2654918310


##########
src/iceberg/manifest/manifest_reader.cc:
##########
@@ -33,270 +33,357 @@
 #include "iceberg/manifest/manifest_entry.h"
 #include "iceberg/manifest/manifest_list.h"
 #include "iceberg/manifest/manifest_reader_internal.h"
-#include "iceberg/metadata_columns.h"
 #include "iceberg/partition_spec.h"
 #include "iceberg/schema.h"
 #include "iceberg/schema_field.h"
 #include "iceberg/type.h"
 #include "iceberg/util/checked_cast.h"
 #include "iceberg/util/content_file_util.h"
 #include "iceberg/util/macros.h"
+#include "nanoarrow/common/inline_types.h"
 
 namespace iceberg {
 
 namespace {
 
-// TODO(gangwu): refactor these macros with template functions.
-#define PARSE_PRIMITIVE_FIELD(item, array_view, type)                          
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(array_view, row_idx)) {                          
         \
-      auto value = ArrowArrayViewGetIntUnsafe(array_view, row_idx);            
         \
-      item = static_cast<type>(value);                                         
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_STRING_FIELD(item, array_view)                                   
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(array_view, row_idx)) {                          
         \
-      auto value = ArrowArrayViewGetStringUnsafe(array_view, row_idx);         
         \
-      item = std::string(value.data, value.size_bytes);                        
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_BINARY_FIELD(item, array_view)                                   
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(view_of_column, row_idx)) {                      
         \
-      item = ArrowArrayViewGetInt8Vector(array_view, row_idx);                 
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_INTEGER_VECTOR_FIELD(item, count, array_view, type)              
     \
-  for (int64_t manifest_idx = 0; manifest_idx < count; manifest_idx++) {       
     \
-    auto offset = ArrowArrayViewListChildOffset(array_view, manifest_idx);     
     \
-    auto next_offset = ArrowArrayViewListChildOffset(array_view, manifest_idx 
+ 1); \
-    for (int64_t offset_idx = offset; offset_idx < next_offset; offset_idx++) 
{     \
-      item.emplace_back(static_cast<type>(                                     
     \
-          ArrowArrayViewGetIntUnsafe(array_view->children[0], offset_idx)));   
     \
-    }                                                                          
     \
-  }
-
-#define PARSE_MAP_FIELD(item, count, array_view, key_type, value_type, 
assignment)   \
-  do {                                                                         
      \
-    if (array_view->storage_type != ArrowType::NANOARROW_TYPE_MAP) {           
      \
-      return InvalidManifest("Field:{} should be a map.", field_name);         
      \
-    }                                                                          
      \
-    auto view_of_map = array_view->children[0];                                
      \
-    ASSERT_VIEW_TYPE_AND_CHILDREN(view_of_map, 
ArrowType::NANOARROW_TYPE_STRUCT, 2); \
-    auto view_of_map_key = view_of_map->children[0];                           
      \
-    ASSERT_VIEW_TYPE(view_of_map_key, key_type);                               
      \
-    auto view_of_map_value = view_of_map->children[1];                         
      \
-    ASSERT_VIEW_TYPE(view_of_map_value, value_type);                           
      \
-    for (int64_t row_idx = 0; row_idx < count; row_idx++) {                    
      \
-      auto offset = array_view->buffer_views[1].data.as_int32[row_idx];        
      \
-      auto next_offset = array_view->buffer_views[1].data.as_int32[row_idx + 
1];     \
-      for (int32_t offset_idx = offset; offset_idx < next_offset; 
offset_idx++) {    \
-        auto key = ArrowArrayViewGetIntUnsafe(view_of_map_key, offset_idx);    
      \
-        item[key] = assignment;                                                
      \
-      }                                                                        
      \
-    }                                                                          
      \
-  } while (0)
-
-#define PARSE_INT_LONG_MAP_FIELD(item, count, array_view)                   \
-  PARSE_MAP_FIELD(item, count, array_view, ArrowType::NANOARROW_TYPE_INT32, \
-                  ArrowType::NANOARROW_TYPE_INT64,                          \
-                  ArrowArrayViewGetIntUnsafe(view_of_map_value, offset_idx));
-
-#define PARSE_INT_BINARY_MAP_FIELD(item, count, array_view)                 \
-  PARSE_MAP_FIELD(item, count, array_view, ArrowType::NANOARROW_TYPE_INT32, \
-                  ArrowType::NANOARROW_TYPE_BINARY,                         \
-                  ArrowArrayViewGetInt8Vector(view_of_map_value, offset_idx));
-
-#define ASSERT_VIEW_TYPE(view, type)                                           
\
-  if (view->storage_type != type) {                                            
\
-    return InvalidManifest("Sub Field:{} should be a {}.", field_name, #type); 
\
-  }
-
-#define ASSERT_VIEW_TYPE_AND_CHILDREN(view, type, n_child)                     
  \
-  if (view->storage_type != type) {                                            
  \
-    return InvalidManifest("Sub Field:{} should be a {}.", field_name, #type); 
  \
-  }                                                                            
  \
-  if (view->n_children != n_child) {                                           
  \
-    return InvalidManifest("Sub Field for:{} should have key&value:{} 
columns.", \
-                           field_name, n_child);                               
  \
-  }
-
-std::vector<uint8_t> ArrowArrayViewGetInt8Vector(const ArrowArrayView* view,
-                                                 int32_t offset_idx) {
+template <typename... Args>
+[[nodiscard]] Status MakeError(ErrorKind error_kind, 
std::format_string<Args...> fmt,
+                               Args&&... args) {
+  return std::unexpected<Error>(
+      {error_kind, std::format(fmt, std::forward<Args>(args)...)});
+}
+
+[[nodiscard]] Status MakeNullError(ErrorKind error_kind, std::string_view 
field_name,
+                                   int64_t row_idx) {
+  return MakeError(error_kind, "Required field '{}' is null at row {}", 
field_name,
+                   row_idx);
+}
+
+[[nodiscard]] Status AssertViewType(const ArrowArrayView* view, ArrowType 
expected_type,
+                                    std::string_view field_name, ErrorKind 
error_kind) {
+  if (view->storage_type != expected_type) {
+    return MakeError(error_kind, "Field '{}' should be of type {}, got {}", 
field_name,
+                     ArrowTypeString(expected_type), 
ArrowTypeString(view->storage_type));
+  }
+  return {};
+}
+
+[[nodiscard]] Status AssertViewTypeAndChildren(const ArrowArrayView* view,
+                                               ArrowType expected_type,
+                                               int64_t expected_children,
+                                               std::string_view field_name,
+                                               ErrorKind error_kind) {
+  if (view->storage_type != expected_type) {
+    return MakeError(error_kind, "Field '{}' should be of type {}, got {}", 
field_name,
+                     ArrowTypeString(expected_type), 
ArrowTypeString(view->storage_type));
+  }
+  if (view->n_children != expected_children) {
+    return MakeError(error_kind, "Field '{}' should have {} children, got {}", 
field_name,
+                     expected_children, view->n_children);
+  }
+  return {};
+}
+
+std::vector<uint8_t> ParseInt8VectorField(const ArrowArrayView* view,
+                                          int64_t offset_idx) {
   auto buffer = ArrowArrayViewGetBytesUnsafe(view, offset_idx);
   return {buffer.data.as_char, buffer.data.as_char + buffer.size_bytes};
 }
 
-Status ParsePartitionFieldSummaryList(ArrowArrayView* view_of_column,
+template <typename T, typename Container, typename Accessor>
+Status ParseIntegerField(const ArrowArrayView* array_view, Container& 
container,
+                         Accessor&& accessor, std::string_view field_name, 
bool required,
+                         ErrorKind error_kind) {
+  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {
+    if (!ArrowArrayViewIsNull(array_view, row_idx)) {
+      auto value = ArrowArrayViewGetIntUnsafe(array_view, row_idx);
+      accessor(container, row_idx) = static_cast<T>(value);
+    } else if (required) {
+      return MakeNullError(error_kind, field_name, row_idx);
+    }
+  }
+  return {};
+}
+
+template <typename Container, typename Accessor>
+Status ParseStringField(const ArrowArrayView* array_view, Container& container,
+                        Accessor&& accessor, std::string_view field_name, bool 
required,
+                        ErrorKind error_kind) {
+  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {
+    if (!ArrowArrayViewIsNull(array_view, row_idx)) {
+      auto value = ArrowArrayViewGetStringUnsafe(array_view, row_idx);
+      accessor(container, row_idx) = std::string(value.data, value.size_bytes);
+    } else if (required) {
+      return MakeNullError(error_kind, field_name, row_idx);
+    }
+  }
+  return {};
+}
+
+template <typename Container, typename Accessor>
+Status ParseBinaryField(const ArrowArrayView* array_view, Container& container,
+                        Accessor&& accessor, std::string_view field_name, bool 
required,
+                        ErrorKind error_kind) {
+  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {
+    if (!ArrowArrayViewIsNull(array_view, row_idx)) {
+      accessor(container, row_idx) = ParseInt8VectorField(array_view, row_idx);
+    } else if (required) {
+      return MakeNullError(error_kind, field_name, row_idx);
+    }
+  }
+  return {};
+}
+
+template <typename T, typename Container, typename Accessor>
+void ParseIntegerVectorField(const ArrowArrayView* array_view, int64_t length,
+                             Container& container, Accessor&& accessor) {
+  for (int64_t row_idx = 0; row_idx < length; row_idx++) {
+    auto begin_offset = ArrowArrayViewListChildOffset(array_view, row_idx);
+    auto end_offset = ArrowArrayViewListChildOffset(array_view, row_idx + 1);
+    for (int64_t offset = begin_offset; offset < end_offset; offset++) {
+      auto value = ArrowArrayViewGetIntUnsafe(array_view->children[0], offset);
+      accessor(container, row_idx).emplace_back(static_cast<T>(value));
+    }
+  }
+}
+
+template <typename Container, typename Accessor>
+Status ParseIntLongMapField(const ArrowArrayView* array_view, int64_t length,
+                            Container& container, Accessor&& accessor,
+                            std::string_view field_name, ErrorKind error_kind) 
{
+  ICEBERG_RETURN_UNEXPECTED(
+      AssertViewType(array_view, ArrowType::NANOARROW_TYPE_MAP, field_name, 
error_kind));
+
+  auto map_array_view = array_view->children[0];
+  ICEBERG_RETURN_UNEXPECTED(AssertViewTypeAndChildren(
+      map_array_view, ArrowType::NANOARROW_TYPE_STRUCT, 2, field_name, 
error_kind));
+
+  auto key_array_view = map_array_view->children[0];
+  auto value_array_view = map_array_view->children[1];
+  ICEBERG_RETURN_UNEXPECTED(AssertViewType(
+      key_array_view, ArrowType::NANOARROW_TYPE_INT32, field_name, 
error_kind));
+  ICEBERG_RETURN_UNEXPECTED(AssertViewType(
+      value_array_view, ArrowType::NANOARROW_TYPE_INT64, field_name, 
error_kind));
+
+  for (int64_t row_idx = 0; row_idx < length; row_idx++) {
+    auto begin_offset = array_view->buffer_views[1].data.as_int32[row_idx];
+    auto end_offset = array_view->buffer_views[1].data.as_int32[row_idx + 1];
+    for (int32_t offset = begin_offset; offset < end_offset; offset++) {
+      auto key = ArrowArrayViewGetIntUnsafe(key_array_view, offset);
+      accessor(container, row_idx)[key] =
+          ArrowArrayViewGetIntUnsafe(value_array_view, offset);
+    }
+  }
+  return {};
+}
+
+template <typename Container, typename Accessor>
+Status ParseIntBinaryMapField(const ArrowArrayView* array_view, int64_t length,

Review Comment:
   ditto



##########
src/iceberg/manifest/manifest_reader.cc:
##########
@@ -33,270 +33,357 @@
 #include "iceberg/manifest/manifest_entry.h"
 #include "iceberg/manifest/manifest_list.h"
 #include "iceberg/manifest/manifest_reader_internal.h"
-#include "iceberg/metadata_columns.h"
 #include "iceberg/partition_spec.h"
 #include "iceberg/schema.h"
 #include "iceberg/schema_field.h"
 #include "iceberg/type.h"
 #include "iceberg/util/checked_cast.h"
 #include "iceberg/util/content_file_util.h"
 #include "iceberg/util/macros.h"
+#include "nanoarrow/common/inline_types.h"
 
 namespace iceberg {
 
 namespace {
 
-// TODO(gangwu): refactor these macros with template functions.
-#define PARSE_PRIMITIVE_FIELD(item, array_view, type)                          
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(array_view, row_idx)) {                          
         \
-      auto value = ArrowArrayViewGetIntUnsafe(array_view, row_idx);            
         \
-      item = static_cast<type>(value);                                         
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_STRING_FIELD(item, array_view)                                   
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(array_view, row_idx)) {                          
         \
-      auto value = ArrowArrayViewGetStringUnsafe(array_view, row_idx);         
         \
-      item = std::string(value.data, value.size_bytes);                        
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_BINARY_FIELD(item, array_view)                                   
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(view_of_column, row_idx)) {                      
         \
-      item = ArrowArrayViewGetInt8Vector(array_view, row_idx);                 
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_INTEGER_VECTOR_FIELD(item, count, array_view, type)              
     \
-  for (int64_t manifest_idx = 0; manifest_idx < count; manifest_idx++) {       
     \
-    auto offset = ArrowArrayViewListChildOffset(array_view, manifest_idx);     
     \
-    auto next_offset = ArrowArrayViewListChildOffset(array_view, manifest_idx 
+ 1); \
-    for (int64_t offset_idx = offset; offset_idx < next_offset; offset_idx++) 
{     \
-      item.emplace_back(static_cast<type>(                                     
     \
-          ArrowArrayViewGetIntUnsafe(array_view->children[0], offset_idx)));   
     \
-    }                                                                          
     \
-  }
-
-#define PARSE_MAP_FIELD(item, count, array_view, key_type, value_type, 
assignment)   \
-  do {                                                                         
      \
-    if (array_view->storage_type != ArrowType::NANOARROW_TYPE_MAP) {           
      \
-      return InvalidManifest("Field:{} should be a map.", field_name);         
      \
-    }                                                                          
      \
-    auto view_of_map = array_view->children[0];                                
      \
-    ASSERT_VIEW_TYPE_AND_CHILDREN(view_of_map, 
ArrowType::NANOARROW_TYPE_STRUCT, 2); \
-    auto view_of_map_key = view_of_map->children[0];                           
      \
-    ASSERT_VIEW_TYPE(view_of_map_key, key_type);                               
      \
-    auto view_of_map_value = view_of_map->children[1];                         
      \
-    ASSERT_VIEW_TYPE(view_of_map_value, value_type);                           
      \
-    for (int64_t row_idx = 0; row_idx < count; row_idx++) {                    
      \
-      auto offset = array_view->buffer_views[1].data.as_int32[row_idx];        
      \
-      auto next_offset = array_view->buffer_views[1].data.as_int32[row_idx + 
1];     \
-      for (int32_t offset_idx = offset; offset_idx < next_offset; 
offset_idx++) {    \
-        auto key = ArrowArrayViewGetIntUnsafe(view_of_map_key, offset_idx);    
      \
-        item[key] = assignment;                                                
      \
-      }                                                                        
      \
-    }                                                                          
      \
-  } while (0)
-
-#define PARSE_INT_LONG_MAP_FIELD(item, count, array_view)                   \
-  PARSE_MAP_FIELD(item, count, array_view, ArrowType::NANOARROW_TYPE_INT32, \
-                  ArrowType::NANOARROW_TYPE_INT64,                          \
-                  ArrowArrayViewGetIntUnsafe(view_of_map_value, offset_idx));
-
-#define PARSE_INT_BINARY_MAP_FIELD(item, count, array_view)                 \
-  PARSE_MAP_FIELD(item, count, array_view, ArrowType::NANOARROW_TYPE_INT32, \
-                  ArrowType::NANOARROW_TYPE_BINARY,                         \
-                  ArrowArrayViewGetInt8Vector(view_of_map_value, offset_idx));
-
-#define ASSERT_VIEW_TYPE(view, type)                                           
\
-  if (view->storage_type != type) {                                            
\
-    return InvalidManifest("Sub Field:{} should be a {}.", field_name, #type); 
\
-  }
-
-#define ASSERT_VIEW_TYPE_AND_CHILDREN(view, type, n_child)                     
  \
-  if (view->storage_type != type) {                                            
  \
-    return InvalidManifest("Sub Field:{} should be a {}.", field_name, #type); 
  \
-  }                                                                            
  \
-  if (view->n_children != n_child) {                                           
  \
-    return InvalidManifest("Sub Field for:{} should have key&value:{} 
columns.", \
-                           field_name, n_child);                               
  \
-  }
-
-std::vector<uint8_t> ArrowArrayViewGetInt8Vector(const ArrowArrayView* view,
-                                                 int32_t offset_idx) {
+template <typename... Args>
+[[nodiscard]] Status MakeError(ErrorKind error_kind, 
std::format_string<Args...> fmt,
+                               Args&&... args) {
+  return std::unexpected<Error>(
+      {error_kind, std::format(fmt, std::forward<Args>(args)...)});
+}
+
+[[nodiscard]] Status MakeNullError(ErrorKind error_kind, std::string_view 
field_name,

Review Comment:
   Is it necessary to provide the first argument `error_kind`? Will any error 
kind except `ErrorKind::kInvalidManifestList` be used here?



##########
src/iceberg/manifest/manifest_reader.cc:
##########
@@ -33,270 +33,357 @@
 #include "iceberg/manifest/manifest_entry.h"
 #include "iceberg/manifest/manifest_list.h"
 #include "iceberg/manifest/manifest_reader_internal.h"
-#include "iceberg/metadata_columns.h"
 #include "iceberg/partition_spec.h"
 #include "iceberg/schema.h"
 #include "iceberg/schema_field.h"
 #include "iceberg/type.h"
 #include "iceberg/util/checked_cast.h"
 #include "iceberg/util/content_file_util.h"
 #include "iceberg/util/macros.h"
+#include "nanoarrow/common/inline_types.h"
 
 namespace iceberg {
 
 namespace {
 
-// TODO(gangwu): refactor these macros with template functions.
-#define PARSE_PRIMITIVE_FIELD(item, array_view, type)                          
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(array_view, row_idx)) {                          
         \
-      auto value = ArrowArrayViewGetIntUnsafe(array_view, row_idx);            
         \
-      item = static_cast<type>(value);                                         
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_STRING_FIELD(item, array_view)                                   
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(array_view, row_idx)) {                          
         \
-      auto value = ArrowArrayViewGetStringUnsafe(array_view, row_idx);         
         \
-      item = std::string(value.data, value.size_bytes);                        
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_BINARY_FIELD(item, array_view)                                   
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(view_of_column, row_idx)) {                      
         \
-      item = ArrowArrayViewGetInt8Vector(array_view, row_idx);                 
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_INTEGER_VECTOR_FIELD(item, count, array_view, type)              
     \
-  for (int64_t manifest_idx = 0; manifest_idx < count; manifest_idx++) {       
     \
-    auto offset = ArrowArrayViewListChildOffset(array_view, manifest_idx);     
     \
-    auto next_offset = ArrowArrayViewListChildOffset(array_view, manifest_idx 
+ 1); \
-    for (int64_t offset_idx = offset; offset_idx < next_offset; offset_idx++) 
{     \
-      item.emplace_back(static_cast<type>(                                     
     \
-          ArrowArrayViewGetIntUnsafe(array_view->children[0], offset_idx)));   
     \
-    }                                                                          
     \
-  }
-
-#define PARSE_MAP_FIELD(item, count, array_view, key_type, value_type, 
assignment)   \
-  do {                                                                         
      \
-    if (array_view->storage_type != ArrowType::NANOARROW_TYPE_MAP) {           
      \
-      return InvalidManifest("Field:{} should be a map.", field_name);         
      \
-    }                                                                          
      \
-    auto view_of_map = array_view->children[0];                                
      \
-    ASSERT_VIEW_TYPE_AND_CHILDREN(view_of_map, 
ArrowType::NANOARROW_TYPE_STRUCT, 2); \
-    auto view_of_map_key = view_of_map->children[0];                           
      \
-    ASSERT_VIEW_TYPE(view_of_map_key, key_type);                               
      \
-    auto view_of_map_value = view_of_map->children[1];                         
      \
-    ASSERT_VIEW_TYPE(view_of_map_value, value_type);                           
      \
-    for (int64_t row_idx = 0; row_idx < count; row_idx++) {                    
      \
-      auto offset = array_view->buffer_views[1].data.as_int32[row_idx];        
      \
-      auto next_offset = array_view->buffer_views[1].data.as_int32[row_idx + 
1];     \
-      for (int32_t offset_idx = offset; offset_idx < next_offset; 
offset_idx++) {    \
-        auto key = ArrowArrayViewGetIntUnsafe(view_of_map_key, offset_idx);    
      \
-        item[key] = assignment;                                                
      \
-      }                                                                        
      \
-    }                                                                          
      \
-  } while (0)
-
-#define PARSE_INT_LONG_MAP_FIELD(item, count, array_view)                   \
-  PARSE_MAP_FIELD(item, count, array_view, ArrowType::NANOARROW_TYPE_INT32, \
-                  ArrowType::NANOARROW_TYPE_INT64,                          \
-                  ArrowArrayViewGetIntUnsafe(view_of_map_value, offset_idx));
-
-#define PARSE_INT_BINARY_MAP_FIELD(item, count, array_view)                 \
-  PARSE_MAP_FIELD(item, count, array_view, ArrowType::NANOARROW_TYPE_INT32, \
-                  ArrowType::NANOARROW_TYPE_BINARY,                         \
-                  ArrowArrayViewGetInt8Vector(view_of_map_value, offset_idx));
-
-#define ASSERT_VIEW_TYPE(view, type)                                           
\
-  if (view->storage_type != type) {                                            
\
-    return InvalidManifest("Sub Field:{} should be a {}.", field_name, #type); 
\
-  }
-
-#define ASSERT_VIEW_TYPE_AND_CHILDREN(view, type, n_child)                     
  \
-  if (view->storage_type != type) {                                            
  \
-    return InvalidManifest("Sub Field:{} should be a {}.", field_name, #type); 
  \
-  }                                                                            
  \
-  if (view->n_children != n_child) {                                           
  \
-    return InvalidManifest("Sub Field for:{} should have key&value:{} 
columns.", \
-                           field_name, n_child);                               
  \
-  }
-
-std::vector<uint8_t> ArrowArrayViewGetInt8Vector(const ArrowArrayView* view,
-                                                 int32_t offset_idx) {
+template <typename... Args>
+[[nodiscard]] Status MakeError(ErrorKind error_kind, 
std::format_string<Args...> fmt,

Review Comment:
   This method is universal, and maybe it shouldn’t be placed here.



##########
src/iceberg/manifest/manifest_reader.cc:
##########
@@ -33,270 +33,357 @@
 #include "iceberg/manifest/manifest_entry.h"
 #include "iceberg/manifest/manifest_list.h"
 #include "iceberg/manifest/manifest_reader_internal.h"
-#include "iceberg/metadata_columns.h"
 #include "iceberg/partition_spec.h"
 #include "iceberg/schema.h"
 #include "iceberg/schema_field.h"
 #include "iceberg/type.h"
 #include "iceberg/util/checked_cast.h"
 #include "iceberg/util/content_file_util.h"
 #include "iceberg/util/macros.h"
+#include "nanoarrow/common/inline_types.h"
 
 namespace iceberg {
 
 namespace {
 
-// TODO(gangwu): refactor these macros with template functions.
-#define PARSE_PRIMITIVE_FIELD(item, array_view, type)                          
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(array_view, row_idx)) {                          
         \
-      auto value = ArrowArrayViewGetIntUnsafe(array_view, row_idx);            
         \
-      item = static_cast<type>(value);                                         
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_STRING_FIELD(item, array_view)                                   
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(array_view, row_idx)) {                          
         \
-      auto value = ArrowArrayViewGetStringUnsafe(array_view, row_idx);         
         \
-      item = std::string(value.data, value.size_bytes);                        
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_BINARY_FIELD(item, array_view)                                   
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(view_of_column, row_idx)) {                      
         \
-      item = ArrowArrayViewGetInt8Vector(array_view, row_idx);                 
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_INTEGER_VECTOR_FIELD(item, count, array_view, type)              
     \
-  for (int64_t manifest_idx = 0; manifest_idx < count; manifest_idx++) {       
     \
-    auto offset = ArrowArrayViewListChildOffset(array_view, manifest_idx);     
     \
-    auto next_offset = ArrowArrayViewListChildOffset(array_view, manifest_idx 
+ 1); \
-    for (int64_t offset_idx = offset; offset_idx < next_offset; offset_idx++) 
{     \
-      item.emplace_back(static_cast<type>(                                     
     \
-          ArrowArrayViewGetIntUnsafe(array_view->children[0], offset_idx)));   
     \
-    }                                                                          
     \
-  }
-
-#define PARSE_MAP_FIELD(item, count, array_view, key_type, value_type, 
assignment)   \
-  do {                                                                         
      \
-    if (array_view->storage_type != ArrowType::NANOARROW_TYPE_MAP) {           
      \
-      return InvalidManifest("Field:{} should be a map.", field_name);         
      \
-    }                                                                          
      \
-    auto view_of_map = array_view->children[0];                                
      \
-    ASSERT_VIEW_TYPE_AND_CHILDREN(view_of_map, 
ArrowType::NANOARROW_TYPE_STRUCT, 2); \
-    auto view_of_map_key = view_of_map->children[0];                           
      \
-    ASSERT_VIEW_TYPE(view_of_map_key, key_type);                               
      \
-    auto view_of_map_value = view_of_map->children[1];                         
      \
-    ASSERT_VIEW_TYPE(view_of_map_value, value_type);                           
      \
-    for (int64_t row_idx = 0; row_idx < count; row_idx++) {                    
      \
-      auto offset = array_view->buffer_views[1].data.as_int32[row_idx];        
      \
-      auto next_offset = array_view->buffer_views[1].data.as_int32[row_idx + 
1];     \
-      for (int32_t offset_idx = offset; offset_idx < next_offset; 
offset_idx++) {    \
-        auto key = ArrowArrayViewGetIntUnsafe(view_of_map_key, offset_idx);    
      \
-        item[key] = assignment;                                                
      \
-      }                                                                        
      \
-    }                                                                          
      \
-  } while (0)
-
-#define PARSE_INT_LONG_MAP_FIELD(item, count, array_view)                   \
-  PARSE_MAP_FIELD(item, count, array_view, ArrowType::NANOARROW_TYPE_INT32, \
-                  ArrowType::NANOARROW_TYPE_INT64,                          \
-                  ArrowArrayViewGetIntUnsafe(view_of_map_value, offset_idx));
-
-#define PARSE_INT_BINARY_MAP_FIELD(item, count, array_view)                 \
-  PARSE_MAP_FIELD(item, count, array_view, ArrowType::NANOARROW_TYPE_INT32, \
-                  ArrowType::NANOARROW_TYPE_BINARY,                         \
-                  ArrowArrayViewGetInt8Vector(view_of_map_value, offset_idx));
-
-#define ASSERT_VIEW_TYPE(view, type)                                           
\
-  if (view->storage_type != type) {                                            
\
-    return InvalidManifest("Sub Field:{} should be a {}.", field_name, #type); 
\
-  }
-
-#define ASSERT_VIEW_TYPE_AND_CHILDREN(view, type, n_child)                     
  \
-  if (view->storage_type != type) {                                            
  \
-    return InvalidManifest("Sub Field:{} should be a {}.", field_name, #type); 
  \
-  }                                                                            
  \
-  if (view->n_children != n_child) {                                           
  \
-    return InvalidManifest("Sub Field for:{} should have key&value:{} 
columns.", \
-                           field_name, n_child);                               
  \
-  }
-
-std::vector<uint8_t> ArrowArrayViewGetInt8Vector(const ArrowArrayView* view,
-                                                 int32_t offset_idx) {
+template <typename... Args>
+[[nodiscard]] Status MakeError(ErrorKind error_kind, 
std::format_string<Args...> fmt,
+                               Args&&... args) {
+  return std::unexpected<Error>(
+      {error_kind, std::format(fmt, std::forward<Args>(args)...)});
+}
+
+[[nodiscard]] Status MakeNullError(ErrorKind error_kind, std::string_view 
field_name,
+                                   int64_t row_idx) {
+  return MakeError(error_kind, "Required field '{}' is null at row {}", 
field_name,
+                   row_idx);
+}
+
+[[nodiscard]] Status AssertViewType(const ArrowArrayView* view, ArrowType 
expected_type,
+                                    std::string_view field_name, ErrorKind 
error_kind) {
+  if (view->storage_type != expected_type) {
+    return MakeError(error_kind, "Field '{}' should be of type {}, got {}", 
field_name,
+                     ArrowTypeString(expected_type), 
ArrowTypeString(view->storage_type));
+  }
+  return {};
+}
+
+[[nodiscard]] Status AssertViewTypeAndChildren(const ArrowArrayView* view,
+                                               ArrowType expected_type,
+                                               int64_t expected_children,
+                                               std::string_view field_name,
+                                               ErrorKind error_kind) {
+  if (view->storage_type != expected_type) {
+    return MakeError(error_kind, "Field '{}' should be of type {}, got {}", 
field_name,
+                     ArrowTypeString(expected_type), 
ArrowTypeString(view->storage_type));
+  }
+  if (view->n_children != expected_children) {
+    return MakeError(error_kind, "Field '{}' should have {} children, got {}", 
field_name,
+                     expected_children, view->n_children);
+  }
+  return {};
+}
+
+std::vector<uint8_t> ParseInt8VectorField(const ArrowArrayView* view,
+                                          int64_t offset_idx) {
   auto buffer = ArrowArrayViewGetBytesUnsafe(view, offset_idx);
   return {buffer.data.as_char, buffer.data.as_char + buffer.size_bytes};
 }
 
-Status ParsePartitionFieldSummaryList(ArrowArrayView* view_of_column,
+template <typename T, typename Container, typename Accessor>
+Status ParseIntegerField(const ArrowArrayView* array_view, Container& 
container,

Review Comment:
   I suggest to use a template to merge `ParseIntegerField`, `ParseStringField` 
and `ParseBinaryField` and simplify `Accessor` to just receive a projection.
   
   ```cpp
   template <typename Container, typename Accessor, typename Transfer>
     requires std::ranges::forward_range<Container> &&
              requires(Accessor& a, Container& c, Transfer& t, const 
ArrowArrayView* v,
                       int64_t i) {
                std::invoke(a, *std::ranges::begin(c)) = std::invoke(t, v, i);
              }
   Status ParseField(const ArrowArrayView* array_view, Container& container,
                     Accessor accessor, std::string_view field_name, bool 
required,
                     ErrorKind error_kind, Transfer transfer) {
     auto iter = std::ranges::begin(container);
     for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++, iter++) 
{
       if (!ArrowArrayViewIsNull(array_view, row_idx)) {
         std::invoke(accessor, *iter) = std::invoke(transfer, array_view, 
row_idx);
       } else if (required) {
         return MakeNullError(error_kind, field_name, row_idx);
       }
     }
     return {};
   }
   
   template <typename T, typename Container, typename Accessor>
   Status ParseIntegerField(const ArrowArrayView* array_view, Container& 
container,
                            Accessor accessor, std::string_view field_name, 
bool required,
                            ErrorKind error_kind) {
     return ParseField(array_view, container, accessor, field_name, required, 
error_kind,
                       [](const ArrowArrayView* view, int64_t row_idx) {
                         return static_cast<T>(ArrowArrayViewGetIntUnsafe(view, 
row_idx));
                       });
   }
   
   template <typename Container, typename Accessor>
   Status ParseStringField(const ArrowArrayView* array_view, Container& 
container,
                           Accessor accessor, std::string_view field_name, bool 
required,
                           ErrorKind error_kind) {
     return ParseField(array_view, container, accessor, field_name, required, 
error_kind,
                       [](const ArrowArrayView* view, int64_t row_idx) {
                         auto value = ArrowArrayViewGetStringUnsafe(view, 
row_idx);
                         return std::string(value.data, value.size_bytes);
                       });
   }
   
   template <typename Container, typename Accessor>
   Status ParseBinaryField(const ArrowArrayView* array_view, Container& 
container,
                           Accessor accessor, std::string_view field_name, bool 
required,
                           ErrorKind error_kind) {
     return ParseField(array_view, container, accessor, field_name, required, 
error_kind,
                       ParseInt8VectorField);
   }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to