wgtmac commented on code in PR #461:
URL: https://github.com/apache/iceberg-cpp/pull/461#discussion_r2654940261


##########
src/iceberg/manifest/manifest_reader.cc:
##########
@@ -33,270 +33,357 @@
 #include "iceberg/manifest/manifest_entry.h"
 #include "iceberg/manifest/manifest_list.h"
 #include "iceberg/manifest/manifest_reader_internal.h"
-#include "iceberg/metadata_columns.h"
 #include "iceberg/partition_spec.h"
 #include "iceberg/schema.h"
 #include "iceberg/schema_field.h"
 #include "iceberg/type.h"
 #include "iceberg/util/checked_cast.h"
 #include "iceberg/util/content_file_util.h"
 #include "iceberg/util/macros.h"
+#include "nanoarrow/common/inline_types.h"
 
 namespace iceberg {
 
 namespace {
 
-// TODO(gangwu): refactor these macros with template functions.
-#define PARSE_PRIMITIVE_FIELD(item, array_view, type)                          
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(array_view, row_idx)) {                          
         \
-      auto value = ArrowArrayViewGetIntUnsafe(array_view, row_idx);            
         \
-      item = static_cast<type>(value);                                         
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_STRING_FIELD(item, array_view)                                   
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(array_view, row_idx)) {                          
         \
-      auto value = ArrowArrayViewGetStringUnsafe(array_view, row_idx);         
         \
-      item = std::string(value.data, value.size_bytes);                        
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_BINARY_FIELD(item, array_view)                                   
         \
-  for (int64_t row_idx = 0; row_idx < array_view->length; row_idx++) {         
         \
-    if (!ArrowArrayViewIsNull(view_of_column, row_idx)) {                      
         \
-      item = ArrowArrayViewGetInt8Vector(array_view, row_idx);                 
         \
-    } else if (required) {                                                     
         \
-      return InvalidManifestList("Field {} is required but null at row {}", 
field_name, \
-                                 row_idx);                                     
         \
-    }                                                                          
         \
-  }
-
-#define PARSE_INTEGER_VECTOR_FIELD(item, count, array_view, type)              
     \
-  for (int64_t manifest_idx = 0; manifest_idx < count; manifest_idx++) {       
     \
-    auto offset = ArrowArrayViewListChildOffset(array_view, manifest_idx);     
     \
-    auto next_offset = ArrowArrayViewListChildOffset(array_view, manifest_idx 
+ 1); \
-    for (int64_t offset_idx = offset; offset_idx < next_offset; offset_idx++) 
{     \
-      item.emplace_back(static_cast<type>(                                     
     \
-          ArrowArrayViewGetIntUnsafe(array_view->children[0], offset_idx)));   
     \
-    }                                                                          
     \
-  }
-
-#define PARSE_MAP_FIELD(item, count, array_view, key_type, value_type, 
assignment)   \
-  do {                                                                         
      \
-    if (array_view->storage_type != ArrowType::NANOARROW_TYPE_MAP) {           
      \
-      return InvalidManifest("Field:{} should be a map.", field_name);         
      \
-    }                                                                          
      \
-    auto view_of_map = array_view->children[0];                                
      \
-    ASSERT_VIEW_TYPE_AND_CHILDREN(view_of_map, 
ArrowType::NANOARROW_TYPE_STRUCT, 2); \
-    auto view_of_map_key = view_of_map->children[0];                           
      \
-    ASSERT_VIEW_TYPE(view_of_map_key, key_type);                               
      \
-    auto view_of_map_value = view_of_map->children[1];                         
      \
-    ASSERT_VIEW_TYPE(view_of_map_value, value_type);                           
      \
-    for (int64_t row_idx = 0; row_idx < count; row_idx++) {                    
      \
-      auto offset = array_view->buffer_views[1].data.as_int32[row_idx];        
      \
-      auto next_offset = array_view->buffer_views[1].data.as_int32[row_idx + 
1];     \
-      for (int32_t offset_idx = offset; offset_idx < next_offset; 
offset_idx++) {    \
-        auto key = ArrowArrayViewGetIntUnsafe(view_of_map_key, offset_idx);    
      \
-        item[key] = assignment;                                                
      \
-      }                                                                        
      \
-    }                                                                          
      \
-  } while (0)
-
-#define PARSE_INT_LONG_MAP_FIELD(item, count, array_view)                   \
-  PARSE_MAP_FIELD(item, count, array_view, ArrowType::NANOARROW_TYPE_INT32, \
-                  ArrowType::NANOARROW_TYPE_INT64,                          \
-                  ArrowArrayViewGetIntUnsafe(view_of_map_value, offset_idx));
-
-#define PARSE_INT_BINARY_MAP_FIELD(item, count, array_view)                 \
-  PARSE_MAP_FIELD(item, count, array_view, ArrowType::NANOARROW_TYPE_INT32, \
-                  ArrowType::NANOARROW_TYPE_BINARY,                         \
-                  ArrowArrayViewGetInt8Vector(view_of_map_value, offset_idx));
-
-#define ASSERT_VIEW_TYPE(view, type)                                           
\
-  if (view->storage_type != type) {                                            
\
-    return InvalidManifest("Sub Field:{} should be a {}.", field_name, #type); 
\
-  }
-
-#define ASSERT_VIEW_TYPE_AND_CHILDREN(view, type, n_child)                     
  \
-  if (view->storage_type != type) {                                            
  \
-    return InvalidManifest("Sub Field:{} should be a {}.", field_name, #type); 
  \
-  }                                                                            
  \
-  if (view->n_children != n_child) {                                           
  \
-    return InvalidManifest("Sub Field for:{} should have key&value:{} 
columns.", \
-                           field_name, n_child);                               
  \
-  }
-
-std::vector<uint8_t> ArrowArrayViewGetInt8Vector(const ArrowArrayView* view,
-                                                 int32_t offset_idx) {
+template <typename... Args>
+[[nodiscard]] Status MakeError(ErrorKind error_kind, 
std::format_string<Args...> fmt,
+                               Args&&... args) {
+  return std::unexpected<Error>(
+      {error_kind, std::format(fmt, std::forward<Args>(args)...)});
+}
+
+[[nodiscard]] Status MakeNullError(ErrorKind error_kind, std::string_view 
field_name,

Review Comment:
   For now, `ErrorKind::kInvalidManifestList` is the only value passed. But 
this function is generic so perhaps it is ok to keep it to be consistent?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to