lidavidm commented on code in PR #159:
URL: https://github.com/apache/iceberg-cpp/pull/159#discussion_r2253468625


##########
test/parquet_schema_test.cc:
##########
@@ -17,50 +17,494 @@
  * under the License.
  */
 
+#include <arrow/type.h>
 #include <gtest/gtest.h>
+#include <parquet/arrow/reader.h>
+#include <parquet/arrow/schema.h>
 #include <parquet/schema.h>
-#include <parquet/types.h>
 
+#include "iceberg/metadata_columns.h"
 #include "iceberg/parquet/parquet_schema_util_internal.h"
+#include "iceberg/schema.h"
+#include "matchers.h"
 
 namespace iceberg::parquet {
 
 namespace {
 
-::parquet::schema::NodePtr MakeInt32Node(const std::string& name, int field_id 
= -1) {
+constexpr std::string_view kParquetFieldIdKey = "PARQUET:field_id";
+
+::parquet::schema::NodePtr MakeInt32Node(const std::string& name, int field_id 
= -1,
+                                         bool optional = true) {
+  return ::parquet::schema::PrimitiveNode::Make(
+      name, optional ? ::parquet::Repetition::OPTIONAL : 
::parquet::Repetition::REQUIRED,
+      ::parquet::LogicalType::None(), ::parquet::Type::INT32, 
/*primitive_length=*/-1,
+      field_id);
+}
+
+::parquet::schema::NodePtr MakeInt64Node(const std::string& name, int field_id 
= -1,
+                                         bool optional = true) {
+  return ::parquet::schema::PrimitiveNode::Make(
+      name, optional ? ::parquet::Repetition::OPTIONAL : 
::parquet::Repetition::REQUIRED,
+      ::parquet::LogicalType::None(), ::parquet::Type::INT64, 
/*primitive_length=*/-1,
+      field_id);
+}
+
+::parquet::schema::NodePtr MakeStringNode(const std::string& name, int 
field_id = -1,
+                                          bool optional = true) {
+  return ::parquet::schema::PrimitiveNode::Make(
+      name, optional ? ::parquet::Repetition::OPTIONAL : 
::parquet::Repetition::REQUIRED,
+      ::parquet::LogicalType::String(), ::parquet::Type::BYTE_ARRAY,
+      /*primitive_length=*/-1, field_id);
+}
+
+::parquet::schema::NodePtr MakeDoubleNode(const std::string& name, int 
field_id = -1,
+                                          bool optional = true) {
+  return ::parquet::schema::PrimitiveNode::Make(
+      name, optional ? ::parquet::Repetition::OPTIONAL : 
::parquet::Repetition::REQUIRED,
+      ::parquet::LogicalType::None(), ::parquet::Type::DOUBLE, 
/*primitive_length=*/-1,
+      field_id);
+}
+
+::parquet::schema::NodePtr MakeFloatNode(const std::string& name, int field_id 
= -1,
+                                         bool optional = true) {
   return ::parquet::schema::PrimitiveNode::Make(
-      name, ::parquet::Repetition::REQUIRED, ::parquet::LogicalType::None(),
-      ::parquet::Type::INT32, /*primitive_length=*/-1, field_id);
+      name, optional ? ::parquet::Repetition::OPTIONAL : 
::parquet::Repetition::REQUIRED,
+      ::parquet::LogicalType::None(), ::parquet::Type::FLOAT, 
/*primitive_length=*/-1,
+      field_id);
 }
 
 ::parquet::schema::NodePtr MakeGroupNode(const std::string& name,
                                          const ::parquet::schema::NodeVector& 
fields,
-                                         int field_id = -1) {
-  return ::parquet::schema::GroupNode::Make(name, 
::parquet::Repetition::REQUIRED, fields,
-                                            /*logical_type=*/nullptr, 
field_id);
+                                         int field_id = -1, bool optional = 
true) {
+  return ::parquet::schema::GroupNode::Make(
+      name, optional ? ::parquet::Repetition::OPTIONAL : 
::parquet::Repetition::REQUIRED,
+      fields, /*logical_type=*/nullptr, field_id);
 }
 
+::parquet::schema::NodePtr MakeListNode(const std::string& name,
+                                        const ::parquet::schema::NodePtr& 
element_node,
+                                        int field_id = -1, bool optional = 
true) {
+  auto list_group = ::parquet::schema::GroupNode::Make(
+      "element", ::parquet::Repetition::REPEATED, {element_node});
+  return ::parquet::schema::GroupNode::Make(
+      name, optional ? ::parquet::Repetition::OPTIONAL : 
::parquet::Repetition::REQUIRED,
+      {list_group}, ::parquet::LogicalType::List(), field_id);
+}
+
+::parquet::schema::NodePtr MakeMapNode(const std::string& name,
+                                       const ::parquet::schema::NodePtr& 
key_node,
+                                       const ::parquet::schema::NodePtr& 
value_node,
+                                       int field_id = -1, bool optional = 
true) {
+  auto key_value_group = ::parquet::schema::GroupNode::Make(
+      "key_value", ::parquet::Repetition::REPEATED, {key_node, value_node});
+  return ::parquet::schema::GroupNode::Make(
+      name, optional ? ::parquet::Repetition::OPTIONAL : 
::parquet::Repetition::REQUIRED,
+      {key_value_group}, ::parquet::LogicalType::Map(), field_id);
+}
+
+// Helper to create SchemaManifest from Parquet schema
+::parquet::arrow::SchemaManifest MakeSchemaManifest(
+    const ::parquet::schema::NodePtr& parquet_schema) {
+  auto parquet_schema_descriptor = 
std::make_shared<::parquet::SchemaDescriptor>();
+  parquet_schema_descriptor->Init(parquet_schema);
+
+  ::parquet::arrow::SchemaManifest manifest;
+  auto status = ::parquet::arrow::SchemaManifest::Make(
+      parquet_schema_descriptor.get(), /*key_value_metadata=*/nullptr,
+      ::parquet::default_arrow_reader_properties(), &manifest);
+  if (!status.ok()) {
+    throw std::runtime_error("Failed to create SchemaManifest: " + 
status.ToString());
+  }
+  return manifest;
+}
+
+#define ASSERT_PROJECTED_FIELD(field_projection, index)                \
+  ASSERT_EQ(field_projection.kind, FieldProjection::Kind::kProjected); \
+  ASSERT_EQ(std::get<1>(field_projection.from), index);
+
+#define ASSERT_PROJECTED_NULL_FIELD(field_projection) \
+  ASSERT_EQ(field_projection.kind, FieldProjection::Kind::kNull);
+
 }  // namespace
 
 TEST(HasFieldIds, PrimitiveNode) {
   EXPECT_FALSE(HasFieldIds(MakeInt32Node("test_field")));
   EXPECT_TRUE(HasFieldIds(MakeInt32Node("test_field", /*field_id=*/1)));
 }
 
-TEST(HasFieldIds, GroupNode) {
-  auto group_node_without_field_id =
-      MakeGroupNode("test_group", {MakeInt32Node("c1"), MakeInt32Node("c2")});
-  EXPECT_FALSE(HasFieldIds(group_node_without_field_id));
+// TEST(HasFieldIds, GroupNode) {

Review Comment:
   Weird, I'm not sure what's going on here...NodePtr is a shared_ptr and this 
seems to work elsewhere...
   
   https://github.com/llvm/llvm-project/issues/122405 also seems to be related 
- maybe this check is a bit flaky. We can uncomment and use NOLINT on the 
specific line?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to