wgtmac commented on code in PR #159: URL: https://github.com/apache/iceberg-cpp/pull/159#discussion_r2253266405
########## test/parquet_schema_test.cc: ########## @@ -17,50 +17,494 @@ * under the License. */ +#include <arrow/type.h> #include <gtest/gtest.h> +#include <parquet/arrow/reader.h> +#include <parquet/arrow/schema.h> #include <parquet/schema.h> -#include <parquet/types.h> +#include "iceberg/metadata_columns.h" #include "iceberg/parquet/parquet_schema_util_internal.h" +#include "iceberg/schema.h" +#include "matchers.h" namespace iceberg::parquet { namespace { -::parquet::schema::NodePtr MakeInt32Node(const std::string& name, int field_id = -1) { +constexpr std::string_view kParquetFieldIdKey = "PARQUET:field_id"; + +::parquet::schema::NodePtr MakeInt32Node(const std::string& name, int field_id = -1, + bool optional = true) { + return ::parquet::schema::PrimitiveNode::Make( + name, optional ? ::parquet::Repetition::OPTIONAL : ::parquet::Repetition::REQUIRED, + ::parquet::LogicalType::None(), ::parquet::Type::INT32, /*primitive_length=*/-1, + field_id); +} + +::parquet::schema::NodePtr MakeInt64Node(const std::string& name, int field_id = -1, + bool optional = true) { + return ::parquet::schema::PrimitiveNode::Make( + name, optional ? ::parquet::Repetition::OPTIONAL : ::parquet::Repetition::REQUIRED, + ::parquet::LogicalType::None(), ::parquet::Type::INT64, /*primitive_length=*/-1, + field_id); +} + +::parquet::schema::NodePtr MakeStringNode(const std::string& name, int field_id = -1, + bool optional = true) { + return ::parquet::schema::PrimitiveNode::Make( + name, optional ? ::parquet::Repetition::OPTIONAL : ::parquet::Repetition::REQUIRED, + ::parquet::LogicalType::String(), ::parquet::Type::BYTE_ARRAY, + /*primitive_length=*/-1, field_id); +} + +::parquet::schema::NodePtr MakeDoubleNode(const std::string& name, int field_id = -1, + bool optional = true) { + return ::parquet::schema::PrimitiveNode::Make( + name, optional ? ::parquet::Repetition::OPTIONAL : ::parquet::Repetition::REQUIRED, + ::parquet::LogicalType::None(), ::parquet::Type::DOUBLE, /*primitive_length=*/-1, + field_id); +} + +::parquet::schema::NodePtr MakeFloatNode(const std::string& name, int field_id = -1, + bool optional = true) { return ::parquet::schema::PrimitiveNode::Make( - name, ::parquet::Repetition::REQUIRED, ::parquet::LogicalType::None(), - ::parquet::Type::INT32, /*primitive_length=*/-1, field_id); + name, optional ? ::parquet::Repetition::OPTIONAL : ::parquet::Repetition::REQUIRED, + ::parquet::LogicalType::None(), ::parquet::Type::FLOAT, /*primitive_length=*/-1, + field_id); } ::parquet::schema::NodePtr MakeGroupNode(const std::string& name, const ::parquet::schema::NodeVector& fields, - int field_id = -1) { - return ::parquet::schema::GroupNode::Make(name, ::parquet::Repetition::REQUIRED, fields, - /*logical_type=*/nullptr, field_id); + int field_id = -1, bool optional = true) { + return ::parquet::schema::GroupNode::Make( + name, optional ? ::parquet::Repetition::OPTIONAL : ::parquet::Repetition::REQUIRED, + fields, /*logical_type=*/nullptr, field_id); } +::parquet::schema::NodePtr MakeListNode(const std::string& name, + const ::parquet::schema::NodePtr& element_node, + int field_id = -1, bool optional = true) { + auto list_group = ::parquet::schema::GroupNode::Make( + "element", ::parquet::Repetition::REPEATED, {element_node}); + return ::parquet::schema::GroupNode::Make( + name, optional ? ::parquet::Repetition::OPTIONAL : ::parquet::Repetition::REQUIRED, + {list_group}, ::parquet::LogicalType::List(), field_id); +} + +::parquet::schema::NodePtr MakeMapNode(const std::string& name, + const ::parquet::schema::NodePtr& key_node, + const ::parquet::schema::NodePtr& value_node, + int field_id = -1, bool optional = true) { + auto key_value_group = ::parquet::schema::GroupNode::Make( + "key_value", ::parquet::Repetition::REPEATED, {key_node, value_node}); + return ::parquet::schema::GroupNode::Make( + name, optional ? ::parquet::Repetition::OPTIONAL : ::parquet::Repetition::REQUIRED, + {key_value_group}, ::parquet::LogicalType::Map(), field_id); +} + +// Helper to create SchemaManifest from Parquet schema +::parquet::arrow::SchemaManifest MakeSchemaManifest( + const ::parquet::schema::NodePtr& parquet_schema) { + auto parquet_schema_descriptor = std::make_shared<::parquet::SchemaDescriptor>(); + parquet_schema_descriptor->Init(parquet_schema); + + ::parquet::arrow::SchemaManifest manifest; + auto status = ::parquet::arrow::SchemaManifest::Make( + parquet_schema_descriptor.get(), /*key_value_metadata=*/nullptr, + ::parquet::default_arrow_reader_properties(), &manifest); + if (!status.ok()) { + throw std::runtime_error("Failed to create SchemaManifest: " + status.ToString()); + } + return manifest; +} + +#define ASSERT_PROJECTED_FIELD(field_projection, index) \ + ASSERT_EQ(field_projection.kind, FieldProjection::Kind::kProjected); \ + ASSERT_EQ(std::get<1>(field_projection.from), index); + +#define ASSERT_PROJECTED_NULL_FIELD(field_projection) \ + ASSERT_EQ(field_projection.kind, FieldProjection::Kind::kNull); + } // namespace TEST(HasFieldIds, PrimitiveNode) { EXPECT_FALSE(HasFieldIds(MakeInt32Node("test_field"))); EXPECT_TRUE(HasFieldIds(MakeInt32Node("test_field", /*field_id=*/1))); } -TEST(HasFieldIds, GroupNode) { - auto group_node_without_field_id = - MakeGroupNode("test_group", {MakeInt32Node("c1"), MakeInt32Node("c2")}); - EXPECT_FALSE(HasFieldIds(group_node_without_field_id)); +// TEST(HasFieldIds, GroupNode) { Review Comment: The linter is satisfied if I comment out these lines. @lidavidm -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org