wgtmac commented on code in PR #180:
URL: https://github.com/apache/iceberg-cpp/pull/180#discussion_r2298212667


##########
src/iceberg/schema.cc:
##########
@@ -19,13 +19,57 @@
 
 #include "iceberg/schema.h"
 
+#include <algorithm>
 #include <format>
+#include <functional>
+
+#include <iceberg/util/string_utils.h>

Review Comment:
   ```suggestion
   #include "iceberg/util/string_utils.h"
   ```
   
   By convention, `#include <xxx>` for third-party libraries and `#include 
"xxx"` for current library.



##########
src/iceberg/schema.h:
##########
@@ -54,13 +55,43 @@ class ICEBERG_EXPORT Schema : public StructType {
 
   [[nodiscard]] std::string ToString() const override;
 
+  /// \brief Find thd SchemaField By field name

Review Comment:
   ```suggestion
     /// \brief Find the SchemaField by field name.
   ```



##########
test/schema_test.cc:
##########
@@ -81,3 +81,443 @@ TEST(SchemaTest, Equality) {
   ASSERT_EQ(schema1, schema5);
   ASSERT_EQ(schema5, schema1);
 }
+
+class BasicShortNameTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    field1_ = std::make_unique<iceberg::SchemaField>(1, "Foo", 
iceberg::int32(), true);
+    field2_ = std::make_unique<iceberg::SchemaField>(2, "Bar", 
iceberg::string(), true);
+    field3_ = std::make_unique<iceberg::SchemaField>(3, "Foobar", 
iceberg::int32(), true);
+
+    auto structtype = iceberg::StructType(
+        std::vector<iceberg::SchemaField>{*field1_, *field2_, *field3_});
+
+    auto listype = iceberg::ListType(iceberg::SchemaField::MakeRequired(
+        4, "element", std::make_shared<iceberg::StructType>(structtype)));
+
+    auto maptype =
+        iceberg::MapType(iceberg::SchemaField::MakeRequired(5, "key", 
iceberg::int32()),
+                         iceberg::SchemaField::MakeRequired(
+                             6, "value", 
std::make_shared<iceberg::ListType>(listype)));
+
+    field4_ = std::make_unique<iceberg::SchemaField>(
+        4, "element", std::make_shared<iceberg::StructType>(structtype), 
false);
+    field5_ = std::make_unique<iceberg::SchemaField>(5, "key", 
iceberg::int32(), false);
+    field6_ = std::make_unique<iceberg::SchemaField>(
+        6, "value", std::make_shared<iceberg::ListType>(listype), false);
+    field7_ = std::make_unique<iceberg::SchemaField>(
+        7, "Value", std::make_shared<iceberg::MapType>(maptype), false);
+
+    schema_ =
+        
std::make_shared<iceberg::Schema>(std::vector<iceberg::SchemaField>{*field7_}, 
1);
+  }
+
+  std::shared_ptr<iceberg::Schema> schema_;

Review Comment:
   ```suggestion
           
std::make_unique<iceberg::Schema>(std::vector<iceberg::SchemaField>{*field7_}, 
1);
     }
   
     std::unique_ptr<iceberg::Schema> schema_;
   ```
   
   We don't need shared_ptr here.



##########
test/schema_test.cc:
##########
@@ -81,3 +81,443 @@ TEST(SchemaTest, Equality) {
   ASSERT_EQ(schema1, schema5);
   ASSERT_EQ(schema5, schema1);
 }
+
+class BasicShortNameTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    field1_ = std::make_unique<iceberg::SchemaField>(1, "Foo", 
iceberg::int32(), true);
+    field2_ = std::make_unique<iceberg::SchemaField>(2, "Bar", 
iceberg::string(), true);
+    field3_ = std::make_unique<iceberg::SchemaField>(3, "Foobar", 
iceberg::int32(), true);
+
+    auto structtype = iceberg::StructType(
+        std::vector<iceberg::SchemaField>{*field1_, *field2_, *field3_});
+
+    auto listype = iceberg::ListType(iceberg::SchemaField::MakeRequired(
+        4, "element", std::make_shared<iceberg::StructType>(structtype)));
+
+    auto maptype =
+        iceberg::MapType(iceberg::SchemaField::MakeRequired(5, "key", 
iceberg::int32()),
+                         iceberg::SchemaField::MakeRequired(
+                             6, "value", 
std::make_shared<iceberg::ListType>(listype)));
+
+    field4_ = std::make_unique<iceberg::SchemaField>(
+        4, "element", std::make_shared<iceberg::StructType>(structtype), 
false);
+    field5_ = std::make_unique<iceberg::SchemaField>(5, "key", 
iceberg::int32(), false);
+    field6_ = std::make_unique<iceberg::SchemaField>(
+        6, "value", std::make_shared<iceberg::ListType>(listype), false);
+    field7_ = std::make_unique<iceberg::SchemaField>(
+        7, "Value", std::make_shared<iceberg::MapType>(maptype), false);
+
+    schema_ =
+        
std::make_shared<iceberg::Schema>(std::vector<iceberg::SchemaField>{*field7_}, 
1);
+  }
+
+  std::shared_ptr<iceberg::Schema> schema_;
+  std::unique_ptr<iceberg::SchemaField> field1_;
+  std::unique_ptr<iceberg::SchemaField> field2_;
+  std::unique_ptr<iceberg::SchemaField> field3_;
+  std::unique_ptr<iceberg::SchemaField> field4_;
+  std::unique_ptr<iceberg::SchemaField> field5_;
+  std::unique_ptr<iceberg::SchemaField> field6_;
+  std::unique_ptr<iceberg::SchemaField> field7_;
+};
+
+TEST_F(BasicShortNameTest, TestFindById) {
+  ASSERT_THAT(schema_->FindFieldById(7), ::testing::Optional(*field7_));
+  ASSERT_THAT(schema_->FindFieldById(6), ::testing::Optional(*field6_));
+  ASSERT_THAT(schema_->FindFieldById(5), ::testing::Optional(*field5_));
+  ASSERT_THAT(schema_->FindFieldById(4), ::testing::Optional(*field4_));
+  ASSERT_THAT(schema_->FindFieldById(3), ::testing::Optional(*field3_));
+  ASSERT_THAT(schema_->FindFieldById(2), ::testing::Optional(*field2_));
+  ASSERT_THAT(schema_->FindFieldById(1), ::testing::Optional(*field1_));
+
+  ASSERT_THAT(schema_->FindFieldById(10), ::testing::Optional(std::nullopt));
+}
+
+TEST_F(BasicShortNameTest, TestFindByName) {
+  ASSERT_THAT(schema_->FindFieldByName("Value"), 
::testing::Optional(*field7_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.value"), 
::testing::Optional(*field6_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.key"), 
::testing::Optional(*field5_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.value.element"),
+              ::testing::Optional(*field4_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.value.element.Foobar"),
+              ::testing::Optional(*field3_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.value.element.Bar"),
+              ::testing::Optional(*field2_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.value.element.Foo"),
+              ::testing::Optional(*field1_));
+
+  ASSERT_THAT(schema_->FindFieldByName("Value.value.element.FoO"),
+              ::testing::Optional(std::nullopt));
+}
+
+TEST_F(BasicShortNameTest, TestFindByNameCaseInsensitive) {
+  ASSERT_THAT(schema_->FindFieldByName("vALue", false), 
::testing::Optional(*field7_));
+  ASSERT_THAT(schema_->FindFieldByName("vALue.VALUE", false),
+              ::testing::Optional(*field6_));
+  ASSERT_THAT(schema_->FindFieldByName("valUe.kEy", false),
+              ::testing::Optional(*field5_));
+  ASSERT_THAT(schema_->FindFieldByName("vaLue.vAlue.elEment", false),
+              ::testing::Optional(*field4_));
+  ASSERT_THAT(schema_->FindFieldByName("vaLue.vAlue.eLement.fOObar", false),
+              ::testing::Optional(*field3_));
+  ASSERT_THAT(schema_->FindFieldByName("valUe.vaLUe.elemEnt.Bar", false),
+              ::testing::Optional(*field2_));
+  ASSERT_THAT(schema_->FindFieldByName("valUe.valUe.ELEMENT.FOO", false),
+              ::testing::Optional(*field1_));
+  ASSERT_THAT(schema_->FindFieldByName("valUe.valUe.ELEMENT.FO", false),
+              ::testing::Optional(std::nullopt));
+}
+
+TEST_F(BasicShortNameTest, TestFindByShortNameCaseInsensitive) {
+  ASSERT_THAT(schema_->FindFieldByName("vaLue.value.FOO", false),
+              ::testing::Optional(*field1_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.value.Bar", false),
+              ::testing::Optional(*field2_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.value.FooBAR", false),
+              ::testing::Optional(*field3_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.value.FooBAR.a", false),
+              ::testing::Optional(std::nullopt));
+}
+
+class ComplexShortNameTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    field1_ = std::make_unique<iceberg::SchemaField>(1, "Foo", 
iceberg::int32(), true);
+    field2_ = std::make_unique<iceberg::SchemaField>(2, "Bar", 
iceberg::string(), true);
+    field3_ = std::make_unique<iceberg::SchemaField>(3, "Foobar", 
iceberg::int32(), true);
+
+    auto structtype = iceberg::StructType({*field1_, *field2_, *field3_});

Review Comment:
   ditto, use shared_ptr



##########
test/schema_test.cc:
##########
@@ -81,3 +81,443 @@ TEST(SchemaTest, Equality) {
   ASSERT_EQ(schema1, schema5);
   ASSERT_EQ(schema5, schema1);
 }
+
+class BasicShortNameTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    field1_ = std::make_unique<iceberg::SchemaField>(1, "Foo", 
iceberg::int32(), true);
+    field2_ = std::make_unique<iceberg::SchemaField>(2, "Bar", 
iceberg::string(), true);
+    field3_ = std::make_unique<iceberg::SchemaField>(3, "Foobar", 
iceberg::int32(), true);
+
+    auto structtype = iceberg::StructType(

Review Comment:
   Can we use a shared_ptr for `structtype`? Then we don't need to make an 
unneccesary  copy it at line 104.



##########
src/iceberg/schema.cc:
##########
@@ -19,13 +19,57 @@
 
 #include "iceberg/schema.h"
 
+#include <algorithm>
 #include <format>
+#include <functional>
+
+#include <iceberg/util/string_utils.h>
 
 #include "iceberg/type.h"
 #include "iceberg/util/formatter.h"  // IWYU pragma: keep
+#include "iceberg/util/macros.h"
+#include "iceberg/util/visit_type.h"
 
 namespace iceberg {
 
+class IdToFieldVisitor {
+ public:
+  explicit IdToFieldVisitor(
+      std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>&
+          id_to_field);
+  Status Visit(const Type& type);
+  Status VisitNestedType(const Type& type);
+
+ private:
+  std::unordered_map<int32_t, std::reference_wrapper<const SchemaField>>& 
id_to_field_;
+};
+
+class NameToIdVisitor {
+ public:
+  explicit NameToIdVisitor(
+      std::unordered_map<std::string, int32_t>& name_to_id, bool 
case_sensitive_ = true,

Review Comment:
   ```suggestion
         std::unordered_map<std::string, int32_t>& name_to_id, bool 
case_sensitive = true,
   ```



##########
src/iceberg/schema.h:
##########
@@ -54,13 +55,43 @@ class ICEBERG_EXPORT Schema : public StructType {
 
   [[nodiscard]] std::string ToString() const override;
 
+  /// \brief Find thd SchemaField By field name
+  ///
+  /// Short names for maps and lists are included for any name that does not 
conflict with
+  /// a canonical name. For example, a list, 'l', of structs with field 'x' 
will produce
+  /// short name 'l.x' in addition to canonical name 'l.element.x'. a map 'm', 
if its
+  /// value include a structs with field 'x' wil produce short name 'm.x' in 
addition to
+  /// canonical name 'm.value.x'
+  /// FIXME: Currently only handles ASCII lowercase conversion; extend to 
support
+  /// non-ASCII characters (e.g., using std::towlower or ICU)
+  [[nodiscard]] Result<std::optional<std::reference_wrapper<const 
SchemaField>>>
+  FindFieldByName(std::string_view name, bool case_sensitive = true) const;
+
+  [[nodiscard]] Result<std::optional<std::reference_wrapper<const 
SchemaField>>>

Review Comment:
   ```suggestion
     /// \brief Find the SchemaField by field id.
     [[nodiscard]] Result<std::optional<std::reference_wrapper<const 
SchemaField>>>
   ```



##########
test/schema_test.cc:
##########
@@ -81,3 +81,443 @@ TEST(SchemaTest, Equality) {
   ASSERT_EQ(schema1, schema5);
   ASSERT_EQ(schema5, schema1);
 }
+
+class BasicShortNameTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    field1_ = std::make_unique<iceberg::SchemaField>(1, "Foo", 
iceberg::int32(), true);
+    field2_ = std::make_unique<iceberg::SchemaField>(2, "Bar", 
iceberg::string(), true);
+    field3_ = std::make_unique<iceberg::SchemaField>(3, "Foobar", 
iceberg::int32(), true);
+
+    auto structtype = iceberg::StructType(
+        std::vector<iceberg::SchemaField>{*field1_, *field2_, *field3_});
+
+    auto listype = iceberg::ListType(iceberg::SchemaField::MakeRequired(
+        4, "element", std::make_shared<iceberg::StructType>(structtype)));
+
+    auto maptype =
+        iceberg::MapType(iceberg::SchemaField::MakeRequired(5, "key", 
iceberg::int32()),
+                         iceberg::SchemaField::MakeRequired(
+                             6, "value", 
std::make_shared<iceberg::ListType>(listype)));
+
+    field4_ = std::make_unique<iceberg::SchemaField>(
+        4, "element", std::make_shared<iceberg::StructType>(structtype), 
false);
+    field5_ = std::make_unique<iceberg::SchemaField>(5, "key", 
iceberg::int32(), false);

Review Comment:
   Can we move `field4_` and `field5_` initialization before line 98? Then we 
can simply write it as 
   
   ```cpp
       auto maptype = std::make_shared<iceberg::MapType>(*field4_, *field5_);
   ``` 
   
   The point is avoid creating duplicate objects.



##########
test/schema_test.cc:
##########
@@ -81,3 +81,443 @@ TEST(SchemaTest, Equality) {
   ASSERT_EQ(schema1, schema5);
   ASSERT_EQ(schema5, schema1);
 }
+
+class BasicShortNameTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    field1_ = std::make_unique<iceberg::SchemaField>(1, "Foo", 
iceberg::int32(), true);
+    field2_ = std::make_unique<iceberg::SchemaField>(2, "Bar", 
iceberg::string(), true);
+    field3_ = std::make_unique<iceberg::SchemaField>(3, "Foobar", 
iceberg::int32(), true);
+
+    auto structtype = iceberg::StructType(
+        std::vector<iceberg::SchemaField>{*field1_, *field2_, *field3_});
+
+    auto listype = iceberg::ListType(iceberg::SchemaField::MakeRequired(
+        4, "element", std::make_shared<iceberg::StructType>(structtype)));
+
+    auto maptype =
+        iceberg::MapType(iceberg::SchemaField::MakeRequired(5, "key", 
iceberg::int32()),
+                         iceberg::SchemaField::MakeRequired(
+                             6, "value", 
std::make_shared<iceberg::ListType>(listype)));
+
+    field4_ = std::make_unique<iceberg::SchemaField>(
+        4, "element", std::make_shared<iceberg::StructType>(structtype), 
false);
+    field5_ = std::make_unique<iceberg::SchemaField>(5, "key", 
iceberg::int32(), false);
+    field6_ = std::make_unique<iceberg::SchemaField>(
+        6, "value", std::make_shared<iceberg::ListType>(listype), false);
+    field7_ = std::make_unique<iceberg::SchemaField>(
+        7, "Value", std::make_shared<iceberg::MapType>(maptype), false);
+
+    schema_ =
+        
std::make_shared<iceberg::Schema>(std::vector<iceberg::SchemaField>{*field7_}, 
1);
+  }
+
+  std::shared_ptr<iceberg::Schema> schema_;
+  std::unique_ptr<iceberg::SchemaField> field1_;
+  std::unique_ptr<iceberg::SchemaField> field2_;
+  std::unique_ptr<iceberg::SchemaField> field3_;
+  std::unique_ptr<iceberg::SchemaField> field4_;
+  std::unique_ptr<iceberg::SchemaField> field5_;
+  std::unique_ptr<iceberg::SchemaField> field6_;
+  std::unique_ptr<iceberg::SchemaField> field7_;
+};
+
+TEST_F(BasicShortNameTest, TestFindById) {
+  ASSERT_THAT(schema_->FindFieldById(7), ::testing::Optional(*field7_));
+  ASSERT_THAT(schema_->FindFieldById(6), ::testing::Optional(*field6_));
+  ASSERT_THAT(schema_->FindFieldById(5), ::testing::Optional(*field5_));
+  ASSERT_THAT(schema_->FindFieldById(4), ::testing::Optional(*field4_));
+  ASSERT_THAT(schema_->FindFieldById(3), ::testing::Optional(*field3_));
+  ASSERT_THAT(schema_->FindFieldById(2), ::testing::Optional(*field2_));
+  ASSERT_THAT(schema_->FindFieldById(1), ::testing::Optional(*field1_));
+
+  ASSERT_THAT(schema_->FindFieldById(10), ::testing::Optional(std::nullopt));
+}
+
+TEST_F(BasicShortNameTest, TestFindByName) {
+  ASSERT_THAT(schema_->FindFieldByName("Value"), 
::testing::Optional(*field7_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.value"), 
::testing::Optional(*field6_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.key"), 
::testing::Optional(*field5_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.value.element"),
+              ::testing::Optional(*field4_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.value.element.Foobar"),
+              ::testing::Optional(*field3_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.value.element.Bar"),
+              ::testing::Optional(*field2_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.value.element.Foo"),
+              ::testing::Optional(*field1_));
+
+  ASSERT_THAT(schema_->FindFieldByName("Value.value.element.FoO"),
+              ::testing::Optional(std::nullopt));
+}
+
+TEST_F(BasicShortNameTest, TestFindByNameCaseInsensitive) {
+  ASSERT_THAT(schema_->FindFieldByName("vALue", false), 
::testing::Optional(*field7_));
+  ASSERT_THAT(schema_->FindFieldByName("vALue.VALUE", false),
+              ::testing::Optional(*field6_));
+  ASSERT_THAT(schema_->FindFieldByName("valUe.kEy", false),
+              ::testing::Optional(*field5_));
+  ASSERT_THAT(schema_->FindFieldByName("vaLue.vAlue.elEment", false),
+              ::testing::Optional(*field4_));
+  ASSERT_THAT(schema_->FindFieldByName("vaLue.vAlue.eLement.fOObar", false),
+              ::testing::Optional(*field3_));
+  ASSERT_THAT(schema_->FindFieldByName("valUe.vaLUe.elemEnt.Bar", false),
+              ::testing::Optional(*field2_));
+  ASSERT_THAT(schema_->FindFieldByName("valUe.valUe.ELEMENT.FOO", false),
+              ::testing::Optional(*field1_));
+  ASSERT_THAT(schema_->FindFieldByName("valUe.valUe.ELEMENT.FO", false),
+              ::testing::Optional(std::nullopt));
+}
+
+TEST_F(BasicShortNameTest, TestFindByShortNameCaseInsensitive) {
+  ASSERT_THAT(schema_->FindFieldByName("vaLue.value.FOO", false),
+              ::testing::Optional(*field1_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.value.Bar", false),
+              ::testing::Optional(*field2_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.value.FooBAR", false),
+              ::testing::Optional(*field3_));
+  ASSERT_THAT(schema_->FindFieldByName("Value.value.FooBAR.a", false),
+              ::testing::Optional(std::nullopt));
+}
+
+class ComplexShortNameTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    field1_ = std::make_unique<iceberg::SchemaField>(1, "Foo", 
iceberg::int32(), true);
+    field2_ = std::make_unique<iceberg::SchemaField>(2, "Bar", 
iceberg::string(), true);
+    field3_ = std::make_unique<iceberg::SchemaField>(3, "Foobar", 
iceberg::int32(), true);
+
+    auto structtype = iceberg::StructType({*field1_, *field2_, *field3_});
+
+    field4_ = std::make_unique<iceberg::SchemaField>(
+        4, "element", std::make_shared<iceberg::StructType>(structtype), 
false);
+
+    auto listype = iceberg::ListType(*field4_);
+
+    auto structtype2 = iceberg::StructType(
+        {iceberg::SchemaField::MakeRequired(5, "First_child", 
iceberg::int32()),
+         iceberg::SchemaField::MakeRequired(
+             6, "Second_child", 
std::make_shared<iceberg::ListType>(listype))});
+
+    auto maptype = iceberg::MapType(
+        iceberg::SchemaField::MakeRequired(7, "key", iceberg::int32()),
+        iceberg::SchemaField::MakeRequired(
+            8, "value", std::make_shared<iceberg::StructType>(structtype2)));
+
+    field5_ =
+        std::make_unique<iceberg::SchemaField>(5, "First_child", 
iceberg::int32(), false);
+    field6_ = std::make_unique<iceberg::SchemaField>(
+        6, "Second_child", std::make_shared<iceberg::ListType>(listype), 
false);
+    field7_ = std::make_unique<iceberg::SchemaField>(7, "key", 
iceberg::int32(), false);
+    field8_ = std::make_unique<iceberg::SchemaField>(
+        8, "value", std::make_shared<iceberg::StructType>(structtype2), false);
+    field9_ = std::make_unique<iceberg::SchemaField>(
+        9, "Map", std::make_shared<iceberg::MapType>(maptype), false);
+
+    schema_ =
+        
std::make_shared<iceberg::Schema>(std::vector<iceberg::SchemaField>{*field9_}, 
1);
+  }
+
+  std::shared_ptr<iceberg::Schema> schema_;

Review Comment:
   ditto



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to