wgtmac commented on code in PR #207:
URL: https://github.com/apache/iceberg-cpp/pull/207#discussion_r2335192080


##########
src/iceberg/schema.cc:
##########
@@ -257,4 +258,242 @@ void NameToIdVisitor::Finish() {
   }
 }
 
+/// \brief Visitor class for pruning schema columns based on selected field 
IDs.
+///
+/// This visitor traverses a schema and creates a projected version containing 
only
+/// the specified fields. It handles different projection modes:
+/// - select_full_types=true: Include entire fields when their ID is selected
+/// - select_full_types=false: Recursively project nested fields within 
selected structs
+///
+/// \warning Error conditions that will cause projection to fail:
+/// - Project or Select a Map with just key or value (returns InvalidArgument)
+class PruneColumnVisitor {
+ public:
+  PruneColumnVisitor(const std::unordered_set<int32_t>& selected_ids,

Review Comment:
   ```cpp
   class PruneColumnVisitor {
    public:
     PruneColumnVisitor(const std::unordered_set<int32_t>& selected_ids,
                        bool select_full_types)
         : selected_ids_(selected_ids), select_full_types_(select_full_types) {}
   
     Result<std::shared_ptr<Type>> Visit(const std::shared_ptr<Type>& type) 
const {
       switch (type->type_id()) {
         case TypeId::kStruct:
           return Visit(internal::checked_pointer_cast<StructType>(type));
         case TypeId::kList:
           return Visit(internal::checked_pointer_cast<ListType>(type));
         case TypeId::kMap:
           return Visit(internal::checked_pointer_cast<MapType>(type));
         default:
           return nullptr;
       }
     }
   
     Result<std::shared_ptr<Type>> Visit(const SchemaField& field) const {
       if (selected_ids_.contains(field.field_id())) {
         return (select_full_types_ || field.type()->is_primitive()) ? 
field.type()
                                                                     : 
Visit(field.type());
       }
       return Visit(field.type());
     }
   
     static SchemaField MakeField(const SchemaField& field, 
std::shared_ptr<Type> type) {
       return {field.field_id(), std::string(field.name()), std::move(type),
               field.optional(), std::string(field.doc())};
     }
   
     Result<std::shared_ptr<Type>> Visit(const std::shared_ptr<StructType>& 
type) const {
       std::vector<SchemaField> selected_fields;
       bool same_types = true;
       for (const auto& field : type->fields()) {
         ICEBERG_ASSIGN_OR_RAISE(auto child_type, Visit(field.type()));
         if (child_type != nullptr) {
           same_types = same_types && (child_type == field.type());
           selected_fields.push_back(MakeField(field, std::move(child_type)));
         }
       }
       if (selected_fields.empty()) {
         return nullptr;
       } else if (same_types && selected_fields.size() == 
type->fields().size()) {
         return type;
       }
       return std::make_shared<StructType>(std::move(selected_fields));
     }
   
     Result<std::shared_ptr<Type>> Visit(const std::shared_ptr<ListType>& type) 
const {
       const auto& elem_field = type->fields()[0];
       ICEBERG_ASSIGN_OR_RAISE(auto elem_type, Visit(elem_field));
       if (elem_type == nullptr) {
         return nullptr;
       } else if (elem_type == elem_field.type()) {
         return type;
       }
       return std::make_shared<ListType>(MakeField(elem_field, 
std::move(elem_type)));
     }
   
     Result<std::shared_ptr<Type>> Visit(const std::shared_ptr<MapType>& type) 
const {
       const auto& key_field = type->fields()[0];
       const auto& value_field = type->fields()[1];
       ICEBERG_ASSIGN_OR_RAISE(auto key_type, Visit(key_field.type()));
       ICEBERG_ASSIGN_OR_RAISE(auto value_type, Visit(value_field.type()));
   
       if (key_type == nullptr && value_type == nullptr) {
         return nullptr;
       } else if (value_type == value_field.type() &&
                  (key_type == key_field.type() || key_type == nullptr)) {
         return type;
       } else if (value_type == nullptr) {
         return InvalidArgument("Cannot project Map without value field");
       }
       return std::make_shared<MapType>(
           (key_type == nullptr ? key_field : MakeField(key_field, 
std::move(key_type))),
           MakeField(value_field, std::move(value_type)));
     }
   
    private:
     const std::unordered_set<int32_t>& selected_ids_;
     const bool select_full_types_;
   };
   ```
   
   I think we need `Result<std::shared_ptr<Type>> Visit(const SchemaField& 
field) const` to make it look simpler by eliminating duplicate codes.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to