wgtmac commented on code in PR #177: URL: https://github.com/apache/iceberg-cpp/pull/177#discussion_r2281354030
########## src/iceberg/expression/term.h: ########## @@ -0,0 +1,258 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/expression/term.h +/// Term interface for Iceberg expressions - represents values that can be evaluated. + +#include <memory> +#include <string> +#include <string_view> + +#include "iceberg/arrow_c_data.h" +#include "iceberg/expression/literal.h" +#include "iceberg/iceberg_export.h" +#include "iceberg/result.h" +#include "iceberg/type_fwd.h" + +namespace iceberg { + +// TODO(gangwu): add a struct-like interface to wrap a row of data from ArrowArray or +// structs like ManifestFile and ManifestEntry to facilitate generailization of the +// evaluation of expressions on top of different data structures. +class StructLike; + +/// \brief A term is an expression node that produces a typed value when evaluated. +class ICEBERG_EXPORT Term { + public: + virtual ~Term() = default; + + /// \brief Returns a string representation of this term. + virtual std::string ToString() const = 0; +}; + +/// \brief Interface for unbound expressions that need schema binding. +/// +/// Unbound expressions contain string-based references that must be resolved +/// against a concrete schema to produce bound expressions that can be evaluated. +/// +/// \tparam B The bound type this term produces when binding is successful +template <typename B> +class ICEBERG_EXPORT Unbound { + public: + virtual ~Unbound() = default; + + /// \brief Bind this expression to a concrete schema. + /// + /// \param schema The schema to bind against + /// \param case_sensitive Whether field name matching should be case sensitive + /// \return A bound expression or an error if binding fails + virtual Result<std::unique_ptr<B>> Bind(const Schema& schema, + bool case_sensitive) const = 0; + + /// \brief Overloaded Bind method that uses case-sensitive matching by default. + Result<std::unique_ptr<B>> Bind(const Schema& schema) const; + + /// \brief Returns the underlying named reference for this unbound term. + virtual std::shared_ptr<class NamedReference> reference() = 0; +}; + +/// \brief Interface for bound expressions that can be evaluated. +/// +/// Bound expressions have been resolved against a concrete schema and contain +/// all necessary information to evaluate against data structures. +class ICEBERG_EXPORT Bound { + public: + virtual ~Bound() = default; + + /// \brief Evaluate this expression against a row-based data. + virtual Result<Literal::Value> Evaluate(const StructLike& data) const = 0; + + /// \brief Evaluate this expression against an Arrow array. + virtual Result<std::vector<Literal::Value>> Evaluate(const ArrowArray& data) const = 0; + + /// \brief Returns the underlying bound reference for this term. + virtual std::shared_ptr<class BoundReference> reference() = 0; +}; + +/// \brief Base class for unbound terms. +/// +/// \tparam B The bound type this term produces when binding is successful. +template <typename B> +class ICEBERG_EXPORT UnboundTerm : public Unbound<B>, public Term { + public: + using BoundType = B; + + ~UnboundTerm() override = default; +}; + +/// \brief Base class for bound terms. +class ICEBERG_EXPORT BoundTerm : public Bound, public Term { + public: + ~BoundTerm() override = default; + + /// \brief Returns the type produced by this term. + virtual const std::shared_ptr<Type>& type() const = 0; + + /// \brief Returns whether this term may produce null values. + virtual bool MayProduceNull() const = 0; + + // TODO(gangwu): add a comparator function to Literal and BoundTerm. + + /// \brief Returns whether this term is equivalent to another. + /// + /// Two terms are equivalent if they produce the same values when evaluated. + /// + /// \param other Another bound term to compare against + /// \return true if the terms are equivalent, false otherwise + virtual bool Equals(const BoundTerm& other) const = 0; +}; + +/// \brief A reference represents a named field in an expression. +class ICEBERG_EXPORT Reference { + public: + virtual ~Reference() = default; + + /// \brief Returns the name of the referenced field. + virtual std::string_view name() const = 0; +}; + +/// \brief A reference to an unbound named field. +class ICEBERG_EXPORT NamedReference + : public Reference, + public UnboundTerm<BoundReference>, + public std::enable_shared_from_this<NamedReference> { + public: + /// \brief Create a named reference to a field. + /// + /// \param field_name The name of the field to reference + explicit NamedReference(std::string field_name); + + ~NamedReference() override = default; + + std::string_view name() const override { return field_name_; } + + Result<std::unique_ptr<BoundReference>> Bind(const Schema& schema, + bool case_sensitive) const override; + + std::shared_ptr<NamedReference> reference() override { return shared_from_this(); } + + std::string ToString() const override; + + private: + std::string field_name_; +}; + +/// \brief A reference to a bound field. +class ICEBERG_EXPORT BoundReference + : public Reference, + public BoundTerm, + public std::enable_shared_from_this<BoundReference> { + public: + /// \brief Create a bound reference. + /// + /// \param field The schema field + explicit BoundReference(std::shared_ptr<SchemaField> field); + + ~BoundReference() override = default; + + const SchemaField& field() const { return *field_; } + + std::string_view name() const override { return field_->name(); } + + std::string ToString() const override; + + Result<Literal::Value> Evaluate(const StructLike& data) const override; + + Result<std::vector<Literal::Value>> Evaluate(const ArrowArray& data) const override; Review Comment: Building an `ArrowArray` is way too complicated compared with a Literal. @gty404 has explored this already when adding the transform function. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
