bkietz commented on a change in pull request #7026:
URL: https://github.com/apache/arrow/pull/7026#discussion_r416750345



##########
File path: python/pyarrow/_dataset.pyx
##########
@@ -41,6 +42,167 @@ def _forbid_instantiation(klass, subclasses_instead=True):
     raise TypeError(msg)
 
 
+cdef class Expression:
+
+    cdef:
+        shared_ptr[CExpression] wrapped
+        CExpression* expr
+
+    def __init__(self, Buffer buffer=None):
+        if buffer is not None:
+            c_buffer = pyarrow_unwrap_buffer(buffer)
+            expr = GetResultValue(CExpression.Deserialize(deref(c_buffer)))
+            self.init(expr)
+
+    cdef void init(self, const shared_ptr[CExpression]& sp):
+        self.wrapped = sp
+        self.expr = sp.get()
+
+    @staticmethod
+    cdef wrap(const shared_ptr[CExpression]& sp):
+        self = Expression()
+        self.init(sp)
+        return self
+
+    cdef inline shared_ptr[CExpression] unwrap(self):
+        return self.wrapped
+
+    def equals(self, Expression other):
+        return self.expr.Equals(other.unwrap())
+
+    def __str__(self):
+        return frombytes(self.expr.ToString())
+
+    def __repr__(self):
+        return "<pyarrow.dataset.{0} {1}>".format(
+            self.__class__.__name__, str(self)
+        )
+
+    def __reduce__(self):
+        buffer = pyarrow_wrap_buffer(GetResultValue(self.expr.Serialize()))
+        return Expression, (buffer,)
+
+    def validate(self, Schema schema not None):
+        """Validate this expression for execution against a schema.
+
+        This will check that all reference fields are present (fields not in
+        the schema will be replaced with null) and all subexpressions are
+        executable. Returns the type to which this expression will evaluate.
+
+        Parameters
+        ----------
+        schema : Schema
+            Schema to execute the expression on.
+
+        Returns
+        -------
+        type : DataType
+        """
+        cdef:
+            shared_ptr[CSchema] sp_schema
+            CResult[shared_ptr[CDataType]] result
+        sp_schema = pyarrow_unwrap_schema(schema)
+        result = self.expr.Validate(deref(sp_schema))
+        return pyarrow_wrap_data_type(GetResultValue(result))
+
+    def assume(self, Expression given):
+        """Simplify to an equivalent Expression given assumed constraints."""
+        return Expression.wrap(self.expr.Assume(given.unwrap()))
+
+    def __invert__(self):
+        return Expression.wrap(CMakeNotExpression(self.unwrap()))
+
+    @staticmethod
+    cdef shared_ptr[CExpression] _expr_or_scalar(object expr) except *:
+        if isinstance(expr, Expression):
+            return (<Expression> expr).unwrap()
+        return (<Expression> Expression.scalar(expr)).unwrap()
+
+    @staticmethod
+    def wtf():
+        return Expression.wrap(Expression._expr_or_scalar([]))
+
+    def __richcmp__(self, other, int op):
+        cdef:
+            shared_ptr[CExpression] c_expr
+            shared_ptr[CExpression] c_left
+            shared_ptr[CExpression] c_right
+
+        c_left = self.unwrap()
+        c_right = Expression._expr_or_scalar(other)
+
+        if op == Py_EQ:
+            c_expr = CMakeEqualExpression(move(c_left), move(c_right))
+        elif op == Py_NE:
+            c_expr = CMakeNotEqualExpression(move(c_left), move(c_right))
+        elif op == Py_GT:
+            c_expr = CMakeGreaterExpression(move(c_left), move(c_right))
+        elif op == Py_GE:
+            c_expr = CMakeGreaterEqualExpression(move(c_left), move(c_right))
+        elif op == Py_LT:
+            c_expr = CMakeLessExpression(move(c_left), move(c_right))
+        elif op == Py_LE:
+            c_expr = CMakeLessEqualExpression(move(c_left), move(c_right))
+
+        return Expression.wrap(c_expr)
+
+    def __and__(Expression self, other):
+        c_other = Expression._expr_or_scalar(other)
+        return Expression.wrap(CMakeAndExpression(self.wrapped,
+                                                  move(c_other)))
+
+    def __or__(Expression self, other):
+        c_other = Expression._expr_or_scalar(other)
+        return Expression.wrap(CMakeOrExpression(self.wrapped,
+                                                 move(c_other)))
+
+    def is_valid(self):
+        """Checks whether the expression is not-null (valid)"""
+        return Expression.wrap(self.expr.IsValid().Copy())
+
+    def cast(self, type, bint safe=True):
+        """Explicitly change the expression's data type"""
+        cdef CastOptions options
+        options = CastOptions.safe() if safe else CastOptions.unsafe()
+        c_type = pyarrow_unwrap_data_type(ensure_type(type))
+        return Expression.wrap(self.expr.CastTo(c_type,
+                                                options.unwrap()).Copy())
+
+    def isin(self, values):
+        """Checks whether the expression is contained in values"""
+        if not isinstance(values, pa.Array):
+            values = pa.array(values)
+        c_values = pyarrow_unwrap_array(values)
+        return Expression.wrap(self.expr.In(c_values).Copy())
+
+    @staticmethod
+    def field(str name not None):
+        return Expression.wrap(CMakeFieldExpression(tobytes(name)))
+
+    @staticmethod
+    def scalar(value):

Review comment:
       will do




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to