This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 03e80dc1a7 ARROW-11341: [Python] [Gandiva] Add NULL/None checks to 
Gandiva builder functions (#9289)
03e80dc1a7 is described below

commit 03e80dc1a7e037658a0644d1c5ad1e49027b4ec5
Author: Will Jones <[email protected]>
AuthorDate: Wed Jul 13 10:07:52 2022 -0700

    ARROW-11341: [Python] [Gandiva] Add NULL/None checks to Gandiva builder 
functions (#9289)
    
    If these functions were passed None as an argument, they caused segfaults. 
As an example:
    
    ```python
    import pyarrow
    import pyarrow.gandiva as gandiva
    
    builder = gandiva.TreeExprBuilder()
    field = pyarrow.field('whatever', type=pyarrow.date64())
    date_col = builder.make_field(field)
    
    func = builder.make_function('less_than_or_equal_to', [date_col, None], 
pyarrow.bool_())
    
    condition = builder.make_condition(func)
    
    # Will segfault on this line:
    gandiva.make_filter(pyarrow.schema([field]), condition)
    ```
    
    Lead-authored-by: Will Jones <[email protected]>
    Co-authored-by: Will Jones <[email protected]>
    Signed-off-by: David Li <[email protected]>
---
 python/pyarrow/gandiva.pyx           | 41 +++++++++++++++++++++++-------------
 python/pyarrow/tests/test_gandiva.py | 41 ++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 15 deletions(-)

diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx
index dba83bb334..a69ac0657b 100644
--- a/python/pyarrow/gandiva.pyx
+++ b/python/pyarrow/gandiva.pyx
@@ -292,7 +292,8 @@ cdef class TreeExprBuilder(_Weakrefable):
 
         return Node.create(r)
 
-    def make_expression(self, Node root_node, Field return_field):
+    def make_expression(self, Node root_node not None,
+                        Field return_field not None):
         cdef shared_ptr[CGandivaExpression] r = TreeExprBuilder_MakeExpression(
             root_node.node, return_field.sp_field)
         cdef Expression expression = Expression()
@@ -303,17 +304,19 @@ cdef class TreeExprBuilder(_Weakrefable):
         cdef c_vector[shared_ptr[CNode]] c_children
         cdef Node child
         for child in children:
+            if child is None:
+                raise TypeError("Child nodes must not be None")
             c_children.push_back(child.node)
         cdef shared_ptr[CNode] r = TreeExprBuilder_MakeFunction(
             name.encode(), c_children, return_type.sp_type)
         return Node.create(r)
 
-    def make_field(self, Field field):
+    def make_field(self, Field field not None):
         cdef shared_ptr[CNode] r = TreeExprBuilder_MakeField(field.sp_field)
         return Node.create(r)
 
-    def make_if(self, Node condition, Node this_node,
-                Node else_node, DataType return_type):
+    def make_if(self, Node condition not None, Node this_node not None,
+                Node else_node not None, DataType return_type not None):
         cdef shared_ptr[CNode] r = TreeExprBuilder_MakeIf(
             condition.node, this_node.node, else_node.node,
             return_type.sp_type)
@@ -323,6 +326,8 @@ cdef class TreeExprBuilder(_Weakrefable):
         cdef c_vector[shared_ptr[CNode]] c_children
         cdef Node child
         for child in children:
+            if child is None:
+                raise TypeError("Child nodes must not be None")
             c_children.push_back(child.node)
         cdef shared_ptr[CNode] r = TreeExprBuilder_MakeAnd(c_children)
         return Node.create(r)
@@ -331,11 +336,13 @@ cdef class TreeExprBuilder(_Weakrefable):
         cdef c_vector[shared_ptr[CNode]] c_children
         cdef Node child
         for child in children:
+            if child is None:
+                raise TypeError("Child nodes must not be None")
             c_children.push_back(child.node)
         cdef shared_ptr[CNode] r = TreeExprBuilder_MakeOr(c_children)
         return Node.create(r)
 
-    def _make_in_expression_int32(self, Node node, values):
+    def _make_in_expression_int32(self, Node node not None, values):
         cdef shared_ptr[CNode] r
         cdef c_unordered_set[int32_t] c_values
         cdef int32_t v
@@ -344,7 +351,7 @@ cdef class TreeExprBuilder(_Weakrefable):
         r = TreeExprBuilder_MakeInExpressionInt32(node.node, c_values)
         return Node.create(r)
 
-    def _make_in_expression_int64(self, Node node, values):
+    def _make_in_expression_int64(self, Node node not None, values):
         cdef shared_ptr[CNode] r
         cdef c_unordered_set[int64_t] c_values
         cdef int64_t v
@@ -353,7 +360,7 @@ cdef class TreeExprBuilder(_Weakrefable):
         r = TreeExprBuilder_MakeInExpressionInt64(node.node, c_values)
         return Node.create(r)
 
-    def _make_in_expression_time32(self, Node node, values):
+    def _make_in_expression_time32(self, Node node not None, values):
         cdef shared_ptr[CNode] r
         cdef c_unordered_set[int32_t] c_values
         cdef int32_t v
@@ -362,7 +369,7 @@ cdef class TreeExprBuilder(_Weakrefable):
         r = TreeExprBuilder_MakeInExpressionTime32(node.node, c_values)
         return Node.create(r)
 
-    def _make_in_expression_time64(self, Node node, values):
+    def _make_in_expression_time64(self, Node node not None, values):
         cdef shared_ptr[CNode] r
         cdef c_unordered_set[int64_t] c_values
         cdef int64_t v
@@ -371,7 +378,7 @@ cdef class TreeExprBuilder(_Weakrefable):
         r = TreeExprBuilder_MakeInExpressionTime64(node.node, c_values)
         return Node.create(r)
 
-    def _make_in_expression_date32(self, Node node, values):
+    def _make_in_expression_date32(self, Node node not None, values):
         cdef shared_ptr[CNode] r
         cdef c_unordered_set[int32_t] c_values
         cdef int32_t v
@@ -380,7 +387,7 @@ cdef class TreeExprBuilder(_Weakrefable):
         r = TreeExprBuilder_MakeInExpressionDate32(node.node, c_values)
         return Node.create(r)
 
-    def _make_in_expression_date64(self, Node node, values):
+    def _make_in_expression_date64(self, Node node not None, values):
         cdef shared_ptr[CNode] r
         cdef c_unordered_set[int64_t] c_values
         cdef int64_t v
@@ -389,7 +396,7 @@ cdef class TreeExprBuilder(_Weakrefable):
         r = TreeExprBuilder_MakeInExpressionDate64(node.node, c_values)
         return Node.create(r)
 
-    def _make_in_expression_timestamp(self, Node node, values):
+    def _make_in_expression_timestamp(self, Node node not None, values):
         cdef shared_ptr[CNode] r
         cdef c_unordered_set[int64_t] c_values
         cdef int64_t v
@@ -398,7 +405,7 @@ cdef class TreeExprBuilder(_Weakrefable):
         r = TreeExprBuilder_MakeInExpressionTimeStamp(node.node, c_values)
         return Node.create(r)
 
-    def _make_in_expression_binary(self, Node node, values):
+    def _make_in_expression_binary(self, Node node not None, values):
         cdef shared_ptr[CNode] r
         cdef c_unordered_set[c_string] c_values
         cdef c_string v
@@ -407,7 +414,7 @@ cdef class TreeExprBuilder(_Weakrefable):
         r = TreeExprBuilder_MakeInExpressionString(node.node, c_values)
         return Node.create(r)
 
-    def _make_in_expression_string(self, Node node, values):
+    def _make_in_expression_string(self, Node node not None, values):
         cdef shared_ptr[CNode] r
         cdef c_unordered_set[c_string] c_values
         cdef c_string _v
@@ -417,7 +424,7 @@ cdef class TreeExprBuilder(_Weakrefable):
         r = TreeExprBuilder_MakeInExpressionString(node.node, c_values)
         return Node.create(r)
 
-    def make_in_expression(self, Node node, values, dtype):
+    def make_in_expression(self, Node node not None, values, dtype):
         cdef DataType type = ensure_type(dtype)
 
         if type.id == _Type_INT32:
@@ -441,7 +448,7 @@ cdef class TreeExprBuilder(_Weakrefable):
         else:
             raise TypeError("Data type " + str(dtype) + " not supported.")
 
-    def make_condition(self, Node condition):
+    def make_condition(self, Node condition not None):
         cdef shared_ptr[CCondition] r = TreeExprBuilder_MakeCondition(
             condition.node)
         return Condition.create(r)
@@ -476,6 +483,8 @@ cpdef make_projector(Schema schema, children, MemoryPool 
pool,
         shared_ptr[CProjector] result
 
     for child in children:
+        if child is None:
+            raise TypeError("Expressions must not be None")
         c_children.push_back(child.expression)
 
     check_status(
@@ -505,6 +514,8 @@ cpdef make_filter(Schema schema, Condition condition):
     Filter instance
     """
     cdef shared_ptr[CFilter] result
+    if condition is None:
+        raise TypeError("Condition must not be None")
     check_status(
         Filter_Make(schema.sp_schema, condition.condition, &result))
     return Filter.create(result)
diff --git a/python/pyarrow/tests/test_gandiva.py 
b/python/pyarrow/tests/test_gandiva.py
index 6522c233a1..241cac4d83 100644
--- a/python/pyarrow/tests/test_gandiva.py
+++ b/python/pyarrow/tests/test_gandiva.py
@@ -389,3 +389,44 @@ def test_to_string():
     field_y = builder.make_field(pa.field('y', pa.bool_()))
     and_node = builder.make_and([func_node, field_y])
     assert str(and_node) == 'bool not((bool) z) && (bool) y'
+
+
[email protected]
+def test_rejects_none():
+    import pyarrow.gandiva as gandiva
+
+    builder = gandiva.TreeExprBuilder()
+
+    field_x = pa.field('x', pa.int32())
+    schema = pa.schema([field_x])
+    literal_true = builder.make_literal(True, pa.bool_())
+
+    with pytest.raises(TypeError):
+        builder.make_field(None)
+
+    with pytest.raises(TypeError):
+        builder.make_if(literal_true, None, None, None)
+
+    with pytest.raises(TypeError):
+        builder.make_and([literal_true, None])
+
+    with pytest.raises(TypeError):
+        builder.make_or([None, literal_true])
+
+    with pytest.raises(TypeError):
+        builder.make_in_expression(None, [1, 2, 3], pa.int32())
+
+    with pytest.raises(TypeError):
+        builder.make_expression(None, field_x)
+
+    with pytest.raises(TypeError):
+        builder.make_condition(None)
+
+    with pytest.raises(TypeError):
+        builder.make_function('less_than', [literal_true, None], pa.bool_())
+
+    with pytest.raises(TypeError):
+        gandiva.make_projector(schema, [None])
+
+    with pytest.raises(TypeError):
+        gandiva.make_filter(schema, None)

Reply via email to