This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 03e80dc1a7 ARROW-11341: [Python] [Gandiva] Add NULL/None checks to
Gandiva builder functions (#9289)
03e80dc1a7 is described below
commit 03e80dc1a7e037658a0644d1c5ad1e49027b4ec5
Author: Will Jones <[email protected]>
AuthorDate: Wed Jul 13 10:07:52 2022 -0700
ARROW-11341: [Python] [Gandiva] Add NULL/None checks to Gandiva builder
functions (#9289)
If these functions were passed None as an argument, they caused segfaults.
As an example:
```python
import pyarrow
import pyarrow.gandiva as gandiva
builder = gandiva.TreeExprBuilder()
field = pyarrow.field('whatever', type=pyarrow.date64())
date_col = builder.make_field(field)
func = builder.make_function('less_than_or_equal_to', [date_col, None],
pyarrow.bool_())
condition = builder.make_condition(func)
# Will segfault on this line:
gandiva.make_filter(pyarrow.schema([field]), condition)
```
Lead-authored-by: Will Jones <[email protected]>
Co-authored-by: Will Jones <[email protected]>
Signed-off-by: David Li <[email protected]>
---
python/pyarrow/gandiva.pyx | 41 +++++++++++++++++++++++-------------
python/pyarrow/tests/test_gandiva.py | 41 ++++++++++++++++++++++++++++++++++++
2 files changed, 67 insertions(+), 15 deletions(-)
diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx
index dba83bb334..a69ac0657b 100644
--- a/python/pyarrow/gandiva.pyx
+++ b/python/pyarrow/gandiva.pyx
@@ -292,7 +292,8 @@ cdef class TreeExprBuilder(_Weakrefable):
return Node.create(r)
- def make_expression(self, Node root_node, Field return_field):
+ def make_expression(self, Node root_node not None,
+ Field return_field not None):
cdef shared_ptr[CGandivaExpression] r = TreeExprBuilder_MakeExpression(
root_node.node, return_field.sp_field)
cdef Expression expression = Expression()
@@ -303,17 +304,19 @@ cdef class TreeExprBuilder(_Weakrefable):
cdef c_vector[shared_ptr[CNode]] c_children
cdef Node child
for child in children:
+ if child is None:
+ raise TypeError("Child nodes must not be None")
c_children.push_back(child.node)
cdef shared_ptr[CNode] r = TreeExprBuilder_MakeFunction(
name.encode(), c_children, return_type.sp_type)
return Node.create(r)
- def make_field(self, Field field):
+ def make_field(self, Field field not None):
cdef shared_ptr[CNode] r = TreeExprBuilder_MakeField(field.sp_field)
return Node.create(r)
- def make_if(self, Node condition, Node this_node,
- Node else_node, DataType return_type):
+ def make_if(self, Node condition not None, Node this_node not None,
+ Node else_node not None, DataType return_type not None):
cdef shared_ptr[CNode] r = TreeExprBuilder_MakeIf(
condition.node, this_node.node, else_node.node,
return_type.sp_type)
@@ -323,6 +326,8 @@ cdef class TreeExprBuilder(_Weakrefable):
cdef c_vector[shared_ptr[CNode]] c_children
cdef Node child
for child in children:
+ if child is None:
+ raise TypeError("Child nodes must not be None")
c_children.push_back(child.node)
cdef shared_ptr[CNode] r = TreeExprBuilder_MakeAnd(c_children)
return Node.create(r)
@@ -331,11 +336,13 @@ cdef class TreeExprBuilder(_Weakrefable):
cdef c_vector[shared_ptr[CNode]] c_children
cdef Node child
for child in children:
+ if child is None:
+ raise TypeError("Child nodes must not be None")
c_children.push_back(child.node)
cdef shared_ptr[CNode] r = TreeExprBuilder_MakeOr(c_children)
return Node.create(r)
- def _make_in_expression_int32(self, Node node, values):
+ def _make_in_expression_int32(self, Node node not None, values):
cdef shared_ptr[CNode] r
cdef c_unordered_set[int32_t] c_values
cdef int32_t v
@@ -344,7 +351,7 @@ cdef class TreeExprBuilder(_Weakrefable):
r = TreeExprBuilder_MakeInExpressionInt32(node.node, c_values)
return Node.create(r)
- def _make_in_expression_int64(self, Node node, values):
+ def _make_in_expression_int64(self, Node node not None, values):
cdef shared_ptr[CNode] r
cdef c_unordered_set[int64_t] c_values
cdef int64_t v
@@ -353,7 +360,7 @@ cdef class TreeExprBuilder(_Weakrefable):
r = TreeExprBuilder_MakeInExpressionInt64(node.node, c_values)
return Node.create(r)
- def _make_in_expression_time32(self, Node node, values):
+ def _make_in_expression_time32(self, Node node not None, values):
cdef shared_ptr[CNode] r
cdef c_unordered_set[int32_t] c_values
cdef int32_t v
@@ -362,7 +369,7 @@ cdef class TreeExprBuilder(_Weakrefable):
r = TreeExprBuilder_MakeInExpressionTime32(node.node, c_values)
return Node.create(r)
- def _make_in_expression_time64(self, Node node, values):
+ def _make_in_expression_time64(self, Node node not None, values):
cdef shared_ptr[CNode] r
cdef c_unordered_set[int64_t] c_values
cdef int64_t v
@@ -371,7 +378,7 @@ cdef class TreeExprBuilder(_Weakrefable):
r = TreeExprBuilder_MakeInExpressionTime64(node.node, c_values)
return Node.create(r)
- def _make_in_expression_date32(self, Node node, values):
+ def _make_in_expression_date32(self, Node node not None, values):
cdef shared_ptr[CNode] r
cdef c_unordered_set[int32_t] c_values
cdef int32_t v
@@ -380,7 +387,7 @@ cdef class TreeExprBuilder(_Weakrefable):
r = TreeExprBuilder_MakeInExpressionDate32(node.node, c_values)
return Node.create(r)
- def _make_in_expression_date64(self, Node node, values):
+ def _make_in_expression_date64(self, Node node not None, values):
cdef shared_ptr[CNode] r
cdef c_unordered_set[int64_t] c_values
cdef int64_t v
@@ -389,7 +396,7 @@ cdef class TreeExprBuilder(_Weakrefable):
r = TreeExprBuilder_MakeInExpressionDate64(node.node, c_values)
return Node.create(r)
- def _make_in_expression_timestamp(self, Node node, values):
+ def _make_in_expression_timestamp(self, Node node not None, values):
cdef shared_ptr[CNode] r
cdef c_unordered_set[int64_t] c_values
cdef int64_t v
@@ -398,7 +405,7 @@ cdef class TreeExprBuilder(_Weakrefable):
r = TreeExprBuilder_MakeInExpressionTimeStamp(node.node, c_values)
return Node.create(r)
- def _make_in_expression_binary(self, Node node, values):
+ def _make_in_expression_binary(self, Node node not None, values):
cdef shared_ptr[CNode] r
cdef c_unordered_set[c_string] c_values
cdef c_string v
@@ -407,7 +414,7 @@ cdef class TreeExprBuilder(_Weakrefable):
r = TreeExprBuilder_MakeInExpressionString(node.node, c_values)
return Node.create(r)
- def _make_in_expression_string(self, Node node, values):
+ def _make_in_expression_string(self, Node node not None, values):
cdef shared_ptr[CNode] r
cdef c_unordered_set[c_string] c_values
cdef c_string _v
@@ -417,7 +424,7 @@ cdef class TreeExprBuilder(_Weakrefable):
r = TreeExprBuilder_MakeInExpressionString(node.node, c_values)
return Node.create(r)
- def make_in_expression(self, Node node, values, dtype):
+ def make_in_expression(self, Node node not None, values, dtype):
cdef DataType type = ensure_type(dtype)
if type.id == _Type_INT32:
@@ -441,7 +448,7 @@ cdef class TreeExprBuilder(_Weakrefable):
else:
raise TypeError("Data type " + str(dtype) + " not supported.")
- def make_condition(self, Node condition):
+ def make_condition(self, Node condition not None):
cdef shared_ptr[CCondition] r = TreeExprBuilder_MakeCondition(
condition.node)
return Condition.create(r)
@@ -476,6 +483,8 @@ cpdef make_projector(Schema schema, children, MemoryPool
pool,
shared_ptr[CProjector] result
for child in children:
+ if child is None:
+ raise TypeError("Expressions must not be None")
c_children.push_back(child.expression)
check_status(
@@ -505,6 +514,8 @@ cpdef make_filter(Schema schema, Condition condition):
Filter instance
"""
cdef shared_ptr[CFilter] result
+ if condition is None:
+ raise TypeError("Condition must not be None")
check_status(
Filter_Make(schema.sp_schema, condition.condition, &result))
return Filter.create(result)
diff --git a/python/pyarrow/tests/test_gandiva.py
b/python/pyarrow/tests/test_gandiva.py
index 6522c233a1..241cac4d83 100644
--- a/python/pyarrow/tests/test_gandiva.py
+++ b/python/pyarrow/tests/test_gandiva.py
@@ -389,3 +389,44 @@ def test_to_string():
field_y = builder.make_field(pa.field('y', pa.bool_()))
and_node = builder.make_and([func_node, field_y])
assert str(and_node) == 'bool not((bool) z) && (bool) y'
+
+
[email protected]
+def test_rejects_none():
+ import pyarrow.gandiva as gandiva
+
+ builder = gandiva.TreeExprBuilder()
+
+ field_x = pa.field('x', pa.int32())
+ schema = pa.schema([field_x])
+ literal_true = builder.make_literal(True, pa.bool_())
+
+ with pytest.raises(TypeError):
+ builder.make_field(None)
+
+ with pytest.raises(TypeError):
+ builder.make_if(literal_true, None, None, None)
+
+ with pytest.raises(TypeError):
+ builder.make_and([literal_true, None])
+
+ with pytest.raises(TypeError):
+ builder.make_or([None, literal_true])
+
+ with pytest.raises(TypeError):
+ builder.make_in_expression(None, [1, 2, 3], pa.int32())
+
+ with pytest.raises(TypeError):
+ builder.make_expression(None, field_x)
+
+ with pytest.raises(TypeError):
+ builder.make_condition(None)
+
+ with pytest.raises(TypeError):
+ builder.make_function('less_than', [literal_true, None], pa.bool_())
+
+ with pytest.raises(TypeError):
+ gandiva.make_projector(schema, [None])
+
+ with pytest.raises(TypeError):
+ gandiva.make_filter(schema, None)