This is an automated email from the ASF dual-hosted git repository.
jayzhan pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 89aea0ad40 Support LargeList for ListIndex (#9424)
89aea0ad40 is described below
commit 89aea0ad403ccde70f2a0792f68a4ab703b62900
Author: Chojan Shang <[email protected]>
AuthorDate: Sun Mar 3 08:37:33 2024 +0800
Support LargeList for ListIndex (#9424)
Signed-off-by: Chojan Shang <[email protected]>
---
datafusion/expr/src/field_util.rs | 9 +++++----
datafusion/physical-expr/src/expressions/get_indexed_field.rs | 8 ++++----
datafusion/sqllogictest/test_files/array.slt | 5 +++++
3 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/datafusion/expr/src/field_util.rs
b/datafusion/expr/src/field_util.rs
index 8039a211c9..f0ce61ee9b 100644
--- a/datafusion/expr/src/field_util.rs
+++ b/datafusion/expr/src/field_util.rs
@@ -78,10 +78,11 @@ impl GetFieldAccessSchema {
Self::ListIndex{ key_dt } => {
match (data_type, key_dt) {
(DataType::List(lt), DataType::Int64) =>
Ok(Field::new("list", lt.data_type().clone(), true)),
- (DataType::List(_), _) => plan_err!(
- "Only ints are valid as an indexed field in a list"
+ (DataType::LargeList(lt), DataType::Int64) =>
Ok(Field::new("large_list", lt.data_type().clone(), true)),
+ (DataType::List(_), _) | (DataType::LargeList(_), _) =>
plan_err!(
+ "Only ints are valid as an indexed field in a
List/LargeList"
),
- (other, _) => plan_err!("The expression to get an indexed
field is only valid for `List` or `Struct` types, got {other}"),
+ (other, _) => plan_err!("The expression to get an indexed
field is only valid for `List`, `LargeList` or `Struct` types, got {other}"),
}
}
Self::ListRange { start_dt, stop_dt, stride_dt } => {
@@ -89,7 +90,7 @@ impl GetFieldAccessSchema {
(DataType::List(_), DataType::Int64, DataType::Int64,
DataType::Int64) => Ok(Field::new("list", data_type.clone(), true)),
(DataType::LargeList(_), DataType::Int64, DataType::Int64,
DataType::Int64) => Ok(Field::new("large_list", data_type.clone(), true)),
(DataType::List(_), _, _, _) | (DataType::LargeList(_), _,
_, _)=> plan_err!(
- "Only ints are valid as an indexed field in a list"
+ "Only ints are valid as an indexed field in a
List/LargeList"
),
(other, _, _, _) => plan_err!("The expression to get an
indexed field is only valid for `List`, `LargeList` or `Struct` types, got
{other}"),
}
diff --git a/datafusion/physical-expr/src/expressions/get_indexed_field.rs
b/datafusion/physical-expr/src/expressions/get_indexed_field.rs
index 773387bf74..c93090c494 100644
--- a/datafusion/physical-expr/src/expressions/get_indexed_field.rs
+++ b/datafusion/physical-expr/src/expressions/get_indexed_field.rs
@@ -252,14 +252,14 @@ impl PhysicalExpr for GetIndexedFieldExpr {
GetFieldAccessExpr::ListIndex{key} => {
let key = key.evaluate(batch)?.into_array(batch.num_rows())?;
match (array.data_type(), key.data_type()) {
- (DataType::List(_), DataType::Int64) =>
Ok(ColumnarValue::Array(array_element(&[
+ (DataType::List(_), DataType::Int64) |
(DataType::LargeList(_), DataType::Int64) =>
Ok(ColumnarValue::Array(array_element(&[
array, key
])?)),
- (DataType::List(_), key) => exec_err!(
- "get indexed field is only possible on lists
with int64 indexes. \
+ (DataType::List(_), key) | (DataType::LargeList(_), key) =>
exec_err!(
+ "get indexed field is only possible on
List/LargeList with int64 indexes. \
Tried with {key:?} index"),
(dt, key) => exec_err!(
- "get indexed field is only possible on
lists with int64 indexes or struct \
+ "get indexed field is only possible on
List/LargeList with int64 indexes or struct \
with utf8 indexes. Tried
{dt:?} with {key:?} index"),
}
},
diff --git a/datafusion/sqllogictest/test_files/array.slt
b/datafusion/sqllogictest/test_files/array.slt
index 4e6cb4d59d..5065d9b9a7 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -883,6 +883,11 @@ select arrow_cast([1, 2, 3], 'LargeList(Int64)')[0:0],
----
[] [1, 2] [h, e, l, l, o]
+query I
+select arrow_cast([1, 2, 3], 'LargeList(Int64)')[1];
+----
+1
+
# TODO: support multiple negative index
# multiple index with columns #3 (negative index)
# query II