goldmedal commented on code in PR #11712:
URL: https://github.com/apache/datafusion/pull/11712#discussion_r1702612042


##########
datafusion/functions-nested/src/map.rs:
##########
@@ -161,7 +174,19 @@ impl ScalarUDFImpl for MapFunc {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> 
datafusion_common::Result<DataType> {
+    fn return_type(
+        &self,
+        _arg_types: &[DataType],
+    ) -> datafusion_common::Result<DataType> {
+        internal_err!("map: return_type called instead of 
return_type_from_exprs")
+    }
+
+    fn return_type_from_exprs(
+        &self,
+        _args: &[Expr],
+        _schema: &dyn ExprSchema,
+        arg_types: &[DataType],
+    ) -> datafusion_common::Result<DataType> {

Review Comment:
   Curiously, why are you using `return_type_from_exprs` instead? It seems that 
neither `args` nor `schema` will be used. I guess we can just use `return_type`.



##########
datafusion/functions-nested/src/map.rs:
##########
@@ -202,3 +226,128 @@ fn get_element_type(data_type: &DataType) -> 
datafusion_common::Result<&DataType
         ),
     }
 }
+
+/// Helper function to create MapArray from array of values to support arrays 
for Map scalar function
+///
+/// ``` text
+/// Format of input KEYS and VALUES column
+///         keys                        values
+/// +---------------------+       +---------------------+
+/// | +-----------------+ |       | +-----------------+ |
+/// | | [k11, k12, k13] | |       | | [v11, v12, v13] | |
+/// | +-----------------+ |       | +-----------------+ |
+/// |                     |       |                     |
+/// | +-----------------+ |       | +-----------------+ |
+/// | | [k21, k22, k23] | |       | | [v21, v22, v23] | |
+/// | +-----------------+ |       | +-----------------+ |
+/// |                     |       |                     |
+/// | +-----------------+ |       | +-----------------+ |
+/// | |[k31, k32, k33]  | |       | |[v31, v32, v33]  | |
+/// | +-----------------+ |       | +-----------------+ |
+/// +---------------------+       +---------------------+
+/// ```
+/// Flattened keys and values array to user create `StructArray`,
+/// which serves as inner child for `MapArray`
+///
+/// ``` text
+/// Flattened           Flattened
+/// Keys                Values
+/// +-----------+      +-----------+
+/// | +-------+ |      | +-------+ |
+/// | |  k11  | |      | |  v11  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k12  | |      | |  v12  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k13  | |      | |  v13  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k21  | |      | |  v21  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k22  | |      | |  v22  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k23  | |      | |  v23  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k31  | |      | |  v31  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k32  | |      | |  v32  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k33  | |      | |  v33  | |
+/// | +-------+ |      | +-------+ |
+/// +-----------+      +-----------+
+/// ```text
+
+fn make_map_array_internal<O: OffsetSizeTrait>(
+    keys: ArrayRef,
+    values: ArrayRef,
+) -> datafusion_common::Result<ColumnarValue> {
+    let mut offset_buffer = vec![O::usize_as(0)];
+    let mut running_offset = O::usize_as(0);
+
+    let keys = collect_array_ref::<O>(keys);
+    let values = collect_array_ref::<O>(values);
+
+    let mut key_array_vec = vec![];
+    let mut value_array_vec = vec![];
+    for (k, v) in keys.iter().zip(values.iter()) {
+        running_offset = running_offset.add(O::usize_as(k.len()));
+        offset_buffer.push(running_offset);
+        key_array_vec.push(k.as_ref());
+        value_array_vec.push(v.as_ref());
+    }
+
+    // concatenate all the arrays
+    let flattened_keys = 
arrow::compute::concat(key_array_vec.as_ref()).unwrap();
+    let flattened_values = 
arrow::compute::concat(value_array_vec.as_ref()).unwrap();
+
+    let fields = vec![
+        Arc::new(Field::new("key", flattened_keys.data_type().clone(), false)),
+        Arc::new(Field::new(
+            "value",
+            flattened_values.data_type().clone(),
+            true,
+        )),
+    ];
+
+    let struct_data = ArrayData::builder(DataType::Struct(fields.into()))
+        .len(flattened_keys.len())
+        .add_child_data(flattened_keys.to_data())
+        .add_child_data(flattened_values.to_data())
+        .build()
+        .unwrap();
+
+    let map_data = ArrayData::builder(DataType::Map(
+        Arc::new(Field::new(
+            "entries",
+            struct_data.data_type().clone(),
+            false,
+        )),
+        false,
+    ))
+    .len(keys.len())
+    .add_child_data(struct_data)
+    .add_buffer(Buffer::from_slice_ref(offset_buffer.as_slice()))
+    .build()
+    .unwrap();

Review Comment:
   ```suggestion
       .build()?;
   ```
   Instead of panic here, we can use `?` to return the error.



##########
datafusion/functions-nested/src/map.rs:
##########
@@ -202,3 +226,128 @@ fn get_element_type(data_type: &DataType) -> 
datafusion_common::Result<&DataType
         ),
     }
 }
+
+/// Helper function to create MapArray from array of values to support arrays 
for Map scalar function
+///
+/// ``` text
+/// Format of input KEYS and VALUES column
+///         keys                        values
+/// +---------------------+       +---------------------+
+/// | +-----------------+ |       | +-----------------+ |
+/// | | [k11, k12, k13] | |       | | [v11, v12, v13] | |
+/// | +-----------------+ |       | +-----------------+ |
+/// |                     |       |                     |
+/// | +-----------------+ |       | +-----------------+ |
+/// | | [k21, k22, k23] | |       | | [v21, v22, v23] | |
+/// | +-----------------+ |       | +-----------------+ |
+/// |                     |       |                     |
+/// | +-----------------+ |       | +-----------------+ |
+/// | |[k31, k32, k33]  | |       | |[v31, v32, v33]  | |
+/// | +-----------------+ |       | +-----------------+ |
+/// +---------------------+       +---------------------+
+/// ```
+/// Flattened keys and values array to user create `StructArray`,
+/// which serves as inner child for `MapArray`
+///
+/// ``` text
+/// Flattened           Flattened
+/// Keys                Values
+/// +-----------+      +-----------+
+/// | +-------+ |      | +-------+ |
+/// | |  k11  | |      | |  v11  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k12  | |      | |  v12  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k13  | |      | |  v13  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k21  | |      | |  v21  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k22  | |      | |  v22  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k23  | |      | |  v23  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k31  | |      | |  v31  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k32  | |      | |  v32  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k33  | |      | |  v33  | |
+/// | +-------+ |      | +-------+ |
+/// +-----------+      +-----------+
+/// ```text
+
+fn make_map_array_internal<O: OffsetSizeTrait>(
+    keys: ArrayRef,
+    values: ArrayRef,
+) -> datafusion_common::Result<ColumnarValue> {
+    let mut offset_buffer = vec![O::usize_as(0)];
+    let mut running_offset = O::usize_as(0);
+
+    let keys = collect_array_ref::<O>(keys);
+    let values = collect_array_ref::<O>(values);
+
+    let mut key_array_vec = vec![];
+    let mut value_array_vec = vec![];
+    for (k, v) in keys.iter().zip(values.iter()) {
+        running_offset = running_offset.add(O::usize_as(k.len()));
+        offset_buffer.push(running_offset);
+        key_array_vec.push(k.as_ref());
+        value_array_vec.push(v.as_ref());
+    }
+
+    // concatenate all the arrays
+    let flattened_keys = 
arrow::compute::concat(key_array_vec.as_ref()).unwrap();
+    let flattened_values = 
arrow::compute::concat(value_array_vec.as_ref()).unwrap();
+
+    let fields = vec![
+        Arc::new(Field::new("key", flattened_keys.data_type().clone(), false)),
+        Arc::new(Field::new(
+            "value",
+            flattened_values.data_type().clone(),
+            true,
+        )),
+    ];
+
+    let struct_data = ArrayData::builder(DataType::Struct(fields.into()))
+        .len(flattened_keys.len())
+        .add_child_data(flattened_keys.to_data())
+        .add_child_data(flattened_values.to_data())
+        .build()
+        .unwrap();

Review Comment:
   ```suggestion
           .build()?;
   ```
   Same as above.



##########
datafusion/functions-nested/src/map.rs:
##########
@@ -202,3 +226,128 @@ fn get_element_type(data_type: &DataType) -> 
datafusion_common::Result<&DataType
         ),
     }
 }
+
+/// Helper function to create MapArray from array of values to support arrays 
for Map scalar function
+///
+/// ``` text
+/// Format of input KEYS and VALUES column
+///         keys                        values
+/// +---------------------+       +---------------------+
+/// | +-----------------+ |       | +-----------------+ |
+/// | | [k11, k12, k13] | |       | | [v11, v12, v13] | |
+/// | +-----------------+ |       | +-----------------+ |
+/// |                     |       |                     |
+/// | +-----------------+ |       | +-----------------+ |
+/// | | [k21, k22, k23] | |       | | [v21, v22, v23] | |
+/// | +-----------------+ |       | +-----------------+ |
+/// |                     |       |                     |
+/// | +-----------------+ |       | +-----------------+ |
+/// | |[k31, k32, k33]  | |       | |[v31, v32, v33]  | |
+/// | +-----------------+ |       | +-----------------+ |
+/// +---------------------+       +---------------------+
+/// ```
+/// Flattened keys and values array to user create `StructArray`,
+/// which serves as inner child for `MapArray`
+///
+/// ``` text
+/// Flattened           Flattened
+/// Keys                Values
+/// +-----------+      +-----------+
+/// | +-------+ |      | +-------+ |
+/// | |  k11  | |      | |  v11  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k12  | |      | |  v12  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k13  | |      | |  v13  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k21  | |      | |  v21  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k22  | |      | |  v22  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k23  | |      | |  v23  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k31  | |      | |  v31  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k32  | |      | |  v32  | |
+/// | +-------+ |      | +-------+ |
+/// | +-------+ |      | +-------+ |
+/// | |  k33  | |      | |  v33  | |
+/// | +-------+ |      | +-------+ |
+/// +-----------+      +-----------+
+/// ```text
+
+fn make_map_array_internal<O: OffsetSizeTrait>(
+    keys: ArrayRef,
+    values: ArrayRef,
+) -> datafusion_common::Result<ColumnarValue> {
+    let mut offset_buffer = vec![O::usize_as(0)];
+    let mut running_offset = O::usize_as(0);
+
+    let keys = collect_array_ref::<O>(keys);
+    let values = collect_array_ref::<O>(values);
+
+    let mut key_array_vec = vec![];
+    let mut value_array_vec = vec![];
+    for (k, v) in keys.iter().zip(values.iter()) {
+        running_offset = running_offset.add(O::usize_as(k.len()));
+        offset_buffer.push(running_offset);
+        key_array_vec.push(k.as_ref());
+        value_array_vec.push(v.as_ref());
+    }
+
+    // concatenate all the arrays
+    let flattened_keys = 
arrow::compute::concat(key_array_vec.as_ref()).unwrap();
+    let flattened_values = 
arrow::compute::concat(value_array_vec.as_ref()).unwrap();

Review Comment:
   ```suggestion
       let flattened_keys = arrow::compute::concat(key_array_vec.as_ref())?;
       let flattened_values = arrow::compute::concat(value_array_vec.as_ref())?;
   ```
   Same as above.



##########
datafusion/sqllogictest/test_files/map.slt:
##########


Review Comment:
   I think we can add more complex type tests or negative tests for it. I tried 
a case `Utf8 Array as keys`as below but it failed.
   ```
   statement ok
   create table t as values
   ('a', 1, 'k1', 10, ['k1', 'k2'], [1, 2], [['a'], ['b']]),
   ('b', 2, 'k3', 30, ['k3'], [3], [['c']]),
   ('d', 4, 'k5', 50, ['k5'], [5], [['d']]);
   
   query ?
   SELECT map(column7, column6) FROM t;
   ----
   {[a]: 1, [b]: 2}
   {[c]: 3}
   {[d]: 5}
   ```
   
   The error message:
   ```
   External error: query failed: DataFusion error: Arrow error: Invalid 
argument error: column types must match schema types, expected Map(Field { 
name: "entries", data_type: Struct([Field { name: "key", data_type: List(Field 
{ name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: 
false, metadata: {} }), nullable: false, dict_id: 0, dict_is_ordered: false, 
metadata: {} }, Field { name: "value", data_type: Int64, nullable: true, 
dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 
0, dict_is_ordered: false, metadata: {} }, false) but found Map(Field { name: 
"entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: 
false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: 
"value", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, 
metadata: {} }, false) at column index 0
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to