ueshin commented on code in PR #41603:
URL: https://github.com/apache/spark/pull/41603#discussion_r1231539888


##########
python/pyspark/sql/pandas/types.py:
##########
@@ -578,29 +578,44 @@ def _converter(
         if isinstance(dt, ArrayType):
             _element_conv = _converter(dt.elementType, _struct_in_pandas, 
_ndarray_as_list)
 
-            def convert_array(value: Any) -> Any:
-                if value is None:
-                    return None
-                elif isinstance(value, np.ndarray):
-                    element_list = (
-                        [_element_conv(v) for v in value]
-                        if _element_conv is not None
-                        else [v for v in value]
-                    )
-
-                    if _ndarray_as_list:
+            if _ndarray_as_list:
+
+                def convert_array_ndarray_as_list(value: Any) -> Any:
+                    if value is None:
+                        return None
+                    elif isinstance(value, np.ndarray):
                         # In Arrow Python UDF, ArrayType is converted to 
`np.ndarray`
                         # whereas a list is expected.
-                        return element_list
+                        return (
+                            [_element_conv(v) for v in value]
+                            if _element_conv is not None
+                            else [v for v in value]
+                        )
                     else:
+                        assert isinstance(value, list)
+                        # otherwise, `list` should be used.
+                        return [_element_conv(v) for v in value]  # type: 
ignore[misc]
+
+                return convert_array_ndarray_as_list
+            else:
+
+                def convert_array_ndarray_as_ndarray(value: Any) -> Any:

Review Comment:
   In this case, we can return `None` if `_element_conv is None`?



##########
python/pyspark/sql/pandas/types.py:
##########
@@ -578,29 +578,44 @@ def _converter(
         if isinstance(dt, ArrayType):
             _element_conv = _converter(dt.elementType, _struct_in_pandas, 
_ndarray_as_list)
 
-            def convert_array(value: Any) -> Any:
-                if value is None:
-                    return None
-                elif isinstance(value, np.ndarray):
-                    element_list = (
-                        [_element_conv(v) for v in value]
-                        if _element_conv is not None
-                        else [v for v in value]
-                    )
-
-                    if _ndarray_as_list:
+            if _ndarray_as_list:
+
+                def convert_array_ndarray_as_list(value: Any) -> Any:
+                    if value is None:
+                        return None
+                    elif isinstance(value, np.ndarray):
                         # In Arrow Python UDF, ArrayType is converted to 
`np.ndarray`
                         # whereas a list is expected.
-                        return element_list
+                        return (
+                            [_element_conv(v) for v in value]
+                            if _element_conv is not None

Review Comment:
   In this case, `_element_conv` can be `_element_conv or (lambda x: x)` 
outside of function?



##########
python/pyspark/sql/pandas/types.py:
##########
@@ -569,30 +572,56 @@ def correct_dtype(pser: pd.Series) -> pd.Series:
         return correct_dtype
 
     def _converter(
-        dt: DataType, _struct_in_pandas: Optional[str]
+        dt: DataType, _struct_in_pandas: Optional[str], _ndarray_as_list: bool
     ) -> Optional[Callable[[Any], Any]]:
 
         if isinstance(dt, ArrayType):
-            _element_conv = _converter(dt.elementType, _struct_in_pandas)
-            if _element_conv is None:
-                return None
+            _element_conv = _converter(dt.elementType, _struct_in_pandas, 
_ndarray_as_list)
 
-            def convert_array(value: Any) -> Any:
-                if value is None:
-                    return None
-                elif isinstance(value, np.ndarray):
-                    # `pyarrow.Table.to_pandas` uses `np.ndarray`.
-                    return np.array([_element_conv(v) for v in value])  # 
type: ignore[misc]
-                else:
-                    assert isinstance(value, list)
-                    # otherwise, `list` should be used.
-                    return [_element_conv(v) for v in value]  # type: 
ignore[misc]
+            if _ndarray_as_list:
 
-            return convert_array
+                def convert_array_ndarray_as_list(value: Any) -> Any:
+                    if value is None:
+                        return None
+                    elif isinstance(value, np.ndarray):

Review Comment:
   Do we still need to check this?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to