Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22807#discussion_r227784077
  
    --- Diff: python/pyspark/sql/tests.py ---
    @@ -4961,6 +4961,31 @@ def foofoo(x, y):
                 ).collect
             )
     
    +    def test_pandas_udf_detect_unsafe_type_conversion(self):
    +        from distutils.version import LooseVersion
    +        from pyspark.sql.functions import pandas_udf
    +        import pandas as pd
    +        import numpy as np
    +        import pyarrow as pa
    +
    +        values = [1.0] * 3
    +        pdf = pd.DataFrame({'A': values})
    +        df = self.spark.createDataFrame(pdf).repartition(1)
    +
    +        @pandas_udf(returnType="int")
    +        def udf(column):
    +            return pd.Series(np.linspace(0, 1, 3))
    +
    +        udf_boolean = df.select(['A']).withColumn('udf', udf('A'))
    +
    +        # Since 0.11.0, PyArrow supports the feature to raise an error for 
unsafe cast.
    +        if LooseVersion(pa.__version__) >= LooseVersion("0.11.0"):
    --- End diff --
    
    BTW, let's bump up the minimal required PyArrow and Pandas version up if 
possible at 3.0 :-)


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to