Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20567#discussion_r167713865 --- Diff: python/pyspark/sql/dataframe.py --- @@ -1941,12 +1941,24 @@ def toPandas(self): timezone = None if self.sql_ctx.getConf("spark.sql.execution.arrow.enabled", "false").lower() == "true": + should_fall_back = False try: - from pyspark.sql.types import _check_dataframe_convert_date, \ - _check_dataframe_localize_timestamps + from pyspark.sql.types import to_arrow_schema from pyspark.sql.utils import require_minimum_pyarrow_version - import pyarrow require_minimum_pyarrow_version() + # Check if its schema is convertible in Arrow format. + to_arrow_schema(self.schema) + except Exception as e: + # Fallback to convert to Pandas DataFrame without arrow if raise some exception + should_fall_back = True --- End diff -- Yup.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org