Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20567#discussion_r167423077 --- Diff: python/pyspark/sql/dataframe.py --- @@ -1941,12 +1941,24 @@ def toPandas(self): timezone = None if self.sql_ctx.getConf("spark.sql.execution.arrow.enabled", "false").lower() == "true": + should_fall_back = False try: - from pyspark.sql.types import _check_dataframe_convert_date, \ - _check_dataframe_localize_timestamps + from pyspark.sql.types import to_arrow_schema from pyspark.sql.utils import require_minimum_pyarrow_version - import pyarrow require_minimum_pyarrow_version() + # Check if its schema is convertible in Arrow format. + to_arrow_schema(self.schema) + except Exception as e: + # Fallback to convert to Pandas DataFrame without arrow if raise some exception --- End diff -- Yup. It does fall back for unsupported schema, PyArrow version mismatch and PyAarrow missing. Will add a note in PR description.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org