Github user felixcheung commented on a diff in the pull request:
https://github.com/apache/spark/pull/20487#discussion_r165855092
--- Diff: python/pyspark/sql/utils.py ---
@@ -115,18 +115,30 @@ def toJArray(gateway, jtype, arr):
def require_minimum_pandas_version():
""" Raise ImportError if minimum version of Pandas is not installed
"""
+ minimum_pandas_version = "0.19.2"
+
from distutils.version import LooseVersion
- import pandas
- if LooseVersion(pandas.__version__) < LooseVersion('0.19.2'):
- raise ImportError("Pandas >= 0.19.2 must be installed on calling
Python process; "
- "however, your version was %s." %
pandas.__version__)
+ try:
+ import pandas
+ except ImportError:
+ raise ImportError("Pandas >= %s must be installed; however, "
+ "it was not found." % minimum_pandas_version)
+ if LooseVersion(pandas.__version__) <
LooseVersion(minimum_pandas_version):
+ raise ImportError("Pandas >= %s must be installed; however, "
+ "your version was %s." %
(minimum_pandas_version, pandas.__version__))
def require_minimum_pyarrow_version():
""" Raise ImportError if minimum version of pyarrow is not installed
"""
+ minimum_pyarrow_version = "0.8.0"
--- End diff --
maybe add a comment in
https://github.com/apache/spark/blob/master/pom.xml#L188
otherwise it's hard to remember to change
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]