This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 6d9c54b [SPARK-26645][PYTHON] Support decimals with negative scale when parsing datatype 6d9c54b is described below commit 6d9c54b62cee6fdf396f507caf7eb7f2e3f35b0a Author: Marco Gaido <marcogaid...@gmail.com> AuthorDate: Sun Jan 20 17:43:50 2019 +0800 [SPARK-26645][PYTHON] Support decimals with negative scale when parsing datatype ## What changes were proposed in this pull request? When parsing datatypes from the json internal representation, PySpark doesn't support decimals with negative scales. Since they are allowed and can actually happen, PySpark should be able to successfully parse them. ## How was this patch tested? added test Closes #23575 from mgaido91/SPARK-26645. Authored-by: Marco Gaido <marcogaid...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/tests/test_types.py | 8 +++++++- python/pyspark/sql/types.py | 4 +++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index fb673f2..3afb88c 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -24,7 +24,7 @@ import sys import unittest from pyspark.sql import Row -from pyspark.sql.functions import UserDefinedFunction +from pyspark.sql.functions import col, UserDefinedFunction from pyspark.sql.types import * from pyspark.sql.types import _array_signed_int_typecode_ctype_mappings, _array_type_mappings, \ _array_unsigned_int_typecode_ctype_mappings, _infer_type, _make_type_verifier, _merge_type @@ -202,6 +202,12 @@ class TypesTests(ReusedSQLTestCase): df = self.spark.createDataFrame([{'a': 1}], ["b"]) self.assertEqual(df.columns, ['b']) + def test_negative_decimal(self): + df = self.spark.createDataFrame([(1, ), (11, )], ["value"]) + ret = df.select(col("value").cast(DecimalType(1, -1))).collect() + actual = list(map(lambda r: int(r.value), ret)) + self.assertEqual(actual, [0, 10]) + def test_create_dataframe_from_objects(self): data = [MyObject(1, "1"), MyObject(2, "2")] df = self.spark.createDataFrame(data) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 22ee5d3..00e90fc 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -752,7 +752,7 @@ _all_complex_types = dict((v.typeName(), v) for v in [ArrayType, MapType, StructType]) -_FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(\d+)\s*\)") +_FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(-?\d+)\s*\)") def _parse_datatype_string(s): @@ -865,6 +865,8 @@ def _parse_datatype_json_string(json_string): >>> complex_maptype = MapType(complex_structtype, ... complex_arraytype, False) >>> check_datatype(complex_maptype) + >>> # Decimal with negative scale. + >>> check_datatype(DecimalType(1,-1)) """ return _parse_datatype_json_value(json.loads(json_string)) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org