simplylizz commented on issue #25117: [SPARK-28454][Python] Validate LongType in _make_type_verifier URL: https://github.com/apache/spark/pull/25117#issuecomment-515682710 Unpatched version: ``` In [23]: s.createDataFrame([{'x': 1 << 64}], StructType([StructField('x', LongType())])).collect() Out[23]: [Row(x=None)] ``` Patched: ``` In [5]: s.createDataFrame([{'x': 1 << 64}], StructType([StructField('x', LongType())])).collect() --------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-5-c1740fcadbf9> in <module> ----> 1 s.createDataFrame([{'x': 1 << 64}], StructType([StructField('x', LongType())])).collect() /usr/local/lib/python3.5/site-packages/pyspark/sql/session.py in createDataFrame(self, data, schema, samplingRatio, verifySchema) 689 rdd, schema = self._createFromRDD(data.map(prepare), schema, samplingRatio) 690 else: --> 691 rdd, schema = self._createFromLocal(map(prepare, data), schema) 692 jrdd = self._jvm.SerDeUtil.toJavaArray(rdd._to_java_object_rdd()) 693 jdf = self._jsparkSession.applySchemaToPythonRDD(jrdd.rdd(), schema.json()) /usr/local/lib/python3.5/site-packages/pyspark/sql/session.py in _createFromLocal(self, data, schema) 405 # make sure data could consumed multiple times 406 if not isinstance(data, list): --> 407 data = list(data) 408 409 if schema is None or isinstance(schema, (list, tuple)): /usr/local/lib/python3.5/site-packages/pyspark/sql/session.py in prepare(obj) 671 672 def prepare(obj): --> 673 verify_func(obj) 674 return obj 675 elif isinstance(schema, DataType): /usr/local/lib/python3.5/site-packages/pyspark/sql/types.py in verify(obj) 1427 def verify(obj): 1428 if not verify_nullability(obj): -> 1429 verify_value(obj) 1430 1431 return verify /usr/local/lib/python3.5/site-packages/pyspark/sql/types.py in verify_struct(obj) 1397 if isinstance(obj, dict): 1398 for f, verifier in verifiers: -> 1399 verifier(obj.get(f)) 1400 elif isinstance(obj, Row) and getattr(obj, "__from_dict__", False): 1401 # the order in obj could be different than dataType.fields /usr/local/lib/python3.5/site-packages/pyspark/sql/types.py in verify(obj) 1427 def verify(obj): 1428 if not verify_nullability(obj): -> 1429 verify_value(obj) 1430 1431 return verify /usr/local/lib/python3.5/site-packages/pyspark/sql/types.py in verify_long(obj) 1356 if obj < -9223372036854775808 or obj > 9223372036854775807: 1357 raise ValueError( -> 1358 new_msg("object of LongType out of range, got: %s" % obj)) 1359 1360 verify_value = verify_long ValueError: field x: object of LongType out of range, got: 18446744073709551616 ```
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
