simplylizz commented on issue #25117: [SPARK-28454][Python] Validate LongType 
in _make_type_verifier
URL: https://github.com/apache/spark/pull/25117#issuecomment-515682710
 
 
   Unpatched version:
   ```
   In [23]: s.createDataFrame([{'x': 1 << 64}], StructType([StructField('x', 
LongType())])).collect()
   Out[23]: [Row(x=None)]
   ```
   
   Patched:
   ```
   In [5]: s.createDataFrame([{'x': 1 << 64}], StructType([StructField('x', 
LongType())])).collect()
   ---------------------------------------------------------------------------
   ValueError                                Traceback (most recent call last)
   <ipython-input-5-c1740fcadbf9> in <module>
   ----> 1 s.createDataFrame([{'x': 1 << 64}], StructType([StructField('x', 
LongType())])).collect()
   
   /usr/local/lib/python3.5/site-packages/pyspark/sql/session.py in 
createDataFrame(self, data, schema, samplingRatio, verifySchema)
       689             rdd, schema = self._createFromRDD(data.map(prepare), 
schema, samplingRatio)
       690         else:
   --> 691             rdd, schema = self._createFromLocal(map(prepare, data), 
schema)
       692         jrdd = 
self._jvm.SerDeUtil.toJavaArray(rdd._to_java_object_rdd())
       693         jdf = self._jsparkSession.applySchemaToPythonRDD(jrdd.rdd(), 
schema.json())
   
   /usr/local/lib/python3.5/site-packages/pyspark/sql/session.py in 
_createFromLocal(self, data, schema)
       405         # make sure data could consumed multiple times
       406         if not isinstance(data, list):
   --> 407             data = list(data)
       408
       409         if schema is None or isinstance(schema, (list, tuple)):
   
   /usr/local/lib/python3.5/site-packages/pyspark/sql/session.py in prepare(obj)
       671
       672             def prepare(obj):
   --> 673                 verify_func(obj)
       674                 return obj
       675         elif isinstance(schema, DataType):
   
   /usr/local/lib/python3.5/site-packages/pyspark/sql/types.py in verify(obj)
      1427     def verify(obj):
      1428         if not verify_nullability(obj):
   -> 1429             verify_value(obj)
      1430
      1431     return verify
   
   /usr/local/lib/python3.5/site-packages/pyspark/sql/types.py in 
verify_struct(obj)
      1397             if isinstance(obj, dict):
      1398                 for f, verifier in verifiers:
   -> 1399                     verifier(obj.get(f))
      1400             elif isinstance(obj, Row) and getattr(obj, 
"__from_dict__", False):
      1401                 # the order in obj could be different than 
dataType.fields
   
   /usr/local/lib/python3.5/site-packages/pyspark/sql/types.py in verify(obj)
      1427     def verify(obj):
      1428         if not verify_nullability(obj):
   -> 1429             verify_value(obj)
      1430
      1431     return verify
   
   /usr/local/lib/python3.5/site-packages/pyspark/sql/types.py in 
verify_long(obj)
      1356             if obj < -9223372036854775808 or obj > 
9223372036854775807:
      1357                 raise ValueError(
   -> 1358                     new_msg("object of LongType out of range, got: 
%s" % obj))
      1359
      1360         verify_value = verify_long
   
   ValueError: field x: object of LongType out of range, got: 
18446744073709551616
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to