Github user dgingrich commented on a diff in the pull request:

    https://github.com/apache/spark/pull/17213#discussion_r105246855
  
    --- Diff: python/pyspark/sql/types.py ---
    @@ -1300,70 +1300,71 @@ def _verify_type(obj, dataType, nullable=True):
             if nullable:
                 return
             else:
    -            raise ValueError("This field is not nullable, but got None")
    +            raise ValueError("This field ({}, of type {}) is not nullable, 
but got None".format(
    +                name, dataType))
     
         # StringType can work with any types
         if isinstance(dataType, StringType):
             return
     
         if isinstance(dataType, UserDefinedType):
             if not (hasattr(obj, '__UDT__') and obj.__UDT__ == dataType):
    -            raise ValueError("%r is not an instance of type %r" % (obj, 
dataType))
    +            raise ValueError("%r is not an instance of type %r for field 
%s" % (obj, dataType, name))
             _verify_type(dataType.toInternal(obj), dataType.sqlType())
             return
     
         _type = type(dataType)
    -    assert _type in _acceptable_types, "unknown datatype: %s for object 
%r" % (dataType, obj)
    +    assert _type in _acceptable_types, "unknown datatype: %s for object %r 
for field %s" % (dataType, obj, name)
     
         if _type is StructType:
             # check the type and fields later
             pass
         else:
             # subclass of them can not be fromInternal in JVM
             if type(obj) not in _acceptable_types[_type]:
    -            raise TypeError("%s can not accept object %r in type %s" % 
(dataType, obj, type(obj)))
    +            raise TypeError("%s can not accept object %r in type %s for 
field %s" % (dataType, obj, type(obj), name))
     
         if isinstance(dataType, ByteType):
             if obj < -128 or obj > 127:
    -            raise ValueError("object of ByteType out of range, got: %s" % 
obj)
    +            raise ValueError("object of ByteType out of range, got: %s for 
field %s" % (obj, name))
     
         elif isinstance(dataType, ShortType):
             if obj < -32768 or obj > 32767:
    -            raise ValueError("object of ShortType out of range, got: %s" % 
obj)
    +            raise ValueError("object of ShortType out of range, got: %s 
for field %s" % (obj, name))
     
         elif isinstance(dataType, IntegerType):
             if obj < -2147483648 or obj > 2147483647:
    -            raise ValueError("object of IntegerType out of range, got: %s" 
% obj)
    +            raise ValueError("object of IntegerType out of range, got: %s 
for field %s" % (obj, name))
     
         elif isinstance(dataType, ArrayType):
             for i in obj:
    -            _verify_type(i, dataType.elementType, dataType.containsNull)
    +            _verify_type(i, dataType.elementType, dataType.containsNull, 
name)
     
         elif isinstance(dataType, MapType):
             for k, v in obj.items():
    -            _verify_type(k, dataType.keyType, False)
    -            _verify_type(v, dataType.valueType, dataType.valueContainsNull)
    +            _verify_type(k, dataType.keyType, False, name)
    +            _verify_type(v, dataType.valueType, 
dataType.valueContainsNull, name)
     
         elif isinstance(dataType, StructType):
             if isinstance(obj, dict):
                 for f in dataType.fields:
    -                _verify_type(obj.get(f.name), f.dataType, f.nullable)
    +                _verify_type(obj.get(f.name), f.dataType, f.nullable, 
f.name)
    --- End diff --
    
    This doesn't work that well for nested structs:
    
    ```python
    MySubType = StructType([StructField('value', StringType(), nullable=False)])
    MyType = StructType([
        StructField('one', MySubType),
        StructField('two', MySubType)])
    
    _verify_type({'one': {'value': 'good'}, 'two': {'value': None}}, MyType)
    # "This field (value, of type StringType) is not nullable, but got None"
    # But is it one.value or two.value?
    ```


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to