Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/22653#discussion_r223520528
--- Diff: python/pyspark/sql/tests.py ---
@@ -1149,6 +1149,75 @@ def test_infer_schema(self):
result = self.spark.sql("SELECT l[0].a from test2 where d['key'].d
= '2'")
self.assertEqual(1, result.head()[0])
+ def test_infer_schema_specification(self):
+ from decimal import Decimal
+
+ class A(object):
+ def __init__(self):
+ self.a = 1
+
+ data = [
+ True,
+ 1,
+ "a",
+ u"a",
+ datetime.date(1970, 1, 1),
+ datetime.datetime(1970, 1, 1, 0, 0),
+ 1.0,
+ array.array("d", [1]),
+ [1],
+ (1, ),
+ {"a": 1},
+ bytearray(1),
+ Decimal(1),
+ Row(a=1),
+ Row("a")(1),
+ A(),
--- End diff --
Yea, it uses `__dict__` attribute.. looks that's not possible in UDFs ..
possibly an issue.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]