This is an automated email from the ASF dual-hosted git repository.
jincheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/flink.git
The following commit(s) were added to refs/heads/master by this push:
new ef3e201 [hotfix][python] Align the signature of type utility methods
with Java
ef3e201 is described below
commit ef3e201e180aa943ee25c645622aa4b5d7bdaed8
Author: Dian Fu <[email protected]>
AuthorDate: Wed Jun 19 20:36:05 2019 +0800
[hotfix][python] Align the signature of type utility methods with Java
This closes #8893
---
flink-python/pyflink/table/tests/test_calc.py | 6 +-
flink-python/pyflink/table/tests/test_types.py | 160 +++++-----
flink-python/pyflink/table/types.py | 391 ++++++++++++++++++-------
3 files changed, 359 insertions(+), 198 deletions(-)
diff --git a/flink-python/pyflink/table/tests/test_calc.py
b/flink-python/pyflink/table/tests/test_calc.py
index 147b3c2..edf430f 100644
--- a/flink-python/pyflink/table/tests/test_calc.py
+++ b/flink-python/pyflink/table/tests/test_calc.py
@@ -81,11 +81,11 @@ class StreamTableCalcTests(PyFlinkStreamTableTestCase):
DataTypes.ARRAY(DataTypes.DOUBLE(False)),
DataTypes.ARRAY(DataTypes.STRING()),
DataTypes.ARRAY(DataTypes.DATE()),
- DataTypes.DECIMAL(),
+ DataTypes.DECIMAL(10, 0),
DataTypes.ROW([DataTypes.FIELD("a", DataTypes.BIGINT()),
DataTypes.FIELD("b",
DataTypes.DOUBLE())]),
- DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.DOUBLE()),
- DataTypes.VARBINARY(), ExamplePointUDT(),
+ DataTypes.MAP(DataTypes.STRING(), DataTypes.DOUBLE()),
+ DataTypes.BYTES(), ExamplePointUDT(),
PythonOnlyUDT()]
table_sink = source_sink_utils.TestAppendSink(field_names, field_types)
t_env.register_table_sink("Results", table_sink)
diff --git a/flink-python/pyflink/table/tests/test_types.py
b/flink-python/pyflink/table/tests/test_types.py
index ed4f19f..4888583 100644
--- a/flink-python/pyflink/table/tests/test_types.py
+++ b/flink-python/pyflink/table/tests/test_types.py
@@ -30,7 +30,7 @@ from pyflink.table.types import (_infer_schema_from_data,
_infer_type,
_array_type_mappings, _merge_type,
_create_type_verifier, UserDefinedType,
DataTypes, Row, RowField,
RowType, ArrayType, BigIntType, VarCharType,
MapType, DataType,
- _to_java_type, _from_java_type, TimestampKind)
+ _to_java_type, _from_java_type)
class ExamplePointUDT(UserDefinedType):
@@ -145,7 +145,7 @@ class TypesTests(unittest.TestCase):
'VarCharType(2147483647, true)',
'DateType(true)',
'TimeType(0, true)',
- 'TimestampType(0, 6, true)',
+ 'TimestampType(6, true)',
'DoubleType(true)',
"ArrayType(DoubleType(false), true)",
"ArrayType(BigIntType(true), true)",
@@ -242,46 +242,46 @@ class TypesTests(unittest.TestCase):
self.assertEqual(expected_schema, _infer_type(p))
def test_struct_type(self):
- row1 = DataTypes.ROW().add("f1", DataTypes.VARCHAR(nullable=True)) \
- .add("f2", DataTypes.VARCHAR(nullable=True))
- row2 = DataTypes.ROW([DataTypes.FIELD("f1",
DataTypes.VARCHAR(nullable=True)),
- DataTypes.FIELD("f2",
DataTypes.VARCHAR(nullable=True), None)])
+ row1 = DataTypes.ROW().add("f1", DataTypes.STRING(nullable=True)) \
+ .add("f2", DataTypes.STRING(nullable=True))
+ row2 = DataTypes.ROW([DataTypes.FIELD("f1",
DataTypes.STRING(nullable=True)),
+ DataTypes.FIELD("f2",
DataTypes.STRING(nullable=True), None)])
self.assertEqual(row1.field_names(), row2.names)
self.assertEqual(row1, row2)
- row1 = DataTypes.ROW().add("f1", DataTypes.VARCHAR(nullable=True)) \
- .add("f2", DataTypes.VARCHAR(nullable=True))
- row2 = DataTypes.ROW([DataTypes.FIELD("f1",
DataTypes.VARCHAR(nullable=True))])
+ row1 = DataTypes.ROW().add("f1", DataTypes.STRING(nullable=True)) \
+ .add("f2", DataTypes.STRING(nullable=True))
+ row2 = DataTypes.ROW([DataTypes.FIELD("f1",
DataTypes.STRING(nullable=True))])
self.assertNotEqual(row1.field_names(), row2.names)
self.assertNotEqual(row1, row2)
- row1 = (DataTypes.ROW().add(DataTypes.FIELD("f1",
DataTypes.VARCHAR(nullable=True)))
- .add("f2", DataTypes.VARCHAR(nullable=True)))
- row2 = DataTypes.ROW([DataTypes.FIELD("f1",
DataTypes.VARCHAR(nullable=True)),
- DataTypes.FIELD("f2",
DataTypes.VARCHAR(nullable=True))])
+ row1 = (DataTypes.ROW().add(DataTypes.FIELD("f1",
DataTypes.STRING(nullable=True)))
+ .add("f2", DataTypes.STRING(nullable=True)))
+ row2 = DataTypes.ROW([DataTypes.FIELD("f1",
DataTypes.STRING(nullable=True)),
+ DataTypes.FIELD("f2",
DataTypes.STRING(nullable=True))])
self.assertEqual(row1.field_names(), row2.names)
self.assertEqual(row1, row2)
- row1 = (DataTypes.ROW().add(DataTypes.FIELD("f1",
DataTypes.VARCHAR(nullable=True)))
- .add("f2", DataTypes.VARCHAR(nullable=True)))
- row2 = DataTypes.ROW([DataTypes.FIELD("f1",
DataTypes.VARCHAR(nullable=True))])
+ row1 = (DataTypes.ROW().add(DataTypes.FIELD("f1",
DataTypes.STRING(nullable=True)))
+ .add("f2", DataTypes.STRING(nullable=True)))
+ row2 = DataTypes.ROW([DataTypes.FIELD("f1",
DataTypes.STRING(nullable=True))])
self.assertNotEqual(row1.field_names(), row2.names)
self.assertNotEqual(row1, row2)
# Catch exception raised during improper construction
self.assertRaises(ValueError, lambda: DataTypes.ROW().add("name"))
- row1 = DataTypes.ROW().add("f1", DataTypes.VARCHAR(nullable=True)) \
- .add("f2", DataTypes.VARCHAR(nullable=True))
+ row1 = DataTypes.ROW().add("f1", DataTypes.STRING(nullable=True)) \
+ .add("f2", DataTypes.STRING(nullable=True))
for field in row1:
self.assertIsInstance(field, RowField)
- row1 = DataTypes.ROW().add("f1", DataTypes.VARCHAR(nullable=True)) \
- .add("f2", DataTypes.VARCHAR(nullable=True))
+ row1 = DataTypes.ROW().add("f1", DataTypes.STRING(nullable=True)) \
+ .add("f2", DataTypes.STRING(nullable=True))
self.assertEqual(len(row1), 2)
- row1 = DataTypes.ROW().add("f1", DataTypes.VARCHAR(nullable=True)) \
- .add("f2", DataTypes.VARCHAR(nullable=True))
+ row1 = DataTypes.ROW().add("f1", DataTypes.STRING(nullable=True)) \
+ .add("f2", DataTypes.STRING(nullable=True))
self.assertIs(row1["f1"], row1.fields[0])
self.assertIs(row1[0], row1.fields[0])
self.assertEqual(row1[0:1], DataTypes.ROW(row1.fields[0:1]))
@@ -315,31 +315,31 @@ class TypesTests(unittest.TestCase):
_merge_type(DataTypes.ARRAY(DataTypes.BIGINT()),
DataTypes.ARRAY(DataTypes.DOUBLE()))
self.assertEqual(_merge_type(
- DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.BIGINT()),
- DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.BIGINT())
- ), DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.BIGINT()))
+ DataTypes.MAP(DataTypes.STRING(), DataTypes.BIGINT()),
+ DataTypes.MAP(DataTypes.STRING(), DataTypes.BIGINT())
+ ), DataTypes.MAP(DataTypes.STRING(), DataTypes.BIGINT()))
with self.assertRaises(TypeError):
_merge_type(
- DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.BIGINT()),
+ DataTypes.MAP(DataTypes.STRING(), DataTypes.BIGINT()),
DataTypes.MAP(DataTypes.DOUBLE(), DataTypes.BIGINT()))
with self.assertRaises(TypeError):
_merge_type(
- DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.BIGINT()),
- DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.DOUBLE()))
+ DataTypes.MAP(DataTypes.STRING(), DataTypes.BIGINT()),
+ DataTypes.MAP(DataTypes.STRING(), DataTypes.DOUBLE()))
self.assertEqual(_merge_type(
DataTypes.ROW([DataTypes.FIELD('f1', DataTypes.BIGINT()),
- DataTypes.FIELD('f2', DataTypes.VARCHAR())]),
+ DataTypes.FIELD('f2', DataTypes.STRING())]),
DataTypes.ROW([DataTypes.FIELD('f1', DataTypes.BIGINT()),
- DataTypes.FIELD('f2', DataTypes.VARCHAR())])
+ DataTypes.FIELD('f2', DataTypes.STRING())])
), DataTypes.ROW([DataTypes.FIELD('f1', DataTypes.BIGINT()),
- DataTypes.FIELD('f2', DataTypes.VARCHAR())]))
+ DataTypes.FIELD('f2', DataTypes.STRING())]))
with self.assertRaises(TypeError):
_merge_type(
DataTypes.ROW([DataTypes.FIELD('f1', DataTypes.BIGINT()),
- DataTypes.FIELD('f2', DataTypes.VARCHAR())]),
+ DataTypes.FIELD('f2', DataTypes.STRING())]),
DataTypes.ROW([DataTypes.FIELD('f1', DataTypes.DOUBLE()),
- DataTypes.FIELD('f2', DataTypes.VARCHAR())]))
+ DataTypes.FIELD('f2', DataTypes.STRING())]))
self.assertEqual(_merge_type(
DataTypes.ROW([DataTypes.FIELD(
@@ -353,54 +353,54 @@ class TypesTests(unittest.TestCase):
DataTypes.ROW([DataTypes.FIELD('f1', DataTypes.ROW(
[DataTypes.FIELD('f2', DataTypes.BIGINT())]))]),
DataTypes.ROW([DataTypes.FIELD('f1', DataTypes.ROW(
- [DataTypes.FIELD('f2', DataTypes.VARCHAR())]))]))
+ [DataTypes.FIELD('f2', DataTypes.STRING())]))]))
self.assertEqual(_merge_type(
DataTypes.ROW([DataTypes.FIELD('f1',
DataTypes.ARRAY(DataTypes.BIGINT())),
- DataTypes.FIELD('f2', DataTypes.VARCHAR())]),
+ DataTypes.FIELD('f2', DataTypes.STRING())]),
DataTypes.ROW([DataTypes.FIELD('f1',
DataTypes.ARRAY(DataTypes.BIGINT())),
- DataTypes.FIELD('f2', DataTypes.VARCHAR())])
+ DataTypes.FIELD('f2', DataTypes.STRING())])
), DataTypes.ROW([DataTypes.FIELD('f1',
DataTypes.ARRAY(DataTypes.BIGINT())),
- DataTypes.FIELD('f2', DataTypes.VARCHAR())]))
+ DataTypes.FIELD('f2', DataTypes.STRING())]))
with self.assertRaises(TypeError):
_merge_type(
DataTypes.ROW([
DataTypes.FIELD('f1', DataTypes.ARRAY(DataTypes.BIGINT())),
- DataTypes.FIELD('f2', DataTypes.VARCHAR())]),
+ DataTypes.FIELD('f2', DataTypes.STRING())]),
DataTypes.ROW([
DataTypes.FIELD('f1', DataTypes.ARRAY(DataTypes.DOUBLE())),
- DataTypes.FIELD('f2', DataTypes.VARCHAR())]))
+ DataTypes.FIELD('f2', DataTypes.STRING())]))
self.assertEqual(_merge_type(
DataTypes.ROW([
- DataTypes.FIELD('f1', DataTypes.MAP(DataTypes.VARCHAR(),
DataTypes.BIGINT())),
- DataTypes.FIELD('f2', DataTypes.VARCHAR())]),
+ DataTypes.FIELD('f1', DataTypes.MAP(DataTypes.STRING(),
DataTypes.BIGINT())),
+ DataTypes.FIELD('f2', DataTypes.STRING())]),
DataTypes.ROW([
- DataTypes.FIELD('f1', DataTypes.MAP(DataTypes.VARCHAR(),
DataTypes.BIGINT())),
- DataTypes.FIELD('f2', DataTypes.VARCHAR())])
+ DataTypes.FIELD('f1', DataTypes.MAP(DataTypes.STRING(),
DataTypes.BIGINT())),
+ DataTypes.FIELD('f2', DataTypes.STRING())])
), DataTypes.ROW([
- DataTypes.FIELD('f1', DataTypes.MAP(DataTypes.VARCHAR(),
DataTypes.BIGINT())),
- DataTypes.FIELD('f2', DataTypes.VARCHAR())]))
+ DataTypes.FIELD('f1', DataTypes.MAP(DataTypes.STRING(),
DataTypes.BIGINT())),
+ DataTypes.FIELD('f2', DataTypes.STRING())]))
with self.assertRaises(TypeError):
_merge_type(
DataTypes.ROW([
- DataTypes.FIELD('f1', DataTypes.MAP(DataTypes.VARCHAR(),
DataTypes.BIGINT())),
- DataTypes.FIELD('f2', DataTypes.VARCHAR())]),
+ DataTypes.FIELD('f1', DataTypes.MAP(DataTypes.STRING(),
DataTypes.BIGINT())),
+ DataTypes.FIELD('f2', DataTypes.STRING())]),
DataTypes.ROW([
- DataTypes.FIELD('f1', DataTypes.MAP(DataTypes.VARCHAR(),
DataTypes.DOUBLE())),
- DataTypes.FIELD('f2', DataTypes.VARCHAR())]))
+ DataTypes.FIELD('f1', DataTypes.MAP(DataTypes.STRING(),
DataTypes.DOUBLE())),
+ DataTypes.FIELD('f2', DataTypes.STRING())]))
self.assertEqual(_merge_type(
DataTypes.ROW([DataTypes.FIELD('f1', DataTypes.ARRAY(
- DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.BIGINT())))]),
+ DataTypes.MAP(DataTypes.STRING(), DataTypes.BIGINT())))]),
DataTypes.ROW([DataTypes.FIELD('f1', DataTypes.ARRAY(
- DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.BIGINT())))])
+ DataTypes.MAP(DataTypes.STRING(), DataTypes.BIGINT())))])
), DataTypes.ROW([DataTypes.FIELD('f1', DataTypes.ARRAY(
- DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.BIGINT())))]))
+ DataTypes.MAP(DataTypes.STRING(), DataTypes.BIGINT())))]))
with self.assertRaises(TypeError):
_merge_type(
DataTypes.ROW([DataTypes.FIELD('f1', DataTypes.ARRAY(
- DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.BIGINT())))]),
+ DataTypes.MAP(DataTypes.STRING(), DataTypes.BIGINT())))]),
DataTypes.ROW([DataTypes.FIELD('f1', DataTypes.ARRAY(
DataTypes.MAP(DataTypes.DOUBLE(), DataTypes.BIGINT())))])
)
@@ -510,12 +510,10 @@ class TypesTests(unittest.TestCase):
self.assertEqual(lt, lt2)
def test_decimal_type(self):
- t1 = DataTypes.DECIMAL()
+ t1 = DataTypes.DECIMAL(10, 0)
t2 = DataTypes.DECIMAL(10, 2)
self.assertTrue(t2 is not t1)
self.assertNotEqual(t1, t2)
- t3 = DataTypes.DECIMAL(8)
- self.assertNotEqual(t2, t3)
def test_datetype_equal_zero(self):
dt = DataTypes.DATE()
@@ -554,7 +552,7 @@ class DataTypeVerificationTests(unittest.TestCase):
self.assertRaises(
ValueError,
lambda: _create_type_verifier(
- DataTypes.VARCHAR(nullable=False), name="test_name")(None))
+ DataTypes.STRING(nullable=False), name="test_name")(None))
schema = DataTypes.ROW(
[DataTypes.FIELD('a', DataTypes.ROW([DataTypes.FIELD('b',
DataTypes.INT())]))])
@@ -564,7 +562,7 @@ class DataTypeVerificationTests(unittest.TestCase):
def test_verify_type_ok_nullable(self):
obj = None
- types = [DataTypes.INT(), DataTypes.FLOAT(), DataTypes.VARCHAR(),
DataTypes.ROW([])]
+ types = [DataTypes.INT(), DataTypes.FLOAT(), DataTypes.STRING(),
DataTypes.ROW([])]
for data_type in types:
try:
_create_type_verifier(data_type)(obj)
@@ -577,7 +575,7 @@ class DataTypeVerificationTests(unittest.TestCase):
import decimal
schema = DataTypes.ROW([
- DataTypes.FIELD('s', DataTypes.VARCHAR(nullable=False)),
+ DataTypes.FIELD('s', DataTypes.STRING(nullable=False)),
DataTypes.FIELD('i', DataTypes.INT(True))])
class MyObj:
@@ -588,8 +586,8 @@ class DataTypeVerificationTests(unittest.TestCase):
# obj, data_type
success_spec = [
# String
- ("", DataTypes.VARCHAR()),
- (u"", DataTypes.VARCHAR()),
+ ("", DataTypes.STRING()),
+ (u"", DataTypes.STRING()),
# UDT
(ExamplePoint(1.0, 2.0), ExamplePointUDT()),
@@ -617,10 +615,10 @@ class DataTypeVerificationTests(unittest.TestCase):
(1.0, DataTypes.DOUBLE()),
# Decimal
- (decimal.Decimal("1.0"), DataTypes.DECIMAL()),
+ (decimal.Decimal("1.0"), DataTypes.DECIMAL(10, 0)),
# Binary
- (bytearray([1]), DataTypes.BINARY()),
+ (bytearray([1]), DataTypes.BINARY(1)),
# Date/Time/Timestamp
(datetime.date(2000, 1, 2), DataTypes.DATE()),
@@ -630,15 +628,15 @@ class DataTypeVerificationTests(unittest.TestCase):
# Array
([], DataTypes.ARRAY(DataTypes.INT())),
- (["1", None], DataTypes.ARRAY(DataTypes.VARCHAR(nullable=True))),
+ (["1", None], DataTypes.ARRAY(DataTypes.STRING(nullable=True))),
([1, 2], DataTypes.ARRAY(DataTypes.INT())),
((1, 2), DataTypes.ARRAY(DataTypes.INT())),
(array.array('h', [1, 2]), DataTypes.ARRAY(DataTypes.INT())),
# Map
- ({}, DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.INT())),
- ({"a": 1}, DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.INT())),
- ({"a": None}, DataTypes.MAP(DataTypes.VARCHAR(nullable=False),
DataTypes.INT(True))),
+ ({}, DataTypes.MAP(DataTypes.STRING(), DataTypes.INT())),
+ ({"a": 1}, DataTypes.MAP(DataTypes.STRING(), DataTypes.INT())),
+ ({"a": None}, DataTypes.MAP(DataTypes.STRING(nullable=False),
DataTypes.INT(True))),
# Struct
({"s": "a", "i": 1}, schema),
@@ -659,13 +657,13 @@ class DataTypeVerificationTests(unittest.TestCase):
# obj, data_type, exception class
failure_spec = [
# Char/VarChar (match anything but None)
- (None, DataTypes.VARCHAR(), ValueError),
- (None, DataTypes.CHAR(), ValueError),
+ (None, DataTypes.VARCHAR(1), ValueError),
+ (None, DataTypes.CHAR(1), ValueError),
# VarChar (length exceeds maximum length)
- ("abc", DataTypes.VARCHAR(), ValueError),
+ ("abc", DataTypes.VARCHAR(1), ValueError),
# Char (length exceeds length)
- ("abc", DataTypes.CHAR(), ValueError),
+ ("abc", DataTypes.CHAR(1), ValueError),
# UDT
(ExamplePoint(1.0, 2.0), PythonOnlyUDT(), ValueError),
@@ -694,16 +692,16 @@ class DataTypeVerificationTests(unittest.TestCase):
(1, DataTypes.DOUBLE(), TypeError),
# Decimal
- (1.0, DataTypes.DECIMAL(), TypeError),
- (1, DataTypes.DECIMAL(), TypeError),
- ("1.0", DataTypes.DECIMAL(), TypeError),
+ (1.0, DataTypes.DECIMAL(10, 0), TypeError),
+ (1, DataTypes.DECIMAL(10, 0), TypeError),
+ ("1.0", DataTypes.DECIMAL(10, 0), TypeError),
# Binary
- (1, DataTypes.BINARY(), TypeError),
+ (1, DataTypes.BINARY(1), TypeError),
# VarBinary (length exceeds maximum length)
- (bytearray([1, 2]), DataTypes.VARBINARY(), ValueError),
+ (bytearray([1, 2]), DataTypes.VARBINARY(1), ValueError),
# Char (length exceeds length)
- (bytearray([1, 2]), DataTypes.BINARY(), ValueError),
+ (bytearray([1, 2]), DataTypes.BINARY(1), ValueError),
# Date/Time/Timestamp
("2000-01-02", DataTypes.DATE(), TypeError),
@@ -711,13 +709,13 @@ class DataTypeVerificationTests(unittest.TestCase):
(946811040, DataTypes.TIMESTAMP(), TypeError),
# Array
- (["1", None], DataTypes.ARRAY(DataTypes.VARCHAR(nullable=False)),
ValueError),
+ (["1", None], DataTypes.ARRAY(DataTypes.VARCHAR(1,
nullable=False)), ValueError),
([1, "2"], DataTypes.ARRAY(DataTypes.INT()), TypeError),
# Map
({"a": 1}, DataTypes.MAP(DataTypes.INT(), DataTypes.INT()),
TypeError),
- ({"a": "1"}, DataTypes.MAP(DataTypes.VARCHAR(), DataTypes.INT()),
TypeError),
- ({"a": None}, DataTypes.MAP(DataTypes.VARCHAR(),
DataTypes.INT(False)), ValueError),
+ ({"a": "1"}, DataTypes.MAP(DataTypes.VARCHAR(1), DataTypes.INT()),
TypeError),
+ ({"a": None}, DataTypes.MAP(DataTypes.VARCHAR(1),
DataTypes.INT(False)), ValueError),
# Struct
({"s": "a", "i": "1"}, schema, TypeError),
@@ -779,7 +777,7 @@ class DataTypeConvertTests(unittest.TestCase):
converted_python_types = [_from_java_type(item) for item in java_types]
expected = [DataTypes.TIME(3, False),
- DataTypes.TIMESTAMP(TimestampKind.REGULAR).not_null(),
+ DataTypes.TIMESTAMP().not_null(),
DataTypes.VARBINARY(100, False),
DataTypes.BINARY(2, False),
DataTypes.VARCHAR(30, False),
diff --git a/flink-python/pyflink/table/types.py
b/flink-python/pyflink/table/types.py
index 25050c2..c6092ab 100644
--- a/flink-python/pyflink/table/types.py
+++ b/flink-python/pyflink/table/types.py
@@ -173,8 +173,8 @@ class CharType(AtomicType):
"""
Char data type. SQL CHAR(n)
- The serialized string representation is 'char(n)' where 'n' (default: 1)
is the number of
- bytes. 'n' must have a value between 1 and 255 (both inclusive).
+ The serialized string representation is ``char(n)`` where ``n`` (default:
1) is the number of
+ code points. ``n`` must have a value between 1 and 2147483647(0x7fffffff)
(both inclusive).
:param length: int, the string representation length.
:param nullable: boolean, whether the type can be null (None) or not.
@@ -192,8 +192,9 @@ class VarCharType(AtomicType):
"""
Varchar data type. SQL VARCHAR(n)
- The serialized string representation is 'varchar(n)' where 'n' (default:
1) is the number of
- characters. 'n' must have a value between 1 and 0x7fffffff (both
inclusive).
+ The serialized string representation is ``varchar(n)`` where 'n' (default:
1) is the maximum
+ number of code points. 'n' must have a value between 1 and
2147483647(0x7fffffff)
+ (both inclusive).
:param length: int, the maximum string representation length.
:param nullable: boolean, whether the type can be null (None) or not.
@@ -211,8 +212,8 @@ class BinaryType(AtomicType):
"""
Binary (byte array) data type. SQL BINARY(n)
- The serialized string representation is 'binary(n)' where 'n' (default: 1)
is the number of
- bytes. 'n' must have a value between 1 and 0x7fffffff (both inclusive).
+ The serialized string representation is ``binary(n)`` where ``n``
(default: 1) is the number of
+ bytes. ``n`` must have a value between 1 and 2147483647(0x7fffffff) (both
inclusive).
:param length: int, the number of bytes.
:param nullable: boolean, whether the type can be null (None) or not.
@@ -230,8 +231,8 @@ class VarBinaryType(AtomicType):
"""
Binary (byte array) data type. SQL VARBINARY(n)
- The serialized string representation is 'varbinary(n)' where 'n' (default:
1) is the
- maximum number of bytes. 'n' must have a value between 1 and 0x7fffffff
(both inclusive).
+ The serialized string representation is ``varbinary(n)`` where ``n``
(default: 1) is the
+ maximum number of bytes. ``n`` must have a value between 1 and 0x7fffffff
(both inclusive).
:param length: int, the maximum number of bytes.
:param nullable: boolean, whether the type can be null (None) or not.
@@ -335,7 +336,7 @@ class DecimalType(FractionalType):
When create a DecimalType, the default precision and scale is (10, 0).
When infer
schema from decimal.Decimal objects, it will be DecimalType(38, 18).
- :param precision: the maximum total number of digits (default: 10)
+ :param precision: the number of digits in a number (default: 10)
:param scale: the number of digits on right side of dot. (default: 0)
:param nullable: boolean, whether the field can be null (None) or not.
"""
@@ -419,36 +420,23 @@ class TimeType(AtomicType):
return datetime.time(hours, minutes, seconds, microseconds)
-class TimestampKind(object):
- """
- Timestamp kind for the time attribute metadata to timestamps.
- """
- REGULAR = 0
- ROWTIME = 1
- PROCTIME = 2
-
-
class TimestampType(AtomicType):
"""
Timestamp data type. SQL TIMESTAMP
The precision must be greater than or equal to 0 and less than or equal to
9.
- :param kind, the time attribute metadata (default: TimestampKind.REGULAR)
:param precision: int, the number of digits of fractional seconds
(default: 6)
:param nullable: boolean, whether the field can be null (None) or not.
"""
- def __init__(self, kind=TimestampKind.REGULAR, precision=6, nullable=True):
+ def __init__(self, precision=6, nullable=True):
super(TimestampType, self).__init__(nullable)
- assert 0 <= kind <= 2
assert 0 <= precision <= 9
- self.kind = kind
self.precision = precision
def __repr__(self):
- return "TimestampType(%s, %s, %s)" % (
- self.kind, self.precision, str(self._nullable).lower())
+ return "TimestampType(%s, %s)" % (self.precision,
str(self._nullable).lower())
def need_conversion(self):
return True
@@ -486,9 +474,9 @@ class ArrayType(DataType):
def __init__(self, element_type, nullable=True):
"""
- >>> ArrayType(VarCharType()) == ArrayType(VarCharType())
+ >>> ArrayType(VarCharType(100)) == ArrayType(VarCharType(100))
True
- >>> ArrayType(VarCharType()) == ArrayType(BigIntType())
+ >>> ArrayType(VarCharType(100)) == ArrayType(BigIntType())
False
"""
assert isinstance(element_type, DataType), \
@@ -526,11 +514,11 @@ class MapType(DataType):
def __init__(self, key_type, value_type, nullable=True):
"""
- >>> (MapType(VarCharType(nullable=False), IntType())
- ... == MapType(VarCharType(nullable=False), IntType()))
+ >>> (MapType(VarCharType(100, nullable=False), IntType())
+ ... == MapType(VarCharType(100, nullable=False), IntType()))
True
- >>> (MapType(VarCharType(nullable=False), IntType())
- ... == MapType(VarCharType(nullable=False), FloatType()))
+ >>> (MapType(VarCharType(100, nullable=False), IntType())
+ ... == MapType(VarCharType(100, nullable=False), FloatType()))
False
"""
assert isinstance(key_type, DataType), \
@@ -571,9 +559,9 @@ class MultisetType(DataType):
def __init__(self, element_type, nullable=True):
"""
- >>> MultisetType(VarCharType()) == MultisetType(VarCharType())
+ >>> MultisetType(VarCharType(100)) == MultisetType(VarCharType(100))
True
- >>> MultisetType(VarCharType()) == MultisetType(BigIntType())
+ >>> MultisetType(VarCharType(100)) == MultisetType(BigIntType())
False
"""
assert isinstance(element_type, DataType), \
@@ -609,11 +597,9 @@ class RowField(object):
def __init__(self, name, data_type, description=None):
"""
- >>> (RowField("f1", VarCharType())
- ... == RowField("f1", VarCharType()))
+ >>> (RowField("f1", VarCharType(100)) == RowField("f1",
VarCharType(100)))
True
- >>> (RowField("f1", VarCharType())
- ... == RowField("f2", VarCharType()))
+ >>> (RowField("f1", VarCharType(100)) == RowField("f2",
VarCharType(100)))
False
"""
assert isinstance(data_type, DataType), \
@@ -658,22 +644,21 @@ class RowType(DataType):
Iterating a :class:`RowType` will iterate its :class:`RowField`\\s.
A contained :class:`RowField` can be accessed by name or position.
- >>> row1 = RowType([RowField("f1", VarCharType())])
+ >>> row1 = RowType([RowField("f1", VarCharType(100))])
>>> row1["f1"]
- RowField(f1, VarCharType(1))
+ RowField(f1, VarCharType(100))
>>> row1[0]
- RowField(f1, VarCharType(1))
+ RowField(f1, VarCharType(100))
"""
def __init__(self, fields=None, nullable=True):
"""
- >>> row1 = RowType([RowField("f1", VarCharType())])
- >>> row2 = RowType([RowField("f1", VarCharType())])
+ >>> row1 = RowType([RowField("f1", VarCharType(100))])
+ >>> row2 = RowType([RowField("f1", VarCharType(100))])
>>> row1 == row2
True
- >>> row1 = RowType([RowField("f1", VarCharType())])
- >>> row2 = RowType([RowField("f1", VarCharType()),
- ... RowField("f2", IntType())])
+ >>> row1 = RowType([RowField("f1", VarCharType(100))])
+ >>> row2 = RowType([RowField("f1", VarCharType(100)), RowField("f2",
IntType())])
>>> row1 == row2
False
"""
@@ -700,15 +685,15 @@ class RowType(DataType):
b) 2 parameters as (name, data_type). The data_type parameter may
be either a String
or a DataType object.
- >>> row1 = RowType().add("f1", VarCharType()).add("f2", VarCharType())
- >>> row2 = RowType([RowField("f1", VarCharType()), RowField("f2",
VarCharType())])
+ >>> row1 = RowType().add("f1", VarCharType(100)).add("f2",
VarCharType(100))
+ >>> row2 = RowType([RowField("f1", VarCharType(100)), RowField("f2",
VarCharType(100))])
>>> row1 == row2
True
- >>> row1 = RowType().add(RowField("f1", VarCharType()))
- >>> row2 = RowType([RowField("f1", VarCharType())])
+ >>> row1 = RowType().add(RowField("f1", VarCharType(100)))
+ >>> row2 = RowType([RowField("f1", VarCharType(100))])
>>> row1 == row2
True
- >>> row2 = RowType([RowField("f1", VarCharType())])
+ >>> row2 = RowType([RowField("f1", VarCharType(100))])
>>> row1 == row2
True
@@ -769,7 +754,7 @@ class RowType(DataType):
"""
Returns all field names in a list.
- >>> row = RowType([RowField("f1", VarCharType())])
+ >>> row = RowType([RowField("f1", VarCharType(100))])
>>> row.field_names()
['f1']
"""
@@ -1339,18 +1324,7 @@ def _from_java_type(j_data_type):
elif _is_instance_of(logical_type, gateway.jvm.TimeType):
data_type = DataTypes.TIME(logical_type.getPrecision(),
logical_type.isNullable())
elif _is_instance_of(logical_type, gateway.jvm.TimestampType):
- j_kind = logical_type.getKind()
- kind = None
- if j_kind == gateway.jvm.TimestampKind.REGULAR:
- kind = TimestampKind.REGULAR
- elif j_kind == gateway.jvm.TimestampKind.ROWTIME:
- kind = TimestampKind.ROWTIME
- elif j_kind == gateway.jvm.TimestampKind.PROCTIME:
- kind = TimestampKind.PROCTIME
- if kind is None:
- raise Exception("Unsupported java timestamp kind %s" % j_kind)
- data_type = DataTypes.TIMESTAMP(kind,
- nullable=logical_type.isNullable())
+ data_type = DataTypes.TIMESTAMP(nullable=logical_type.isNullable())
elif _is_instance_of(logical_type, gateway.jvm.BooleanType):
data_type = DataTypes.BOOLEAN(logical_type.isNullable())
elif _is_instance_of(logical_type, gateway.jvm.TinyIntType):
@@ -1649,14 +1623,14 @@ def _create_type_verifier(data_type, name=None):
float is not checked, so it will become infinity when cast to Java float
if it overflows.
>>> _create_type_verifier(RowType([]))(None)
- >>> _create_type_verifier(VarCharType())("")
+ >>> _create_type_verifier(VarCharType(100))("")
>>> _create_type_verifier(BigIntType())(0)
>>> _create_type_verifier(ArrayType(SmallIntType()))(list(range(3)))
- >>> _create_type_verifier(ArrayType(VarCharType()))(set()) # doctest:
+IGNORE_EXCEPTION_DETAIL
+ >>> _create_type_verifier(ArrayType(VarCharType(10)))(set()) # doctest:
+IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
TypeError:...
- >>> _create_type_verifier(MapType(VarCharType(), IntType()))({})
+ >>> _create_type_verifier(MapType(VarCharType(100), IntType()))({})
>>> _create_type_verifier(RowType([]))(())
>>> _create_type_verifier(RowType([]))([])
>>> _create_type_verifier(RowType([]))([1]) # doctest:
+IGNORE_EXCEPTION_DETAIL
@@ -1678,11 +1652,11 @@ def _create_type_verifier(data_type, name=None):
Traceback (most recent call last):
...
ValueError:...
- >>> _create_type_verifier(MapType(VarCharType(), IntType()))({None: 1})
+ >>> _create_type_verifier(MapType(VarCharType(100), IntType()))({None: 1})
Traceback (most recent call last):
...
ValueError:...
- >>> schema = RowType().add("a", IntType()).add("b", VarCharType(), False)
+ >>> schema = RowType().add("a", IntType()).add("b", VarCharType(100),
False)
>>> _create_type_verifier(schema)((1, None)) # doctest:
+IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
@@ -1856,95 +1830,284 @@ def _create_type_verifier(data_type, name=None):
class DataTypes(object):
+ """
+ A :class:`DataType` can be used to declare input and/or output types of
operations.
+ This class enumerates all supported data types of the Table & SQL API.
+ """
- @classmethod
- def NULL(cls):
+ @staticmethod
+ def NULL():
+ """
+ Data type for representing untyped null (None) values. A null type has
no
+ other value except null (None), thus, it can be cast to any nullable
type.
+
+ This type helps in representing unknown types in API calls that use a
null
+ (None) literal as well as bridging to formats such as JSON or Avro that
+ define such a type as well.
+
+ The null type is an extension to the SQL standard.
+ """
return NullType()
- @classmethod
- def CHAR(cls, length=1, nullable=True):
+ @staticmethod
+ def CHAR(length, nullable=True):
+ """
+ Data type of a fixed-length character string.
+
+ :param length: int, the string representation length. It must have a
value
+ between 1 and 2147483647(0x7fffffff) (both inclusive).
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return CharType(length, nullable)
- @classmethod
- def VARCHAR(cls, length=1, nullable=True):
+ @staticmethod
+ def VARCHAR(length, nullable=True):
+ """
+ Data type of a variable-length character string.
+
+ :param length: int, the maximum string representation length. It must
have a
+ value between 1 and 2147483647(0x7fffffff) (both
inclusive).
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return VarCharType(length, nullable)
- @classmethod
- def STRING(cls, nullable=True):
+ @staticmethod
+ def STRING(nullable=True):
+ """
+ Data type of a variable-length character string with defined maximum
length.
+ This is a shortcut for ``DataTypes.VARCHAR(2147483647)``.
+
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return DataTypes.VARCHAR(0x7fffffff, nullable)
- @classmethod
- def BOOLEAN(cls, nullable=True):
+ @staticmethod
+ def BOOLEAN(nullable=True):
+ """
+ Data type of a boolean with a (possibly) three-valued logic of
+ TRUE, FALSE, UNKNOWN.
+
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return BooleanType(nullable)
- @classmethod
- def BINARY(cls, length=1, nullable=True):
+ @staticmethod
+ def BINARY(length, nullable=True):
+ """
+ Data type of a fixed-length binary string (=a sequence of bytes).
+
+ :param length: int, the number of bytes. It must have a value between
+ 1 and 2147483647(0x7fffffff) (both inclusive).
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return BinaryType(length, nullable)
- @classmethod
- def VARBINARY(cls, length=1, nullable=True):
+ @staticmethod
+ def VARBINARY(length, nullable=True):
+ """
+ Data type of a variable-length binary string (=a sequence of bytes)
+
+ :param length: int, the maximum number of bytes. It must have a value
+ between 1 and 2147483647(0x7fffffff) (both inclusive).
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return VarBinaryType(length, nullable)
- @classmethod
- def BYTES(cls, nullable=True):
+ @staticmethod
+ def BYTES(nullable=True):
+ """
+ Data type of a variable-length binary string (=a sequence of bytes)
with
+ defined maximum length. This is a shortcut for
``DataTypes.VARBINARY(2147483647)``.
+
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return DataTypes.VARBINARY(0x7fffffff, nullable)
- @classmethod
- def DECIMAL(cls, precision=10, scale=0, nullable=True):
+ @staticmethod
+ def DECIMAL(precision, scale, nullable=True):
+ """
+ Data type of a decimal number with fixed precision and scale.
+
+ :param precision: the number of digits in a number. It must have a
value
+ between 1 and 38 (both inclusive).
+ :param scale: the number of digits on right side of dot. It must have
+ a value between 0 and precision (both inclusive).
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return DecimalType(precision, scale, nullable)
- @classmethod
- def TINYINT(cls, nullable=True):
+ @staticmethod
+ def TINYINT(nullable=True):
+ """
+ Data type of a 1-byte signed integer with values from -128 to 127.
+
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return TinyIntType(nullable)
- @classmethod
- def SMALLINT(cls, nullable=True):
+ @staticmethod
+ def SMALLINT(nullable=True):
+ """
+ Data type of a 2-byte signed integer with values from -32,768 to
32,767.
+
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return SmallIntType(nullable)
- @classmethod
- def INT(cls, nullable=True):
+ @staticmethod
+ def INT(nullable=True):
+ """
+ Data type of a 2-byte signed integer with values from -2,147,483,648
+ to 2,147,483,647.
+
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return IntType(nullable)
- @classmethod
- def BIGINT(cls, nullable=True):
+ @staticmethod
+ def BIGINT(nullable=True):
+ """
+ Data type of an 8-byte signed integer with values from
+ -9,223,372,036,854,775,808 to 9,223,372,036,854,775,807.
+
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return BigIntType(nullable)
- @classmethod
- def FLOAT(cls, nullable=True):
+ @staticmethod
+ def FLOAT(nullable=True):
+ """
+ Data type of a 4-byte single precision floating point number.
+
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return FloatType(nullable)
- @classmethod
- def DOUBLE(cls, nullable=True):
+ @staticmethod
+ def DOUBLE(nullable=True):
+ """
+ Data type of an 8-byte double precision floating point number.
+
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return DoubleType(nullable)
- @classmethod
- def DATE(cls, nullable=True):
+ @staticmethod
+ def DATE(nullable=True):
+ """
+ Data type of a date consisting of year-month-day with values ranging
+ from ``0000-01-01`` to ``9999-12-31``.
+
+ Compared to the SQL standard, the range starts at year 0000.
+
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return DateType(nullable)
- @classmethod
- def TIME(cls, precision=0, nullable=True):
+ @staticmethod
+ def TIME(precision=0, nullable=True):
+ """
+ Data type of a time WITHOUT time zone.
+
+ An instance consists of hour:minute:second[.fractional with up to
nanosecond
+ precision and values ranging from ``00:00:00.000000000`` to
``23:59:59.999999999``.
+
+ Compared to the SQL standard, leap seconds (23:59:60 and 23:59:61)
+ are not supported.
+
+ :param precision: int, the number of digits of fractional seconds. It
must
+ have a value between 0 and 9 (both inclusive).
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return TimeType(precision, nullable)
- @classmethod
- def TIMESTAMP(cls, kind=TimestampKind.REGULAR, precision=6, nullable=True):
- return TimestampType(kind, precision, nullable)
+ @staticmethod
+ def TIMESTAMP(precision=6, nullable=True):
+ """
+ Data type of a timestamp WITHOUT time zone.
- @classmethod
- def ARRAY(cls, element_type, nullable=True):
+ An instance consists of year-month-day hour:minute:second[.fractional
+ with up to nanosecond precision and values ranging from
+ ``0000-01-01 00:00:00.000000000`` to ``9999-12-31 23:59:59.999999999``.
+
+ Compared to the SQL standard, leap seconds (``23:59:60`` and
``23:59:61``)
+ are not supported.
+
+ :param precision: int, the number of digits of fractional seconds.
+ It must have a value between 0 and 9 (both
inclusive).
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
+ return TimestampType(precision, nullable)
+
+ @staticmethod
+ def ARRAY(element_type, nullable=True):
+ """
+ Data type of an array of elements with same subtype.
+
+ Compared to the SQL standard, the maximum cardinality of an array
cannot
+ be specified but is fixed at 2147483647(0x7fffffff). Also, any valid
+ type is supported as a subtype.
+
+ :param element_type: :class:`DataType` of each element in the array.
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return ArrayType(element_type, nullable)
- @classmethod
- def MAP(cls, key_type, value_type, nullable=True):
+ @staticmethod
+ def MAP(key_type, value_type, nullable=True):
+ """
+ Data type of an associative array that maps keys to values. A map
+ cannot contain duplicate keys; each key can map to at most one value.
+
+ There is no restriction of key types; it is the responsibility of the
+ user to ensure uniqueness. The map type is an extension to the SQL
standard.
+
+ :param key_type: :class:`DataType` of the keys in the map.
+ :param value_type: :class:`DataType` of the values in the map.
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return MapType(key_type, value_type, nullable)
- @classmethod
- def MULTISET(cls, element_type, nullable=True):
+ @staticmethod
+ def MULTISET(element_type, nullable=True):
+ """
+ Data type of a multiset (=bag). Unlike a set, it allows for multiple
+ instances for each of its elements with a common subtype. Each unique
+ value is mapped to some multiplicity.
+
+ There is no restriction of element types; it is the responsibility
+ of the user to ensure uniqueness.
+
+ :param element_type: :class:`DataType` of each element in the multiset.
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return MultisetType(element_type, nullable)
- @classmethod
- def ROW(cls, row_fields=[], nullable=True):
+ @staticmethod
+ def ROW(row_fields=[], nullable=True):
+ """
+ Data type of a sequence of fields. A field consists of a field name,
+ field type, and an optional description. The most specific type of
+ a row of a table is a row type. In this case, each column of the row
+ corresponds to the field of the row type that has the same ordinal
+ position as the column.
+
+ Compared to the SQL standard, an optional field description simplifies
+ the handling with complex structures.
+
+ :param row_fields: a list of row field types which can be created via
+ :func:`DataTypes.FIELD`.
+ :param nullable: boolean, whether the type can be null (None) or not.
+ """
return RowType(row_fields, nullable)
- @classmethod
- def FIELD(cls, name, data_type, description=None):
+ @staticmethod
+ def FIELD(name, data_type, description=None):
+ """
+ Field definition with field name, data type, and a description.
+
+ :param name: string, name of the field.
+ :param data_type: :class:`DataType` of the field.
+ :param description: string, description of the field.
+ """
return RowField(name, data_type, description)