This is an automated email from the ASF dual-hosted git repository. mgreber pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/kudu.git
commit cec31d1e6cbeaf8bfadd85258a0c9cc8bfc5c39b Author: Marton Greber <[email protected]> AuthorDate: Mon Oct 6 15:33:52 2025 +0200 KUDU-1261 Python array datatype fixes - When setting an array, added reserve since the size is known beforehand. - Added support for decimal type: - If precision <= 9 → use Int32 SetArrayUnscaledDecimal. - Else If precision <= 18 → use Int64 SetArrayUnscaledDecimal. - Added type_to_string function to ColumnSchema (new addition). - Updated the Schema.__repr__ function to properly cover array types. - Added tests for stringification. - Updated test cases to cover decimal types. - Added a test case for schemas with array columns containing only None values. - Added decimal array tests for both Int32 and Int64 code paths. Change-Id: I77292b07a854b4f48a75d4e1293f302e8bad9129 Reviewed-on: http://gerrit.cloudera.org:8080/23494 Reviewed-by: Alexey Serbin <[email protected]> Reviewed-by: Gabriella Lotz <[email protected]> Tested-by: Marton Greber <[email protected]> --- python/kudu/client.pyx | 67 +++++++++++++++++++ python/kudu/libkudu_client.pxd | 20 ++++++ python/kudu/schema.pyx | 54 ++++++++++++--- python/kudu/tests/test_array_datatype.py | 111 ++++++++++++++++++++++++++++--- python/kudu/tests/test_schema.py | 85 +++++++++++++++++++++-- 5 files changed, 317 insertions(+), 20 deletions(-) diff --git a/python/kudu/client.pyx b/python/kudu/client.pyx index 16e2ab3e0..041405d54 100644 --- a/python/kudu/client.pyx +++ b/python/kudu/client.pyx @@ -34,6 +34,7 @@ from kudu.errors cimport check_status from kudu.util import to_unixtime_micros, from_unixtime_micros, \ from_hybridtime, to_unscaled_decimal, from_unscaled_decimal, \ unix_epoch_days_to_date, date_to_unix_epoch_days +from decimal import Decimal from errors import KuduException import six @@ -1969,6 +1970,8 @@ cdef class Row: vector[double] cpp_data_double vector[Slice] cpp_data_slice vector[c_bool] cpp_validity + int8_t precision + int8_t scale size_t j list result @@ -2042,6 +2045,26 @@ cdef class Row: result.append(frombytes(cpp_data_slice[j].ToString()) if cpp_validity[j] else None) return result + elif elem_type == KUDU_DECIMAL: + # Determine element precision/scale from projection schema + precision = self.parent.batch.projection_schema().Column(i).type_attributes().precision() + scale = self.parent.batch.projection_schema().Column(i).type_attributes().scale() + + if precision <= 9: + check_status(self.row.GetArrayUnscaledDecimal(i, &cpp_data_int32, &cpp_validity)) + result = [] + for j in range(cpp_data_int32.size()): + result.append(from_unscaled_decimal(cpp_data_int32[j], scale) if cpp_validity[j] else None) + return result + elif precision <= 18: + check_status(self.row.GetArrayUnscaledDecimal(i, &cpp_data_int64, &cpp_validity)) + result = [] + for j in range(cpp_data_int64.size()): + result.append(from_unscaled_decimal(cpp_data_int64[j], scale) if cpp_validity[j] else None) + return result + else: + raise TypeError("Unsupported DECIMAL array precision: {0}".format(precision)) + elif elem_type == KUDU_UNIXTIME_MICROS: check_status(self.row.GetArrayUnixTimeMicros(i, &cpp_data_int64, &cpp_validity)) result = [] @@ -3337,10 +3360,17 @@ cdef class PartialRow: Slice slc bytes encoded_str DataType elem_type + int8_t precision + int8_t scale + ColumnSchema py_col + size_t array_len if not isinstance(value, (list, tuple)): raise TypeError("Array values must be a list or tuple, got {0}".format(type(value))) + array_len = len(value) + cpp_validity.reserve(array_len) + for elem in value: cpp_validity.push_back(elem is not None) @@ -3348,6 +3378,7 @@ cdef class PartialRow: elem_type = self._get_array_element_type(i) if elem_type == KUDU_BOOL: + cpp_values_bool.reserve(array_len) for elem in value: if elem is None: # Dummy value for NULL @@ -3357,6 +3388,7 @@ cdef class PartialRow: check_status(self.row.SetArrayBool(i, cpp_values_bool, cpp_validity)) elif elem_type == KUDU_INT8: + cpp_values_int8.reserve(array_len) for elem in value: if elem is None: cpp_values_int8.push_back(0) @@ -3365,6 +3397,7 @@ cdef class PartialRow: check_status(self.row.SetArrayInt8(i, cpp_values_int8, cpp_validity)) elif elem_type == KUDU_INT16: + cpp_values_int16.reserve(array_len) for elem in value: if elem is None: cpp_values_int16.push_back(0) @@ -3373,6 +3406,7 @@ cdef class PartialRow: check_status(self.row.SetArrayInt16(i, cpp_values_int16, cpp_validity)) elif elem_type == KUDU_INT32: + cpp_values_int32.reserve(array_len) for elem in value: if elem is None: cpp_values_int32.push_back(0) @@ -3381,6 +3415,7 @@ cdef class PartialRow: check_status(self.row.SetArrayInt32(i, cpp_values_int32, cpp_validity)) elif elem_type == KUDU_INT64: + cpp_values_int64.reserve(array_len) for elem in value: if elem is None: cpp_values_int64.push_back(0) @@ -3389,6 +3424,7 @@ cdef class PartialRow: check_status(self.row.SetArrayInt64(i, cpp_values_int64, cpp_validity)) elif elem_type == KUDU_FLOAT: + cpp_values_float.reserve(array_len) for elem in value: if elem is None: cpp_values_float.push_back(0.0) @@ -3397,6 +3433,7 @@ cdef class PartialRow: check_status(self.row.SetArrayFloat(i, cpp_values_float, cpp_validity)) elif elem_type == KUDU_DOUBLE: + cpp_values_double.reserve(array_len) for elem in value: if elem is None: cpp_values_double.push_back(0.0) @@ -3405,6 +3442,7 @@ cdef class PartialRow: check_status(self.row.SetArrayDouble(i, cpp_values_double, cpp_validity)) elif elem_type == KUDU_STRING: + cpp_values_slice.reserve(array_len) encoded_strings = [] for elem in value: if elem is None: @@ -3422,6 +3460,7 @@ cdef class PartialRow: check_status(self.row.SetArrayString(i, cpp_values_slice, cpp_validity)) elif elem_type == KUDU_BINARY: + cpp_values_slice.reserve(array_len) binary_data = [] for elem in value: if elem is None: @@ -3437,6 +3476,7 @@ cdef class PartialRow: check_status(self.row.SetArrayBinary(i, cpp_values_slice, cpp_validity)) elif elem_type == KUDU_VARCHAR: + cpp_values_slice.reserve(array_len) encoded_strings = [] for elem in value: if elem is None: @@ -3454,6 +3494,7 @@ cdef class PartialRow: check_status(self.row.SetArrayVarchar(i, cpp_values_slice, cpp_validity)) elif elem_type == KUDU_UNIXTIME_MICROS: + cpp_values_int64.reserve(array_len) for elem in value: if elem is None: cpp_values_int64.push_back(0) @@ -3462,6 +3503,7 @@ cdef class PartialRow: check_status(self.row.SetArrayUnixTimeMicros(i, cpp_values_int64, cpp_validity)) elif elem_type == KUDU_DATE: + cpp_values_int32.reserve(array_len) for elem in value: if elem is None: cpp_values_int32.push_back(0) @@ -3469,6 +3511,31 @@ cdef class PartialRow: val = date_to_unix_epoch_days(elem) cpp_values_int32.push_back(<int32_t>val) check_status(self.row.SetArrayDate(i, cpp_values_int32, cpp_validity)) + elif elem_type == KUDU_DECIMAL: + # Determine element precision/scale from schema + py_col = self.schema[i] + precision = py_col.type_attributes.precision + scale = py_col.type_attributes.scale + if precision <= 9: + cpp_values_int32.reserve(array_len) + for elem in value: + if elem is None: + cpp_values_int32.push_back(0) + else: + # Match scalar decimal semantics: rely on the value's own exponent + cpp_values_int32.push_back(<int32_t>to_unscaled_decimal(elem)) + check_status(self.row.SetArrayUnscaledDecimal(i, cpp_values_int32, cpp_validity)) + elif precision <= 18: + cpp_values_int64.reserve(array_len) + for elem in value: + if elem is None: + cpp_values_int64.push_back(0) + else: + # Match scalar decimal semantics: rely on the value's own exponent + cpp_values_int64.push_back(<int64_t>to_unscaled_decimal(elem)) + check_status(self.row.SetArrayUnscaledDecimal(i, cpp_values_int64, cpp_validity)) + else: + raise TypeError("Unsupported DECIMAL array precision: {0}".format(precision)) else: raise TypeError("Unsupported array element type: {0}".format(_type_names.get(elem_type, elem_type))) diff --git a/python/kudu/libkudu_client.pxd b/python/kudu/libkudu_client.pxd index 6f1fd5f23..77dea2a3f 100644 --- a/python/kudu/libkudu_client.pxd +++ b/python/kudu/libkudu_client.pxd @@ -325,6 +325,16 @@ cdef extern from "kudu/client/scan_batch.h" namespace "kudu::client" nogil: Status GetArrayInt64(int col_idx, vector[int64_t]* data, vector[c_bool]* validity) + # DECIMAL array getters (unscaled values) + Status GetArrayUnscaledDecimal(const Slice& col_name, vector[int32_t]* data, + vector[c_bool]* validity) + Status GetArrayUnscaledDecimal(int col_idx, vector[int32_t]* data, + vector[c_bool]* validity) + Status GetArrayUnscaledDecimal(const Slice& col_name, vector[int64_t]* data, + vector[c_bool]* validity) + Status GetArrayUnscaledDecimal(int col_idx, vector[int64_t]* data, + vector[c_bool]* validity) + Status GetArrayFloat(const Slice& col_name, vector[float]* data, vector[c_bool]* validity) Status GetArrayFloat(int col_idx, vector[float]* data, @@ -495,6 +505,16 @@ cdef extern from "kudu/common/partial_row.h" namespace "kudu" nogil: Status SetArrayInt64(int col_idx, const vector[int64_t]& val, const vector[c_bool]& validity) + # DECIMAL array setters (unscaled values) + Status SetArrayUnscaledDecimal(const Slice& col_name, const vector[int32_t]& val, + const vector[c_bool]& validity) + Status SetArrayUnscaledDecimal(int col_idx, const vector[int32_t]& val, + const vector[c_bool]& validity) + Status SetArrayUnscaledDecimal(const Slice& col_name, const vector[int64_t]& val, + const vector[c_bool]& validity) + Status SetArrayUnscaledDecimal(int col_idx, const vector[int64_t]& val, + const vector[c_bool]& validity) + Status SetArrayFloat(const Slice& col_name, const vector[float]& val, const vector[c_bool]& validity) Status SetArrayFloat(int col_idx, const vector[float]& val, diff --git a/python/kudu/schema.pyx b/python/kudu/schema.pyx index bc98dfe32..8714aceeb 100644 --- a/python/kudu/schema.pyx +++ b/python/kudu/schema.pyx @@ -367,6 +367,46 @@ cdef class ColumnSchema: return False return self.schema.Equals(deref((<ColumnSchema> other).schema)) + def type_to_string(self): + """ + Return a string representation of the column type including nullability. + For arrays, includes element type and '1D-ARRAY' marker. + Examples: 'INT32 NOT NULL', 'STRING 1D-ARRAY NULLABLE', 'DECIMAL(8, 2) 1D-ARRAY NOT NULL' + """ + cdef: + const KuduNestedTypeDescriptor* nested_desc + const KuduArrayTypeDescriptor* array_desc + DataType elem_type + + type_name = self.type.name.upper() + nullable_str = "NULLABLE" if self.nullable else "NOT NULL" + + if self.schema.type() == KUDU_NESTED: + nested_desc = self.schema.nested_type() + if nested_desc != NULL and nested_desc.is_array(): + array_desc = nested_desc.array() + if array_desc != NULL: + elem_type = array_desc.type() + elem_type_name = _type_names.get(elem_type, 'UNKNOWN').replace('KUDU_', '').upper() + type_attrs = self.type_attributes + + if elem_type == KUDU_DECIMAL: + return '{0}({1}, {2}) 1D-ARRAY {3}'.format( + elem_type_name, type_attrs.precision, type_attrs.scale, nullable_str) + elif elem_type == KUDU_VARCHAR: + return '{0}({1}) 1D-ARRAY {2}'.format( + elem_type_name, type_attrs.length, nullable_str) + else: + return '{0} 1D-ARRAY {1}'.format(elem_type_name, nullable_str) + + type_attrs = self.type_attributes + if self.schema.type() == KUDU_DECIMAL: + return '{0}({1}, {2}) {3}'.format(type_name, type_attrs.precision, type_attrs.scale, nullable_str) + elif self.schema.type() == KUDU_VARCHAR: + return '{0}({1}) {2}'.format(type_name, type_attrs.length, nullable_str) + else: + return '{0} {1}'.format(type_name, nullable_str) + def __repr__(self): return ('ColumnSchema(name=%s, type=%s, nullable=%s)' % (self.name, self.type.name, @@ -900,25 +940,23 @@ cdef class Schema: return result def __repr__(self): + # Python-style schema representation following Python best practices. # Got to be careful with huge schemas, maybe some kind of summary repr # when more than 20-30 columns? - buf = six.StringIO() + lines = [] col_names = self.names space = 2 + max(len(x) for x in col_names) for i in range(len(self)): col = self.at(i) - not_null = '' if col.nullable else ' NOT NULL' - - buf.write('\n{0}{1}{2}' - .format(col.name.ljust(space), - col.type.name, not_null)) + lines.append(' {0}{1}'.format(col.name.ljust(space), + col.type_to_string())) pk_string = ', '.join(col_names[i] for i in self.primary_key_indices()) - buf.write('\nPRIMARY KEY ({0})'.format(pk_string)) + lines.append(' PRIMARY KEY ({0})'.format(pk_string)) - return "kudu.Schema {{{0}\n}}".format(util.indent(buf.getvalue(), 2)) + return "kudu.Schema {{\n{0}\n}}".format('\n'.join(lines)) def __len__(self): return self.schema.num_columns() diff --git a/python/kudu/tests/test_array_datatype.py b/python/kudu/tests/test_array_datatype.py index 31485808f..dbf69f7b6 100644 --- a/python/kudu/tests/test_array_datatype.py +++ b/python/kudu/tests/test_array_datatype.py @@ -20,6 +20,7 @@ from kudu.compat import CompatUnitTest from kudu.tests.common import KuduTestBase from kudu.client import Partitioning import datetime +from decimal import Decimal from pytz import utc class TestArrayDataTypeIntegration(KuduTestBase, CompatUnitTest): @@ -42,11 +43,7 @@ class TestArrayDataTypeIntegration(KuduTestBase, CompatUnitTest): # Types that require special parameters SPECIAL_PARAM_TYPES = [ ('varchar', kudu.varchar, {'length': 50}), - # TODO: Decimal arrays out of scope for this patch - # - C++ API has overloaded methods (int32/int64) but no int128 version - # - Regular decimals use int128, creating API inconsistency - # - Will be addressed in future patch with proper int128 support - # ('decimal', kudu.decimal, {'precision': 8, 'scale': 2}), + ('decimal', kudu.decimal, {'precision': 8, 'scale': 2}), ] @classmethod @@ -115,6 +112,7 @@ class TestArrayDataTypeIntegration(KuduTestBase, CompatUnitTest): ('arr_unixtime_micros', [v if v is None else datetime.datetime(2020, 1, min(v, 28), tzinfo=utc) for v in values]), ('arr_date', [v if v is None else datetime.date(2020, 1, min(v, 28)) for v in values]), ('arr_varchar', [v if v is None else 'varchar{0}'.format(v) for v in values]), + ('arr_decimal', [None if v is None else Decimal(v / 100).quantize(Decimal('0.01')) for v in values]), ] def test_insert_all_array_types(self): @@ -218,13 +216,11 @@ class TestArrayDataTypeIntegration(KuduTestBase, CompatUnitTest): insert['arr_string'] = ['row', str(row_id)] insert['arr_double'] = [row_id * 1.1] insert['arr_bool'] = [True] - remaining_types = ['int8', 'int16', 'int32', 'float', 'binary', 'unixtime_micros', 'date'] + remaining_types = ['int8', 'int16', 'int32', 'float', 'binary', 'unixtime_micros', 'date', 'decimal'] for type_name in remaining_types: col_name = 'arr_' + type_name insert[col_name] = [] insert['arr_varchar'] = [] - # TODO: Add decimal arrays once Cython overloading issue is resolved - # insert['arr_decimal'] = [] session.apply(insert) session.flush() @@ -259,6 +255,7 @@ class TestArrayDataTypeIntegration(KuduTestBase, CompatUnitTest): col_name = 'arr_' + type_name insert[col_name] = [] insert['arr_varchar'] = [] + insert['arr_decimal'] = [] session.apply(insert) session.flush() @@ -313,3 +310,101 @@ class TestArrayDataTypeIntegration(KuduTestBase, CompatUnitTest): actual_array = row[idx] self.assertEqual(actual_array, expected_array, "Row {0} {1} mismatch".format(row_id, col_name)) + + def test_decimal32_array(self): + builder = kudu.schema_builder() + builder.add_column('key', kudu.int32, nullable=False).primary_key() + builder.add_column('decimal32_arr').nested_type(kudu.array_type(kudu.decimal)).precision(9).scale(4) + + schema = builder.build() + partitioning = Partitioning().set_range_partition_columns(['key']) + table_name = 'decimal32-array-test' + + if self.client.table_exists(table_name): + self.client.delete_table(table_name) + + self.client.create_table(table_name, schema, partitioning) + table = self.client.table(table_name) + session = self.client.new_session() + + mixed_values = [ + Decimal('12345.6789'), + Decimal('0.0001'), + Decimal('-9999.9999'), + None, + Decimal('0.0000'), + ] + all_nulls = [None, None, None] + + insert1 = table.new_insert() + insert1['key'] = 1 + insert1['decimal32_arr'] = mixed_values + session.apply(insert1) + + insert2 = table.new_insert() + insert2['key'] = 2 + insert2['decimal32_arr'] = all_nulls + session.apply(insert2) + + session.flush() + + scanner = table.scanner().open() + tuples = scanner.read_all_tuples() + self.assertEqual(len(tuples), 2) + + for row in tuples: + if row[0] == 1: + self.assertEqual(row[1], mixed_values) + elif row[0] == 2: + self.assertEqual(row[1], all_nulls) + + self.client.delete_table(table_name) + + def test_decimal64_array(self): + builder = kudu.schema_builder() + builder.add_column('key', kudu.int32, nullable=False).primary_key() + builder.add_column('decimal64_arr').nested_type(kudu.array_type(kudu.decimal)).precision(18).scale(6) + + schema = builder.build() + partitioning = Partitioning().set_range_partition_columns(['key']) + table_name = 'decimal64-array-test' + + if self.client.table_exists(table_name): + self.client.delete_table(table_name) + + self.client.create_table(table_name, schema, partitioning) + table = self.client.table(table_name) + session = self.client.new_session() + + mixed_values = [ + Decimal('123456789012.123456'), + Decimal('0.000001'), + Decimal('-999999999999.999999'), + None, + Decimal('1.000000'), + ] + all_nulls = [None, None, None, None] + + insert1 = table.new_insert() + insert1['key'] = 1 + insert1['decimal64_arr'] = mixed_values + session.apply(insert1) + + insert2 = table.new_insert() + insert2['key'] = 2 + insert2['decimal64_arr'] = all_nulls + session.apply(insert2) + + session.flush() + + scanner = table.scanner().open() + tuples = scanner.read_all_tuples() + self.assertEqual(len(tuples), 2) + + for row in tuples: + if row[0] == 1: + self.assertEqual(row[1], mixed_values) + elif row[0] == 2: + self.assertEqual(row[1], all_nulls) + + self.client.delete_table(table_name) diff --git a/python/kudu/tests/test_schema.py b/python/kudu/tests/test_schema.py index 6204da72c..6e37a7e53 100644 --- a/python/kudu/tests/test_schema.py +++ b/python/kudu/tests/test_schema.py @@ -24,6 +24,7 @@ import kudu from kudu.schema import Schema import datetime +from decimal import Decimal class TestSchema(CompatUnitTest): @@ -566,9 +567,26 @@ class TestArrayDataTypeSchema(CompatUnitTest): ] SPECIAL_PARAM_TYPES = [ - ('varchar', kudu.varchar, {'length': 50}) + ('varchar', kudu.varchar, {'length': 50}), + ('decimal', kudu.decimal, {'precision': 8, 'scale': 2}), ] + EXPECTED_ARRAY_TYPE_STRINGS = { + 'arr_int8': 'INT8 1D-ARRAY NULLABLE', + 'arr_int16': 'INT16 1D-ARRAY NULLABLE', + 'arr_int32': 'INT32 1D-ARRAY NULLABLE', + 'arr_int64': 'INT64 1D-ARRAY NULLABLE', + 'arr_float': 'FLOAT 1D-ARRAY NULLABLE', + 'arr_double': 'DOUBLE 1D-ARRAY NULLABLE', + 'arr_bool': 'BOOL 1D-ARRAY NULLABLE', + 'arr_string': 'STRING 1D-ARRAY NULLABLE', + 'arr_binary': 'BINARY 1D-ARRAY NULLABLE', + 'arr_unixtime_micros': 'UNIXTIME_MICROS 1D-ARRAY NULLABLE', + 'arr_date': 'DATE 1D-ARRAY NULLABLE', + 'arr_varchar': 'VARCHAR(50) 1D-ARRAY NULLABLE', + 'arr_decimal': 'DECIMAL(8, 2) 1D-ARRAY NULLABLE', + } + def test_array_type_descriptors_all_types(self): for type_name, kudu_type in self.SUPPORTED_ARRAY_TYPES: arr = kudu.array_type(kudu_type) @@ -606,8 +624,30 @@ class TestArrayDataTypeSchema(CompatUnitTest): schema = builder.build() # Verify schema structure - # 3 scalar + 11 basic arrays + 1 special array (varchar) - self.assertEqual(len(schema), 15) + # 3 scalar + 11 basic arrays + 2 special arrays (varchar, decimal) + self.assertEqual(len(schema), 16) + + # Verify complete schema string representation + expected_schema_str = """kudu.Schema { + id INT32 NOT NULL + name STRING NULLABLE + age INT32 NULLABLE + arr_int8 INT8 1D-ARRAY NULLABLE + arr_int16 INT16 1D-ARRAY NULLABLE + arr_int32 INT32 1D-ARRAY NULLABLE + arr_int64 INT64 1D-ARRAY NULLABLE + arr_float FLOAT 1D-ARRAY NULLABLE + arr_double DOUBLE 1D-ARRAY NULLABLE + arr_bool BOOL 1D-ARRAY NULLABLE + arr_string STRING 1D-ARRAY NULLABLE + arr_binary BINARY 1D-ARRAY NULLABLE + arr_unixtime_micros UNIXTIME_MICROS 1D-ARRAY NULLABLE + arr_date DATE 1D-ARRAY NULLABLE + arr_varchar VARCHAR(50) 1D-ARRAY NULLABLE + arr_decimal DECIMAL(8, 2) 1D-ARRAY NULLABLE + PRIMARY KEY (id) +}""" + self.assertEqual(str(schema), expected_schema_str) self.assertEqual(schema[0].name, 'id') self.assertEqual(schema[0].type.name, 'int32') @@ -628,11 +668,22 @@ class TestArrayDataTypeSchema(CompatUnitTest): self.assertEqual(col.type.name, 'nested') # Arrays nullable by default self.assertTrue(col.nullable) + # Verify type_to_string matches expected format + self.assertEqual(col.type_to_string(), self.EXPECTED_ARRAY_TYPE_STRINGS[expected_name]) - # Verify special parameter types + # Verify special parameter types (with element type attributes) varchar_col = schema[14] self.assertEqual(varchar_col.name, 'arr_varchar') self.assertEqual(varchar_col.type.name, 'nested') + self.assertEqual(varchar_col.type_attributes.length, 50) + self.assertEqual(varchar_col.type_to_string(), 'VARCHAR(50) 1D-ARRAY NULLABLE') + + decimal_col = schema[15] + self.assertEqual(decimal_col.name, 'arr_decimal') + self.assertEqual(decimal_col.type.name, 'nested') + self.assertEqual(decimal_col.type_attributes.precision, 8) + self.assertEqual(decimal_col.type_attributes.scale, 2) + self.assertEqual(decimal_col.type_to_string(), 'DECIMAL(8, 2) 1D-ARRAY NULLABLE') def test_array_schema_introspection_and_writing(self): builder = kudu.schema_builder() @@ -670,6 +721,7 @@ class TestArrayDataTypeSchema(CompatUnitTest): ('arr_unixtime_micros', [datetime.datetime(2020, 1, 1), datetime.datetime(2020, 1, 2), None]), ('arr_date', [datetime.date(2020, 1, 1), datetime.date(2020, 1, 2), None]), ('arr_varchar', ['short', 'text', None]), + ('arr_decimal', [Decimal('1.23'), Decimal('4.56'), None]), ] for col_name, data in test_data: @@ -693,9 +745,34 @@ class TestArrayDataTypeSchema(CompatUnitTest): ('arr_unixtime_micros', []), ('arr_date', []), ('arr_varchar', []), + ('arr_decimal', []), ] for col_name, data in empty_test_data: row2[col_name] = data self.assertIsNotNone(row2) + + row3 = schema.new_row() + row3['key'] = 3 + + null_test_data = [ + ('arr_int8', None), + ('arr_int16', None), + ('arr_int32', None), + ('arr_int64', None), + ('arr_float', None), + ('arr_double', None), + ('arr_bool', None), + ('arr_string', None), + ('arr_binary', None), + ('arr_unixtime_micros', None), + ('arr_date', None), + ('arr_varchar', None), + ('arr_decimal', None), + ] + + for col_name, data in null_test_data: + row3[col_name] = data + + self.assertIsNotNone(row3)
