KUDU-1690 - [python] Enable setting of default values This patch enables the python client to set default column values when adding columns to a SchemaBuilder. Some KuduValue logic needed to be decoupled from scan predicates to enable this. Existing tests were modified for this patch.
Change-Id: I78008c5ca3646e0b1fdf47bbc0bc14af1b88166e Reviewed-on: http://gerrit.cloudera.org:8080/4818 Reviewed-by: Jean-Daniel Cryans <[email protected]> Tested-by: Kudu Jenkins Project: http://git-wip-us.apache.org/repos/asf/kudu/repo Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/8389f482 Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/8389f482 Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/8389f482 Branch: refs/heads/master Commit: 8389f482b4a8eb8464781bda967537da608765a9 Parents: 0f87b04 Author: Jordan Birdsell <[email protected]> Authored: Sun Oct 23 16:02:34 2016 -0400 Committer: Jean-Daniel Cryans <[email protected]> Committed: Mon Nov 7 20:24:27 2016 +0000 ---------------------------------------------------------------------- python/kudu/client.pyx | 42 +++++---------------------- python/kudu/libkudu_client.pxd | 18 ++++++------ python/kudu/schema.pxd | 7 +++++ python/kudu/schema.pyx | 58 +++++++++++++++++++++++++++++++++++-- python/kudu/tests/common.py | 2 +- python/kudu/tests/util.py | 10 +++++-- 6 files changed, 87 insertions(+), 50 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kudu/blob/8389f482/python/kudu/client.pyx ---------------------------------------------------------------------- diff --git a/python/kudu/client.pyx b/python/kudu/client.pyx index 0977b30..5cfb5b7 100644 --- a/python/kudu/client.pyx +++ b/python/kudu/client.pyx @@ -27,7 +27,7 @@ from cython.operator cimport dereference as deref from libkudu_client cimport * from kudu.compat import tobytes, frombytes, dict_iter -from kudu.schema cimport Schema, ColumnSchema +from kudu.schema cimport Schema, ColumnSchema, KuduValue, KuduType from kudu.errors cimport check_status from kudu.util import to_unixtime_micros, from_unixtime_micros, from_hybridtime from errors import KuduException @@ -888,38 +888,10 @@ cdef class Column: self.spec.type.name)) return result - cdef KuduValue* box_value(self, object obj) except NULL: - cdef: - KuduValue* val - Slice slc - - if (self.spec.type.name[:3] == 'int'): - val = KuduValue.FromInt(obj) - elif (self.spec.type.name in ['string', 'binary']): - if isinstance(obj, unicode): - obj = obj.encode('utf8') - - slc = Slice(<char*> obj, len(obj)) - val = KuduValue.CopyString(slc) - elif (self.spec.type.name == 'bool'): - val = KuduValue.FromBool(obj) - elif (self.spec.type.name == 'float'): - val = KuduValue.FromFloat(obj) - elif (self.spec.type.name == 'double'): - val = KuduValue.FromDouble(obj) - elif (self.spec.type.name == 'unixtime_micros'): - obj = to_unixtime_micros(obj) - val = KuduValue.FromInt(obj) - else: - raise TypeError("Cannot add predicate for kudu type <{0}>" - .format(self.spec.type.name)) - - return val - def __richcmp__(Column self, value, int op): cdef: KuduPredicate* pred - KuduValue* val + KuduValue val Slice col_name_slice ComparisonOp cmp_op Predicate result @@ -939,10 +911,10 @@ cdef class Column: else: raise NotImplementedError - val = self.box_value(value) + val = self.spec.type.new_value(value) pred = (self.parent.ptr() .NewComparisonPredicate(col_name_slice, - cmp_op, val)) + cmp_op, val._value)) result = Predicate() result.init(pred) @@ -968,7 +940,8 @@ cdef class Column: """ cdef: KuduPredicate* pred - vector[KuduValue*] vals + KuduValue kval + vector[C_KuduValue*] vals Slice col_name_slice Predicate result object _name = tobytes(self.name) @@ -977,7 +950,8 @@ cdef class Column: try: for val in values: - vals.push_back(self.box_value(val)) + kval = self.spec.type.new_value(val) + vals.push_back(kval._value) except TypeError: while not vals.empty(): _val = vals.back() http://git-wip-us.apache.org/repos/asf/kudu/blob/8389f482/python/kudu/libkudu_client.pxd ---------------------------------------------------------------------- diff --git a/python/kudu/libkudu_client.pxd b/python/kudu/libkudu_client.pxd index 9f2bf09..b8cbb60 100644 --- a/python/kudu/libkudu_client.pxd +++ b/python/kudu/libkudu_client.pxd @@ -170,7 +170,7 @@ cdef extern from "kudu/client/schema.h" namespace "kudu::client" nogil: cdef cppclass KuduColumnSpec: - KuduColumnSpec* Default(KuduValue* value) + KuduColumnSpec* Default(C_KuduValue* value) KuduColumnSpec* RemoveDefault() KuduColumnSpec* Compression(CompressionType compression) @@ -436,21 +436,21 @@ cdef extern from "kudu/client/scan_predicate.h" namespace "kudu::client" nogil: cdef extern from "kudu/client/value.h" namespace "kudu::client" nogil: - cdef cppclass KuduValue: + cdef cppclass C_KuduValue "kudu::client::KuduValue": @staticmethod - KuduValue* FromInt(int64_t val); + C_KuduValue* FromInt(int64_t val); @staticmethod - KuduValue* FromFloat(float val); + C_KuduValue* FromFloat(float val); @staticmethod - KuduValue* FromDouble(double val); + C_KuduValue* FromDouble(double val); @staticmethod - KuduValue* FromBool(c_bool val); + C_KuduValue* FromBool(c_bool val); @staticmethod - KuduValue* CopyString(const Slice& s); + C_KuduValue* CopyString(const Slice& s); cdef extern from "kudu/client/client.h" namespace "kudu::client" nogil: @@ -571,9 +571,9 @@ cdef extern from "kudu/client/client.h" namespace "kudu::client" nogil: KuduPredicate* NewComparisonPredicate(const Slice& col_name, ComparisonOp op, - KuduValue* value); + C_KuduValue* value); KuduPredicate* NewInListPredicate(const Slice& col_name, - vector[KuduValue*]* values) + vector[C_KuduValue*]* values) KuduClient* client() # const PartitionSchema& partition_schema() http://git-wip-us.apache.org/repos/asf/kudu/blob/8389f482/python/kudu/schema.pxd ---------------------------------------------------------------------- diff --git a/python/kudu/schema.pxd b/python/kudu/schema.pxd index b70f8ad..c1cfc2e 100644 --- a/python/kudu/schema.pxd +++ b/python/kudu/schema.pxd @@ -38,6 +38,7 @@ cdef class ColumnSchema: cdef class ColumnSpec: cdef: KuduColumnSpec* spec + KuduType _type cdef class SchemaBuilder: @@ -57,3 +58,9 @@ cdef class Schema: cdef inline DataType loc_type(self, int i): return self.schema.Column(i).type() + + +cdef class KuduValue: + cdef: + C_KuduValue* _value + http://git-wip-us.apache.org/repos/asf/kudu/blob/8389f482/python/kudu/schema.pyx ---------------------------------------------------------------------- diff --git a/python/kudu/schema.pyx b/python/kudu/schema.pyx index 69de477..f24d42b 100644 --- a/python/kudu/schema.pyx +++ b/python/kudu/schema.pyx @@ -24,6 +24,7 @@ from kudu.compat import tobytes, frombytes from kudu.schema cimport * from kudu.errors cimport check_status from kudu.client cimport PartialRow +from kudu.util import to_unixtime_micros import six @@ -101,6 +102,9 @@ cdef class KuduType(object): def __repr__(self): return 'KuduType({0})'.format(self.name) + def new_value(self, value): + return KuduValue(self, value) + int8 = KuduType(KUDU_INT8) int16 = KuduType(KUDU_INT16) @@ -204,14 +208,24 @@ cdef class ColumnSpec: """ def type(self, type_): - self.spec.Type(to_data_type(type_).type) + self._type = to_data_type(type_) + self.spec.Type(self._type.type) return self def default(self, value): """ Set a default value for the column """ - raise NotImplementedError + cdef: + KuduValue kval + + if not self._type: + raise ValueError("You must set the Column type before setting " + + "the default value.") + else: + kval = self._type.new_value(value) + self.spec.Default(kval._value) + return self def clear_default(self): """ @@ -370,7 +384,8 @@ cdef class SchemaBuilder: colschema.nullable) def add_column(self, name, type_=None, nullable=None, compression=None, - encoding=None, primary_key=False, block_size=None): + encoding=None, primary_key=False, block_size=None, + default= None): """ Add a new column to the schema. Returns a ColumnSpec object for further configuration and use in a fluid programming style. @@ -393,6 +408,8 @@ cdef class SchemaBuilder: Use this column as the table primary key block_size : int, optional Block size (in bytes) to use for the target column. + default : obj + Use this to set the column default value Examples -------- @@ -428,6 +445,9 @@ cdef class SchemaBuilder: if block_size: result.block_size(block_size) + if default: + result.default(default) + return result def set_primary_keys(self, key_names): @@ -621,3 +641,35 @@ cdef class Schema: """ indices = self.primary_key_indices() return [self.at(i).name for i in indices] + + +cdef class KuduValue: + + def __cinit__(self, KuduType type_, value): + cdef: + Slice slc + + if (type_.name[:3] == 'int'): + self._value = C_KuduValue.FromInt(value) + elif (type_.name in ['string', 'binary']): + if isinstance(value, unicode): + value = value.encode('utf8') + + slc = Slice(<char*> value, len(value)) + self._value = C_KuduValue.CopyString(slc) + elif (type_.name == 'bool'): + self._value = C_KuduValue.FromBool(value) + elif (type_.name == 'float'): + self._value = C_KuduValue.FromFloat(value) + elif (type_.name == 'double'): + self._value = C_KuduValue.FromDouble(value) + elif (type_.name == 'unixtime_micros'): + value = to_unixtime_micros(value) + self._value = C_KuduValue.FromInt(value) + else: + raise TypeError("Cannot initialize KuduValue for kudu type <{0}>" + .format(type_.name)) + + def __dealloc__(self): + # We don't own this. + pass http://git-wip-us.apache.org/repos/asf/kudu/blob/8389f482/python/kudu/tests/common.py ---------------------------------------------------------------------- diff --git a/python/kudu/tests/common.py b/python/kudu/tests/common.py index 69738e8..1b606aa 100644 --- a/python/kudu/tests/common.py +++ b/python/kudu/tests/common.py @@ -165,7 +165,7 @@ class KuduTestBase(object): builder = kudu.schema_builder() builder.add_column('key', kudu.int32, nullable=False) builder.add_column('int_val', kudu.int32) - builder.add_column('string_val', kudu.string) + builder.add_column('string_val', kudu.string, default='nothing') builder.add_column('unixtime_micros_val', kudu.unixtime_micros) builder.set_primary_keys(['key']) http://git-wip-us.apache.org/repos/asf/kudu/blob/8389f482/python/kudu/tests/util.py ---------------------------------------------------------------------- diff --git a/python/kudu/tests/util.py b/python/kudu/tests/util.py index 2783111..be7f6d8 100644 --- a/python/kudu/tests/util.py +++ b/python/kudu/tests/util.py @@ -50,15 +50,19 @@ class TestScanBase(KuduTestBase, unittest.TestCase): op['int_val'] = tup[1] if i % 2 == 0: op['string_val'] = tup[2] - elif i % 3 == 0: - op['string_val'] = None op['unixtime_micros_val'] = tup[3] session.apply(op) tuples.append(tup) session.flush() self.table = table - self.tuples = tuples + self.tuples = [] + + # Replace missing values w/ defaults to test default values. + for tuple in tuples: + if tuple[2] == None: + tuple = (tuple[0], tuple[1], 'nothing', tuple[3]) + self.tuples.append(tuple) # Create table to test all types # for various predicate tests
