This is an automated email from the ASF dual-hosted git repository. haonan pushed a commit to branch numpy_tablet_none in repository https://gitbox.apache.org/repos/asf/iotdb.git
commit 89a219b1c621f65f66ec59fbb0db466fa149b4e4 Author: HTHou <[email protected]> AuthorDate: Mon Dec 19 14:45:39 2022 +0800 NumpyTablet support null --- client-py/SessionExample.py | 18 ++++++++++++++++ client-py/iotdb/utils/NumpyTablet.py | 31 +++++++++++++++++++++----- client-py/tests/test_numpy_tablet.py | 42 ++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 5 deletions(-) diff --git a/client-py/SessionExample.py b/client-py/SessionExample.py index a91b456ca0..372a4ea457 100644 --- a/client-py/SessionExample.py +++ b/client-py/SessionExample.py @@ -218,6 +218,24 @@ np_tablet_unsorted = NumpyTablet( np_values_unsorted, np_timestamps_unsorted, ) + +# insert one numpy tablet into the database. +np_values_ = [ + np.array([False, True, False, True], TSDataType.BOOLEAN.np_dtype()), + np.array([10, 100, 100, 0], TSDataType.INT32.np_dtype()), + np.array([11, 11111, 1, 0], TSDataType.INT64.np_dtype()), + np.array([1.1, 1.25, 188.1, 0], TSDataType.FLOAT.np_dtype()), + np.array([10011.1, 101.0, 688.25, 6.25], TSDataType.DOUBLE.np_dtype()), + np.array(["test01", "test02", "test03", "test04"]), +] +np_timestamps_ = np.array([98, 99, 100, 101], TSDataType.INT64.np_dtype()) +np_tablet_with_none = NumpyTablet( + "root.sg_test_01.d_02", measurements_, data_types_, np_values_, np_timestamps_ +) +np_tablet_with_none.mark_none_value(0,0) +session.insert_tablet(np_tablet_with_none) + + session.insert_tablet(np_tablet_unsorted) print(np_tablet_unsorted.get_timestamps()) for value in np_tablet_unsorted.get_values(): diff --git a/client-py/iotdb/utils/NumpyTablet.py b/client-py/iotdb/utils/NumpyTablet.py index b81a172a40..20dc958943 100644 --- a/client-py/iotdb/utils/NumpyTablet.py +++ b/client-py/iotdb/utils/NumpyTablet.py @@ -38,6 +38,7 @@ class NumpyTablet(object): :param values: List of numpy array, the values of each column should be the inner numpy array :param timestamps: Numpy array, the timestamps """ + self.bitmaps = None if len(values) > 0 and len(values[0]) != len(timestamps): raise RuntimeError( "Input error! len(timestamps) does not equal to len(values[0])!" @@ -116,6 +117,23 @@ class NumpyTablet(object): bs = value.tobytes() bs_list.append(bs) bs_len += len(bs) + if self.bitmaps is not None: + format_str_list = [">"] + values_tobe_packed = [] + for i in range(self.__column_number): + format_str_list.append("?") + if self.bitmaps[i] is None: + values_tobe_packed.append(False) + else: + values_tobe_packed.append(True) + format_str_list.append(str(self.__row_number // 8 + 1)) + format_str_list.append("c") + for j in range(self.__row_number // 8 + 1): + values_tobe_packed.append(bytes([self.bitmaps[i].bits[j]])) + format_str = "".join(format_str_list) + bs = struct.pack(format_str, *values_tobe_packed) + bs_list.append(bs) + bs_len += len(bs) ret = memoryview(bytearray(bs_len)) offset = 0 for bs in bs_list: @@ -124,8 +142,11 @@ class NumpyTablet(object): offset += _l return ret - def __mark_none_value(self, bitmaps, bitmap, column, row): - if bitmap is None: - bitmap = BitMap(self.__row_number) - bitmaps.insert(column, bitmap) - bitmap.mark(row) + def mark_none_value(self, column, row): + if self.bitmaps is None: + self.bitmaps = [] + for i in range(self.__column_number): + self.bitmaps.append(None) + if self.bitmaps[column] is None: + self.bitmaps[column] = BitMap(self.__row_number) + self.bitmaps[column].mark(row) diff --git a/client-py/tests/test_numpy_tablet.py b/client-py/tests/test_numpy_tablet.py index b984193975..74fe5bfe73 100644 --- a/client-py/tests/test_numpy_tablet.py +++ b/client-py/tests/test_numpy_tablet.py @@ -59,6 +59,48 @@ def test_numpy_tablet_serialization(): assert tablet_.get_binary_values() == np_tablet_.get_binary_values() +def test_numpy_tablet_serialization2(): + + measurements_ = ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"] + data_types_ = [ + TSDataType.BOOLEAN, + TSDataType.INT32, + TSDataType.INT64, + TSDataType.FLOAT, + TSDataType.DOUBLE, + TSDataType.TEXT, + ] + values_ = [ + [None, 10, 11, 1.1, 10011.1, "test01"], + [True, None, 11111, 1.25, 101.0, "test02"], + [False, 100, None, 188.1, 688.25, "test03"], + [True, 0, 0, 0, None, None], + ] + timestamps_ = [16, 17, 18, 19] + tablet_ = Tablet( + "root.sg_test_01.d_01", measurements_, data_types_, values_, timestamps_ + ) + np_values_ = [ + np.array([False, True, False, True], np.dtype(">?")), + np.array([10, 0, 100, 0], np.dtype(">i4")), + np.array([11, 11111, 0, 0], np.dtype(">i8")), + np.array([1.1, 1.25, 188.1, 0], np.dtype(">f4")), + np.array([10011.1, 101.0, 688.25, 0], np.dtype(">f8")), + np.array(["test01", "test02", "test03", ""]), + ] + np_timestamps_ = np.array([16, 17, 18, 19], np.dtype(">i8")) + np_tablet_ = NumpyTablet( + "root.sg_test_01.d_01", measurements_, data_types_, np_values_, np_timestamps_ + ) + np_tablet_.mark_none_value(0, 0) + np_tablet_.mark_none_value(1, 1) + np_tablet_.mark_none_value(2, 2) + np_tablet_.mark_none_value(4, 3) + np_tablet_.mark_none_value(5, 3) + assert tablet_.get_binary_timestamps() == np_tablet_.get_binary_timestamps() + assert tablet_.get_binary_values() == np_tablet_.get_binary_values() + + def test_sort_numpy_tablet(): measurements_ = ["s_01", "s_02", "s_03", "s_04", "s_05", "s_06"]
