This is an automated email from the ASF dual-hosted git repository.
haonan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iotdb.git
The following commit(s) were added to refs/heads/master by this push:
new d1f2ce3792 [CLIENT-PY]Optimize the performance of filling null values
(#6555)
d1f2ce3792 is described below
commit d1f2ce3792bbbbe9c831f4ebeed6b234bee72125
Author: Wei Fu <[email protected]>
AuthorDate: Mon Jul 11 12:06:01 2022 +0800
[CLIENT-PY]Optimize the performance of filling null values (#6555)
---
client-py/iotdb/utils/IoTDBRpcDataSet.py | 36 ++++++++++++++++++--------------
1 file changed, 20 insertions(+), 16 deletions(-)
diff --git a/client-py/iotdb/utils/IoTDBRpcDataSet.py
b/client-py/iotdb/utils/IoTDBRpcDataSet.py
index 67b907140c..de0fe7728c 100644
--- a/client-py/iotdb/utils/IoTDBRpcDataSet.py
+++ b/client-py/iotdb/utils/IoTDBRpcDataSet.py
@@ -243,34 +243,38 @@ class IoTDBRpcDataSet(object):
if len(data_array) < total_length:
if data_type == TSDataType.INT32 or data_type ==
TSDataType.INT64:
tmp_array = np.full(total_length, np.nan, np.float32)
- if data_array.dtype == np.int32:
- tmp_array = pd.Series(tmp_array).astype("Int32")
- else:
- tmp_array = pd.Series(tmp_array).astype("Int64")
- elif (
- data_type == TSDataType.FLOAT or data_type ==
TSDataType.DOUBLE
- ):
+ elif data_type == TSDataType.FLOAT or data_type ==
TSDataType.DOUBLE:
tmp_array = np.full(total_length, np.nan,
data_array.dtype)
elif data_type == TSDataType.BOOLEAN:
tmp_array = np.full(total_length, np.nan, np.float32)
- tmp_array = pd.Series(tmp_array).astype("boolean")
elif data_type == TSDataType.TEXT:
tmp_array = np.full(total_length, None,
dtype=data_array.dtype)
+
bitmap_buffer = self.__query_data_set.bitmapList[location]
bitmap_str = self._to_bitstring(bitmap_buffer)
- j = 0
- for index in range(total_length):
- if bitmap_str[index] == "1":
- tmp_array[index] = data_array[j]
- j += 1
+ bit_mask = (np.fromstring(bitmap_str, 'u1') -
ord('0')).astype(np.bool)
+ if len(bit_mask) != total_length:
+ bit_mask = bit_mask[:total_length]
+ tmp_array[bit_mask] = data_array
+
+ if data_type == TSDataType.INT32:
+ tmp_array = pd.Series(tmp_array).astype("Int32")
+ elif data_type == TSDataType.INT64:
+ tmp_array = pd.Series(tmp_array).astype("Int64")
+ elif data_type == TSDataType.BOOLEAN:
+ tmp_array = pd.Series(tmp_array).astype("boolean")
+
data_array = tmp_array
if result[column_name] is None:
result[column_name] = data_array
else:
- result[column_name] = np.concatenate(
- (result[column_name], data_array), axis=0
- )
+ if isinstance(result[column_name], pd.Series):
+ result[column_name] =
result[column_name].append(data_array)
+ else:
+ result[column_name] = np.concatenate(
+ (result[column_name], data_array), axis=0
+ )
for k, v in result.items():
if v is None:
result[k] = []