This is an automated email from the ASF dual-hosted git repository.
jiangtian pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/tsfile.git
The following commit(s) were added to refs/heads/develop by this push:
new 956d5982 implement RLE DICTIONART ZIGZAG codec (#542)
956d5982 is described below
commit 956d59822ac1b95995b5c18894e98c4b3ea9eb0b
Author: Hongzhi Gao <[email protected]>
AuthorDate: Thu Jul 17 09:40:31 2025 +0800
implement RLE DICTIONART ZIGZAG codec (#542)
* Implement datatype TIMESTAMP BLOB TEXT
* format cpp code style
* format code style
* fix compilation error
* fix compilation error
* fix compilation error
* fix memory leak
* format code style
* implement zigzag codec
* implement dictionary codec AND fix zigzag codec
* add some unittests
* implement rle codec
* Added some notes
* add license
* fix rle decoder
* fix compilation error
* fix some code comments
* removed unused class "Fire"(for sprintz)
* fix some codec issues
* fix some unittests
---
cpp/src/common/allocator/my_string.h | 8 +-
cpp/src/common/container/byte_buffer.h | 2 +
cpp/src/common/datatype/date_converter.h | 1 +
cpp/src/common/tablet.cc | 12 +-
cpp/src/encoding/decoder_factory.h | 120 ++++++---
cpp/src/encoding/dictionary_decoder.h | 33 ++-
cpp/src/encoding/dictionary_encoder.h | 52 +++-
cpp/src/encoding/encoder_factory.h | 141 ++++++----
cpp/src/encoding/gorilla_decoder.h | 8 +-
cpp/src/encoding/gorilla_encoder.h | 2 +-
cpp/src/encoding/{intpacker.h => int32_packer.h} | 42 +--
.../{bitpack_decoder.h => int32_rle_decoder.h} | 114 ++++++--
.../{bitpack_encoder.h => int32_rle_encoder.h} | 89 ++++---
cpp/src/encoding/{intpacker.h => int64_packer.h} | 71 ++---
.../{bitpack_decoder.h => int64_rle_decoder.h} | 68 ++++-
.../{bitpack_encoder.h => int64_rle_encoder.h} | 83 +++---
cpp/src/encoding/plain_decoder.h | 1 +
cpp/src/encoding/ts2diff_decoder.h | 2 +-
cpp/src/encoding/zigzag_decoder.h | 50 +++-
cpp/src/encoding/zigzag_encoder.h | 86 ++++--
cpp/src/reader/qds_without_timegenerator.cc | 5 +-
cpp/src/utils/errno_define.h | 2 +
cpp/test/encoding/bitpack_codec_test.cc | 289 ---------------------
cpp/test/encoding/dictionary_codec_test.cc | 82 +++++-
cpp/test/encoding/gorilla_codec_test.cc | 100 +++++++
cpp/test/encoding/inpacker_test.cc | 57 ----
cpp/test/encoding/int32_packer_test.cc | 194 ++++++++++++++
cpp/test/encoding/int32_rle_codec_test.cc | 167 ++++++++++++
cpp/test/encoding/int64_packer_test.cc | 250 ++++++++++++++++++
cpp/test/encoding/int64_rle_codec_test.cc | 133 ++++++++++
.../reader/table_view/tsfile_reader_table_test.cc | 2 -
.../writer/table_view/tsfile_writer_table_test.cc | 104 ++++++++
32 files changed, 1720 insertions(+), 650 deletions(-)
diff --git a/cpp/src/common/allocator/my_string.h
b/cpp/src/common/allocator/my_string.h
index 1f7bf00a..ef27f2d3 100644
--- a/cpp/src/common/allocator/my_string.h
+++ b/cpp/src/common/allocator/my_string.h
@@ -125,9 +125,15 @@ struct String {
// return < 0, if this < that
// return > 0, if this > that
FORCE_INLINE int compare(const String &that) const {
- if (len_ == 0 || that.len_ == 0) {
+ if (len_ == 0 && that.len_ == 0) {
return 0;
}
+ if (len_ == 0) {
+ return -1;
+ }
+ if (that.len_ == 0) {
+ return 1;
+ }
uint32_t min_len = std::min(len_, that.len_);
int cmp_res = memcmp(buf_, that.buf_, min_len);
if (cmp_res == 0) {
diff --git a/cpp/src/common/container/byte_buffer.h
b/cpp/src/common/container/byte_buffer.h
index 9e8ecb48..becf4a4d 100644
--- a/cpp/src/common/container/byte_buffer.h
+++ b/cpp/src/common/container/byte_buffer.h
@@ -108,6 +108,8 @@ class ByteBuffer {
// for variable len value
FORCE_INLINE char *read(uint32_t offset, uint32_t *len) {
uint32_t tmp;
+ // Directly memcpy to avoid potential alignment issues when casting
+ // int32_t array pointer
std::memcpy(&tmp, data_ + offset, sizeof(tmp));
*len = tmp;
char *p = &data_[offset + variable_type_len_];
diff --git a/cpp/src/common/datatype/date_converter.h
b/cpp/src/common/datatype/date_converter.h
index 3234c775..82d76441 100644
--- a/cpp/src/common/datatype/date_converter.h
+++ b/cpp/src/common/datatype/date_converter.h
@@ -43,6 +43,7 @@ class DateConverter {
return E_INVALID_ARG;
}
+ // Normalize the tm structure and validate the date
std::tm tmp = tm_date;
tmp.tm_hour = 12;
tmp.tm_isdst = -1;
diff --git a/cpp/src/common/tablet.cc b/cpp/src/common/tablet.cc
index 26be1336..dcb5e063 100644
--- a/cpp/src/common/tablet.cc
+++ b/cpp/src/common/tablet.cc
@@ -53,24 +53,34 @@ int Tablet::init() {
case BOOLEAN:
value_matrix_[c].bool_data = (bool *)malloc(
get_data_type_size(schema.data_type_) * max_row_num_);
+ memset(value_matrix_[c].bool_data, 0,
+ get_data_type_size(schema.data_type_) * max_row_num_);
break;
case DATE:
case INT32:
value_matrix_[c].int32_data = (int32_t *)malloc(
get_data_type_size(schema.data_type_) * max_row_num_);
+ memset(value_matrix_[c].int32_data, 0,
+ get_data_type_size(schema.data_type_) * max_row_num_);
break;
case TIMESTAMP:
case INT64:
value_matrix_[c].int64_data = (int64_t *)malloc(
get_data_type_size(schema.data_type_) * max_row_num_);
+ memset(value_matrix_[c].int64_data, 0,
+ get_data_type_size(schema.data_type_) * max_row_num_);
break;
case FLOAT:
value_matrix_[c].float_data = (float *)malloc(
get_data_type_size(schema.data_type_) * max_row_num_);
+ memset(value_matrix_[c].float_data, 0,
+ get_data_type_size(schema.data_type_) * max_row_num_);
break;
case DOUBLE:
value_matrix_[c].double_data = (double *)malloc(
get_data_type_size(schema.data_type_) * max_row_num_);
+ memset(value_matrix_[c].double_data, 0,
+ get_data_type_size(schema.data_type_) * max_row_num_);
break;
case BLOB:
case TEXT:
@@ -246,7 +256,7 @@ int Tablet::add_value(uint32_t row_index, uint32_t
schema_index, T val) {
} else {
const MeasurementSchema &schema = schema_vec_->at(schema_index);
auto dic = GetDataTypesFromTemplateType<T>();
- if (!GetDataTypesFromTemplateType<T>().count(schema.data_type_)) {
+ if (dic.find(schema.data_type_) == dic.end()) {
return E_TYPE_NOT_MATCH;
}
process_val(row_index, schema_index, val);
diff --git a/cpp/src/encoding/decoder_factory.h
b/cpp/src/encoding/decoder_factory.h
index 12fdf766..a9e725aa 100644
--- a/cpp/src/encoding/decoder_factory.h
+++ b/cpp/src/encoding/decoder_factory.h
@@ -21,17 +21,20 @@
#define ENCODING_DECODER_FACTORY_H
#include "decoder.h"
+#include "dictionary_decoder.h"
+#include "encoding/int32_rle_decoder.h"
+#include "encoding/int64_rle_decoder.h"
#include "gorilla_decoder.h"
#include "plain_decoder.h"
#include "ts2diff_decoder.h"
+#include "zigzag_decoder.h"
namespace storage {
-
#define ALLOC_AND_RETURN_DECODER(DecoderType) \
do { \
- void *buf = \
+ void* buf = \
common::mem_alloc(sizeof(DecoderType), common::MOD_DECODER_OBJ); \
- DecoderType *decoder = nullptr; \
+ DecoderType* decoder = nullptr; \
if (buf != nullptr) { \
decoder = new (buf) DecoderType; \
} \
@@ -40,7 +43,7 @@ namespace storage {
class DecoderFactory {
public:
- static Decoder *alloc_time_decoder() {
+ static Decoder* alloc_time_decoder() {
if (common::g_config_value_.time_encoding_type_ == common::PLAIN) {
ALLOC_AND_RETURN_DECODER(PlainDecoder);
} else if (common::g_config_value_.time_encoding_type_ ==
@@ -48,52 +51,89 @@ class DecoderFactory {
ALLOC_AND_RETURN_DECODER(LongTS2DIFFDecoder);
} else {
// not support now
- ASSERT(false);
return nullptr;
}
}
- static Decoder *alloc_value_decoder(common::TSEncoding encoding,
+ static Decoder* alloc_value_decoder(common::TSEncoding encoding,
common::TSDataType data_type) {
- if (encoding == common::PLAIN) {
- ALLOC_AND_RETURN_DECODER(PlainDecoder);
- } else if (encoding == common::GORILLA) {
- if (data_type == common::INT32 || data_type == common::DATE) {
- ALLOC_AND_RETURN_DECODER(IntGorillaDecoder);
- } else if (data_type == common::INT64 ||
- data_type == common::TIMESTAMP) {
- ALLOC_AND_RETURN_DECODER(LongGorillaDecoder);
- } else if (data_type == common::FLOAT) {
- ALLOC_AND_RETURN_DECODER(FloatGorillaDecoder);
- } else if (data_type == common::DOUBLE) {
- ALLOC_AND_RETURN_DECODER(DoubleGorillaDecoder);
- } else {
- ASSERT(false);
+ using namespace common;
+
+ switch (encoding) {
+ case PLAIN:
+ ALLOC_AND_RETURN_DECODER(PlainDecoder);
+
+ case DICTIONARY:
+ switch (data_type) {
+ case STRING:
+ case TEXT:
+ ALLOC_AND_RETURN_DECODER(DictionaryDecoder);
+ default:
+ return nullptr;
+ }
+
+ case RLE:
+ switch (data_type) {
+ case INT32:
+ case DATE:
+ ALLOC_AND_RETURN_DECODER(Int32RleDecoder);
+ case INT64:
+ case TIMESTAMP:
+ ALLOC_AND_RETURN_DECODER(Int64RleDecoder);
+ default:
+ return nullptr;
+ }
+
+ case GORILLA:
+ switch (data_type) {
+ case INT32:
+ case DATE:
+ ALLOC_AND_RETURN_DECODER(IntGorillaDecoder);
+ case INT64:
+ case TIMESTAMP:
+ ALLOC_AND_RETURN_DECODER(LongGorillaDecoder);
+ case FLOAT:
+ ALLOC_AND_RETURN_DECODER(FloatGorillaDecoder);
+ case DOUBLE:
+ ALLOC_AND_RETURN_DECODER(DoubleGorillaDecoder);
+ default:
+ return nullptr;
+ }
+
+ case TS_2DIFF:
+ switch (data_type) {
+ case INT32:
+ case DATE:
+ ALLOC_AND_RETURN_DECODER(IntTS2DIFFDecoder);
+ case INT64:
+ case TIMESTAMP:
+ ALLOC_AND_RETURN_DECODER(LongTS2DIFFDecoder);
+ case FLOAT:
+ ALLOC_AND_RETURN_DECODER(FloatTS2DIFFDecoder);
+ case DOUBLE:
+ ALLOC_AND_RETURN_DECODER(DoubleTS2DIFFDecoder);
+ default:
+ return nullptr;
+ }
+
+ case ZIGZAG:
+ switch (data_type) {
+ case INT32:
+ ALLOC_AND_RETURN_DECODER(IntZigzagDecoder);
+ case INT64:
+ ALLOC_AND_RETURN_DECODER(LongZigzagDecoder);
+ default:
+ return nullptr;
+ }
+
+ default:
+ // Not supported encoding
return nullptr;
- }
- } else if (encoding == common::TS_2DIFF) {
- if (data_type == common::INT32 || data_type == common::DATE) {
- ALLOC_AND_RETURN_DECODER(IntTS2DIFFDecoder);
- } else if (data_type == common::INT64 ||
- data_type == common::TIMESTAMP) {
- ALLOC_AND_RETURN_DECODER(LongTS2DIFFDecoder);
- } else if (data_type == common::FLOAT) {
- ALLOC_AND_RETURN_DECODER(FloatTS2DIFFDecoder);
- } else if (data_type == common::DOUBLE) {
- ALLOC_AND_RETURN_DECODER(DoubleTS2DIFFDecoder);
- } else {
- ASSERT(false);
- }
- } else {
- // not support now
- ASSERT(false);
- return nullptr;
}
return nullptr;
}
- static void free(Decoder *decoder) { common::mem_free(decoder); }
+ static void free(Decoder* decoder) { common::mem_free(decoder); }
};
-
} // end namespace storage
#endif // ENCODING_DECODER_FACTORY_H
diff --git a/cpp/src/encoding/dictionary_decoder.h
b/cpp/src/encoding/dictionary_decoder.h
index 4ac846db..a6ee4a51 100644
--- a/cpp/src/encoding/dictionary_decoder.h
+++ b/cpp/src/encoding/dictionary_decoder.h
@@ -24,17 +24,44 @@
#include <vector>
#include "common/allocator/byte_stream.h"
+#include "decoder.h"
#include "encoder.h"
-#include "encoding/bitpack_decoder.h"
+#include "encoding/int32_rle_decoder.h"
namespace storage {
-class DictionaryDecoder {
+class DictionaryDecoder : public Decoder {
private:
- BitPackDecoder value_decoder_;
+ Int32RleDecoder value_decoder_;
std::vector<std::string> entry_index_;
public:
+ ~DictionaryDecoder() override = default;
+ bool has_remaining() {
+ return !entry_index_.empty() && value_decoder_.has_next_package();
+ }
+ int read_boolean(bool &ret_value, common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int read_int32(int32_t &ret_value, common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int read_int64(int64_t &ret_value, common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int read_float(float &ret_value, common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int read_double(double &ret_value, common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int read_String(common::String &ret_value, common::PageArena &pa,
+ common::ByteStream &in) {
+ int ret = common::E_OK;
+ auto std_str = read_string(in);
+ return ret_value.dup_from(std_str, pa);
+ }
+
void init() { value_decoder_.init(); }
void reset() {
diff --git a/cpp/src/encoding/dictionary_encoder.h
b/cpp/src/encoding/dictionary_encoder.h
index f40df548..16b73458 100644
--- a/cpp/src/encoding/dictionary_encoder.h
+++ b/cpp/src/encoding/dictionary_encoder.h
@@ -26,43 +26,70 @@
#include "common/allocator/byte_stream.h"
#include "encoder.h"
-#include "encoding/bitpack_encoder.h"
+#include "encoding/int32_rle_encoder.h"
namespace storage {
-class DictionaryEncoder {
+class DictionaryEncoder : public Encoder {
private:
std::map<std::string, int> entry_index_;
std::vector<std::string> index_entry_;
- BitPackEncoder values_encoder_;
+ Int32RleEncoder values_encoder_;
int map_size_;
public:
DictionaryEncoder() {}
- ~DictionaryEncoder() {}
+ ~DictionaryEncoder() override {}
+
+ int encode(bool value, common::ByteStream &out_stream) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int encode(int32_t value, common::ByteStream &out_stream) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int encode(int64_t value, common::ByteStream &out_stream) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int encode(float value, common::ByteStream &out_stream) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int encode(double value, common::ByteStream &out_stream) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int encode(common::String value, common::ByteStream &out_stream) override {
+ encode(value.to_std_string(), out_stream);
+ return common::E_OK;
+ }
void init() {
map_size_ = 0;
values_encoder_.init();
}
- void reset() {
+ void destroy() override {}
+
+ void reset() override {
entry_index_.clear();
index_entry_.clear();
map_size_ = 0;
values_encoder_.reset();
}
- void encode(std::string value, common::ByteStream &out) {
+ int encode(const char *value, common::ByteStream &out) {
+ return encode(std::string(value), out);
+ }
+
+ int encode(std::string value, common::ByteStream &out) {
if (entry_index_.count(value) == 0) {
index_entry_.push_back(value);
map_size_ = map_size_ + value.length();
entry_index_[value] = entry_index_.size();
}
values_encoder_.encode(entry_index_[value], out);
+ return common::E_OK;
}
- int flush(common::ByteStream &out) {
+ int flush(common::ByteStream &out) override {
int ret = common::E_OK;
ret = write_map(out);
if (ret != common::E_OK) {
@@ -70,10 +97,7 @@ class DictionaryEncoder {
} else {
write_encoded_data(out);
}
- if (ret != common::E_OK) {
- return ret;
- }
- return common::E_OK;
+ return ret;
}
int write_map(common::ByteStream &out) {
@@ -93,14 +117,14 @@ class DictionaryEncoder {
}
void write_encoded_data(common::ByteStream &out) {
- values_encoder_.encode_flush(out);
+ values_encoder_.flush(out);
}
- int get_max_byte_size() {
+ int get_max_byte_size() override {
// 4 bytes for storing dictionary size
return 4 + map_size_ + values_encoder_.get_max_byte_size();
}
};
} // end namespace storage
-#endif // ENCODING_DICTIONARY_ENCODER_H
+#endif // ENCODING_DICTIONARY_ENCODER_H
\ No newline at end of file
diff --git a/cpp/src/encoding/encoder_factory.h
b/cpp/src/encoding/encoder_factory.h
index 1f86bb6c..ab132389 100644
--- a/cpp/src/encoding/encoder_factory.h
+++ b/cpp/src/encoding/encoder_factory.h
@@ -21,10 +21,14 @@
#define ENCODING_ENCODER_FACTORY_H
#include "common/global.h"
+#include "dictionary_encoder.h"
#include "encoder.h"
+#include "encoding/int32_rle_encoder.h"
+#include "encoding/int64_rle_encoder.h"
#include "gorilla_encoder.h"
#include "plain_encoder.h"
#include "ts2diff_encoder.h"
+#include "zigzag_encoder.h"
namespace storage {
@@ -50,7 +54,6 @@ class EncoderFactory {
ALLOC_AND_RETURN_ENCODER(LongTS2DIFFEncoder);
} else {
// not support now
- ASSERT(false);
return nullptr;
}
}
@@ -62,70 +65,98 @@ class EncoderFactory {
ALLOC_AND_RETURN_ENCODER(LongTS2DIFFEncoder);
} else {
// not support now
- ASSERT(false);
return nullptr;
}
}
static Encoder *alloc_value_encoder(common::TSEncoding encoding,
common::TSDataType data_type) {
- if (encoding == common::PLAIN) {
- ALLOC_AND_RETURN_ENCODER(PlainEncoder);
- } else if (encoding == common::DICTIONARY) {
- return nullptr;
- } else if (encoding == common::RLE) {
- return nullptr;
- } else if (encoding == common::DIFF) {
- return nullptr;
- } else if (encoding == common::TS_2DIFF) {
- if (data_type == common::INT32 || data_type == common::DATE) {
- ALLOC_AND_RETURN_ENCODER(IntTS2DIFFEncoder);
- } else if (data_type == common::INT64 ||
- data_type == common::TIMESTAMP) {
- ALLOC_AND_RETURN_ENCODER(LongTS2DIFFEncoder);
- } else if (data_type == common::FLOAT) {
- ALLOC_AND_RETURN_ENCODER(FloatTS2DIFFEncoder);
- } else if (data_type == common::DOUBLE) {
- ALLOC_AND_RETURN_ENCODER(DoubleTS2DIFFEncoder);
- } else if (data_type == common::TIMESTAMP) {
- ALLOC_AND_RETURN_ENCODER(LongTS2DIFFEncoder);
- } else {
- ASSERT(false);
- }
- } else if (encoding == common::BITMAP) {
- return nullptr;
- } else if (encoding == common::GORILLA_V1) {
- return nullptr;
- } else if (encoding == common::REGULAR) {
- return nullptr;
- } else if (encoding == common::GORILLA) {
- if (data_type == common::INT32 || data_type == common::DATE) {
- ALLOC_AND_RETURN_ENCODER(IntGorillaEncoder);
- } else if (data_type == common::INT64) {
- ALLOC_AND_RETURN_ENCODER(LongGorillaEncoder);
- } else if (data_type == common::FLOAT) {
- ALLOC_AND_RETURN_ENCODER(FloatGorillaEncoder);
- } else if (data_type == common::DOUBLE) {
- ALLOC_AND_RETURN_ENCODER(DoubleGorillaEncoder);
- } else if (data_type == common::INT64 ||
- data_type == common::TIMESTAMP) {
- ALLOC_AND_RETURN_ENCODER(LongGorillaEncoder);
- } else {
- ASSERT(false);
- }
- } else if (encoding == common::ZIGZAG) {
- return nullptr;
- } else if (encoding == common::FREQ) {
- return nullptr;
- } else {
- // not support now
- ASSERT(false);
- return nullptr;
+ using namespace common;
+
+ switch (encoding) {
+ case PLAIN:
+ ALLOC_AND_RETURN_ENCODER(PlainEncoder);
+
+ case DICTIONARY:
+ switch (data_type) {
+ case STRING:
+ case TEXT:
+ ALLOC_AND_RETURN_ENCODER(DictionaryEncoder);
+ default:
+ return nullptr;
+ }
+
+ case RLE:
+ switch (data_type) {
+ case INT32:
+ case DATE:
+ ALLOC_AND_RETURN_ENCODER(Int32RleEncoder);
+ case INT64:
+ case TIMESTAMP:
+ ALLOC_AND_RETURN_ENCODER(Int64RleEncoder);
+ default:
+ return nullptr;
+ }
+
+ case TS_2DIFF:
+ switch (data_type) {
+ case INT32:
+ case DATE:
+ ALLOC_AND_RETURN_ENCODER(IntTS2DIFFEncoder);
+ case INT64:
+ case TIMESTAMP:
+ ALLOC_AND_RETURN_ENCODER(LongTS2DIFFEncoder);
+ case FLOAT:
+ ALLOC_AND_RETURN_ENCODER(FloatTS2DIFFEncoder);
+ case DOUBLE:
+ ALLOC_AND_RETURN_ENCODER(DoubleTS2DIFFEncoder);
+ default:
+ return nullptr;
+ }
+
+ case GORILLA:
+ switch (data_type) {
+ case INT32:
+ case DATE:
+ ALLOC_AND_RETURN_ENCODER(IntGorillaEncoder);
+ case INT64:
+ case TIMESTAMP:
+ ALLOC_AND_RETURN_ENCODER(LongGorillaEncoder);
+ case FLOAT:
+ ALLOC_AND_RETURN_ENCODER(FloatGorillaEncoder);
+ case DOUBLE:
+ ALLOC_AND_RETURN_ENCODER(DoubleGorillaEncoder);
+ default:
+ return nullptr;
+ }
+
+ case ZIGZAG:
+ switch (data_type) {
+ case INT32:
+ ALLOC_AND_RETURN_ENCODER(IntZigzagEncoder);
+ case INT64:
+ ALLOC_AND_RETURN_ENCODER(LongZigzagEncoder);
+ default:
+ return nullptr;
+ }
+
+ case DIFF:
+ case BITMAP:
+ case GORILLA_V1:
+ case REGULAR:
+ case FREQ:
+ return nullptr;
+
+ default:
+ return nullptr;
}
return nullptr;
}
- static void free(Encoder *encoder) { common::mem_free(encoder); }
+ static void free(Encoder *encoder) {
+ encoder->~Encoder();
+ common::mem_free(encoder);
+ }
};
} // end namespace storage
diff --git a/cpp/src/encoding/gorilla_decoder.h
b/cpp/src/encoding/gorilla_decoder.h
index f374b32e..726bbf30 100644
--- a/cpp/src/encoding/gorilla_decoder.h
+++ b/cpp/src/encoding/gorilla_decoder.h
@@ -35,7 +35,7 @@ class GorillaDecoder : public Decoder {
public:
GorillaDecoder() { reset(); }
- ~GorillaDecoder() {}
+ ~GorillaDecoder() override = default;
void reset() {
type_ = common::GORILLA;
@@ -158,7 +158,8 @@ GorillaDecoder<int32_t>::read_next(common::ByteStream &in) {
stored_leading_zeros_ -
stored_trailing_zeros_,
in);
- xor_value <<= stored_trailing_zeros_;
+ xor_value = static_cast<uint32_t>(xor_value)
+ << stored_trailing_zeros_;
stored_value_ ^= xor_value;
// missing break is intentional, we want to overflow to next one
default: // case '0': use stored value
@@ -191,7 +192,8 @@ GorillaDecoder<int64_t>::read_next(common::ByteStream &in) {
read_long(VALUE_BITS_LENGTH_64BIT - stored_leading_zeros_ -
stored_trailing_zeros_,
in);
- xor_value <<= stored_trailing_zeros_;
+ xor_value = static_cast<uint64_t>(xor_value)
+ << stored_trailing_zeros_;
stored_value_ ^= xor_value;
// missing break is intentional, we want to overflow to next one
}
diff --git a/cpp/src/encoding/gorilla_encoder.h
b/cpp/src/encoding/gorilla_encoder.h
index 5ae47011..3fa6eccf 100644
--- a/cpp/src/encoding/gorilla_encoder.h
+++ b/cpp/src/encoding/gorilla_encoder.h
@@ -111,7 +111,7 @@ class GorillaEncoder : public Encoder {
bits_left_ = 0;
} else {
shift = bits_left_ - bits;
- buffer_ |= (uint8_t)(value << shift);
+ buffer_ |= (uint8_t)(static_cast<uint64_t>(value) << shift);
bits_left_ -= bits;
bits = 0;
}
diff --git a/cpp/src/encoding/intpacker.h b/cpp/src/encoding/int32_packer.h
similarity index 79%
copy from cpp/src/encoding/intpacker.h
copy to cpp/src/encoding/int32_packer.h
index 483d0c92..ecb42b24 100644
--- a/cpp/src/encoding/intpacker.h
+++ b/cpp/src/encoding/int32_packer.h
@@ -17,8 +17,8 @@
* under the License.
*/
-#ifndef ENCODING_IntPacker_ENCODER_H
-#define ENCODING_IntPacker_ENCODER_H
+#ifndef ENCODING_INT32PACKER_ENCODER_H
+#define ENCODING_INT32PACKER_ENCODER_H
#define NUM_OF_INTS 8
@@ -26,20 +26,20 @@
namespace storage {
-class IntPacker {
+class Int32Packer {
private:
int width_;
public:
- IntPacker(int width_) { this->width_ = width_; }
- ~IntPacker() { destroy(); }
+ Int32Packer(int width_) { this->width_ = width_; }
+ ~Int32Packer() { destroy(); }
void destroy() { /* do nothing for IntPacker */
}
void reset() { /* do thing for IntPacker */
}
- void pack_8values(int64_t values[], int offset, unsigned char buf[]) {
+ void pack_8values(const int32_t values[], int offset, unsigned char buf[])
{
int buf_idx = 0;
int value_idx = offset;
// remaining bits for the current unfinished Integer
@@ -47,13 +47,14 @@ class IntPacker {
while (value_idx < NUM_OF_INTS + offset) {
// buffer is used for saving 32 bits as a part of result
- int64_t buffer = 0;
+ int32_t buffer = 0;
// remaining size of bits in the 'buffer'
- int left_size = 64;
+ int left_size = 32;
// encode the left bits of current Integer to 'buffer'
if (left_bit > 0) {
- buffer |= (values[value_idx] << (64 - left_bit));
+ buffer |= (static_cast<uint32_t>(values[value_idx])
+ << (32 - left_bit));
left_size -= left_bit;
left_bit = 0;
value_idx++;
@@ -61,7 +62,8 @@ class IntPacker {
while (left_size >= width_ && value_idx < NUM_OF_INTS + offset) {
// encode one Integer to the 'buffer'
- buffer |= (values[value_idx] << (left_size - width_));
+ buffer |= (static_cast<uint32_t>(values[value_idx])
+ << (left_size - width_));
left_size -= width_;
value_idx++;
}
@@ -70,19 +72,19 @@ class IntPacker {
if (left_size > 0 && value_idx < NUM_OF_INTS + offset) {
// put the first 'left_size' bits of the Integer into remaining
// space of the buffer
- buffer |= ((uint64_t)values[value_idx] >> (width_ -
left_size));
+ buffer |= (static_cast<uint32_t>(values[value_idx]) >>
+ (width_ - left_size));
left_bit = width_ - left_size;
}
// put the buffer into the final result
- for (int j = 0; j < 8; j++) {
+ for (int j = 0; j < 4; j++) {
buf[buf_idx] =
- (unsigned char)(((uint64_t)buffer >> ((8 - j - 1) * 8)) &
- 0xFF);
+ (unsigned char)((buffer >> ((3 - j) * 8)) & 0xFF);
buf_idx++;
// width_ is the bit num of each value, but here is means the
// max byte num
- if (buf_idx >= width_ * 8 / 8) {
+ if (buf_idx >= width_) {
return;
}
}
@@ -97,7 +99,8 @@ class IntPacker {
* @param values - decoded result , the length of 'values' should be @{link
* IntPacker#NUM_OF_INTS}
*/
- void unpack_8values(unsigned char buf[], int offset, int64_t values[]) {
+ void unpack_8values(const unsigned char buf[], int offset,
+ int32_t values[]) {
int byte_idx = offset;
uint64_t buffer = 0;
// total bits which have reader from 'buf' to 'buffer'. i.e.,
@@ -134,16 +137,17 @@ class IntPacker {
* @param length length of bytes to be decoded in buf.
* @param values decoded result.
*/
- void unpack_all_values(unsigned char buf[], int length, int64_t values[]) {
+ void unpack_all_values(const unsigned char buf[], int length,
+ int32_t values[]) {
int idx = 0;
int k = 0;
while (idx < length) {
- int64_t tv[8];
+ int32_t tv[8];
// decode 8 values one time, current result will be saved in the
// array named 'tv'
unpack_8values(buf, idx, tv);
// System.arraycopy(tv, 0, values, k, 8);
- std::memmove(values + k, tv, 8 * sizeof(int64_t));
+ std::memmove(values + k, tv, 8 * sizeof(int32_t));
idx += width_;
k += 8;
}
diff --git a/cpp/src/encoding/bitpack_decoder.h
b/cpp/src/encoding/int32_rle_decoder.h
similarity index 63%
copy from cpp/src/encoding/bitpack_decoder.h
copy to cpp/src/encoding/int32_rle_decoder.h
index 61c30d1a..d80269f7 100644
--- a/cpp/src/encoding/bitpack_decoder.h
+++ b/cpp/src/encoding/int32_rle_decoder.h
@@ -17,19 +17,20 @@
* under the License.
*/
-#ifndef ENCODING_BITPACK_DECODER_H
-#define ENCODING_BITPACK_DECODER_H
+#ifndef ENCODING_INT32RLE_DECODER_H
+#define ENCODING_INT32RLE_DECODER_H
#include <vector>
#include "common/allocator/alloc_base.h"
+#include "decoder.h"
#include "encoder.h"
#include "encoding/encode_utils.h"
-#include "encoding/intpacker.h"
+#include "encoding/int32_packer.h"
namespace storage {
-class BitPackDecoder {
+class Int32RleDecoder : public Decoder {
private:
uint32_t length_;
uint32_t bit_width_;
@@ -37,18 +38,47 @@ class BitPackDecoder {
bool is_length_and_bitwidth_readed_;
int current_count_;
common::ByteStream byte_cache_;
- int64_t *current_buffer_;
- IntPacker *packer_;
+ int32_t *current_buffer_;
+ Int32Packer *packer_;
uint8_t *tmp_buf_;
public:
- BitPackDecoder()
- : current_count_(0),
+ Int32RleDecoder()
+ : length_(0),
+ bit_width_(0),
+ bitpacking_num_(0),
+ is_length_and_bitwidth_readed_(false),
+ current_count_(0),
byte_cache_(1024, common::MOD_DECODER_OBJ),
current_buffer_(nullptr),
packer_(nullptr),
tmp_buf_(nullptr) {}
- ~BitPackDecoder() { destroy(); }
+ ~Int32RleDecoder() override { destroy(); }
+
+ bool has_remaining() override { return has_next_package(); }
+ int read_boolean(bool &ret_value, common::ByteStream &in) {
+ int32_t bool_value;
+ read_int32(bool_value, in);
+ ret_value = bool_value == 0 ? false : true;
+ return common::E_OK;
+ }
+ int read_int32(int32_t &ret_value, common::ByteStream &in) override {
+ ret_value = static_cast<int32_t>(read_int(in));
+ return common::E_OK;
+ }
+ int read_int64(int64_t &ret_value, common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int read_float(float &ret_value, common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int read_double(double &ret_value, common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int read_String(common::String &ret_value, common::PageArena &pa,
+ common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
void init() {
packer_ = nullptr;
@@ -71,7 +101,7 @@ class BitPackDecoder {
return current_count_ > 0 || byte_cache_.remaining_size() > 0;
}
- int64_t read_int(common::ByteStream &buffer) {
+ int32_t read_int(common::ByteStream &buffer) {
if (!is_length_and_bitwidth_readed_) {
// start to reader a new rle+bit-packing pattern
read_length_and_bitwidth(buffer);
@@ -86,7 +116,7 @@ class BitPackDecoder {
call_read_bit_packing_buffer(header);
}
--current_count_;
- int64_t result = current_buffer_[bitpacking_num_ - current_count_ - 1];
+ int32_t result = current_buffer_[bitpacking_num_ - current_count_ - 1];
if (!has_next_package()) {
is_length_and_bitwidth_readed_ = false;
}
@@ -108,22 +138,25 @@ class BitPackDecoder {
(bit_packed_group_count - 1) * 8 + last_bit_packed_num;
bitpacking_num_ = current_count_;
} else {
- printf(
- "tsfile-encoding IntRleDecoder: bit_packed_group_count %d, "
- "smaller "
- "than 1",
- bit_packed_group_count);
+ return common::E_DECODE_ERR;
}
- read_bit_packing_buffer(bit_packed_group_count, last_bit_packed_num);
+ ret = read_bit_packing_buffer(bit_packed_group_count,
+ last_bit_packed_num);
return ret;
}
- void read_bit_packing_buffer(int bit_packed_group_count,
- int last_bit_packed_num) {
+ int read_bit_packing_buffer(int bit_packed_group_count,
+ int last_bit_packed_num) {
+ int ret = common::E_OK;
if (current_buffer_ != nullptr) {
- delete[] current_buffer_;
+ common::mem_free(current_buffer_);
+ }
+ current_buffer_ = static_cast<int32_t *>(
+ common::mem_alloc(sizeof(int32_t) * bit_packed_group_count * 8,
+ common::MOD_DECODER_OBJ));
+ if (IS_NULL(current_buffer_)) {
+ return common::E_OOM;
}
- current_buffer_ = new int64_t[bit_packed_group_count * 8];
int bytes_to_read = bit_packed_group_count * bit_width_;
if (bytes_to_read > (int)byte_cache_.remaining_size()) {
bytes_to_read = byte_cache_.remaining_size();
@@ -131,13 +164,17 @@ class BitPackDecoder {
std::vector<unsigned char> bytes(bytes_to_read);
for (int i = 0; i < bytes_to_read; i++) {
- common::SerializationUtil::read_ui8(bytes[i], byte_cache_);
+ if (RET_FAIL(common::SerializationUtil::read_ui8(bytes[i],
+ byte_cache_))) {
+ return ret;
+ }
}
// save all int values in currentBuffer
packer_->unpack_all_values(
bytes.data(), bytes_to_read,
current_buffer_); // decode from bytes, save in currentBuffer
+ return ret;
}
int read_length_and_bitwidth(common::ByteStream &buffer) {
@@ -146,6 +183,9 @@ class BitPackDecoder {
common::SerializationUtil::read_var_uint(length_, buffer))) {
return common::E_PARTIAL_READ;
} else {
+ if (tmp_buf_) {
+ common::mem_free(tmp_buf_);
+ }
tmp_buf_ =
(uint8_t *)common::mem_alloc(length_, common::MOD_DECODER_OBJ);
if (tmp_buf_ == nullptr) {
@@ -163,29 +203,49 @@ class BitPackDecoder {
uint8_t tmp_bit_width;
common::SerializationUtil::read_ui8(tmp_bit_width, byte_cache_);
bit_width_ = tmp_bit_width;
+ if (packer_ != nullptr) {
+ delete packer_;
+ }
init_packer();
}
return ret;
}
- void init_packer() { packer_ = new IntPacker(bit_width_); }
+ void init_packer() { packer_ = new Int32Packer(bit_width_); }
void destroy() { /* do nothing for BitpackEncoder */
if (packer_) {
delete (packer_);
+ packer_ = nullptr;
}
if (current_buffer_) {
- delete[] current_buffer_;
+ common::mem_free(current_buffer_);
+ current_buffer_ = nullptr;
}
if (tmp_buf_) {
common::mem_free(tmp_buf_);
+ tmp_buf_ = nullptr;
}
}
- void reset() {
- current_count_ = 0;
- is_length_and_bitwidth_readed_ = false;
+ void reset() override {
+ length_ = 0;
+ bit_width_ = 0;
bitpacking_num_ = 0;
+ is_length_and_bitwidth_readed_ = false;
+ current_count_ = 0;
+ if (current_buffer_) {
+ delete[] current_buffer_;
+ current_buffer_ = nullptr;
+ }
+ if (packer_) {
+ delete (packer_);
+ packer_ = nullptr;
+ }
+ if (tmp_buf_) {
+ common::mem_free(tmp_buf_);
+ tmp_buf_ = nullptr;
+ }
}
};
diff --git a/cpp/src/encoding/bitpack_encoder.h
b/cpp/src/encoding/int32_rle_encoder.h
similarity index 75%
copy from cpp/src/encoding/bitpack_encoder.h
copy to cpp/src/encoding/int32_rle_encoder.h
index d169c73e..69ffc1b5 100644
--- a/cpp/src/encoding/bitpack_encoder.h
+++ b/cpp/src/encoding/int32_rle_encoder.h
@@ -17,39 +17,68 @@
* under the License.
*/
-#ifndef ENCODING_BITPACK_ENCODER_H
-#define ENCODING_BITPACK_ENCODER_H
+#ifndef ENCODING_INT32RLE_ENCODER_H
+#define ENCODING_INT32RLE_ENCODER_H
#include <vector>
#include "common/allocator/alloc_base.h"
#include "encoder.h"
#include "encoding/encode_utils.h"
-#include "encoding/intpacker.h"
+#include "encoding/int32_packer.h"
#include "utils/errno_define.h"
namespace storage {
-class BitPackEncoder {
+class Int32RleEncoder : public Encoder {
private:
int bitpacked_group_count_;
int num_buffered_values_;
int bit_width_;
- IntPacker *packer_;
+ Int32Packer *packer_;
common::ByteStream byte_cache_;
- std::vector<int64_t> values_; // all data tobe encoded
- int64_t buffered_values_[8]; // encode each 8 values
+ std::vector<int32_t> values_; // all data tobe encoded
+ int32_t buffered_values_[8]; // encode each 8 values
std::vector<unsigned char> bytes_buffer_;
+ void inner_flush(common::ByteStream &out) {
+ int last_bitpacked_num = num_buffered_values_;
+ if (num_buffered_values_ > 0) {
+ clear_buffer();
+ write_or_append_bitpacked_run();
+ end_previous_bitpacked_run(last_bitpacked_num);
+ } else {
+ end_previous_bitpacked_run(8);
+ }
+ uint32_t b_length = byte_cache_.total_size();
+ common::SerializationUtil::write_var_uint(b_length, out);
+ merge_byte_stream(out, byte_cache_);
+ reset();
+ }
+
public:
// BitPackEncoder() :byte_cache_(1024,common::MOD_ENCODER_OBJ){}
- BitPackEncoder()
+ Int32RleEncoder()
: bitpacked_group_count_(0),
num_buffered_values_(0),
bit_width_(0),
packer_(nullptr),
byte_cache_(1024, common::MOD_ENCODER_OBJ) {}
- ~BitPackEncoder() { destroy(); }
+ ~Int32RleEncoder() override { destroy(); }
+
+ int encode(bool value, common::ByteStream &out_stream) override {
+ int32_t bool_value = value == true ? 1 : 0;
+ return encode(bool_value, out_stream);
+ }
+ int encode(float value, common::ByteStream &out_stream) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int encode(double value, common::ByteStream &out_stream) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int encode(common::String value, common::ByteStream &out_stream) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
void init() {
bitpacked_group_count_ = 0;
@@ -58,9 +87,9 @@ class BitPackEncoder {
packer_ = nullptr;
}
- void destroy() { delete (packer_); }
+ void destroy() override { delete (packer_); }
- void reset() {
+ void reset() override {
num_buffered_values_ = 0;
bitpacked_group_count_ = 0;
bit_width_ = 0;
@@ -71,17 +100,25 @@ class BitPackEncoder {
packer_ = nullptr;
}
- FORCE_INLINE void encode(int64_t value, common::ByteStream &out) {
+ FORCE_INLINE int encode(int64_t value, common::ByteStream &out) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+
+ FORCE_INLINE int encode(int32_t value, common::ByteStream &out) override {
values_.push_back(value);
- int current_bit_width = 64 - number_of_leading_zeros(value);
+ // The current_bit_width must be at least 1, even if value is 0.
+ int current_bit_width =
+ std::max(1, 32 - number_of_leading_zeros(value));
if (current_bit_width > bit_width_) {
bit_width_ = current_bit_width;
}
+ return common::E_OK;
}
- void encode_flush(common::ByteStream &out) {
+ int flush(common::ByteStream &out) override {
ASSERT(packer_ == nullptr);
- packer_ = new IntPacker(bit_width_);
+ if (bit_width_ == 0) return common::E_OK;
+ packer_ = new Int32Packer(bit_width_);
common::SerializationUtil::write_i8(bit_width_, byte_cache_);
for (size_t i = 0; i < values_.size(); i++) {
// encodeValue(value);
@@ -91,7 +128,8 @@ class BitPackEncoder {
write_or_append_bitpacked_run();
}
}
- flush(out);
+ inner_flush(out);
+ return common::E_OK;
}
void write_or_append_bitpacked_run() {
@@ -109,7 +147,7 @@ class BitPackEncoder {
// TODO: put the bytes on the stack instead on the heap
unsigned char *bytes = (unsigned char *)common::mem_alloc(
bit_width_, common::MOD_BITENCODE_OBJ);
- int64_t tmp_buffer[8];
+ int32_t tmp_buffer[8];
for (int i = 0; i < 8; i++) {
tmp_buffer[i] = (int64_t)buffered_values_[i];
}
@@ -122,21 +160,6 @@ class BitPackEncoder {
common::mem_free(bytes);
}
- void flush(common::ByteStream &out) {
- int last_bitpacked_num = num_buffered_values_;
- if (num_buffered_values_ > 0) {
- clear_buffer();
- write_or_append_bitpacked_run();
- end_previous_bitpacked_run(last_bitpacked_num);
- } else {
- end_previous_bitpacked_run(8);
- }
- uint32_t b_length = byte_cache_.total_size();
- common::SerializationUtil::write_var_uint(b_length, out);
- merge_byte_stream(out, byte_cache_);
- reset();
- }
-
void clear_buffer() {
for (int i = num_buffered_values_; i < 8; i++) {
buffered_values_[i] = 0;
@@ -156,7 +179,7 @@ class BitPackEncoder {
bitpacked_group_count_ = 0;
}
- int get_max_byte_size() {
+ int get_max_byte_size() override {
if (values_.empty()) {
return 0;
}
diff --git a/cpp/src/encoding/intpacker.h b/cpp/src/encoding/int64_packer.h
similarity index 68%
rename from cpp/src/encoding/intpacker.h
rename to cpp/src/encoding/int64_packer.h
index 483d0c92..23267206 100644
--- a/cpp/src/encoding/intpacker.h
+++ b/cpp/src/encoding/int64_packer.h
@@ -17,8 +17,8 @@
* under the License.
*/
-#ifndef ENCODING_IntPacker_ENCODER_H
-#define ENCODING_IntPacker_ENCODER_H
+#ifndef ENCODING_INT64PACKER_ENCODER_H
+#define ENCODING_INT64PACKER_ENCODER_H
#define NUM_OF_INTS 8
@@ -26,34 +26,35 @@
namespace storage {
-class IntPacker {
+class Int64Packer {
private:
int width_;
public:
- IntPacker(int width_) { this->width_ = width_; }
- ~IntPacker() { destroy(); }
+ Int64Packer(int width_) { this->width_ = width_; }
+ ~Int64Packer() { destroy(); }
void destroy() { /* do nothing for IntPacker */
}
void reset() { /* do thing for IntPacker */
}
- void pack_8values(int64_t values[], int offset, unsigned char buf[]) {
+ void pack_8values(const int64_t values[], int offset, unsigned char buf[])
{
int buf_idx = 0;
int value_idx = offset;
// remaining bits for the current unfinished Integer
int left_bit = 0;
while (value_idx < NUM_OF_INTS + offset) {
- // buffer is used for saving 32 bits as a part of result
+ // buffer is used for saving 64 bits as a part of result
int64_t buffer = 0;
// remaining size of bits in the 'buffer'
int left_size = 64;
// encode the left bits of current Integer to 'buffer'
if (left_bit > 0) {
- buffer |= (values[value_idx] << (64 - left_bit));
+ buffer |= (static_cast<uint64_t>(values[value_idx])
+ << (64 - left_bit));
left_size -= left_bit;
left_bit = 0;
value_idx++;
@@ -61,7 +62,8 @@ class IntPacker {
while (left_size >= width_ && value_idx < NUM_OF_INTS + offset) {
// encode one Integer to the 'buffer'
- buffer |= (values[value_idx] << (left_size - width_));
+ buffer |= (static_cast<uint64_t>(values[value_idx])
+ << (left_size - width_));
left_size -= width_;
value_idx++;
}
@@ -70,7 +72,8 @@ class IntPacker {
if (left_size > 0 && value_idx < NUM_OF_INTS + offset) {
// put the first 'left_size' bits of the Integer into remaining
// space of the buffer
- buffer |= ((uint64_t)values[value_idx] >> (width_ -
left_size));
+ buffer |= ((static_cast<uint64_t>(values[value_idx])) >>
+ (width_ - left_size));
left_bit = width_ - left_size;
}
@@ -97,32 +100,37 @@ class IntPacker {
* @param values - decoded result , the length of 'values' should be @{link
* IntPacker#NUM_OF_INTS}
*/
- void unpack_8values(unsigned char buf[], int offset, int64_t values[]) {
+ void unpack_8values(const unsigned char buf[], int offset,
+ int64_t values[]) {
int byte_idx = offset;
- uint64_t buffer = 0;
- // total bits which have reader from 'buf' to 'buffer'. i.e.,
- // number of available bits to be decoded.
- int total_bits = 0;
int value_idx = 0;
+ int left_bits = 8;
+ int total_bits = 0;
+
+ while (value_idx < 8) {
+ values[value_idx] = 0;
+ total_bits = 0;
- while (value_idx < NUM_OF_INTS) {
- // If current available bits are not enough to decode one Integer,
- // then add next byte from buf to 'buffer' until total_bits >=
width
while (total_bits < width_) {
- buffer = (buffer << 8) | (buf[byte_idx] & 0xFF);
- byte_idx++;
- total_bits += 8;
+ if (width_ - total_bits >= left_bits) {
+ values[value_idx] <<= left_bits;
+ values[value_idx] |= static_cast<int64_t>(
+ buf[byte_idx] & ((1 << left_bits) - 1));
+ total_bits += left_bits;
+ byte_idx++;
+ left_bits = 8;
+ } else {
+ int t = width_ - total_bits;
+ values[value_idx] <<= t;
+ values[value_idx] |= static_cast<int64_t>(
+ (buf[byte_idx] & ((1 << left_bits) - 1)) >>
+ (left_bits - t));
+ left_bits -= t;
+ total_bits += t;
+ }
}
- // If current available bits are enough to decode one Integer,
- // then decode one Integer one by one until left bits in 'buffer'
is
- // not enough to decode one Integer.
- while (total_bits >= width_ && value_idx < 8) {
- values[value_idx] = (int)(buffer >> (total_bits - width_));
- value_idx++;
- total_bits -= width_;
- buffer = buffer & ((1 << total_bits) - 1);
- }
+ value_idx++;
}
}
@@ -134,7 +142,8 @@ class IntPacker {
* @param length length of bytes to be decoded in buf.
* @param values decoded result.
*/
- void unpack_all_values(unsigned char buf[], int length, int64_t values[]) {
+ void unpack_all_values(const unsigned char buf[], int length,
+ int64_t values[]) {
int idx = 0;
int k = 0;
while (idx < length) {
diff --git a/cpp/src/encoding/bitpack_decoder.h
b/cpp/src/encoding/int64_rle_decoder.h
similarity index 76%
rename from cpp/src/encoding/bitpack_decoder.h
rename to cpp/src/encoding/int64_rle_decoder.h
index 61c30d1a..6d54510b 100644
--- a/cpp/src/encoding/bitpack_decoder.h
+++ b/cpp/src/encoding/int64_rle_decoder.h
@@ -17,19 +17,20 @@
* under the License.
*/
-#ifndef ENCODING_BITPACK_DECODER_H
-#define ENCODING_BITPACK_DECODER_H
+#ifndef ENCODING_INT64RLE_DECODER_H
+#define ENCODING_INT64RLE_DECODER_H
#include <vector>
#include "common/allocator/alloc_base.h"
+#include "decoder.h"
#include "encoder.h"
#include "encoding/encode_utils.h"
-#include "encoding/intpacker.h"
+#include "encoding/int64_packer.h"
namespace storage {
-class BitPackDecoder {
+class Int64RleDecoder : public Decoder {
private:
uint32_t length_;
uint32_t bit_width_;
@@ -38,17 +39,43 @@ class BitPackDecoder {
int current_count_;
common::ByteStream byte_cache_;
int64_t *current_buffer_;
- IntPacker *packer_;
+ Int64Packer *packer_;
uint8_t *tmp_buf_;
public:
- BitPackDecoder()
- : current_count_(0),
+ Int64RleDecoder()
+ : length_(0),
+ bit_width_(0),
+ bitpacking_num_(0),
+ is_length_and_bitwidth_readed_(false),
+ current_count_(0),
byte_cache_(1024, common::MOD_DECODER_OBJ),
current_buffer_(nullptr),
packer_(nullptr),
tmp_buf_(nullptr) {}
- ~BitPackDecoder() { destroy(); }
+ ~Int64RleDecoder() override { destroy(); }
+
+ bool has_remaining() override { return has_next_package(); }
+ int read_boolean(bool &ret_value, common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int read_int32(int32_t &ret_value, common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int read_int64(int64_t &ret_value, common::ByteStream &in) override {
+ ret_value = read_int(in);
+ return common::E_OK;
+ }
+ int read_float(float &ret_value, common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int read_double(double &ret_value, common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int read_String(common::String &ret_value, common::PageArena &pa,
+ common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
void init() {
packer_ = nullptr;
@@ -168,24 +195,41 @@ class BitPackDecoder {
return ret;
}
- void init_packer() { packer_ = new IntPacker(bit_width_); }
+ void init_packer() { packer_ = new Int64Packer(bit_width_); }
void destroy() { /* do nothing for BitpackEncoder */
if (packer_) {
delete (packer_);
+ packer_ = nullptr;
}
if (current_buffer_) {
delete[] current_buffer_;
+ current_buffer_ = nullptr;
}
if (tmp_buf_) {
common::mem_free(tmp_buf_);
+ tmp_buf_ = nullptr;
}
}
- void reset() {
- current_count_ = 0;
- is_length_and_bitwidth_readed_ = false;
+ void reset() override {
+ length_ = 0;
+ bit_width_ = 0;
bitpacking_num_ = 0;
+ is_length_and_bitwidth_readed_ = false;
+ current_count_ = 0;
+ if (current_buffer_) {
+ delete[] current_buffer_;
+ current_buffer_ = nullptr;
+ }
+ if (packer_) {
+ delete (packer_);
+ packer_ = nullptr;
+ }
+ if (tmp_buf_) {
+ common::mem_free(tmp_buf_);
+ tmp_buf_ = nullptr;
+ }
}
};
diff --git a/cpp/src/encoding/bitpack_encoder.h
b/cpp/src/encoding/int64_rle_encoder.h
similarity index 76%
rename from cpp/src/encoding/bitpack_encoder.h
rename to cpp/src/encoding/int64_rle_encoder.h
index d169c73e..abcac68b 100644
--- a/cpp/src/encoding/bitpack_encoder.h
+++ b/cpp/src/encoding/int64_rle_encoder.h
@@ -17,39 +17,67 @@
* under the License.
*/
-#ifndef ENCODING_BITPACK_ENCODER_H
-#define ENCODING_BITPACK_ENCODER_H
+#ifndef ENCODING_INT64RLE_ENCODER_H
+#define ENCODING_INT64RLE_ENCODER_H
#include <vector>
#include "common/allocator/alloc_base.h"
#include "encoder.h"
#include "encoding/encode_utils.h"
-#include "encoding/intpacker.h"
+#include "encoding/int64_packer.h"
#include "utils/errno_define.h"
namespace storage {
-class BitPackEncoder {
+class Int64RleEncoder : public Encoder {
private:
int bitpacked_group_count_;
int num_buffered_values_;
int bit_width_;
- IntPacker *packer_;
+ Int64Packer *packer_;
common::ByteStream byte_cache_;
std::vector<int64_t> values_; // all data tobe encoded
int64_t buffered_values_[8]; // encode each 8 values
std::vector<unsigned char> bytes_buffer_;
+ void inner_flush(common::ByteStream &out) {
+ int last_bitpacked_num = num_buffered_values_;
+ if (num_buffered_values_ > 0) {
+ clear_buffer();
+ write_or_append_bitpacked_run();
+ end_previous_bitpacked_run(last_bitpacked_num);
+ } else {
+ end_previous_bitpacked_run(8);
+ }
+ uint64_t b_length = byte_cache_.total_size();
+ common::SerializationUtil::write_var_uint(b_length, out);
+ merge_byte_stream(out, byte_cache_);
+ reset();
+ }
+
public:
// BitPackEncoder() :byte_cache_(1024,common::MOD_ENCODER_OBJ){}
- BitPackEncoder()
+ Int64RleEncoder()
: bitpacked_group_count_(0),
num_buffered_values_(0),
bit_width_(0),
packer_(nullptr),
byte_cache_(1024, common::MOD_ENCODER_OBJ) {}
- ~BitPackEncoder() { destroy(); }
+ ~Int64RleEncoder() override { destroy(); }
+
+ int encode(bool value, common::ByteStream &out_stream) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int encode(float value, common::ByteStream &out_stream) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int encode(double value, common::ByteStream &out_stream) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int encode(common::String value, common::ByteStream &out_stream) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
void init() {
bitpacked_group_count_ = 0;
@@ -58,9 +86,9 @@ class BitPackEncoder {
packer_ = nullptr;
}
- void destroy() { delete (packer_); }
+ void destroy() override { delete (packer_); }
- void reset() {
+ void reset() override {
num_buffered_values_ = 0;
bitpacked_group_count_ = 0;
bit_width_ = 0;
@@ -69,19 +97,28 @@ class BitPackEncoder {
values_.clear();
delete (packer_);
packer_ = nullptr;
+ memset(buffered_values_, 0, sizeof(buffered_values_));
}
- FORCE_INLINE void encode(int64_t value, common::ByteStream &out) {
+ FORCE_INLINE int encode(int64_t value, common::ByteStream &out) override {
values_.push_back(value);
- int current_bit_width = 64 - number_of_leading_zeros(value);
+ // The current_bit_width must be at least 1, even if value is 0.
+ int current_bit_width =
+ std::max(1, 64 - number_of_leading_zeros(value));
if (current_bit_width > bit_width_) {
bit_width_ = current_bit_width;
}
+ return common::E_OK;
+ }
+
+ FORCE_INLINE int encode(int32_t value, common::ByteStream &out) override {
+ return common::E_TYPE_NOT_MATCH;
}
- void encode_flush(common::ByteStream &out) {
+ int flush(common::ByteStream &out) override {
ASSERT(packer_ == nullptr);
- packer_ = new IntPacker(bit_width_);
+ if (bit_width_ == 0) return common::E_OK;
+ packer_ = new Int64Packer(bit_width_);
common::SerializationUtil::write_i8(bit_width_, byte_cache_);
for (size_t i = 0; i < values_.size(); i++) {
// encodeValue(value);
@@ -91,7 +128,8 @@ class BitPackEncoder {
write_or_append_bitpacked_run();
}
}
- flush(out);
+ inner_flush(out);
+ return common::E_OK;
}
void write_or_append_bitpacked_run() {
@@ -122,21 +160,6 @@ class BitPackEncoder {
common::mem_free(bytes);
}
- void flush(common::ByteStream &out) {
- int last_bitpacked_num = num_buffered_values_;
- if (num_buffered_values_ > 0) {
- clear_buffer();
- write_or_append_bitpacked_run();
- end_previous_bitpacked_run(last_bitpacked_num);
- } else {
- end_previous_bitpacked_run(8);
- }
- uint32_t b_length = byte_cache_.total_size();
- common::SerializationUtil::write_var_uint(b_length, out);
- merge_byte_stream(out, byte_cache_);
- reset();
- }
-
void clear_buffer() {
for (int i = num_buffered_values_; i < 8; i++) {
buffered_values_[i] = 0;
@@ -156,7 +179,7 @@ class BitPackEncoder {
bitpacked_group_count_ = 0;
}
- int get_max_byte_size() {
+ int get_max_byte_size() override {
if (values_.empty()) {
return 0;
}
diff --git a/cpp/src/encoding/plain_decoder.h b/cpp/src/encoding/plain_decoder.h
index 5c309867..42a555f5 100644
--- a/cpp/src/encoding/plain_decoder.h
+++ b/cpp/src/encoding/plain_decoder.h
@@ -26,6 +26,7 @@ namespace storage {
class PlainDecoder : public Decoder {
public:
+ ~PlainDecoder() override = default;
FORCE_INLINE void reset() { /* do nothing */
}
FORCE_INLINE bool has_remaining() { return false; }
diff --git a/cpp/src/encoding/ts2diff_decoder.h
b/cpp/src/encoding/ts2diff_decoder.h
index 5ad0e89c..2da8b611 100644
--- a/cpp/src/encoding/ts2diff_decoder.h
+++ b/cpp/src/encoding/ts2diff_decoder.h
@@ -34,7 +34,7 @@ template <typename T>
class TS2DIFFDecoder : public Decoder {
public:
TS2DIFFDecoder() { reset(); }
- ~TS2DIFFDecoder() {}
+ ~TS2DIFFDecoder() override {}
void reset() {
write_index_ = -1;
diff --git a/cpp/src/encoding/zigzag_decoder.h
b/cpp/src/encoding/zigzag_decoder.h
index 4a540ee2..5b93951b 100644
--- a/cpp/src/encoding/zigzag_decoder.h
+++ b/cpp/src/encoding/zigzag_decoder.h
@@ -30,10 +30,10 @@
namespace storage {
template <typename T>
-class ZigzagDecoder {
+class ZigzagDecoder : public Decoder {
public:
ZigzagDecoder() { init(); }
- ~ZigzagDecoder() { destroy(); }
+ ~ZigzagDecoder() override { destroy(); }
void init() {
type_ = common::ZIGZAG;
@@ -46,12 +46,46 @@ class ZigzagDecoder {
zigzag_decode_arr_ = nullptr;
}
- void reset() {
+ bool has_remaining() override { return !list_transit_in_zd_.empty(); }
+ int read_boolean(bool &ret_value, common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ inline int read_int32(int32_t &ret_value, common::ByteStream &in) override
{
+ ret_value = decode(in);
+ return common::E_OK;
+ }
+ inline int read_int64(int64_t &ret_value, common::ByteStream &in) override
{
+ ret_value = decode(in);
+ return common::E_OK;
+ }
+ int read_float(float &ret_value, common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int read_double(double &ret_value, common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int read_String(common::String &ret_value, common::PageArena &pa,
+ common::ByteStream &in) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+
+ void each_encode_reset() {
+ bits_left_ = 0;
+ buffer_ = 0;
+ stored_value_ = 0;
+ first_bit_of_byte_ = 0;
+ num_of_sorts_of_zigzag_ = 0;
+ }
+
+ void reset() override {
+ type_ = common::ZIGZAG;
bits_left_ = 0;
buffer_ = 0;
stored_value_ = 0;
first_bit_of_byte_ = 0;
num_of_sorts_of_zigzag_ = 0;
+ first_read_ = true;
+ destroy();
}
void destroy() {
@@ -111,7 +145,7 @@ class ZigzagDecoder {
return stored_value_;
}
- T decode(common::ByteStream &in);
+ inline T decode(common::ByteStream &in);
public:
common::TSEncoding type_;
@@ -128,7 +162,7 @@ class ZigzagDecoder {
};
template <>
-int32_t ZigzagDecoder<int32_t>::decode(common::ByteStream &in) {
+inline int32_t ZigzagDecoder<int32_t>::decode(common::ByteStream &in) {
if (UNLIKELY(first_read_ == true)) {
read_header(in);
zigzag_decode_arr_ =
@@ -156,12 +190,12 @@ int32_t ZigzagDecoder<int32_t>::decode(common::ByteStream
&in) {
int32_t ret_value = (int32_t)(stored_value_);
ret_value = (int32_t)(zigzag_decoder(stored_value_));
- reset();
+ each_encode_reset();
return ret_value;
}
template <>
-int64_t ZigzagDecoder<int64_t>::decode(common::ByteStream &in) {
+inline int64_t ZigzagDecoder<int64_t>::decode(common::ByteStream &in) {
if (UNLIKELY(first_read_ == true)) {
read_header(in);
zigzag_decode_arr_ =
@@ -189,7 +223,7 @@ int64_t ZigzagDecoder<int64_t>::decode(common::ByteStream
&in) {
int64_t ret_value = (int64_t)(stored_value_);
ret_value = (int64_t)(zigzag_decoder(stored_value_));
- reset();
+ each_encode_reset();
return ret_value;
}
diff --git a/cpp/src/encoding/zigzag_encoder.h
b/cpp/src/encoding/zigzag_encoder.h
index 887ba07d..841b8b05 100644
--- a/cpp/src/encoding/zigzag_encoder.h
+++ b/cpp/src/encoding/zigzag_encoder.h
@@ -29,13 +29,36 @@
namespace storage {
template <typename T>
-class ZigzagEncoder {
+class ZigzagEncoder : public Encoder {
public:
ZigzagEncoder() { init(); }
- ~ZigzagEncoder() {}
+ ~ZigzagEncoder() override = default;
- void destroy() {}
+ void destroy() override {}
+
+ // int init(common::TSDataType data_type) = 0;
+ int encode(bool value, common::ByteStream &out_stream) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int encode(int32_t value, common::ByteStream &out_stream) override;
+ int encode(int64_t value, common::ByteStream &out_stream) override;
+ int encode(float value, common::ByteStream &out_stream) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int encode(double value, common::ByteStream &out_stream) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+ int encode(common::String value, common::ByteStream &out_stream) override {
+ return common::E_TYPE_NOT_MATCH;
+ }
+
+ int get_max_byte_size() override {
+ if (list_transit_in_ze_.empty()) {
+ return 0;
+ }
+ return 8 + list_transit_in_ze_.size();
+ }
void init() {
type_ = common::ZIGZAG;
@@ -45,7 +68,7 @@ class ZigzagEncoder {
first_read_ = true;
}
- void reset() {
+ void reset() override {
type_ = common::ZIGZAG;
buffer_ = 0;
length_of_input_bytestream_ = 0;
@@ -75,26 +98,42 @@ class ZigzagEncoder {
add_byte_to_trans();
}
- int encode(T value);
- int flush(common::ByteStream &out);
+ inline int encode(T value);
+
+ inline int flush(common::ByteStream &out) override;
public:
common::TSEncoding type_;
- uint8_t buffer_;
- int length_of_input_bytestream_;
- int length_of_encode_bytestream_;
+ uint8_t buffer_ = 0;
+ int length_of_input_bytestream_ = 0;
+ int length_of_encode_bytestream_ = 0;
std::vector<uint8_t> list_transit_in_ze_;
- bool first_read_;
+ bool first_read_{};
};
+template <typename T>
+inline int ZigzagEncoder<T>::encode(int32_t /*value*/,
+ common::ByteStream & /*out*/) {
+ return common::E_TYPE_NOT_MATCH;
+}
+
+template <typename T>
+inline int ZigzagEncoder<T>::encode(int64_t /*value*/,
+ common::ByteStream & /*out*/) {
+ return common::E_TYPE_NOT_MATCH;
+}
+
template <>
-int ZigzagEncoder<int32_t>::encode(int32_t value) {
+inline int ZigzagEncoder<int32_t>::encode(int32_t value) {
if (UNLIKELY(first_read_ == true)) {
reset();
first_read_ = false;
}
length_of_input_bytestream_ += 1;
- int32_t value_zigzag = (value << 1) ^ (value >> 31);
+ int32_t value_zigzag =
+ static_cast<int32_t>((static_cast<uint32_t>(value) << 1) ^
+ static_cast<uint32_t>(value >> 31));
+
if ((value_zigzag & ~0x7F) != 0) {
write_byte_with_subsequence(value_zigzag);
value_zigzag = (uint32_t)value_zigzag >> 7;
@@ -111,13 +150,22 @@ int ZigzagEncoder<int32_t>::encode(int32_t value) {
}
template <>
-int ZigzagEncoder<int64_t>::encode(int64_t value) {
+inline int ZigzagEncoder<int32_t>::encode(int32_t value,
+ common::ByteStream &out_stream) {
+ return encode(value);
+}
+
+template <>
+inline int ZigzagEncoder<int64_t>::encode(int64_t value) {
if (UNLIKELY(first_read_ == true)) {
reset();
first_read_ = false;
}
length_of_input_bytestream_ += 1;
- int64_t value_zigzag = (value << 1) ^ (value >> 63);
+ int64_t value_zigzag =
+ static_cast<int64_t>((static_cast<uint64_t>(value) << 1) ^
+ static_cast<uint64_t>(value >> 63));
+
if ((value_zigzag & ~0x7F) != 0) {
write_byte_with_subsequence(value_zigzag);
value_zigzag = (uint64_t)value_zigzag >> 7;
@@ -134,7 +182,13 @@ int ZigzagEncoder<int64_t>::encode(int64_t value) {
}
template <>
-int ZigzagEncoder<int32_t>::flush(common::ByteStream &out) {
+inline int ZigzagEncoder<int64_t>::encode(int64_t value,
+ common::ByteStream &out_stream) {
+ return encode(value);
+}
+
+template <>
+inline int ZigzagEncoder<int32_t>::flush(common::ByteStream &out) {
common::SerializationUtil::write_var_uint(length_of_encode_bytestream_,
out);
common::SerializationUtil::write_var_uint(length_of_input_bytestream_,
out);
@@ -148,7 +202,7 @@ int ZigzagEncoder<int32_t>::flush(common::ByteStream &out) {
}
template <>
-int ZigzagEncoder<int64_t>::flush(common::ByteStream &out) {
+inline int ZigzagEncoder<int64_t>::flush(common::ByteStream &out) {
common::SerializationUtil::write_var_uint(length_of_encode_bytestream_,
out);
common::SerializationUtil::write_var_uint(length_of_input_bytestream_,
out);
diff --git a/cpp/src/reader/qds_without_timegenerator.cc
b/cpp/src/reader/qds_without_timegenerator.cc
index 575b6b8d..fb2ef6c6 100644
--- a/cpp/src/reader/qds_without_timegenerator.cc
+++ b/cpp/src/reader/qds_without_timegenerator.cc
@@ -124,9 +124,10 @@ int QDSWithoutTimeGenerator::next(bool &has_next) {
std::multimap<int64_t, uint32_t>::iterator iter = heap_time_.find(time);
for (uint32_t i = 0; i < count; ++i) {
uint32_t len = 0;
+ auto val_datatype = value_iters_[iter->second]->get_data_type();
+ void *val_ptr = value_iters_[iter->second]->read(&len);
row_record_->get_field(iter->second + 1)
- ->set_value(value_iters_[iter->second]->get_data_type(),
- value_iters_[iter->second]->read(&len), len, pa_);
+ ->set_value(val_datatype, val_ptr, len, pa_);
value_iters_[iter->second]->next();
if (!time_iters_[iter->second]->end()) {
int64_t timev = *(int64_t
*)(time_iters_[iter->second]->read(&len));
diff --git a/cpp/src/utils/errno_define.h b/cpp/src/utils/errno_define.h
index 8d87ade9..df16c5fe 100644
--- a/cpp/src/utils/errno_define.h
+++ b/cpp/src/utils/errno_define.h
@@ -73,6 +73,8 @@ const int E_TABLE_NOT_EXIST = 49;
const int E_COLUMN_NOT_EXIST = 50;
const int E_UNSUPPORTED_ORDER = 51;
const int E_INVALID_NODE_TYPE = 52;
+const int E_ENCODE_ERR = 53;
+const int E_DECODE_ERR = 54;
} // end namespace common
diff --git a/cpp/test/encoding/bitpack_codec_test.cc
b/cpp/test/encoding/bitpack_codec_test.cc
deleted file mode 100644
index 0f0168b0..00000000
--- a/cpp/test/encoding/bitpack_codec_test.cc
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * License); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License a
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include <gtest/gtest.h>
-
-#include "encoding/bitpack_decoder.h"
-#include "encoding/bitpack_encoder.h"
-
-namespace storage {
-
-class BitPackEncoderTest : public ::testing::Test {};
-
-TEST_F(BitPackEncoderTest, EncodeInt32) {
- BitPackEncoder encoder;
- common::ByteStream stream(1024, common::MOD_ENCODER_OBJ);
-
- int test_data[] = {5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 9, 9};
- for (int value : test_data) {
- encoder.encode(value, stream);
- }
- encoder.encode_flush(stream);
-
- EXPECT_EQ(stream.total_size(), 12);
-
- uint32_t want_len = 12, read_len;
- uint8_t real_buf[12] = {};
- stream.read_buf(real_buf, want_len, read_len);
- EXPECT_EQ(want_len, read_len);
- // Generated using Java Edition
- uint8_t expected_buf[12] = {11, 4, 5, 7, 85, 86,
- 102, 103, 120, 153, 153, 144};
- for (int i = 0; i < 12; i++) {
- EXPECT_EQ(real_buf[i], expected_buf[i]);
- }
-}
-
-TEST_F(BitPackEncoderTest, EncodeInt64) {
- BitPackEncoder encoder;
- common::ByteStream stream(1024, common::MOD_ENCODER_OBJ);
-
- int64_t test_data[] = {10L, 20L, 10L, 10L, 30L, 30L, 30L,
- 20L, 20L, 10L, 10L, INT64_MAX, INT64_MIN};
- for (int64_t value : test_data) {
- encoder.encode(value, stream);
- }
- encoder.encode_flush(stream);
-
- EXPECT_EQ(stream.total_size(), 133);
-
- uint32_t want_len = 133, read_len;
- uint8_t real_buf[133] = {0};
- stream.read_buf(real_buf, want_len, read_len);
-
- EXPECT_EQ(want_len, read_len);
- // Generated using Java Edition
- int8_t expected_buf[133] = {
- -125, 1, 64, 5, 5, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0,
- 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0,
- 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0,
- 0, 30, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0,
- 20, 0, 0, 0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 10,
- 0, 0, 0, 0, 0, 0, 0, 10, 127, -1, -1, -1, -1, -1, -1, -1, -128,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
- for (int i = 0; i < 123; i++) {
- EXPECT_EQ(real_buf[i], (uint8_t)expected_buf[i]);
- }
-}
-
-TEST_F(BitPackEncoderTest, EncodeInt64Num1024) {
- BitPackEncoder encoder;
- common::ByteStream stream(1024, common::MOD_ENCODER_OBJ);
-
- for (int64_t i = 0; i < 1024; i++) {
- encoder.encode(i, stream);
- }
- encoder.encode_flush(stream);
-
- EXPECT_EQ(stream.total_size(), 1289);
-
- uint32_t want_len = 1289, read_len;
- uint8_t real_buf[1289] = {0};
- stream.read_buf(real_buf, want_len, read_len);
-
- EXPECT_EQ(want_len, read_len);
- // Generated using Java Edition
- int8_t expected_buf[1289] = {
- -121, 10, 10, 127, 8, 0, 0, 16, 8, 3, 1, 0,
- 80, 24, 7, 2, 0, -112, 40, 11, 3, 0, -48, 56,
- 15, 4, 1, 16, 72, 19, 5, 1, 80, 88, 23, 6,
- 1, -112, 104, 27, 7, 1, -48, 120, 31, 8, 2, 16,
- -120, 35, 9, 2, 80, -104, 39, 10, 2, -112, -88, 43,
- 11, 2, -48, -72, 47, 12, 3, 16, -56, 51, 13, 3,
- 80, -40, 55, 14, 3, -112, -24, 59, 15, 3, -48, -8,
- 63, 16, 4, 17, 8, 67, 17, 4, 81, 24, 71, 18,
- 4, -111, 40, 75, 19, 4, -47, 56, 79, 20, 5, 17,
- 72, 83, 21, 5, 81, 88, 87, 22, 5, -111, 104, 91,
- 23, 5, -47, 120, 95, 24, 6, 17, -120, 99, 25, 6,
- 81, -104, 103, 26, 6, -111, -88, 107, 27, 6, -47, -72,
- 111, 28, 7, 17, -56, 115, 29, 7, 81, -40, 119, 30,
- 7, -111, -24, 123, 31, 7, -47, -8, 127, 32, 8, 18,
- 8, -125, 33, 8, 82, 24, -121, 34, 8, -110, 40, -117,
- 35, 8, -46, 56, -113, 36, 9, 18, 72, -109, 37, 9,
- 82, 88, -105, 38, 9, -110, 104, -101, 39, 9, -46, 120,
- -97, 40, 10, 18, -120, -93, 41, 10, 82, -104, -89, 42,
- 10, -110, -88, -85, 43, 10, -46, -72, -81, 44, 11, 18,
- -56, -77, 45, 11, 82, -40, -73, 46, 11, -110, -24, -69,
- 47, 11, -46, -8, -65, 48, 12, 19, 8, -61, 49, 12,
- 83, 24, -57, 50, 12, -109, 40, -53, 51, 12, -45, 56,
- -49, 52, 13, 19, 72, -45, 53, 13, 83, 88, -41, 54,
- 13, -109, 104, -37, 55, 13, -45, 120, -33, 56, 14, 19,
- -120, -29, 57, 14, 83, -104, -25, 58, 14, -109, -88, -21,
- 59, 14, -45, -72, -17, 60, 15, 19, -56, -13, 61, 15,
- 83, -40, -9, 62, 15, -109, -24, -5, 63, 15, -45, -8,
- -1, 64, 16, 20, 9, 3, 65, 16, 84, 25, 7, 66,
- 16, -108, 41, 11, 67, 16, -44, 57, 15, 68, 17, 20,
- 73, 19, 69, 17, 84, 89, 23, 70, 17, -108, 105, 27,
- 71, 17, -44, 121, 31, 72, 18, 20, -119, 35, 73, 18,
- 84, -103, 39, 74, 18, -108, -87, 43, 75, 18, -44, -71,
- 47, 76, 19, 20, -55, 51, 77, 19, 84, -39, 55, 78,
- 19, -108, -23, 59, 79, 19, -44, -7, 63, 80, 20, 21,
- 9, 67, 81, 20, 85, 25, 71, 82, 20, -107, 41, 75,
- 83, 20, -43, 57, 79, 84, 21, 21, 73, 83, 85, 21,
- 85, 89, 87, 86, 21, -107, 105, 91, 87, 21, -43, 121,
- 95, 88, 22, 21, -119, 99, 89, 22, 85, -103, 103, 90,
- 22, -107, -87, 107, 91, 22, -43, -71, 111, 92, 23, 21,
- -55, 115, 93, 23, 85, -39, 119, 94, 23, -107, -23, 123,
- 95, 23, -43, -7, 127, 96, 24, 22, 9, -125, 97, 24,
- 86, 25, -121, 98, 24, -106, 41, -117, 99, 24, -42, 57,
- -113, 100, 25, 22, 73, -109, 101, 25, 86, 89, -105, 102,
- 25, -106, 105, -101, 103, 25, -42, 121, -97, 104, 26, 22,
- -119, -93, 105, 26, 86, -103, -89, 106, 26, -106, -87, -85,
- 107, 26, -42, -71, -81, 108, 27, 22, -55, -77, 109, 27,
- 86, -39, -73, 110, 27, -106, -23, -69, 111, 27, -42, -7,
- -65, 112, 28, 23, 9, -61, 113, 28, 87, 25, -57, 114,
- 28, -105, 41, -53, 115, 28, -41, 57, -49, 116, 29, 23,
- 73, -45, 117, 29, 87, 89, -41, 118, 29, -105, 105, -37,
- 119, 29, -41, 121, -33, 120, 30, 23, -119, -29, 121, 30,
- 87, -103, -25, 122, 30, -105, -87, -21, 123, 30, -41, -71,
- -17, 124, 31, 23, -55, -13, 125, 31, 87, -39, -9, 127,
- 8, 126, 31, -105, -23, -5, 127, 31, -41, -7, -1, -128,
- 32, 24, 10, 3, -127, 32, 88, 26, 7, -126, 32, -104,
- 42, 11, -125, 32, -40, 58, 15, -124, 33, 24, 74, 19,
- -123, 33, 88, 90, 23, -122, 33, -104, 106, 27, -121, 33,
- -40, 122, 31, -120, 34, 24, -118, 35, -119, 34, 88, -102,
- 39, -118, 34, -104, -86, 43, -117, 34, -40, -70, 47, -116,
- 35, 24, -54, 51, -115, 35, 88, -38, 55, -114, 35, -104,
- -22, 59, -113, 35, -40, -6, 63, -112, 36, 25, 10, 67,
- -111, 36, 89, 26, 71, -110, 36, -103, 42, 75, -109, 36,
- -39, 58, 79, -108, 37, 25, 74, 83, -107, 37, 89, 90,
- 87, -106, 37, -103, 106, 91, -105, 37, -39, 122, 95, -104,
- 38, 25, -118, 99, -103, 38, 89, -102, 103, -102, 38, -103,
- -86, 107, -101, 38, -39, -70, 111, -100, 39, 25, -54, 115,
- -99, 39, 89, -38, 119, -98, 39, -103, -22, 123, -97, 39,
- -39, -6, 127, -96, 40, 26, 10, -125, -95, 40, 90, 26,
- -121, -94, 40, -102, 42, -117, -93, 40, -38, 58, -113, -92,
- 41, 26, 74, -109, -91, 41, 90, 90, -105, -90, 41, -102,
- 106, -101, -89, 41, -38, 122, -97, -88, 42, 26, -118, -93,
- -87, 42, 90, -102, -89, -86, 42, -102, -86, -85, -85, 42,
- -38, -70, -81, -84, 43, 26, -54, -77, -83, 43, 90, -38,
- -73, -82, 43, -102, -22, -69, -81, 43, -38, -6, -65, -80,
- 44, 27, 10, -61, -79, 44, 91, 26, -57, -78, 44, -101,
- 42, -53, -77, 44, -37, 58, -49, -76, 45, 27, 74, -45,
- -75, 45, 91, 90, -41, -74, 45, -101, 106, -37, -73, 45,
- -37, 122, -33, -72, 46, 27, -118, -29, -71, 46, 91, -102,
- -25, -70, 46, -101, -86, -21, -69, 46, -37, -70, -17, -68,
- 47, 27, -54, -13, -67, 47, 91, -38, -9, -66, 47, -101,
- -22, -5, -65, 47, -37, -6, -1, -64, 48, 28, 11, 3,
- -63, 48, 92, 27, 7, -62, 48, -100, 43, 11, -61, 48,
- -36, 59, 15, -60, 49, 28, 75, 19, -59, 49, 92, 91,
- 23, -58, 49, -100, 107, 27, -57, 49, -36, 123, 31, -56,
- 50, 28, -117, 35, -55, 50, 92, -101, 39, -54, 50, -100,
- -85, 43, -53, 50, -36, -69, 47, -52, 51, 28, -53, 51,
- -51, 51, 92, -37, 55, -50, 51, -100, -21, 59, -49, 51,
- -36, -5, 63, -48, 52, 29, 11, 67, -47, 52, 93, 27,
- 71, -46, 52, -99, 43, 75, -45, 52, -35, 59, 79, -44,
- 53, 29, 75, 83, -43, 53, 93, 91, 87, -42, 53, -99,
- 107, 91, -41, 53, -35, 123, 95, -40, 54, 29, -117, 99,
- -39, 54, 93, -101, 103, -38, 54, -99, -85, 107, -37, 54,
- -35, -69, 111, -36, 55, 29, -53, 115, -35, 55, 93, -37,
- 119, -34, 55, -99, -21, 123, -33, 55, -35, -5, 127, -32,
- 56, 30, 11, -125, -31, 56, 94, 27, -121, -30, 56, -98,
- 43, -117, -29, 56, -34, 59, -113, -28, 57, 30, 75, -109,
- -27, 57, 94, 91, -105, -26, 57, -98, 107, -101, -25, 57,
- -34, 123, -97, -24, 58, 30, -117, -93, -23, 58, 94, -101,
- -89, -22, 58, -98, -85, -85, -21, 58, -34, -69, -81, -20,
- 59, 30, -53, -77, -19, 59, 94, -37, -73, -18, 59, -98,
- -21, -69, -17, 59, -34, -5, -65, -16, 60, 31, 11, -61,
- -15, 60, 95, 27, -57, -14, 60, -97, 43, -53, -13, 60,
- -33, 59, -49, -12, 61, 31, 75, -45, -11, 61, 95, 91,
- -41, -10, 61, -97, 107, -37, -9, 61, -33, 123, -33, -8,
- 62, 31, -117, -29, -7, 62, 95, -101, -25, -6, 62, -97,
- -85, -21, -5, 62, -33, -69, -17, 5, 8, -4, 63, 31,
- -53, -13, -3, 63, 95, -37, -9, -2, 63, -97, -21, -5,
- -1, 63, -33, -5, -1};
- for (int i = 0; i < 1289; i++) {
- ASSERT_EQ(real_buf[i], (uint8_t)expected_buf[i]);
- }
-}
-
-TEST_F(BitPackEncoderTest, EncodeFlush) {
- BitPackEncoder encoder;
- common::ByteStream stream(1024, common::MOD_ENCODER_OBJ);
- encoder.encode(1, stream);
- encoder.encode_flush(stream);
- EXPECT_GT(stream.total_size(), 0);
-}
-
-TEST_F(BitPackEncoderTest, ClearBuffer) {
- BitPackEncoder encoder;
- common::ByteStream stream(1024, common::MOD_ENCODER_OBJ);
- encoder.encode(1, stream);
- encoder.clear_buffer();
-}
-
-TEST_F(BitPackEncoderTest, Flush) {
- BitPackEncoder encoder;
- common::ByteStream stream(1024, common::MOD_ENCODER_OBJ);
- encoder.encode(1, stream);
- encoder.flush(stream);
- EXPECT_GT(stream.total_size(), 0);
-}
-
-class BitPackDecoderTest : public ::testing::Test {};
-
-TEST_F(BitPackDecoderTest, HasNext) {
- BitPackDecoder decoder;
- common::ByteStream stream(1024, common::MOD_ENCODER_OBJ);
- bool result = decoder.has_next(stream);
- EXPECT_FALSE(result);
-}
-
-TEST_F(BitPackDecoderTest, HasNextPackage) {
- BitPackDecoder decoder;
- bool result = decoder.has_next_package();
- EXPECT_FALSE(result);
-}
-
-TEST_F(BitPackDecoderTest, ReadInt64) {
- BitPackDecoder decoder;
- common::ByteStream stream(1024, common::MOD_ENCODER_OBJ);
- BitPackEncoder encoder;
- for (int64_t i = 0; i < 1024; i++) {
- encoder.encode(i, stream);
- }
- encoder.encode_flush(stream);
-
- decoder.init();
- for (int64_t i = 0; i < 1024; i++) {
- ASSERT_EQ(i, decoder.read_int(stream));
- }
-}
-
-TEST_F(BitPackDecoderTest, ReadInt64LargeQuantities) {
- BitPackDecoder decoder;
- common::ByteStream stream(1024, common::MOD_ENCODER_OBJ);
- BitPackEncoder encoder;
- for (int64_t i = 0; i < 10000; i++) {
- encoder.encode(i, stream);
- }
- encoder.encode_flush(stream);
-
- decoder.init();
- for (int64_t i = 0; i < 10000; i++) {
- ASSERT_EQ(i, decoder.read_int(stream));
- }
-}
-
-TEST_F(BitPackDecoderTest, Destroy) {
- BitPackDecoder decoder;
- decoder.destroy();
-}
-
-} // namespace storage
diff --git a/cpp/test/encoding/dictionary_codec_test.cc
b/cpp/test/encoding/dictionary_codec_test.cc
index f41fc2b5..f9d814cb 100644
--- a/cpp/test/encoding/dictionary_codec_test.cc
+++ b/cpp/test/encoding/dictionary_codec_test.cc
@@ -18,7 +18,9 @@
*/
#include <gtest/gtest.h>
+#include <random>
#include <string>
+#include <unordered_set>
#include <vector>
#include "encoding/dictionary_decoder.h"
@@ -80,21 +82,91 @@ TEST_F(DictionaryTest, DictionaryEncoderAndDecoder) {
ASSERT_EQ(decoder.read_string(stream), "apple");
}
-TEST_F(DictionaryTest, DictionaryEncoderAndDecoderLargeQuantities) {
+TEST_F(DictionaryTest, DictionaryEncoderAndDecoderOneItem) {
DictionaryEncoder encoder;
common::ByteStream stream(1024, common::MOD_DICENCODE_OBJ);
encoder.init();
- for (int64_t value = 1; value < 10000; value++) {
- encoder.encode(std::to_string(value), stream);
+ encoder.encode("apple", stream);
+ encoder.flush(stream);
+
+ DictionaryDecoder decoder;
+ decoder.init();
+
+ ASSERT_TRUE(decoder.has_next(stream));
+ ASSERT_EQ(decoder.read_string(stream), "apple");
+
+ ASSERT_FALSE(decoder.has_next(stream));
+}
+
+TEST_F(DictionaryTest, DictionaryEncoderAndDecoderRepeatedItems) {
+ DictionaryEncoder encoder;
+ common::ByteStream stream(1024, common::MOD_DICENCODE_OBJ);
+ encoder.init();
+
+ for (char c = 'a'; c <= 'z'; c++) {
+ for (int i = 0; i < 100; i++) {
+ encoder.encode(std::string(c, 3), stream);
+ }
+ }
+ encoder.flush(stream);
+
+ DictionaryDecoder decoder;
+ decoder.init();
+
+ for (char c = 'a'; c <= 'z'; c++) {
+ for (int i = 0; i < 100; i++) {
+ ASSERT_EQ(decoder.read_string(stream), std::string(c, 3));
+ }
+ }
+}
+
+TEST_F(DictionaryTest,
+ DictionaryEncoderAndDecoderLargeQuantitiesWithRandomStrings) {
+ DictionaryEncoder encoder;
+ common::ByteStream stream(1024, common::MOD_DICENCODE_OBJ);
+ encoder.init();
+
+ // Prepare random string generator
+ std::random_device rd;
+ std::mt19937 gen(rd());
+ std::uniform_int_distribution<> length_dist(5, 20); // String length range
+ std::uniform_int_distribution<> char_dist(33,
+ 126); // Printable ASCII range
+
+ // Generate 10000 random strings
+ const int num_strings = 10000;
+ std::vector<std::string> test_strings;
+ std::unordered_set<std::string> string_set; // For ensuring uniqueness
+
+ while (test_strings.size() < num_strings) {
+ int length = length_dist(gen);
+ std::string str;
+ str.reserve(length);
+
+ for (int i = 0; i < length; ++i) {
+ str.push_back(static_cast<char>(char_dist(gen)));
+ }
+
+ // Ensure string uniqueness
+ if (string_set.insert(str).second) {
+ test_strings.push_back(str);
+ }
+ }
+
+ // Encode all strings
+ for (const auto& str : test_strings) {
+ encoder.encode(str, stream);
}
encoder.flush(stream);
DictionaryDecoder decoder;
decoder.init();
- for (int64_t value = 1; value < 10000; value++) {
- ASSERT_EQ(decoder.read_string(stream), std::to_string(value));
+ // Decode and verify all strings
+ for (const auto& expected_str : test_strings) {
+ std::string decoded_str = decoder.read_string(stream);
+ ASSERT_EQ(decoded_str, expected_str);
}
}
diff --git a/cpp/test/encoding/gorilla_codec_test.cc
b/cpp/test/encoding/gorilla_codec_test.cc
index 6f685b44..47056a6d 100644
--- a/cpp/test/encoding/gorilla_codec_test.cc
+++ b/cpp/test/encoding/gorilla_codec_test.cc
@@ -18,6 +18,8 @@
*/
#include <gtest/gtest.h>
+#include <limits>
+
#include "encoding/gorilla_decoder.h"
#include "encoding/gorilla_encoder.h"
@@ -107,4 +109,102 @@ TEST_F(GorillaCodecTest,
Int64EncodingDecodingLargeQuantities) {
}
}
+TEST_F(GorillaCodecTest, FloatEncodingDecodingBoundaryValues) {
+ storage::FloatGorillaEncoder float_encoder;
+ storage::FloatGorillaDecoder float_decoder;
+ common::ByteStream stream(1024, common::MOD_DEFAULT);
+
+ // Test values include important boundary cases and special floating-point
+ // values
+ std::vector<float> test_values = {
+ 0.0f, // Zero
+ -0.0f, // Negative zero (distinct in IEEE 754)
+ 1.0f, // Positive one
+ -1.0f, // Negative one
+ std::numeric_limits<float>::min(), // Smallest positive normalized
+ // value
+ std::numeric_limits<float>::max(), // Largest positive finite value
+ std::numeric_limits<float>::lowest(), // Smallest (most negative)
+ // finite value
+ std::numeric_limits<float>::infinity(), // Positive infinity
+ -std::numeric_limits<float>::infinity(), // Negative infinity
+ std::numeric_limits<float>::
+ denorm_min(), // Smallest positive subnormal (denormalized) value
+ std::numeric_limits<float>::epsilon(), // Difference between 1 and the
+ // next representable value
+ std::nanf("") // Not-a-Number (NaN)
+ };
+
+ // Encode all test values into the stream
+ for (auto value : test_values) {
+ EXPECT_EQ(float_encoder.encode(value, stream), common::E_OK);
+ }
+ float_encoder.flush(stream);
+
+ // Decode values from the stream and verify correctness
+ for (auto expected : test_values) {
+ float decoded = float_decoder.decode(stream);
+ if (std::isnan(expected)) {
+ // NaN is unordered; must use isnan() to check
+ EXPECT_TRUE(std::isnan(decoded));
+ } else if (std::isinf(expected)) {
+ // Check if decoded value is infinite and has the same sign
+ EXPECT_TRUE(std::isinf(decoded));
+ EXPECT_EQ(std::signbit(expected), std::signbit(decoded));
+ } else {
+ // For finite floats, allow small precision differences
+ EXPECT_FLOAT_EQ(decoded, expected);
+ }
+ }
+}
+
+TEST_F(GorillaCodecTest, DoubleEncodingDecodingBoundaryValues) {
+ storage::DoubleGorillaEncoder double_encoder;
+ storage::DoubleGorillaDecoder double_decoder;
+ common::ByteStream stream(1024, common::MOD_DEFAULT);
+
+ // Test values include important boundary cases and special floating-point
+ // values for double precision
+ std::vector<double> test_values = {
+ 0.0, // Zero
+ -0.0, // Negative zero (distinct in IEEE 754)
+ 1.0, // Positive one
+ -1.0, // Negative one
+ std::numeric_limits<double>::min(), // Smallest positive normalized
+ // value
+ std::numeric_limits<double>::max(), // Largest positive finite
value
+ std::numeric_limits<double>::lowest(), // Smallest (most negative)
+ // finite value
+ std::numeric_limits<double>::infinity(), // Positive infinity
+ -std::numeric_limits<double>::infinity(), // Negative infinity
+ std::numeric_limits<double>::
+ denorm_min(), // Smallest positive subnormal (denormalized) value
+ std::numeric_limits<double>::epsilon(), // Difference between 1 and
the
+ // next representable value
+ std::nan("") // Not-a-Number (NaN)
+ };
+
+ // Encode all test values into the stream
+ for (auto value : test_values) {
+ EXPECT_EQ(double_encoder.encode(value, stream), common::E_OK);
+ }
+ double_encoder.flush(stream);
+
+ // Decode values from the stream and verify correctness
+ for (auto expected : test_values) {
+ double decoded = double_decoder.decode(stream);
+ if (std::isnan(expected)) {
+ // NaN is unordered; must use isnan() to check
+ EXPECT_TRUE(std::isnan(decoded));
+ } else if (std::isinf(expected)) {
+ // Check if decoded value is infinite and has the same sign
+ EXPECT_TRUE(std::isinf(decoded));
+ EXPECT_EQ(std::signbit(expected), std::signbit(decoded));
+ } else {
+ // For finite doubles, allow small precision differences
+ EXPECT_DOUBLE_EQ(decoded, expected);
+ }
+ }
+}
+
} // namespace storage
diff --git a/cpp/test/encoding/inpacker_test.cc
b/cpp/test/encoding/inpacker_test.cc
deleted file mode 100644
index 432585c6..00000000
--- a/cpp/test/encoding/inpacker_test.cc
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * License); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License a
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-#include <gtest/gtest.h>
-
-#include <bitset>
-
-#include "encoding/intpacker.h"
-
-namespace storage {
-
-TEST(IntPackerTest, Pack8Values) {
- IntPacker int_packer(10);
- int64_t values[NUM_OF_INTS] = {100, 200, 300, 400, 500, 600, 700, 800};
- unsigned char buf[10 * NUM_OF_INTS / 8] = {0};
-
- int_packer.pack_8values(values, 0, buf);
-
- // Generated using Java Edition
- unsigned char expected_buf[10 * NUM_OF_INTS / 8] = {25, 12, 132, 177, 144,
- 125, 37, 138, 243, 32};
- for (int i = 0; i < 10 * NUM_OF_INTS / 8; i++) {
- EXPECT_EQ(buf[i], expected_buf[i]);
- }
-}
-
-TEST(IntPackerTest, Unpack8Values) {
- IntPacker int_packer(10);
- unsigned char buf[4 * NUM_OF_INTS] = {25, 12, 132, 177, 144,
- 125, 37, 138, 243, 32};
- int64_t values[NUM_OF_INTS] = {0};
-
- int_packer.unpack_8values(buf, 0, values);
-
- // Generated using Java Edition
- int expected_values[NUM_OF_INTS] = {100, 200, 300, 400, 500, 600, 700,
800};
- for (int i = 0; i < NUM_OF_INTS; i++) {
- EXPECT_EQ(values[i], expected_values[i]);
- }
-}
-
-} // namespace storage
\ No newline at end of file
diff --git a/cpp/test/encoding/int32_packer_test.cc
b/cpp/test/encoding/int32_packer_test.cc
new file mode 100644
index 00000000..1b072383
--- /dev/null
+++ b/cpp/test/encoding/int32_packer_test.cc
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License a
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "encoding/int32_packer.h"
+
+#include <gtest/gtest.h>
+
+#include <bitset>
+#include <random>
+
+namespace storage {
+
+TEST(IntPackerTest, SequentialValues) {
+ for (int width = 3; width < 32; ++width) {
+ int32_t arr[8];
+ for (int i = 0; i < 8; ++i) arr[i] = i;
+ Int32Packer packer(width);
+ const int bufSize = NUM_OF_INTS * width / 8;
+ std::vector<unsigned char> buf(bufSize, 0);
+ packer.pack_8values(arr, 0, buf.data());
+ int32_t res[8] = {0};
+ packer.unpack_8values(buf.data(), 0, res);
+ for (int i = 0; i < 8; ++i) {
+ EXPECT_EQ(res[i], arr[i]) << "Width=" << width << " Index=" << i;
+ }
+ }
+}
+
+TEST(IntPackerStressTest, PackUnpackRandomPositiveValues) {
+ const int width = 31;
+ const int count = 100000;
+ const int total_values = count * 8;
+
+ Int32Packer packer(width);
+ std::vector<int32_t> pre_values;
+ std::vector<unsigned char> buffer;
+ pre_values.reserve(total_values);
+ buffer.resize(count * width);
+ int idx = 0;
+ std::srand(12345); // Optional: deterministic seed
+ for (int i = 0; i < count; ++i) {
+ int32_t vs[8];
+ for (int j = 0; j < 8; ++j) {
+ vs[j] = std::rand() &
+ 0x7FFFFFFF; // ensure non-negative (Java `nextInt`)
+ pre_values.push_back(vs[j]);
+ }
+
+ unsigned char temp_buf[32] = {0};
+ packer.pack_8values(vs, 0, temp_buf);
+ std::memcpy(buffer.data() + idx, temp_buf, width);
+ idx += width;
+ }
+
+ std::vector<int32_t> res(total_values);
+ packer.unpack_all_values(buffer.data(), static_cast<int>(buffer.size()),
+ res.data());
+ std::string diff_msg;
+ for (int i = 0; i < total_values; ++i) {
+ if (res[i] != pre_values[i]) {
+ diff_msg += "\nMismatch at index " + std::to_string(i) +
+ ": expected=" + std::to_string(pre_values[i]) +
+ ", actual=" + std::to_string(res[i]);
+ }
+ }
+ ASSERT_TRUE(diff_msg.empty()) << diff_msg;
+}
+
+// Test all zeros for various widths
+TEST(Int32PackerTest, AllZeroValues) {
+ for (int width = 1; width <= 31; ++width) {
+ int32_t arr[NUM_OF_INTS] = {0};
+ Int32Packer packer(width);
+ const int bufSize = NUM_OF_INTS * width / 8;
+ std::vector<unsigned char> buf(bufSize, 0);
+ packer.pack_8values(arr, 0, buf.data());
+ int32_t res[NUM_OF_INTS] = {
+ 1}; // initialize non-zero to catch failures
+ packer.unpack_8values(buf.data(), 0, res);
+ for (int i = 0; i < NUM_OF_INTS; ++i) {
+ EXPECT_EQ(res[i], 0) << "Width=" << width << " Index=" << i;
+ }
+ }
+}
+
+// Test boundary width = 1 with alternating bits
+TEST(Int32PackerTest, BoundaryWidthOneAlternating) {
+ const int width = 1;
+ int32_t arr[NUM_OF_INTS] = {0, 1, 0, 1, 0, 1, 0, 1};
+ Int32Packer packer(width);
+ const int bufSize = NUM_OF_INTS * width / 8;
+ std::vector<unsigned char> buf(bufSize, 0);
+ packer.pack_8values(arr, 0, buf.data());
+ int32_t res[NUM_OF_INTS] = {0};
+ packer.unpack_8values(buf.data(), 0, res);
+ for (int i = 0; i < NUM_OF_INTS; ++i) {
+ EXPECT_EQ(res[i], arr[i]) << "Index=" << i;
+ }
+}
+
+// Test maximum width (32 bits)
+TEST(Int32PackerTest, MaxWidth32Random) {
+ const int width = 32;
+ const int times = 100000;
+ std::random_device rd;
+ std::mt19937 gen(rd());
+ std::uniform_int_distribution<int32_t> dist(INT32_MIN, INT32_MAX);
+ for (int t = 0; t < times; ++t) {
+ int32_t arr[NUM_OF_INTS];
+ for (int i = 0; i < NUM_OF_INTS; ++i) {
+ arr[i] = dist(gen);
+ }
+ Int32Packer packer(width);
+ const int bufSize = NUM_OF_INTS * width / 8;
+ std::vector<unsigned char> buf(bufSize, 0);
+ packer.pack_8values(arr, 0, buf.data());
+ int32_t res[NUM_OF_INTS] = {0};
+ packer.unpack_8values(buf.data(), 0, res);
+ for (int i = 0; i < NUM_OF_INTS; ++i) {
+ EXPECT_EQ(res[i], arr[i]) << "Index=" << i;
+ }
+ }
+}
+
+TEST(Int32PackerTest, AllNegative32Random) {
+ const int width = 32;
+ const int times = 100000;
+ std::random_device rd;
+ std::mt19937 gen(rd());
+ std::uniform_int_distribution<int32_t> dist(INT32_MIN, -1);
+ for (int t = 0; t < times; ++t) {
+ int32_t arr[NUM_OF_INTS];
+ for (int i = 0; i < NUM_OF_INTS; ++i) {
+ arr[i] = dist(gen);
+ }
+ Int32Packer packer(width);
+ const int bufSize = NUM_OF_INTS * width / 8;
+ std::vector<unsigned char> buf(bufSize, 0);
+ packer.pack_8values(arr, 0, buf.data());
+ int32_t res[NUM_OF_INTS] = {0};
+ packer.unpack_8values(buf.data(), 0, res);
+ for (int i = 0; i < NUM_OF_INTS; ++i) {
+ EXPECT_EQ(res[i], arr[i]) << "Index=" << i;
+ }
+ }
+}
+
+// Test unpack_all_values for multiple blocks
+TEST(Int32PackerTest, UnpackAllValuesMultipleBlocks) {
+ const int width = 16;
+ // pack 10 blocks sequentially
+ const int blocks = 10;
+ Int32Packer packer(width);
+ std::vector<int32_t> orig(blocks * NUM_OF_INTS);
+ std::vector<unsigned char> buf(blocks * width);
+
+ // Fill orig with pattern: block * 16 + index
+ // Example: block 0 = [0,1,...,7], block 1 = [16,17,...,23], etc.
+ for (int b = 0; b < blocks; ++b) {
+ for (int i = 0; i < NUM_OF_INTS; ++i) {
+ orig[b * NUM_OF_INTS + i] = (b << 4) | i;
+ }
+ packer.pack_8values(
+ orig.data() + b * NUM_OF_INTS, 0,
+ buf.data() + b * width); // pack each block into buf
+ }
+
+ std::vector<int32_t> res(blocks * NUM_OF_INTS, 0);
+ // Unpack all blocks at once
+ packer.unpack_all_values(buf.data(), static_cast<int>(buf.size()),
+ res.data());
+
+ // Verify each unpacked value matches the original sequence
+ for (size_t i = 0; i < orig.size(); ++i) {
+ EXPECT_EQ(res[i], orig[i]) << "Index=" << i;
+ }
+}
+
+} // namespace storage
\ No newline at end of file
diff --git a/cpp/test/encoding/int32_rle_codec_test.cc
b/cpp/test/encoding/int32_rle_codec_test.cc
new file mode 100644
index 00000000..c580a0eb
--- /dev/null
+++ b/cpp/test/encoding/int32_rle_codec_test.cc
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License a
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <gtest/gtest.h>
+
+#include <limits>
+#include <random>
+#include <vector>
+
+#include "encoding/int32_rle_decoder.h"
+#include "encoding/int32_rle_encoder.h"
+
+namespace storage {
+
+class Int32RleEncoderTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ std::srand(static_cast<unsigned int>(std::time(nullptr)));
+ }
+
+ void encode_and_decode(const std::vector<int32_t>& input) {
+ // Encode
+ common::ByteStream stream(1024, common::MOD_ENCODER_OBJ);
+ Int32RleEncoder encoder;
+ for (int32_t v : input) {
+ encoder.encode(v, stream);
+ }
+ encoder.flush(stream);
+
+ // Decode
+ Int32RleDecoder decoder;
+ std::vector<int32_t> decoded;
+ while (decoder.has_next(stream)) {
+ int32_t v;
+ decoder.read_int32(v, stream);
+ decoded.push_back(v);
+ }
+
+ ASSERT_EQ(input.size(), decoded.size());
+ for (size_t i = 0; i < input.size(); ++i) {
+ EXPECT_EQ(input[i], decoded[i]);
+ }
+ }
+};
+
+// All-zero input
+TEST_F(Int32RleEncoderTest, EncodeAllZeros) {
+ std::vector<int32_t> data(64, 0);
+ encode_and_decode(data);
+}
+
+// All INT32_MAX
+TEST_F(Int32RleEncoderTest, EncodeAllMaxValues) {
+ std::vector<int32_t> data(64, std::numeric_limits<int32_t>::max());
+ encode_and_decode(data);
+}
+
+// All INT32_MIN
+TEST_F(Int32RleEncoderTest, EncodeAllMinValues) {
+ std::vector<int32_t> data(64, std::numeric_limits<int32_t>::min());
+ encode_and_decode(data);
+}
+
+// Repeating the same value
+TEST_F(Int32RleEncoderTest, EncodeRepeatingValue) {
+ std::vector<int32_t> data(128, 12345678);
+ encode_and_decode(data);
+}
+
+// Incremental values (0 to 127)
+TEST_F(Int32RleEncoderTest, EncodeIncrementalValues) {
+ std::vector<int32_t> data;
+ for (int i = 0; i < 128; ++i) {
+ data.push_back(i);
+ }
+ encode_and_decode(data);
+}
+
+// Alternating signs: 0, -1, 2, -3, ...
+TEST_F(Int32RleEncoderTest, EncodeAlternatingSigns) {
+ std::vector<int32_t> data;
+ for (int i = 0; i < 100; ++i) {
+ data.push_back(i % 2 == 0 ? i : -i);
+ }
+ encode_and_decode(data);
+}
+
+// Random positive numbers
+TEST_F(Int32RleEncoderTest, EncodeRandomPositiveValues) {
+ std::vector<int32_t> data;
+ for (int i = 0; i < 200; ++i) {
+ data.push_back(std::rand() & 0x7FFFFFFF);
+ }
+ encode_and_decode(data);
+}
+
+// Random negative numbers
+TEST_F(Int32RleEncoderTest, EncodeRandomNegativeValues) {
+ std::vector<int32_t> data;
+ for (int i = 0; i < 200; ++i) {
+ data.push_back(-(std::rand() & 0x7FFFFFFF));
+ }
+ encode_and_decode(data);
+}
+
+// INT32 boundary values
+TEST_F(Int32RleEncoderTest, EncodeBoundaryValues) {
+ std::vector<int32_t> data = {std::numeric_limits<int32_t>::min(), -1, 0, 1,
+ std::numeric_limits<int32_t>::max()};
+ encode_and_decode(data);
+}
+
+// Flush after every 8 values (simulate frequent flush)
+TEST_F(Int32RleEncoderTest, EncodeMultipleFlushes) {
+ common::ByteStream stream(1024, common::MOD_ENCODER_OBJ);
+ Int32RleEncoder encoder;
+ std::vector<int32_t> data;
+
+ for (int round = 0; round < 3; ++round) {
+ for (int i = 0; i < 8; ++i) {
+ int val = i + round * 10;
+ encoder.encode(val, stream);
+ data.push_back(val);
+ }
+ encoder.flush(stream);
+ }
+
+ // Decode
+ Int32RleDecoder decoder;
+ std::vector<int32_t> decoded;
+ while (decoder.has_next(stream)) {
+ int32_t v;
+ decoder.read_int32(v, stream);
+ decoded.push_back(v);
+ }
+
+ ASSERT_EQ(data.size(), decoded.size());
+ for (size_t i = 0; i < data.size(); ++i) {
+ EXPECT_EQ(data[i], decoded[i]);
+ }
+}
+
+// Flush with no values encoded
+TEST_F(Int32RleEncoderTest, EncodeFlushWithoutData) {
+ Int32RleEncoder encoder;
+ common::ByteStream stream(1024, common::MOD_ENCODER_OBJ);
+ encoder.flush(stream); // No values encoded
+
+ EXPECT_EQ(stream.total_size(), 0u);
+}
+
+} // namespace storage
diff --git a/cpp/test/encoding/int64_packer_test.cc
b/cpp/test/encoding/int64_packer_test.cc
new file mode 100644
index 00000000..846f9a0c
--- /dev/null
+++ b/cpp/test/encoding/int64_packer_test.cc
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License a
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include "encoding/int64_packer.h"
+
+#include <gtest/gtest.h>
+
+#include <bitset>
+#include <cmath>
+#include <random>
+
+namespace storage {
+
+TEST(Int64PackerTest, SequentialValues) {
+ for (int width = 4; width < 63; ++width) {
+ int64_t arr[8];
+ for (int i = 0; i < 8; ++i) arr[i] = i;
+ Int64Packer packer(width);
+ const int bufSize = NUM_OF_INTS * width / 8;
+ std::vector<unsigned char> buf(bufSize, 0);
+ packer.pack_8values(arr, 0, buf.data());
+ int64_t res[8] = {0};
+ packer.unpack_8values(buf.data(), 0, res);
+ for (int i = 0; i < 8; ++i) {
+ EXPECT_EQ(res[i], arr[i]) << "Width=" << width << " Index=" << i;
+ }
+ }
+}
+
+TEST(Int64PackerTest, PackUnpackSingleBatchRandomPositiveLongs) {
+ const int byte_count = 63; // total bytes for 8 packed uint64_t values
+ const int count = 1;
+ const int total_values = count * 8;
+
+ Int64Packer packer(byte_count);
+ std::vector<uint64_t> pre_values;
+ std::vector<unsigned char> buffer(count * byte_count);
+ pre_values.reserve(total_values);
+
+ int idx = 0;
+ std::srand(12345); // optional fixed seed
+
+ for (int i = 0; i < count; ++i) {
+ int64_t vs[8];
+ for (int j = 0; j < 8; ++j) {
+ // Emulate Java's nextLong() then Math.abs(): remove sign bit
+ uint64_t v = ((uint64_t)std::rand() << 32) | std::rand();
+ vs[j] = v & 0x7FFFFFFFFFFFFFFFULL; // clear sign bit
+ pre_values.push_back(vs[j]);
+ }
+
+ unsigned char temp_buf[64] = {0}; // temp output buffer
+ packer.pack_8values(vs, 0, temp_buf);
+
+ std::memcpy(buffer.data() + idx, temp_buf, byte_count);
+ idx += byte_count;
+ }
+
+ std::vector<int64_t> result(total_values);
+ packer.unpack_all_values(buffer.data(), static_cast<int>(buffer.size()),
+ result.data());
+
+ for (int i = 0; i < total_values; ++i) {
+ ASSERT_EQ(result[i], pre_values[i]) << "Mismatch at index " << i;
+ }
+}
+
+// Utility to compute the maximum bit width needed to store all values
+int get_long_max_bit_width(const std::vector<uint64_t>& values) {
+ uint64_t max_val = 0;
+ for (uint64_t v : values) {
+ max_val = std::max(max_val, v);
+ }
+ if (max_val == 0) return 1;
+ return static_cast<int>(std::floor(std::log2(max_val)) + 1);
+}
+
+TEST(Int64PackerTest, PackAllManualBitWidth) {
+ std::vector<uint64_t> bp_list;
+ int bp_count = 15;
+ uint64_t bp_start = 11;
+ for (int i = 0; i < bp_count; ++i) {
+ bp_list.push_back(bp_start);
+ bp_start *= 3;
+ }
+ bp_list.push_back(0); // Add one zero
+ ASSERT_EQ(bp_list.size(), 16u);
+
+ // Calculate max bit width
+ int bp_bit_width = get_long_max_bit_width(bp_list);
+
+ Int64Packer packer(bp_bit_width);
+ std::ostringstream oss(std::ios::binary);
+
+ // Split into two blocks of 8
+ int64_t value1[8];
+ int64_t value2[8];
+ for (int i = 0; i < 8; ++i) {
+ value1[i] = bp_list[i];
+ value2[i] = bp_list[i + 8];
+ }
+
+ unsigned char bytes1[64] = {0};
+ unsigned char bytes2[64] = {0};
+ packer.pack_8values(value1, 0, bytes1);
+ packer.pack_8values(value2, 0, bytes2);
+ oss.write(reinterpret_cast<const char*>(bytes1), bp_bit_width);
+ oss.write(reinterpret_cast<const char*>(bytes2), bp_bit_width);
+
+ std::string packed_data = oss.str();
+ ASSERT_EQ(static_cast<int>(packed_data.size()), 2 * bp_bit_width);
+
+ // Decode
+ int64_t read_array[16] = {0};
+ packer.unpack_all_values(
+ reinterpret_cast<const unsigned char*>(packed_data.data()),
+ 2 * bp_bit_width, read_array);
+
+ // Compare
+ for (int i = 0; i < 16; ++i) {
+ ASSERT_EQ(read_array[i], bp_list[i]) << "Mismatch at index " << i;
+ }
+}
+
+// Test all zeros for various widths
+TEST(Int64PackerTest, AllZeroValues) {
+ for (int width = 1; width <= 31; ++width) {
+ int64_t arr[NUM_OF_INTS] = {0};
+ Int64Packer packer(width);
+ const int bufSize = NUM_OF_INTS * width / 8;
+ std::vector<unsigned char> buf(bufSize, 0);
+ packer.pack_8values(arr, 0, buf.data());
+ int64_t res[NUM_OF_INTS] = {
+ 1}; // initialize non-zero to catch failures
+ packer.unpack_8values(buf.data(), 0, res);
+ for (int i = 0; i < NUM_OF_INTS; ++i) {
+ EXPECT_EQ(res[i], 0) << "Width=" << width << " Index=" << i;
+ }
+ }
+}
+
+// Test boundary width = 1 with alternating bits
+TEST(Int64PackerTest, BoundaryWidthOneAlternating) {
+ const int width = 1;
+ int64_t arr[NUM_OF_INTS] = {0, 1, 0, 1, 0, 1, 0, 1};
+ Int64Packer packer(width);
+ const int bufSize = NUM_OF_INTS * width / 8;
+ std::vector<unsigned char> buf(bufSize, 0);
+ packer.pack_8values(arr, 0, buf.data());
+ int64_t res[NUM_OF_INTS] = {0};
+ packer.unpack_8values(buf.data(), 0, res);
+ for (int i = 0; i < NUM_OF_INTS; ++i) {
+ EXPECT_EQ(res[i], arr[i]) << "Index=" << i;
+ }
+}
+
+// Test maximum width (64 bits)
+TEST(Int64PackerTest, MaxWidth64Random) {
+ const int width = 64;
+ const int times = 100000;
+ std::random_device rd;
+ std::mt19937 gen(rd());
+ std::uniform_int_distribution<int64_t> dist(INT64_MIN, INT64_MAX);
+ for (int t = 0; t < times; ++t) {
+ int64_t arr[NUM_OF_INTS];
+ for (int i = 0; i < NUM_OF_INTS; ++i) {
+ arr[i] = dist(gen);
+ }
+ Int64Packer packer(width);
+ const int bufSize = NUM_OF_INTS * width / 8;
+ std::vector<unsigned char> buf(bufSize, 0);
+ packer.pack_8values(arr, 0, buf.data());
+ int64_t res[NUM_OF_INTS] = {0};
+ packer.unpack_8values(buf.data(), 0, res);
+ for (int i = 0; i < NUM_OF_INTS; ++i) {
+ EXPECT_EQ(res[i], arr[i]) << "Index=" << i;
+ }
+ }
+}
+
+TEST(Int64PackerTest, AllNegative64Random) {
+ const int width = 64;
+ const int times = 100000;
+ std::random_device rd;
+ std::mt19937 gen(rd());
+ std::uniform_int_distribution<int64_t> dist(INT64_MIN, -1);
+ for (int t = 0; t < times; ++t) {
+ int64_t arr[NUM_OF_INTS];
+ for (int i = 0; i < NUM_OF_INTS; ++i) {
+ arr[i] = dist(gen);
+ }
+ Int64Packer packer(width);
+ const int bufSize = NUM_OF_INTS * width / 8;
+ std::vector<unsigned char> buf(bufSize, 0);
+ packer.pack_8values(arr, 0, buf.data());
+ int64_t res[NUM_OF_INTS] = {0};
+ packer.unpack_8values(buf.data(), 0, res);
+ for (int i = 0; i < NUM_OF_INTS; ++i) {
+ EXPECT_EQ(res[i], arr[i]) << "Index=" << i;
+ }
+ }
+}
+
+// Test unpack_all_values for multiple blocks
+TEST(Int64PackerTest, UnpackAllValuesMultipleBlocks) {
+ const int width = 16;
+ // pack 10 blocks sequentially
+ const int blocks = 10;
+ Int64Packer packer(width);
+ std::vector<int64_t> orig(blocks * NUM_OF_INTS);
+ std::vector<unsigned char> buf(blocks * width);
+
+ // Fill orig with pattern: block * 16 + index
+ // Example: block 0 = [0,1,...,7], block 1 = [16,17,...,23], etc.
+ for (int b = 0; b < blocks; ++b) {
+ for (int i = 0; i < NUM_OF_INTS; ++i) {
+ orig[b * NUM_OF_INTS + i] = (b << 4) | i;
+ }
+ packer.pack_8values(
+ orig.data() + b * NUM_OF_INTS, 0,
+ buf.data() + b * width); // pack each block into buf
+ }
+
+ std::vector<int64_t> res(blocks * NUM_OF_INTS, 0);
+ // Unpack all blocks at once
+ packer.unpack_all_values(buf.data(), static_cast<int>(buf.size()),
+ res.data());
+
+ // Verify each unpacked value matches the original sequence
+ for (size_t i = 0; i < orig.size(); ++i) {
+ EXPECT_EQ(res[i], orig[i]) << "Index=" << i;
+ }
+}
+
+} // namespace storage
\ No newline at end of file
diff --git a/cpp/test/encoding/int64_rle_codec_test.cc
b/cpp/test/encoding/int64_rle_codec_test.cc
new file mode 100644
index 00000000..7583faad
--- /dev/null
+++ b/cpp/test/encoding/int64_rle_codec_test.cc
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License a
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <gtest/gtest.h>
+
+#include <random>
+
+#include "encoding/int64_rle_decoder.h"
+#include "encoding/int64_rle_encoder.h"
+
+namespace storage {
+
+class Int64RleCodecTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ std::srand(static_cast<unsigned int>(std::time(nullptr)));
+ }
+
+ void encode_and_decode_check(const std::vector<int64_t>& input) {
+ common::ByteStream stream(4096, common::MOD_ENCODER_OBJ);
+
+ // Encode
+ Int64RleEncoder encoder;
+ for (int64_t v : input) {
+ encoder.encode(v, stream);
+ }
+ encoder.flush(stream);
+
+ // Decode
+ Int64RleDecoder decoder;
+ for (size_t i = 0; i < input.size(); ++i) {
+ ASSERT_TRUE(decoder.has_next(stream));
+ int64_t value;
+ decoder.read_int64(value, stream);
+ EXPECT_EQ(value, input[i]) << "Mismatch at index " << i;
+ }
+
+ EXPECT_FALSE(decoder.has_next(stream));
+ }
+};
+
+// All-zero input
+TEST_F(Int64RleCodecTest, EncodeAllZeros) {
+ std::vector<int64_t> data(64, 0);
+ encode_and_decode_check(data);
+}
+
+// All INT64_MAX values
+TEST_F(Int64RleCodecTest, EncodeAllMaxValues) {
+ std::vector<int64_t> data(64, std::numeric_limits<int64_t>::max());
+ encode_and_decode_check(data);
+}
+
+// All INT64_MIN values
+TEST_F(Int64RleCodecTest, EncodeAllMinValues) {
+ std::vector<int64_t> data(64, std::numeric_limits<int64_t>::min());
+ encode_and_decode_check(data);
+}
+
+// Repeating a single constant value
+TEST_F(Int64RleCodecTest, EncodeRepeatingSingleValue) {
+ std::vector<int64_t> data(100, 123456789012345);
+ encode_and_decode_check(data);
+}
+
+// Strictly increasing sequence
+TEST_F(Int64RleCodecTest, EncodeIncrementalValues) {
+ std::vector<int64_t> data;
+ for (int64_t i = 0; i < 128; ++i) {
+ data.push_back(i);
+ }
+ encode_and_decode_check(data);
+}
+
+// Alternating positive and negative values
+TEST_F(Int64RleCodecTest, EncodeAlternatingSigns) {
+ std::vector<int64_t> data;
+ for (int64_t i = 0; i < 100; ++i) {
+ data.push_back(i % 2 == 0 ? i : -i);
+ }
+ encode_and_decode_check(data);
+}
+
+// Random positive int64 values
+TEST_F(Int64RleCodecTest, EncodeRandomPositiveValues) {
+ std::vector<int64_t> data;
+ for (int i = 0; i < 256; ++i) {
+ data.push_back(static_cast<int64_t>(std::rand()) << 31 | std::rand());
+ }
+ encode_and_decode_check(data);
+}
+
+// Random negative int64 values
+TEST_F(Int64RleCodecTest, EncodeRandomNegativeValues) {
+ std::vector<int64_t> data;
+ for (int i = 0; i < 256; ++i) {
+ int64_t value = static_cast<int64_t>(std::rand()) << 31 | std::rand();
+ data.push_back(-value);
+ }
+ encode_and_decode_check(data);
+}
+
+// Mixed boundary values
+TEST_F(Int64RleCodecTest, EncodeBoundaryValues) {
+ std::vector<int64_t> data = {std::numeric_limits<int64_t>::min(), -1, 0, 1,
+ std::numeric_limits<int64_t>::max()};
+ encode_and_decode_check(data);
+}
+
+// Flush without any encoded values
+TEST_F(Int64RleCodecTest, EncodeFlushWithoutData) {
+ Int64RleEncoder encoder;
+ common::ByteStream stream(1024, common::MOD_ENCODER_OBJ);
+ encoder.flush(stream);
+ EXPECT_EQ(stream.total_size(), 0u);
+}
+
+} // namespace storage
diff --git a/cpp/test/reader/table_view/tsfile_reader_table_test.cc
b/cpp/test/reader/table_view/tsfile_reader_table_test.cc
index cac0ce20..72c29aa0 100644
--- a/cpp/test/reader/table_view/tsfile_reader_table_test.cc
+++ b/cpp/test/reader/table_view/tsfile_reader_table_test.cc
@@ -345,7 +345,6 @@ TEST_F(TsFileTableReaderTest,
TableModelQueryWithMultiTabletsMultiFlush) {
auto tablet = gen_tablet(tmp_table_schema, cur_row, 1, tablet_size);
ASSERT_EQ(tsfile_table_writer_->write_table(tablet), common::E_OK);
cur_row += tablet_size;
- std::cout << "finish writing " << cur_row << " rows" << std::endl;
}
ASSERT_EQ(tsfile_table_writer_->flush(), common::E_OK);
ASSERT_EQ(tsfile_table_writer_->close(), common::E_OK);
@@ -356,7 +355,6 @@ TEST_F(TsFileTableReaderTest,
TableModelQueryWithMultiTabletsMultiFlush) {
storage::ResultSet* tmp_result_set = nullptr;
ret = reader.query("testtable0", tmp_table_schema->get_measurement_names(),
0, 1000000000000, tmp_result_set);
- std::cout << "begin to dump data from tsfile ---" << std::endl;
auto* table_result_set = (storage::TableResultSet*)tmp_result_set;
bool has_next = false;
char* literal = new char[std::strlen("device_id") + 1];
diff --git a/cpp/test/writer/table_view/tsfile_writer_table_test.cc
b/cpp/test/writer/table_view/tsfile_writer_table_test.cc
index b187a80f..43b00dbe 100644
--- a/cpp/test/writer/table_view/tsfile_writer_table_test.cc
+++ b/cpp/test/writer/table_view/tsfile_writer_table_test.cc
@@ -875,3 +875,107 @@ TEST_F(TsFileWriterTableTest, MultiDatatypes) {
ASSERT_EQ(reader.close(), common::E_OK);
delete[] literal;
}
+
+TEST_F(TsFileWriterTableTest, DiffCodecTypes) {
+ std::vector<MeasurementSchema*> measurement_schemas;
+ std::vector<ColumnCategory> column_categories;
+
+ common::CompressionType compression_type =
+ common::CompressionType::UNCOMPRESSED;
+ std::vector<std::string> measurement_names = {
+ "int32_zigzag", "int64_zigzag", "string_dic", "text_dic",
+ "float_gorilla", "double_gorilla", "int32_ts2diff", "int64_ts2diff",
+ "int32_rle", "int64_rle"};
+ std::vector<common::TSDataType> data_types = {
+ INT32, INT64, STRING, TEXT, FLOAT, DOUBLE, INT32, INT64, INT32, INT64};
+ std::vector<common::TSEncoding> encodings = {
+ ZIGZAG, ZIGZAG, DICTIONARY, DICTIONARY, GORILLA,
+ GORILLA, TS_2DIFF, TS_2DIFF, RLE, RLE};
+
+ for (int i = 0; i < measurement_names.size(); i++) {
+ measurement_schemas.emplace_back(new MeasurementSchema(
+ measurement_names[i], data_types[i], encodings[i], UNCOMPRESSED));
+ column_categories.emplace_back(ColumnCategory::FIELD);
+ }
+ auto table_schema =
+ new TableSchema("testTable", measurement_schemas, column_categories);
+ auto tsfile_table_writer =
+ std::make_shared<TsFileTableWriter>(&write_file_, table_schema);
+ int time = 0;
+ Tablet tablet = Tablet(table_schema->get_measurement_names(),
+ table_schema->get_data_types(), 100);
+
+ char* literal = new char[std::strlen("device_id") + 1];
+ std::strcpy(literal, "device_id");
+ String literal_str(literal, std::strlen("device_id"));
+ for (int i = 0; i < 100; i++) {
+ tablet.add_timestamp(i, static_cast<int64_t>(time++));
+ for (int j = 0; j < measurement_schemas.size(); j++) {
+ std::string measurement_name = measurement_names[j];
+ switch (data_types[j]) {
+ case BOOLEAN:
+ ASSERT_EQ(tablet.add_value(i, j, true), E_OK);
+ break;
+ case INT32:
+ ASSERT_EQ(tablet.add_value(i, j, (int32_t)32), E_OK);
+ break;
+ case INT64:
+ ASSERT_EQ(tablet.add_value(i, j, (int64_t)64), E_OK);
+ break;
+ case FLOAT:
+ ASSERT_EQ(tablet.add_value(i, j, (float)1.0), E_OK);
+ break;
+ case DOUBLE:
+ ASSERT_EQ(tablet.add_value(i, j, (double)2.0), E_OK);
+ break;
+ case TEXT:
+ case STRING:
+ case BLOB:
+ ASSERT_EQ(tablet.add_value(i, j, literal_str), E_OK);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ ASSERT_EQ(tsfile_table_writer->write_table(tablet), E_OK);
+ ASSERT_EQ(tsfile_table_writer->flush(), E_OK);
+ ASSERT_EQ(tsfile_table_writer->close(), E_OK);
+
+ delete table_schema;
+
+ auto reader = TsFileReader();
+ reader.open(write_file_.get_file_path());
+ ResultSet* ret = nullptr;
+ int ret_value = reader.query("testTable", measurement_names, 0, 100, ret);
+ ASSERT_EQ(common::E_OK, ret_value);
+
+ auto table_result_set = (TableResultSet*)ret;
+ bool has_next = false;
+ int cur_line = 0;
+ auto schema = table_result_set->get_metadata();
+ while (IS_SUCC(table_result_set->next(has_next)) && has_next) {
+ int64_t timestamp = table_result_set->get_value<int64_t>(1);
+ ASSERT_EQ(table_result_set->get_value<int32_t>(2), 32);
+ ASSERT_EQ(table_result_set->get_value<int64_t>(3), 64);
+
+ ASSERT_EQ(table_result_set->get_value<common::String*>(4)->compare(
+ literal_str),
+ 0);
+ ASSERT_EQ(table_result_set->get_value<common::String*>(5)->compare(
+ literal_str),
+ 0);
+
+ ASSERT_EQ(table_result_set->get_value<float>(6), (float)1.0);
+ ASSERT_EQ(table_result_set->get_value<double>(7), (double)2.0);
+
+ ASSERT_EQ(table_result_set->get_value<int32_t>(8), 32);
+ ASSERT_EQ(table_result_set->get_value<int64_t>(9), 64);
+
+ ASSERT_EQ(table_result_set->get_value<int32_t>(10), 32);
+ ASSERT_EQ(table_result_set->get_value<int64_t>(11), 64);
+ }
+ reader.destroy_query_data_set(table_result_set);
+ ASSERT_EQ(reader.close(), common::E_OK);
+ delete[] literal;
+}