This is an automated email from the ASF dual-hosted git repository.
jianliangqi pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene by this push:
new af6f847d [Improvement](bkd) improve bkd performance by reuse bkd
reader (#140)
af6f847d is described below
commit af6f847d17d3cff043a3e11d6511d1e14d4333d9
Author: airborne12 <[email protected]>
AuthorDate: Thu Nov 30 16:27:20 2023 +0800
[Improvement](bkd) improve bkd performance by reuse bkd reader (#140)
---
src/core/CLucene/index/IndexFileNames.cpp | 3 +
src/core/CLucene/index/_IndexFileNames.h | 3 +
src/core/CLucene/store/ByteArrayDataInput.cpp | 107 +++----------
src/core/CLucene/store/ByteArrayDataInput.h | 6 +-
src/core/CLucene/util/BytesRef.cpp | 7 +-
src/core/CLucene/util/FutureArrays.cpp | 7 +-
src/core/CLucene/util/bkd/bkd_reader.cpp | 198 ++++++++++++++----------
src/core/CLucene/util/bkd/bkd_reader.h | 74 ++-------
src/core/CLucene/util/bkd/docids_writer.cpp | 23 ++-
src/core/CLucene/util/bkd/index_tree.cpp | 8 +-
src/core/CLucene/util/bkd/index_tree.h | 1 +
src/core/CLucene/util/bkd/legacy_index_tree.cpp | 5 +
src/core/CLucene/util/bkd/legacy_index_tree.h | 1 +
src/core/CLucene/util/bkd/packed_index_tree.cpp | 52 ++++---
src/core/CLucene/util/bkd/packed_index_tree.h | 6 +-
15 files changed, 231 insertions(+), 270 deletions(-)
diff --git a/src/core/CLucene/index/IndexFileNames.cpp
b/src/core/CLucene/index/IndexFileNames.cpp
index 1027ca7f..f5389434 100644
--- a/src/core/CLucene/index/IndexFileNames.cpp
+++ b/src/core/CLucene/index/IndexFileNames.cpp
@@ -34,6 +34,9 @@ CL_NS_DEF(index)
const char* IndexFileNames::PLAIN_NORMS_EXTENSION = "f";
const char* IndexFileNames::SEPARATE_NORMS_EXTENSION = "s";
const char* IndexFileNames::GEN_EXTENSION = "gen";
+ const char* IndexFileNames::BKD_DATA = "bkd";
+ const char* IndexFileNames::BKD_META = "bkd_meta";
+ const char* IndexFileNames::BKD_INDEX = "bkd_index";
const char* IndexFileNames_INDEX_EXTENSIONS_s[] =
{
diff --git a/src/core/CLucene/index/_IndexFileNames.h
b/src/core/CLucene/index/_IndexFileNames.h
index 7838976f..8d631896 100644
--- a/src/core/CLucene/index/_IndexFileNames.h
+++ b/src/core/CLucene/index/_IndexFileNames.h
@@ -45,6 +45,9 @@ public:
static const char *PLAIN_NORMS_EXTENSION;
static const char *SEPARATE_NORMS_EXTENSION;
static const char *GEN_EXTENSION;
+ static const char *BKD_DATA;
+ static const char *BKD_META;
+ static const char *BKD_INDEX;
LUCENE_STATIC_CONSTANT(int32_t, COMPOUND_EXTENSIONS_LENGTH = 7);
LUCENE_STATIC_CONSTANT(int32_t, VECTOR_EXTENSIONS_LENGTH = 3);
diff --git a/src/core/CLucene/store/ByteArrayDataInput.cpp
b/src/core/CLucene/store/ByteArrayDataInput.cpp
index 197bd40e..b9cc2a4c 100644
--- a/src/core/CLucene/store/ByteArrayDataInput.cpp
+++ b/src/core/CLucene/store/ByteArrayDataInput.cpp
@@ -23,7 +23,7 @@ void ByteArrayDataInput::reset(std::vector<uint8_t>& b) {
void ByteArrayDataInput::rewind() { pos = 0; }
-int ByteArrayDataInput::getPosition() { return pos; }
+int ByteArrayDataInput::getPosition() const { return pos; }
void ByteArrayDataInput::setPosition(int p) { pos = p; }
@@ -33,109 +33,46 @@ void ByteArrayDataInput::reset(std::vector<uint8_t> &b,
int offset, int len) {
limit = offset + len;
}
-int ByteArrayDataInput::length() { return limit; }
+int ByteArrayDataInput::length() const { return limit; }
-bool ByteArrayDataInput::eof() { return pos == limit; }
+bool ByteArrayDataInput::eof() const { return pos == limit; }
void ByteArrayDataInput::skipBytes(int64_t count) { pos += count; }
short ByteArrayDataInput::readShort() {
- return static_cast<short>(((bytes.at(pos++) & 0xFF) << 8) |
- (bytes.at(pos++) & 0xFF));
+ return static_cast<short>(((readByte() & 0xFF) << 8) | (readByte() &
0xFF));
}
int ByteArrayDataInput::readInt() {
- return ((bytes.at(pos++) & 0xFF) << 24) | ((bytes.at(pos++) & 0xFF) << 16)
|
- ((bytes.at(pos++) & 0xFF) << 8) | (bytes.at(pos++) & 0xFF);
+ int32_t b = (readByte() << 24);
+ b |= (readByte() << 16);
+ b |= (readByte() << 8);
+ return (b | readByte());
}
int64_t ByteArrayDataInput::readLong() {
- int i1 = ((bytes.at(pos++) & 0xff) << 24) |
- ((bytes.at(pos++) & 0xff) << 16) |
- ((bytes.at(pos++) & 0xff) << 8) | (bytes.at(pos++) & 0xff);
- int i2 = ((bytes.at(pos++) & 0xff) << 24) |
- ((bytes.at(pos++) & 0xff) << 16) |
- ((bytes.at(pos++) & 0xff) << 8) | (bytes.at(pos++) & 0xff);
- return ((static_cast<int64_t>(i1)) << 32) | (i2 & 0xFFFFFFFFLL);
+ int64_t i = ((int64_t)readInt() << 32);
+ return (i | ((int64_t)readInt() & 0xFFFFFFFFL));
}
int ByteArrayDataInput::readVInt() {
- uint8_t b = bytes.at(pos++);
- if (b >= 0) {
- return b;
+ uint8_t b = readByte();
+ int32_t i = b & 0x7F;
+ for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) {
+ b = readByte();
+ i |= (b & 0x7F) << shift;
}
- int i = b & 0x7F;
- b = bytes.at(pos++);
- i |= (b & 0x7F) << 7;
- if (b >= 0) {
- return i;
- }
- b = bytes.at(pos++);
- i |= (b & 0x7F) << 14;
- if (b >= 0) {
- return i;
- }
- b = bytes.at(pos++);
- i |= (b & 0x7F) << 21;
- if (b >= 0) {
- return i;
- }
- b = bytes.at(pos++);
- // Warning: the next ands use 0x0F / 0xF0 - beware copy/paste errors:
- i |= (b & 0x0F) << 28;
- if ((b & 0xF0) == 0) {
- return i;
- }
- _CLTHROWA(CL_ERR_Runtime, "Invalid vInt detected (too many bits)");
+ return i;
}
int64_t ByteArrayDataInput::readVLong() {
- uint8_t b = bytes.at(pos++);
- if (b >= 0) {
- return b;
- }
- int64_t i = b & 0x7FLL;
- b = bytes.at(pos++);
- i |= (b & 0x7FLL) << 7;
- if (b >= 0) {
- return i;
- }
- b = bytes.at(pos++);
- i |= (b & 0x7FLL) << 14;
- if (b >= 0) {
- return i;
- }
- b = bytes.at(pos++);
- i |= (b & 0x7FLL) << 21;
- if (b >= 0) {
- return i;
- }
- b = bytes.at(pos++);
- i |= (b & 0x7FLL) << 28;
- if (b >= 0) {
- return i;
- }
- b = bytes.at(pos++);
- i |= (b & 0x7FLL) << 35;
- if (b >= 0) {
- return i;
- }
- b = bytes.at(pos++);
- i |= (b & 0x7FLL) << 42;
- if (b >= 0) {
- return i;
- }
- b = bytes.at(pos++);
- i |= (b & 0x7FLL) << 49;
- if (b >= 0) {
+ uint8_t b = readByte();
+ int64_t i = b & 0x7F;
+ for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) {
+ b = readByte();
+ i |= (((int64_t)b) & 0x7FL) << shift;
+ }
return i;
- }
- b = bytes.at(pos++);
- i |= (b & 0x7FLL) << 56;
- if (b >= 0) {
- return i;
- }
- _CLTHROWA(CL_ERR_Runtime, "Invalid vLong detected (negative values
disallowed)");
}
uint8_t ByteArrayDataInput::readByte() { return bytes.at(pos++); }
diff --git a/src/core/CLucene/store/ByteArrayDataInput.h
b/src/core/CLucene/store/ByteArrayDataInput.h
index 9c43d6cb..560cd43f 100644
--- a/src/core/CLucene/store/ByteArrayDataInput.h
+++ b/src/core/CLucene/store/ByteArrayDataInput.h
@@ -35,15 +35,15 @@ public:
// called reset w/ non-zero offset!!
void rewind();
- int getPosition();
+ int getPosition() const;
void setPosition(int pos);
void reset(std::vector<uint8_t> &bytes, int offset, int len);
- int length();
+ int length() const;
- bool eof();
+ bool eof() const;
void skipBytes(int64_t count);
diff --git a/src/core/CLucene/util/BytesRef.cpp
b/src/core/CLucene/util/BytesRef.cpp
index 08cd61d1..1e5de62e 100644
--- a/src/core/CLucene/util/BytesRef.cpp
+++ b/src/core/CLucene/util/BytesRef.cpp
@@ -9,11 +9,8 @@ std::vector<uint8_t> BytesRef::EMPTY_BYTES =
std::vector<uint8_t>(0);
BytesRef::BytesRef() : BytesRef((EMPTY_BYTES)) {}
-BytesRef::BytesRef(std::vector<uint8_t> &bytes, int offset, int length) {
- this->bytes = bytes;
- this->offset = offset;
- this->length = length;
- //assert(isValid());
+BytesRef::BytesRef(std::vector<uint8_t>& bytes, int offset, int length)
+ : bytes(std::move(bytes)), offset(offset), length(length) {
}
BytesRef::BytesRef(std::vector<uint8_t> &bytes) : BytesRef(bytes, 0,
bytes.size()) {
diff --git a/src/core/CLucene/util/FutureArrays.cpp
b/src/core/CLucene/util/FutureArrays.cpp
index 2d7e1b97..30eb1aa4 100644
--- a/src/core/CLucene/util/FutureArrays.cpp
+++ b/src/core/CLucene/util/FutureArrays.cpp
@@ -36,9 +36,7 @@ int FutureArrays::Mismatch(std::vector<uint8_t> &a, int
aFromIndex, int aToIndex
int FutureArrays::CompareUnsigned(const uint8_t *a, int aFromIndex,
int aToIndex, const uint8_t *b,
int bFromIndex, int bToIndex) {
- int aLen = aToIndex - aFromIndex;
- int bLen = bToIndex - bFromIndex;
- int len = std::min(aLen, bLen);
+ int len = std::min(aToIndex - aFromIndex, bToIndex - bFromIndex);
for (int i = 0; i < len; i++) {
int aByte = a[i + aFromIndex] & 0xFF;
int bByte = b[i + bFromIndex] & 0xFF;
@@ -48,8 +46,7 @@ int FutureArrays::CompareUnsigned(const uint8_t *a, int
aFromIndex,
}
}
- // One is a prefix of the other, or, they are equal:
- return aLen - bLen;
+ return (aToIndex - aFromIndex) - (bToIndex - bFromIndex);
}
int FutureArrays::CompareNumeric(const uint8_t *a,
diff --git a/src/core/CLucene/util/bkd/bkd_reader.cpp
b/src/core/CLucene/util/bkd/bkd_reader.cpp
index f9430197..4f22f348 100644
--- a/src/core/CLucene/util/bkd/bkd_reader.cpp
+++ b/src/core/CLucene/util/bkd/bkd_reader.cpp
@@ -4,6 +4,7 @@
#include "CLucene/util/CodecUtil.h"
#include "CLucene/util/FutureArrays.h"
#include "CLucene/util/Time.h"
+#include "CLucene/index/_IndexFileNames.h"
#include "bkd_reader.h"
#include "bkd_writer.h"
#include "docids_writer.h"
@@ -11,13 +12,38 @@
#include "packed_index_tree.h"
#include <cmath>
+#include <iostream>
+#include <iomanip>
+CL_NS_USE(index)
CL_NS_DEF2(util, bkd)
bkd_reader::bkd_reader(store::IndexInput *in) {
in_ = std::unique_ptr<store::IndexInput>(in);
}
+bkd_reader::~bkd_reader() {
+ if(_close_directory && _dir){
+ _dir->close();
+ }
+ _CLDECDELETE(_dir);
+}
+
+bkd_reader::bkd_reader(store::Directory *dir, bool close_directory):
_close_directory(close_directory) {
+ _dir = _CL_POINTER(dir);
+}
+
+bool bkd_reader::open() {
+ in_ =
std::unique_ptr<store::IndexInput>(_dir->openInput(IndexFileNames::BKD_DATA));
+ auto meta_in =
std::unique_ptr<store::IndexInput>(_dir->openInput(IndexFileNames::BKD_META));
+ auto index_in
=std::unique_ptr<store::IndexInput>(_dir->openInput(IndexFileNames::BKD_INDEX));
+ if (0 == read_meta(meta_in.get())) {
+ return false;
+ }
+ read_index(index_in.get());
+ return true;
+}
+
int bkd_reader::read_meta(store::IndexInput* meta_in) {
type = meta_in->readInt();
indexFP = meta_in->readLong();
@@ -73,9 +99,8 @@ void bkd_reader::read_index(store::IndexInput* index_in) {
int32_t numBytes = index_in->readVInt();
metaOffset = index_in->getFilePointer();
- clone_index_input =
std::shared_ptr<store::IndexInput>(index_in->clone());
- packed_index_ = std::make_shared<std::vector<uint8_t>>(numBytes);
- index_in->readBytes(packed_index_->data(), numBytes);
+ packed_index_ = std::vector<uint8_t>(numBytes);
+ index_in->readBytes(packed_index_.data(), numBytes);
leaf_block_fps_.clear();
split_packed_values_.clear();
} else {
@@ -115,7 +140,7 @@ void bkd_reader::read_index(store::IndexInput* index_in) {
}
leaf_block_fps_ = leafBlockFPs;
- packed_index_->clear();
+ packed_index_.clear();
}
}
@@ -125,26 +150,23 @@
bkd_reader::intersect_state::intersect_state(store::IndexInput *in,
int32_t packedIndexBytesLength,
int32_t maxPointsInLeafNode,
bkd_reader::intersect_visitor
*visitor,
- const std::shared_ptr<index_tree>
&indexVisitor) {
- in_ = std::shared_ptr<store::IndexInput>(in);
+ index_tree* indexVisitor) {
+ in_ = std::unique_ptr<store::IndexInput>(in);
visitor_ = visitor;
- common_prefix_lengths_ = std::vector<int32_t>(numDims);
+ common_prefix_lengths_.resize(numDims);
docid_set_iterator =
std::make_unique<bkd_docid_set_iterator>(maxPointsInLeafNode);
- scratch_doc_ids_ = std::vector<int32_t>(maxPointsInLeafNode);
- scratch_data_packed_value_ = std::vector<uint8_t>(packedBytesLength);
- scratch_min_index_packed_value_ =
std::vector<uint8_t>(packedIndexBytesLength);
- scratch_max_index_packed_value_ =
std::vector<uint8_t>(packedIndexBytesLength);
- index_ = indexVisitor;
+ scratch_data_packed_value_.resize(packedBytesLength);
+ scratch_min_index_packed_value_.resize(packedIndexBytesLength);
+ scratch_max_index_packed_value_.resize(packedIndexBytesLength);
+ index_ = std::unique_ptr<index_tree>(indexVisitor);
}
std::shared_ptr<bkd_reader::intersect_state>
bkd_reader::get_intersect_state(bkd_reader::intersect_visitor *visitor) {
- // because we will reuse BKDReader, we need to seek to packed tree index
offset every time.
- clone_index_input->seek(metaOffset);
- std::shared_ptr<index_tree> index;
- if (!packed_index_->empty()) {
- index = std::make_shared<packed_index_tree>(shared_from_this());
+ index_tree* index;
+ if (!packed_index_.empty()) {
+ index = new packed_index_tree(shared_from_this());
} else {
- index = std::make_shared<legacy_index_tree>(shared_from_this());
+ index = new legacy_index_tree(shared_from_this());
}
return std::make_shared<intersect_state>(in_->clone(),
num_data_dims_,
@@ -158,11 +180,20 @@ std::shared_ptr<bkd_reader::intersect_state>
bkd_reader::get_intersect_state(bkd
void bkd_reader::intersect(bkd_reader::intersect_visitor *visitor)
{
- intersect(get_intersect_state(visitor), min_packed_value_,
max_packed_value_);
+ if (indexFP == 0) {
+ return;
+ }
+ // because we will modify min/max packed value in intersect, so we copy
them in the first time.
+ auto min_packed_value = min_packed_value_;
+ auto max_packed_value = max_packed_value_;
+ intersect(get_intersect_state(visitor), min_packed_value,
max_packed_value);
}
int64_t bkd_reader::estimate_point_count(bkd_reader::intersect_visitor
*visitor) {
- return estimate_point_count(get_intersect_state(visitor),
min_packed_value_, max_packed_value_);
+ // because we will modify min/max packed value in intersect, so we copy
them in the first time.
+ auto min_packed_value = min_packed_value_;
+ auto max_packed_value = max_packed_value_;
+ return estimate_point_count(get_intersect_state(visitor),
min_packed_value, max_packed_value);
}
int64_t bkd_reader::estimate_point_count(const
std::shared_ptr<bkd_reader::intersect_state> &s, std::vector<uint8_t>
&cellMinPacked, std::vector<uint8_t> &cellMaxPacked)
@@ -253,9 +284,7 @@ void bkd_reader::add_all(const
std::shared_ptr<intersect_state> &state, bool gro
if (state->index_->is_leaf_node()) {
assert(grown);
if (state->index_->node_exists()) {
- auto start = UnixMillis();
visit_doc_ids(state->in_.get(), state->index_->get_leaf_blockFP(),
state->visitor_);
- stats.add_doc_id_visit_time_duration(UnixMillis() - start);
}
} else {
state->index_->push_left();
@@ -317,12 +346,28 @@ void
bkd_reader::visit_compressed_doc_values(std::vector<int32_t> &commonPrefixL
for (i = 0; i < count;) {
scratchPackedValue[compressedByteOffset] = in->readByte();
int32_t runLen = static_cast<int32_t>(in->readByte()) & 0xFF;
+ // under runLen, we compare prefix first, if outside matched value, we
can skip the whole runLen values.
+ std::vector<uint8_t> prefix(scratchPackedValue.begin(),
scratchPackedValue.begin() + compressedByteOffset + 1);
+ if (visitor->compare_prefix(prefix) == relation::CELL_OUTSIDE_QUERY) {
+ size_t skip_bytes = 0;
+ for (int32_t dim = 0; dim < num_data_dims_; dim++) {
+ int32_t prefix = commonPrefixLengths[dim];
+ skip_bytes += bytes_per_dim_ - prefix;
+ }
+ in->seek(in->getFilePointer() + skip_bytes * (runLen));
+ i += runLen;
+ continue;
+ }
for (int32_t j = 0; j < runLen; ++j) {
for (int32_t dim = 0; dim < num_data_dims_; dim++) {
int32_t prefix = commonPrefixLengths[dim];
in->readBytes(scratchPackedValue.data(), bytes_per_dim_ -
prefix, dim * bytes_per_dim_ + prefix);
}
- visitor->visit(iter->docid_set->docids[i + j], scratchPackedValue);
+ // if scratchPackedValue is larger than matched value, we can skip
the left values match because values are sorted from low to high.
+ auto res = visitor->visit(iter->docid_set->docids[i + j],
scratchPackedValue);
+ if ( res > 0) {
+ return;
+ }
}
i += runLen;
}
@@ -416,7 +461,6 @@ void bkd_reader::visit_doc_values(std::vector<int32_t>
&commonPrefixLengths,
bkd_docid_set_iterator *iter,
int32_t count,
bkd_reader::intersect_visitor *visitor) {
- auto start = UnixMillis();
read_common_prefixes(commonPrefixLengths, scratchDataPackedValue, in);
if (num_index_dims_ != 1 && version_ >=
bkd_writer::VERSION_LEAF_STORES_BOUNDS) {
@@ -432,9 +476,7 @@ void bkd_reader::visit_doc_values(std::vector<int32_t>
&commonPrefixLengths,
read_min_max(commonPrefixLengths, minPackedValue, maxPackedValue, in);
- auto start_time = UnixMillis();
relation r = visitor->compare(minPackedValue, maxPackedValue);
- stats.add_visit_compare_time_duration(UnixMillis() - start_time);
if (r == relation::CELL_OUTSIDE_QUERY) {
return;
}
@@ -455,23 +497,17 @@ void bkd_reader::visit_doc_values(std::vector<int32_t>
&commonPrefixLengths,
if (compressedDim == -1) {
assert(iter->bitmap_set!= nullptr);
assert(iter->bitmap_set->docids.size() == 1);
- auto uniq_start = UnixMillis();
visit_unique_raw_doc_values(scratchDataPackedValue, iter, count,
visitor);
- stats.add_uniq_doc_value_visit_time_duration(UnixMillis() -
uniq_start);
} else {
if (compressedDim == -2) {
- auto sparse_start = UnixMillis();
- // low cardinality values
visit_sparse_raw_doc_values(commonPrefixLengths,
scratchDataPackedValue,
in,
iter,
count,
visitor);
- stats.add_sparse_doc_value_visit_time_duration(UnixMillis() -
sparse_start);
} else {
// high cardinality
- auto compress_start = UnixMillis();
visit_compressed_doc_values(commonPrefixLengths,
scratchDataPackedValue,
in,
@@ -479,25 +515,19 @@ void bkd_reader::visit_doc_values(std::vector<int32_t>
&commonPrefixLengths,
count,
visitor,
compressedDim);
- stats.add_compress_doc_value_visit_time_duration(UnixMillis() -
compress_start);
}
}
- stats.add_doc_value_visit_time_duration(UnixMillis() - start);
}
void bkd_reader::intersect(const std::shared_ptr<bkd_reader::intersect_state>
&s, std::vector<uint8_t> &cellMinPacked, std::vector<uint8_t> &cellMaxPacked) {
- auto start_time = UnixMillis();
relation r = s->visitor_->compare(cellMinPacked, cellMaxPacked);
- stats.add_visit_compare_time_duration(UnixMillis() - start_time);
if (r == relation::CELL_OUTSIDE_QUERY) {
} else if (r == relation::CELL_INSIDE_QUERY) {
add_all(s, false);
} else if (s->index_->is_leaf_node()) {
if (s->index_->node_exists()) {
- auto start = UnixMillis();
int32_t count = read_doc_ids(s->in_.get(),
s->index_->get_leaf_blockFP(), s->docid_set_iterator.get());
- stats.add_read_doc_id_time_duration(UnixMillis() - start);
visit_doc_values(s->common_prefix_lengths_,
s->scratch_data_packed_value_,
@@ -512,46 +542,54 @@ void bkd_reader::intersect(const
std::shared_ptr<bkd_reader::intersect_state> &s
int32_t splitDim = s->index_->get_split_dim();
assert(splitDim >= 0);
assert(splitDim < num_index_dims_);
-
- std::vector<uint8_t> &splitPackedValue =
s->index_->get_split_packed_value();
- std::shared_ptr<BytesRef> splitDimValue =
s->index_->get_split_dim_value();
- assert(splitDimValue->length == bytes_per_dim_);
- assert(FutureArrays::CompareUnsigned(cellMinPacked,
- splitDim * bytes_per_dim_,
- splitDim * bytes_per_dim_ +
bytes_per_dim_,
- (splitDimValue->bytes),
- splitDimValue->offset,
- splitDimValue->offset +
bytes_per_dim_) <= 0);
- assert(FutureArrays::CompareUnsigned(cellMaxPacked,
- splitDim * bytes_per_dim_,
- splitDim * bytes_per_dim_ +
bytes_per_dim_,
- (splitDimValue->bytes),
- splitDimValue->offset,
- splitDimValue->offset +
bytes_per_dim_) >= 0);
-
- std::copy(cellMaxPacked.begin(),
- cellMaxPacked.begin() + packed_index_bytes_length_,
- splitPackedValue.begin());
- std::copy(splitDimValue->bytes.begin() + splitDimValue->offset,
- splitDimValue->bytes.begin() + splitDimValue->offset +
bytes_per_dim_,
- splitPackedValue.begin() + splitDim * bytes_per_dim_);
- s->index_->push_left();
- intersect(s, cellMinPacked, splitPackedValue);
- s->index_->pop();
-
- std::copy(splitPackedValue.begin() + splitDim * bytes_per_dim_,
- splitPackedValue.begin() + splitDim * bytes_per_dim_ +
bytes_per_dim_,
- splitDimValue->bytes.begin() + splitDimValue->offset);
-
- std::copy(cellMinPacked.begin(),
- cellMinPacked.begin() + packed_index_bytes_length_,
- splitPackedValue.begin());
- std::copy(splitDimValue->bytes.begin() + splitDimValue->offset,
- splitDimValue->bytes.begin() + splitDimValue->offset +
bytes_per_dim_,
- splitPackedValue.begin() + splitDim * bytes_per_dim_);
- s->index_->push_right();
- intersect(s, splitPackedValue, cellMaxPacked);
- s->index_->pop();
+ // fast path for 1-D BKD
+ if (splitDim == 0) {
+ auto &splitPackedValue = s->index_->get_split_packed_value();
+ auto& splitValue = s->index_->get_split_1dim_value();
+ std::copy(splitValue.begin(), splitValue.end(),
splitPackedValue.begin());
+
+ s->index_->push_left();
+ intersect(s, cellMinPacked, splitPackedValue);
+ s->index_->pop();
+
+ s->index_->push_right();
+ intersect(s, splitPackedValue, cellMaxPacked);
+ s->index_->pop();
+ } else {
+ std::vector<uint8_t>& splitPackedValue =
s->index_->get_split_packed_value();
+ std::shared_ptr<BytesRef> splitDimValue =
s->index_->get_split_dim_value();
+ assert(splitDimValue->length == bytes_per_dim_);
+ assert(FutureArrays::CompareUnsigned(cellMinPacked, splitDim *
bytes_per_dim_,
+ splitDim * bytes_per_dim_ +
bytes_per_dim_,
+ (splitDimValue->bytes),
splitDimValue->offset,
+ splitDimValue->offset +
bytes_per_dim_) <= 0);
+ assert(FutureArrays::CompareUnsigned(cellMaxPacked, splitDim *
bytes_per_dim_,
+ splitDim * bytes_per_dim_ +
bytes_per_dim_,
+ (splitDimValue->bytes),
splitDimValue->offset,
+ splitDimValue->offset +
bytes_per_dim_) >= 0);
+
+ std::copy(cellMaxPacked.begin(), cellMaxPacked.begin() +
packed_index_bytes_length_,
+ splitPackedValue.begin());
+ std::copy(splitDimValue->bytes.begin() + splitDimValue->offset,
+ splitDimValue->bytes.begin() + splitDimValue->offset +
bytes_per_dim_,
+ splitPackedValue.begin() + splitDim * bytes_per_dim_);
+ s->index_->push_left();
+ intersect(s, cellMinPacked, splitPackedValue);
+ s->index_->pop();
+
+ std::copy(splitPackedValue.begin() + splitDim * bytes_per_dim_,
+ splitPackedValue.begin() + splitDim * bytes_per_dim_ +
bytes_per_dim_,
+ splitDimValue->bytes.begin() + splitDimValue->offset);
+
+ std::copy(cellMinPacked.begin(), cellMinPacked.begin() +
packed_index_bytes_length_,
+ splitPackedValue.begin());
+ std::copy(splitDimValue->bytes.begin() + splitDimValue->offset,
+ splitDimValue->bytes.begin() + splitDimValue->offset +
bytes_per_dim_,
+ splitPackedValue.begin() + splitDim * bytes_per_dim_);
+ s->index_->push_right();
+ intersect(s, splitPackedValue, cellMaxPacked);
+ s->index_->pop();
+ }
}
}
@@ -560,8 +598,8 @@ int32_t bkd_reader::get_tree_depth() const {
}
int64_t bkd_reader::ram_bytes_used() {
- if (!packed_index_->empty()) {
- return packed_index_->capacity();
+ if (!packed_index_.empty()) {
+ return packed_index_.capacity();
} else {
return split_packed_values_.capacity() + leaf_block_fps_.capacity() *
sizeof(int64_t);
}
diff --git a/src/core/CLucene/util/bkd/bkd_reader.h
b/src/core/CLucene/util/bkd/bkd_reader.h
index adb7f3da..e6f46693 100644
--- a/src/core/CLucene/util/bkd/bkd_reader.h
+++ b/src/core/CLucene/util/bkd/bkd_reader.h
@@ -10,6 +10,7 @@
#include <memory>
#include <vector>
+CL_CLASS_DEF(store,Directory)
CL_NS_DEF2(util, bkd)
enum class relation {
@@ -37,8 +38,8 @@ public:
int64_t point_count_{};
int32_t doc_count_{};
int32_t version_{};
- std::shared_ptr<std::vector<uint8_t>> packed_index_;
- std::shared_ptr<store::IndexInput> clone_index_input;
+ std::vector<uint8_t> packed_index_;
+ //std::shared_ptr<store::IndexInput> clone_index_input;
int32_t bytes_per_index_entry_{};
std::vector<int64_t> leaf_block_fps_;
@@ -48,6 +49,7 @@ public:
int32_t type{};
int64_t metaOffset{};
int64_t indexFP{};
+ std::shared_ptr<index_tree> index_tree_{};
public:
class intersect_visitor {
@@ -60,7 +62,7 @@ public:
* it. In the 1D case, values are visited in increasing order, and
in the
* case of ties, in increasing docid order.
*/
- virtual void visit(int docid, std::vector<uint8_t> &packedValue) = 0;
+ virtual int visit(int docid, std::vector<uint8_t> &packedValue) = 0;
virtual void visit(roaring::Roaring &docid) = 0;
virtual void visit(roaring::Roaring &&docid) = 0;
virtual void visit(bkd_docid_set_iterator *iter, std::vector<uint8_t>
&packedValue) = 0;
@@ -71,6 +73,7 @@ public:
* determine how to further recurse down the tree. */
virtual relation compare(std::vector<uint8_t> &minPackedValue,
std::vector<uint8_t> &maxPackedValue) = 0;
+ virtual relation compare_prefix(std::vector<uint8_t> &prefix) = 0;
void grow(int count){};
virtual void inc_hits(int count) {}
@@ -85,19 +88,18 @@ public:
int32_t packedIndexBytesLength,
int32_t maxPointsInLeafNode,
bkd_reader::intersect_visitor *visitor,
- const std::shared_ptr<index_tree> &indexVisitor);
+ index_tree* indexVisitor);
public:
- std::shared_ptr<store::IndexInput> in_;
+ std::unique_ptr<store::IndexInput> in_;
std::unique_ptr<bkd_docid_set_iterator> docid_set_iterator;
- std::vector<int32_t> scratch_doc_ids_;
std::vector<uint8_t> scratch_data_packed_value_;
std::vector<uint8_t> scratch_min_index_packed_value_;
std::vector<uint8_t> scratch_max_index_packed_value_;
std::vector<int32_t> common_prefix_lengths_;
bkd_reader::intersect_visitor *visitor_;
- std::shared_ptr<index_tree> index_;
+ std::unique_ptr<index_tree> index_;
};
public:
@@ -142,10 +144,13 @@ public:
bkd_reader::intersect_visitor *visitor)
const;
public:
+ ~bkd_reader();
bkd_reader() = default;
void read_index(store::IndexInput* index_in);
int read_meta(store::IndexInput* meta_in);
explicit bkd_reader(store::IndexInput *in);
+ bkd_reader(store::Directory* directory, bool close_directory = true);
+ bool open();
int64_t estimate_point_count(bkd_reader::intersect_visitor *visitor);
int64_t estimate_point_count(const std::shared_ptr<intersect_state> &s,
std::vector<uint8_t> &cellMinPacked,
@@ -158,58 +163,7 @@ public:
private:
int64_t ram_bytes_used();
-
-public:
- struct reader_stats {
- private:
- uint64_t visit_doc_values_time_duration_ms{0};
- uint64_t visit_uniq_doc_values_time_duration_ms{0};
- uint64_t visit_sparse_doc_values_time_duration_ms{0};
- uint64_t visit_compress_doc_values_time_duration_ms{0};
- uint64_t visit_doc_id_time_duration_ms{0};
- uint64_t read_doc_id_time_duration_ms{0};
- uint64_t visit_compare_time_duration_ms{0};
-
- public:
- void set_doc_value_visit_time_duration(uint64_t time_duration) {
visit_doc_values_time_duration_ms = time_duration; }
- void add_doc_value_visit_time_duration(uint64_t time_duration) {
visit_doc_values_time_duration_ms += time_duration; }
- uint64_t get_doc_value_visit_time_duration() const { return
visit_doc_values_time_duration_ms; }
-
- void set_uniq_doc_value_visit_time_duration(uint64_t time_duration) {
visit_uniq_doc_values_time_duration_ms = time_duration; }
- void add_uniq_doc_value_visit_time_duration(uint64_t time_duration) {
visit_uniq_doc_values_time_duration_ms += time_duration; }
- uint64_t get_uniq_doc_value_visit_time_duration() const { return
visit_uniq_doc_values_time_duration_ms; }
-
- void set_sparse_doc_value_visit_time_duration(uint64_t time_duration)
{ visit_sparse_doc_values_time_duration_ms = time_duration; }
- void add_sparse_doc_value_visit_time_duration(uint64_t time_duration)
{ visit_sparse_doc_values_time_duration_ms += time_duration; }
- uint64_t get_sparse_doc_value_visit_time_duration() const { return
visit_sparse_doc_values_time_duration_ms; }
-
- void set_compress_doc_value_visit_time_duration(uint64_t
time_duration) { visit_compress_doc_values_time_duration_ms = time_duration; }
- void add_compress_doc_value_visit_time_duration(uint64_t
time_duration) { visit_compress_doc_values_time_duration_ms += time_duration; }
- uint64_t get_compress_doc_value_visit_time_duration() const { return
visit_compress_doc_values_time_duration_ms; }
-
- void set_doc_id_visit_time_duration(uint64_t time_duration) {
visit_doc_id_time_duration_ms = time_duration; }
- void add_doc_id_visit_time_duration(uint64_t time_duration) {
visit_doc_id_time_duration_ms += time_duration; }
- uint64_t get_doc_id_visit_time_duration() const { return
visit_doc_id_time_duration_ms; }
-
- void set_read_doc_id_time_duration(uint64_t time_duration) {
read_doc_id_time_duration_ms = time_duration; }
- void add_read_doc_id_time_duration(uint64_t time_duration) {
read_doc_id_time_duration_ms += time_duration; }
- uint64_t get_read_doc_id_time_duration() const { return
read_doc_id_time_duration_ms; }
-
- void set_visit_compare_time_duration(uint64_t time_duration) {
visit_compare_time_duration_ms = time_duration; }
- void add_visit_compare_time_duration(uint64_t time_duration) {
visit_compare_time_duration_ms += time_duration; }
- uint64_t get_visit_compare_time_duration() const { return
visit_compare_time_duration_ms; }
-
- std::string to_string() const {
- return "| visit compare time: " +
std::to_string(visit_compare_time_duration_ms) + "ms " +
- "| read doc id time: " +
std::to_string(read_doc_id_time_duration_ms) + "ms " +
- "| visit doc id time: " +
std::to_string(visit_doc_id_time_duration_ms) + "ms " +
- "| visit doc value time: " +
std::to_string(visit_doc_values_time_duration_ms) + "ms " +
- "| visit unique doc value time: " +
std::to_string(visit_uniq_doc_values_time_duration_ms) + "ms " +
- "| visit sparse doc value time: " +
std::to_string(visit_sparse_doc_values_time_duration_ms) + "ms " +
- "| visit compress doc value time: " +
std::to_string(visit_compress_doc_values_time_duration_ms) + "ms "
- ;
- }
- };
- reader_stats stats;
+ store::Directory* _dir;
+ bool _close_directory;
};
CL_NS_END2
diff --git a/src/core/CLucene/util/bkd/docids_writer.cpp
b/src/core/CLucene/util/bkd/docids_writer.cpp
index cd0a3050..7dd76893 100644
--- a/src/core/CLucene/util/bkd/docids_writer.cpp
+++ b/src/core/CLucene/util/bkd/docids_writer.cpp
@@ -2,6 +2,7 @@
#include "docids_writer.h"
#include "CLucene/debug/error.h"
+#include "CLucene/store/ByteArrayDataInput.h"
CL_NS_DEF2(util, bkd)
@@ -164,12 +165,26 @@ void docids_writer::read_delta_vints(store::IndexInput
*in, int32_t count, std::
}
}
+int32_t calculateTotalBytesForDocIds(int32_t count) {
+ int32_t bytesForGroups = (count / 8) * 24; // Bytes for full groups of 8
docids
+
+ int32_t remainingDocIds = count % 8; // Number of remaining docids
+ int32_t bytesForRemaining = remainingDocIds * 3; // Bytes for remaining
docids, 3 bytes each
+
+ return bytesForGroups + bytesForRemaining;
+}
+
void docids_writer::read_ints24(store::IndexInput *in, int32_t count,
std::vector<int32_t> &docids) {
int32_t i = 0;
+ auto data_size = calculateTotalBytesForDocIds(count);
+ std::vector<uint8_t> packed_docids(data_size);
+ in->readBytes(packed_docids.data(), data_size);
+ auto in2= std::make_unique<store::ByteArrayDataInput>(packed_docids);
+
for (i = 0; i < count - 7; i += 8) {
- int64_t l1 = in->readLong();
- int64_t l2 = in->readLong();
- int64_t l3 = in->readLong();
+ int64_t l1 = in2->readLong();
+ int64_t l2 = in2->readLong();
+ int64_t l3 = in2->readLong();
docids[i] = (int) (static_cast<uint64_t>(l1) >> 40);
docids[i + 1] = (int) (static_cast<uint64_t>(l1) >> 16) & 0xffffff;
docids[i + 2] = (int) (((static_cast<uint64_t>(l1) & 0xffff) << 8) |
(static_cast<uint64_t>(l2) >> 56));
@@ -180,7 +195,7 @@ void docids_writer::read_ints24(store::IndexInput *in,
int32_t count, std::vecto
docids[i + 7] = (int) l3 & 0xffffff;
}
for (; i < count; ++i) {
- docids[i] = ((static_cast<int32_t>(in->readShort()) & 0xffff) << 8) |
static_cast<uint8_t>(in->readByte());
+ docids[i] = ((static_cast<int32_t>(in2->readShort()) & 0xffff) << 8) |
static_cast<uint8_t>(in2->readByte());
}
}
diff --git a/src/core/CLucene/util/bkd/index_tree.cpp
b/src/core/CLucene/util/bkd/index_tree.cpp
index 201cc8d3..1fe6446f 100644
--- a/src/core/CLucene/util/bkd/index_tree.cpp
+++ b/src/core/CLucene/util/bkd/index_tree.cpp
@@ -6,17 +6,17 @@ CL_NS_DEF2(util,bkd)
index_tree::index_tree(std::shared_ptr<bkd_reader>& reader)
: reader(reader) {
int32_t treeDepth = reader->get_tree_depth();
- split_packed_value_stack_ = std::vector<std::vector<uint8_t>>(treeDepth +
1);
+ split_packed_value_stack_.resize(treeDepth + 1);
node_id_ = 1;
level_ = 1;
- split_packed_value_stack_[level_] =
std::vector<uint8_t>(reader->packed_index_bytes_length_);
+
split_packed_value_stack_[level_].resize(reader->packed_index_bytes_length_);
}
void index_tree::push_left() {
node_id_ *= 2;
level_++;
if (split_packed_value_stack_[level_].empty()) {
- split_packed_value_stack_[level_] =
std::vector<uint8_t>(reader->packed_index_bytes_length_);
+
split_packed_value_stack_[level_].resize(reader->packed_index_bytes_length_);
}
}
@@ -24,7 +24,7 @@ void index_tree::push_right() {
node_id_ = node_id_ * 2 + 1;
level_++;
if (split_packed_value_stack_[level_].empty()) {
- split_packed_value_stack_[level_] =
std::vector<uint8_t>(reader->packed_index_bytes_length_);
+
split_packed_value_stack_[level_].resize(reader->packed_index_bytes_length_);
}
}
diff --git a/src/core/CLucene/util/bkd/index_tree.h
b/src/core/CLucene/util/bkd/index_tree.h
index a13a4e20..25cc0ab3 100644
--- a/src/core/CLucene/util/bkd/index_tree.h
+++ b/src/core/CLucene/util/bkd/index_tree.h
@@ -24,6 +24,7 @@ public:
virtual int32_t get_num_leaves();
virtual std::shared_ptr<index_tree> clone() = 0;
virtual std::shared_ptr<BytesRef> get_split_dim_value() = 0;
+ virtual std::vector<uint8_t>& get_split_1dim_value() = 0;
virtual int64_t get_leaf_blockFP() = 0;
private:
int32_t GetNumLeavesSlow(int32_t node);
diff --git a/src/core/CLucene/util/bkd/legacy_index_tree.cpp
b/src/core/CLucene/util/bkd/legacy_index_tree.cpp
index dc3dd99b..c45155fd 100644
--- a/src/core/CLucene/util/bkd/legacy_index_tree.cpp
+++ b/src/core/CLucene/util/bkd/legacy_index_tree.cpp
@@ -43,6 +43,11 @@ std::shared_ptr<BytesRef>
legacy_index_tree::get_split_dim_value() {
return scratch_;
}
+std::vector<uint8_t>& legacy_index_tree::get_split_1dim_value() {
+ assert(is_leaf_node() == false);
+ return scratch_->bytes;
+}
+
void legacy_index_tree::pop() {
index_tree::pop();
leaf_block_fp_ = -1;
diff --git a/src/core/CLucene/util/bkd/legacy_index_tree.h
b/src/core/CLucene/util/bkd/legacy_index_tree.h
index c90ef9c7..aff5472d 100644
--- a/src/core/CLucene/util/bkd/legacy_index_tree.h
+++ b/src/core/CLucene/util/bkd/legacy_index_tree.h
@@ -16,6 +16,7 @@ public:
void pop() override;
int64_t get_leaf_blockFP() override;
std::shared_ptr<BytesRef> get_split_dim_value() override;
+ std::vector<uint8_t>& get_split_1dim_value() override;
private:
void set_node_data();
diff --git a/src/core/CLucene/util/bkd/packed_index_tree.cpp
b/src/core/CLucene/util/bkd/packed_index_tree.cpp
index 7d84b5e1..4f65c413 100644
--- a/src/core/CLucene/util/bkd/packed_index_tree.cpp
+++ b/src/core/CLucene/util/bkd/packed_index_tree.cpp
@@ -1,20 +1,23 @@
-#include "bkd_reader.h"
#include "packed_index_tree.h"
+#include "CLucene/store/ByteArrayDataInput.h"
+#include "bkd_reader.h"
+#include <iostream>
+
CL_NS_DEF2(util,bkd)
packed_index_tree::packed_index_tree(std::shared_ptr<bkd_reader>&& reader)
: index_tree(reader) {
int32_t treeDepth = reader->get_tree_depth();
- leaf_block_fp_stack_ = std::vector<int64_t>(treeDepth + 1);
- left_node_positions_ = std::vector<int32_t>(treeDepth + 1);
- right_node_positions_ = std::vector<int32_t>(treeDepth + 1);
- split_values_stack_ =
std::vector<std::shared_ptr<std::vector<uint8_t>>>(treeDepth + 1);
- split_dims_ = std::vector<int32_t>(treeDepth + 1);
- negative_deltas_ = std::vector<bool>(reader->num_index_dims_ * (treeDepth
+ 1));
-
- in_ = reader->clone_index_input;
- split_values_stack_[0] =
std::make_shared<std::vector<uint8_t>>(reader->packed_index_bytes_length_);
+ leaf_block_fp_stack_.resize(treeDepth + 1);
+ left_node_positions_.resize(treeDepth + 1);
+ right_node_positions_.resize(treeDepth + 1);
+ split_values_stack_.resize(treeDepth + 1);
+ split_dims_.resize(treeDepth + 1);
+ negative_deltas_.resize(reader->num_index_dims_ * (treeDepth + 1));
+
+ in_ = std::make_unique<store::ByteArrayDataInput>(reader->packed_index_);
+ split_values_stack_[0].resize(reader->packed_index_bytes_length_);
read_node_data(false);
scratch_ = std::make_shared<BytesRef>();
scratch_->length = reader->bytes_per_dim_;
@@ -44,7 +47,7 @@ void packed_index_tree::push_left() {
negative_deltas_.begin() + level_ * reader->num_index_dims_);
assert(split_dim_ != -1);
negative_deltas_[level_ * reader->num_index_dims_ + split_dim_] = true;
- in_->seek(nodePosition);
+ in_->setPosition(nodePosition);
read_node_data(true);
}
@@ -56,7 +59,7 @@ void packed_index_tree::push_right() {
negative_deltas_.begin() + level_ * reader->num_index_dims_);
assert(split_dim_ != -1);
negative_deltas_[level_ * reader->num_index_dims_ + split_dim_] = false;
- in_->seek(nodePosition);
+ in_->setPosition(nodePosition);
read_node_data(false);
}
@@ -72,11 +75,16 @@ int64_t packed_index_tree::get_leaf_blockFP() {
std::shared_ptr<BytesRef> packed_index_tree::get_split_dim_value() {
assert(is_leaf_node() == false);
- scratch_->bytes = *split_values_stack_[level_];
+ scratch_->bytes = split_values_stack_[level_];
scratch_->offset = split_dim_ * reader->bytes_per_dim_;
return scratch_;
}
+std::vector<uint8_t>& packed_index_tree::get_split_1dim_value() {
+ assert(is_leaf_node() == false);
+ return split_values_stack_[level_];
+}
+
void packed_index_tree::read_node_data(bool isLeft) {
leaf_block_fp_stack_[level_] = leaf_block_fp_stack_[level_ - 1];
@@ -94,20 +102,20 @@ void packed_index_tree::read_node_data(bool isLeft) {
int32_t prefix = code % (1 + reader->bytes_per_dim_);
int32_t suffix = reader->bytes_per_dim_ - prefix;
- if (split_values_stack_[level_]==nullptr) {
- split_values_stack_[level_] =
std::make_shared<std::vector<uint8_t>>(reader->packed_index_bytes_length_);
+ if (split_values_stack_[level_].empty()) {
+
split_values_stack_[level_].resize(reader->packed_index_bytes_length_);
}
- std::copy(split_values_stack_[level_ - 1]->begin(),
- split_values_stack_[level_ - 1]->begin() +
reader->packed_index_bytes_length_,
- split_values_stack_[level_]->begin());
+ std::copy(split_values_stack_[level_ - 1].begin(),
+ split_values_stack_[level_ - 1].begin() +
reader->packed_index_bytes_length_,
+ split_values_stack_[level_].begin());
if (suffix > 0) {
int32_t firstDiffByteDelta = code / (1 + reader->bytes_per_dim_);
if (negative_deltas_[level_ * reader->num_index_dims_ +
split_dim_]) {
firstDiffByteDelta = -firstDiffByteDelta;
}
- int32_t oldByte = (*split_values_stack_[level_])[split_dim_ *
reader->bytes_per_dim_ + prefix] & 0xFF;
- (*split_values_stack_[level_])[split_dim_ * reader->bytes_per_dim_
+ prefix] = static_cast<uint8_t>(oldByte + firstDiffByteDelta);
- in_->readBytes((split_values_stack_[level_])->data(),
+ int32_t oldByte = split_values_stack_[level_][split_dim_ *
reader->bytes_per_dim_ + prefix] & 0xFF;
+ split_values_stack_[level_][split_dim_ * reader->bytes_per_dim_ +
prefix] = static_cast<uint8_t>(oldByte + firstDiffByteDelta);
+ in_->readBytes(split_values_stack_[level_],
suffix - 1,
split_dim_ * reader->bytes_per_dim_ + prefix + 1);
} else {
@@ -121,7 +129,7 @@ void packed_index_tree::read_node_data(bool isLeft) {
leftNumBytes = 0;
}
- left_node_positions_[level_] = in_->getFilePointer();
+ left_node_positions_[level_] = in_->getPosition();
right_node_positions_[level_] = left_node_positions_[level_] +
leftNumBytes;
}
diff --git a/src/core/CLucene/util/bkd/packed_index_tree.h
b/src/core/CLucene/util/bkd/packed_index_tree.h
index e84b27ee..67bc3702 100644
--- a/src/core/CLucene/util/bkd/packed_index_tree.h
+++ b/src/core/CLucene/util/bkd/packed_index_tree.h
@@ -7,6 +7,7 @@
#include <memory>
#include <vector>
+CL_CLASS_DEF(store, ByteArrayDataInput)
CL_NS_DEF2(util,bkd)
class packed_index_tree : public index_tree {
public:
@@ -18,18 +19,19 @@ public:
void pop() override;
int64_t get_leaf_blockFP() override;
std::shared_ptr<BytesRef> get_split_dim_value() override;
+ std::vector<uint8_t>& get_split_1dim_value() override;
private:
void read_node_data(bool isLeft);
private:
- std::shared_ptr<store::IndexInput> in_;
+ std::unique_ptr<store::ByteArrayDataInput> in_;
std::vector<int64_t> leaf_block_fp_stack_;
std::vector<int32_t> left_node_positions_;
std::vector<int32_t> right_node_positions_;
std::vector<int32_t> split_dims_;
std::vector<bool> negative_deltas_;
- std::vector<std::shared_ptr<std::vector<uint8_t>>> split_values_stack_;
+ std::vector<std::vector<uint8_t>> split_values_stack_;
std::shared_ptr<BytesRef> scratch_;
};
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]