This is an automated email from the ASF dual-hosted git repository.
wangbo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 6651d3b SIMD instruction speed up the storage layer (#6089)
6651d3b is described below
commit 6651d3bf2a977692296c310e9a8aa399c07ab5e3
Author: HappenLee <[email protected]>
AuthorDate: Thu Jun 24 22:04:32 2021 -0500
SIMD instruction speed up the storage layer (#6089)
* SIMD instruction speed up the storage layer
* 1. add DECHECK in power of 2 int32
2. change vector to array deduce the cost
---
be/src/olap/rowset/segment_v2/binary_dict_page.cpp | 41 +++++++++++++++-------
be/src/olap/rowset/segment_v2/binary_plain_page.h | 34 +++++++++++++-----
be/src/runtime/mem_pool.h | 2 +-
be/src/util/bit_util.h | 6 ++++
4 files changed, 61 insertions(+), 22 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
index c5aec41..a65cdf2 100644
--- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
+++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
@@ -20,6 +20,7 @@
#include "common/logging.h"
#include "gutil/strings/substitute.h" // for Substitute
#include "olap/rowset/segment_v2/bitshuffle_page.h"
+#include "runtime/mem_pool.h"
#include "util/slice.h" // for Slice
namespace doris {
@@ -238,8 +239,8 @@ Status BinaryDictPageDecoder::next_batch(size_t* n,
ColumnBlockView* dst) {
// dictionary encoding
DCHECK(_parsed);
DCHECK(_dict_decoder != nullptr) << "dict decoder pointer is nullptr";
+
if (PREDICT_FALSE(*n == 0)) {
- *n = 0;
return Status::OK();
}
Slice* out = reinterpret_cast<Slice*>(dst->data());
@@ -248,21 +249,37 @@ Status BinaryDictPageDecoder::next_batch(size_t* n,
ColumnBlockView* dst) {
ColumnBlock column_block(_batch.get(), dst->column_block()->pool());
ColumnBlockView tmp_block_view(&column_block);
RETURN_IF_ERROR(_data_page_decoder->next_batch(n, &tmp_block_view));
- for (int i = 0; i < *n; ++i) {
+ const auto len = *n;
+
+ size_t mem_len[len];
+ for (int i = 0; i < len; ++i) {
int32_t codeword = *reinterpret_cast<const
int32_t*>(column_block.cell_ptr(i));
// get the string from the dict decoder
- Slice element = _dict_decoder->string_at_index(codeword);
- if (element.size > 0) {
- char* destination =
(char*)dst->column_block()->pool()->allocate(element.size);
- if (destination == nullptr) {
- return Status::MemoryAllocFailed(
- strings::Substitute("memory allocate failed, size:$0",
element.size));
- }
- element.relocate(destination);
- }
- *out = element;
+ *out = _dict_decoder->string_at_index(codeword);
+ mem_len[i] = out->size;
+ out++;
+ }
+
+ // use SIMD instruction to speed up call function `RoundUpToPowerOfTwo`
+ auto mem_size = 0;
+ for (int i = 0; i < len; ++i) {
+ mem_len[i] = BitUtil::RoundUpToPowerOf2Int32(mem_len[i],
MemPool::DEFAULT_ALIGNMENT);
+ mem_size += mem_len[i];
+ }
+
+ // allocate a batch of memory and do memcpy
+ out = reinterpret_cast<Slice*>(dst->data());
+ char* destination = (char*)dst->column_block()->pool()->allocate(mem_size);
+ if (destination == nullptr) {
+ return Status::MemoryAllocFailed(
+ strings::Substitute("memory allocate failed, size:$0",
mem_size));
+ }
+ for (int i = 0; i < len; ++i) {
+ out->relocate(destination);
+ destination += mem_len[i];
++out;
}
+
return Status::OK();
}
diff --git a/be/src/olap/rowset/segment_v2/binary_plain_page.h
b/be/src/olap/rowset/segment_v2/binary_plain_page.h
index bde3ae0..97e7fa8 100644
--- a/be/src/olap/rowset/segment_v2/binary_plain_page.h
+++ b/be/src/olap/rowset/segment_v2/binary_plain_page.h
@@ -29,6 +29,7 @@
#pragma once
#include "common/logging.h"
+#include "gutil/strings/substitute.h"
#include "olap/olap_common.h"
#include "olap/rowset/segment_v2/options.h"
#include "olap/rowset/segment_v2/page_builder.h"
@@ -193,18 +194,33 @@ public:
*n = 0;
return Status::OK();
}
- size_t max_fetch = std::min(*n, static_cast<size_t>(_num_elems -
_cur_idx));
+ const size_t max_fetch = std::min(*n, static_cast<size_t>(_num_elems -
_cur_idx));
Slice* out = reinterpret_cast<Slice*>(dst->data());
-
+ size_t mem_len[max_fetch];
for (size_t i = 0; i < max_fetch; i++, out++, _cur_idx++) {
- Slice elem(string_at_index(_cur_idx));
- out->size = elem.size;
- if (elem.size != 0) {
- out->data =
-
reinterpret_cast<char*>(dst->pool()->allocate(elem.size * sizeof(uint8_t)));
- memcpy(out->data, elem.data, elem.size);
- }
+ *out = string_at_index(_cur_idx);
+ mem_len[i] = out->size;
+ }
+
+ // use SIMD instruction to speed up call function `RoundUpToPowerOfTwo`
+ auto mem_size = 0;
+ for (int i = 0; i < max_fetch; ++i) {
+ mem_len[i] = BitUtil::RoundUpToPowerOf2Int32(mem_len[i],
MemPool::DEFAULT_ALIGNMENT);
+ mem_size += mem_len[i];
+ }
+
+ // allocate a batch of memory and do memcpy
+ out = reinterpret_cast<Slice*>(dst->data());
+ char* destination =
(char*)dst->column_block()->pool()->allocate(mem_size);
+ if (destination == nullptr) {
+ return Status::MemoryAllocFailed(
+ strings::Substitute("memory allocate failed, size:$0",
mem_size));
+ }
+ for (int i = 0; i < max_fetch; ++i) {
+ out->relocate(destination);
+ destination += mem_len[i];
+ ++out;
}
*n = max_fetch;
diff --git a/be/src/runtime/mem_pool.h b/be/src/runtime/mem_pool.h
index 0290361..3a3750e 100644
--- a/be/src/runtime/mem_pool.h
+++ b/be/src/runtime/mem_pool.h
@@ -161,7 +161,7 @@ public:
MemTracker* mem_tracker() { return mem_tracker_; }
- static const int DEFAULT_ALIGNMENT = 8;
+ static constexpr int DEFAULT_ALIGNMENT = 8;
private:
friend class MemPoolTest;
diff --git a/be/src/util/bit_util.h b/be/src/util/bit_util.h
index d49e483..a4bf2ef 100644
--- a/be/src/util/bit_util.h
+++ b/be/src/util/bit_util.h
@@ -300,6 +300,12 @@ public:
return (value + (factor - 1)) & ~(factor - 1);
}
+ // speed up function compute for SIMD
+ static inline size_t RoundUpToPowerOf2Int32(size_t value, size_t factor) {
+ DCHECK((factor > 0) && ((factor & (factor - 1)) == 0));
+ return (value + (factor - 1)) & ~(factor - 1);
+ }
+
// Returns the ceil of value/divisor
static inline int Ceil(int value, int divisor) {
return value / divisor + (value % divisor != 0);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]