[GitHub] [arrow] kou commented on a diff in pull request #14585: ARROW-17884: [C++] Add Intel®-IAA/QPL-based Parquet RLE Decode

GitBox Fri, 09 Dec 2022 13:10:46 -0800


kou commented on code in PR #14585:
URL: https://github.com/apache/arrow/pull/14585#discussion_r1044813741



##########
cpp/CMakeLists.txt:
##########
@@ -796,6 +796,15 @@ if(ARROW_USE_XSIMD)
   list(APPEND ARROW_STATIC_LINK_LIBS xsimd)
 endif()
 
+if(ARROW_WITH_QPL)
+  add_definitions(-DARROW_WITH_QPL)

Review Comment:
   Could you put this to `cpp/src/arrow/util/config.h.cmake` instead of here?



##########
cpp/cmake_modules/FindQPL.cmake:
##########
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# - Find Intel®QPL
+# This module defines
+#  QPL_ROOT - When set, this path is inspected instead of standard library
+#  QPL_INCLUDE_DIR, directory containing headers
+#  QPL_STATIC_LIB, path to libqpl.a
+#  QPL_FOUND, whether QPL is found or not

Review Comment:
   We should mark only CMake target (`qpl::qpl`) public API. Others should not 
mark public API.



##########
cpp/cmake_modules/FindQPL.cmake:
##########
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one

Review Comment:
   Do we need this file?
   It seems that QPL provides its CMake package: 
https://github.com/intel/qpl/blob/develop/CMakeLists.txt#L66-L68



##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -64,6 +64,7 @@ set(ARROW_THIRDPARTY_DEPENDENCIES
     ORC
     re2
     Protobuf
+    QPL

Review Comment:
   It seems that QPL uses `Qpl` for its CMake package name: 
https://github.com/intel/qpl/blob/develop/CMakeLists.txt#L66-L68
   
   ```suggestion
       Qpl
   ```



##########
cpp/src/arrow/util/bit_stream_utils.h:
##########
@@ -200,6 +213,7 @@ class BitReader {
 
   int byte_offset_;  // Offset in buffer_
   int bit_offset_;   // Offset in buffered_values_
+  int val_offset_;   // Index of next value to read

Review Comment:
   Please don't abbreviate `value` as `val` for readability and consistency?



##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -2203,6 +2214,45 @@ if(ARROW_WITH_RAPIDJSON)
   endif()
 endif()
 
+macro(build_qpl)
+  message(STATUS "Building QPL from source")
+  set(QPL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/qpl_ep/src/qpl_ep-install")
+  set(QPL_STATIC_LIB_NAME 
${CMAKE_STATIC_LIBRARY_PREFIX}qpl${CMAKE_STATIC_LIBRARY_SUFFIX})
+  set(QPL_STATIC_LIB "${QPL_PREFIX}/lib/${QPL_STATIC_LIB_NAME}")
+  set(QPL_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} -DCMAKE_INSTALL_LIBDIR=lib
+                     "-DCMAKE_INSTALL_PREFIX=${QPL_PREFIX}")
+  set(QPL_PATCH_COMMAND
+      "sed" "-i" "17,18d"
+      
"${CMAKE_CURRENT_BINARY_DIR}/qpl_ep-prefix/src/qpl_ep/tools/CMakeLists.txt")
+
+  externalproject_add(qpl_ep
+                      ${EP_LOG_OPTIONS}
+                      URL ${QPL_SOURCE_URL}
+                      URL_HASH "SHA256=${ARROW_QPL_BUILD_SHA256_CHECKSUM}"
+                      PATCH_COMMAND ${QPL_PATCH_COMMAND}
+                      BUILD_BYPRODUCTS "${QPL_STATIC_LIB}"
+                      CMAKE_ARGS ${QPL_CMAKE_ARGS})
+
+  file(MAKE_DIRECTORY "${QPL_PREFIX}/include")
+
+  add_library(qpl::qpl STATIC IMPORTED)

Review Comment:
   Could you use `Qpl::qpl` because QPL's CMake package uses `Qpl::qpl`?



##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -2203,6 +2214,45 @@ if(ARROW_WITH_RAPIDJSON)
   endif()
 endif()
 
+macro(build_qpl)
+  message(STATUS "Building QPL from source")
+  set(QPL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/qpl_ep/src/qpl_ep-install")
+  set(QPL_STATIC_LIB_NAME 
${CMAKE_STATIC_LIBRARY_PREFIX}qpl${CMAKE_STATIC_LIBRARY_SUFFIX})
+  set(QPL_STATIC_LIB "${QPL_PREFIX}/lib/${QPL_STATIC_LIB_NAME}")
+  set(QPL_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} -DCMAKE_INSTALL_LIBDIR=lib
+                     "-DCMAKE_INSTALL_PREFIX=${QPL_PREFIX}")
+  set(QPL_PATCH_COMMAND
+      "sed" "-i" "17,18d"

Review Comment:
   `sed -i` (no suffix) isn't portable. It works with GNU sed but not with BSD 
sed.
   
   BTW, this approach is too fragile. How about using `patch` command like a 
code in `build_google_cloud_cpp_storage()`?



##########
cpp/cmake_modules/ThirdpartyToolchain.cmake:
##########
@@ -633,6 +636,14 @@ else()
            
"${THIRDPARTY_MIRROR_URL}/protobuf-${ARROW_PROTOBUF_BUILD_VERSION}.tar.gz")
 endif()
 
+if(DEFINED ENV{ARROW_QPL_URL})
+  set(QPL_SOURCE_URL "$ENV{ARROW_QPL_URL}")
+else()
+  set_urls(QPL_SOURCE_URL
+           
"https://github.com/intel/qpl/archive/refs/tags/${ARROW_QPL_BUILD_VERSION}.tar.gz";
+           "${THIRDPARTY_MIRROR_URL}/qpl-${ARROW_QPL_BUILD_VERSION}.tar.gz")

Review Comment:
   We don't need a mirror URL here because we don't prepare it.
   
   ```suggestion
              
"https://github.com/intel/qpl/archive/refs/tags/${ARROW_QPL_BUILD_VERSION}.tar.gz";)
   ```



##########
cpp/src/arrow/util/rle_encoding.h:
##########
@@ -598,6 +605,83 @@ inline int RleDecoder::GetBatchWithDict(const T* 
dictionary, int32_t dictionary_
   return values_read;
 }
 
+#ifdef ARROW_WITH_QPL
+template <typename T, typename V>
+inline void CopyValues(const T* dictionary, std::vector<uint8_t>* destination, 
T* values,
+                       int batch_size) {
+  auto* out = values;
+  auto* indices = reinterpret_cast<V*>(destination->data());
+  for (int j = 0; j < batch_size; j++) {
+    auto idx = indices[j];
+    T val = dictionary[idx];
+    std::fill(out, out + 1, val);
+    out++;
+  }
+  return;
+}
+
+template <typename T>
+inline int RleDecoder::GetBatchWithDictIAA(const T* dictionary, int32_t 
dictionary_length,
+                                           T* values, int batch_size) {
+  if (batch_size <= 0) {
+    return batch_size;
+  }
+  if (!::arrow::util::internal::QplJobHWPool::GetInstance().job_ready() ||
+      bit_width_ <= 1) {
+    return GetBatchWithDict(dictionary, dictionary_length, values, batch_size);
+  }
+  uint32_t job_id = 0;
+  qpl_job* job = 
::arrow::util::internal::QplJobHWPool::GetInstance().AcquireJob(job_id);
+  if (job == NULL) {
+    return -1;
+  }
+
+  std::vector<uint8_t>* destination;
+  if (dictionary_length < 0xFF) {
+    job->out_bit_width = qpl_ow_8;
+    destination = new std::vector<uint8_t>(batch_size, 0);

Review Comment:
   Who does free this? QPL?
   Can we use `std::vector<uint8_t> destination` and `std::vector::resize()` 
instead of `new std::vector<uint8_t>`?



##########
cpp/src/arrow/util/rle_encoding.h:
##########
@@ -598,6 +605,83 @@ inline int RleDecoder::GetBatchWithDict(const T* 
dictionary, int32_t dictionary_
   return values_read;
 }
 
+#ifdef ARROW_WITH_QPL
+template <typename T, typename V>
+inline void CopyValues(const T* dictionary, std::vector<uint8_t>* destination, 
T* values,
+                       int batch_size) {
+  auto* out = values;
+  auto* indices = reinterpret_cast<V*>(destination->data());
+  for (int j = 0; j < batch_size; j++) {
+    auto idx = indices[j];
+    T val = dictionary[idx];
+    std::fill(out, out + 1, val);
+    out++;
+  }
+  return;
+}
+
+template <typename T>
+inline int RleDecoder::GetBatchWithDictIAA(const T* dictionary, int32_t 
dictionary_length,
+                                           T* values, int batch_size) {
+  if (batch_size <= 0) {
+    return batch_size;
+  }
+  if (!::arrow::util::internal::QplJobHWPool::GetInstance().job_ready() ||
+      bit_width_ <= 1) {
+    return GetBatchWithDict(dictionary, dictionary_length, values, batch_size);
+  }
+  uint32_t job_id = 0;
+  qpl_job* job = 
::arrow::util::internal::QplJobHWPool::GetInstance().AcquireJob(job_id);
+  if (job == NULL) {
+    return -1;

Review Comment:
   Should we fall back to `GetBatchWithDict()`?



##########
cpp/src/arrow/util/rle_encoding.h:
##########
@@ -598,6 +605,83 @@ inline int RleDecoder::GetBatchWithDict(const T* 
dictionary, int32_t dictionary_
   return values_read;
 }
 
+#ifdef ARROW_WITH_QPL
+template <typename T, typename V>
+inline void CopyValues(const T* dictionary, std::vector<uint8_t>* destination, 
T* values,
+                       int batch_size) {
+  auto* out = values;
+  auto* indices = reinterpret_cast<V*>(destination->data());
+  for (int j = 0; j < batch_size; j++) {
+    auto idx = indices[j];
+    T val = dictionary[idx];
+    std::fill(out, out + 1, val);
+    out++;
+  }
+  return;
+}
+
+template <typename T>
+inline int RleDecoder::GetBatchWithDictIAA(const T* dictionary, int32_t 
dictionary_length,
+                                           T* values, int batch_size) {
+  if (batch_size <= 0) {
+    return batch_size;
+  }
+  if (!::arrow::util::internal::QplJobHWPool::GetInstance().job_ready() ||
+      bit_width_ <= 1) {
+    return GetBatchWithDict(dictionary, dictionary_length, values, batch_size);
+  }
+  uint32_t job_id = 0;
+  qpl_job* job = 
::arrow::util::internal::QplJobHWPool::GetInstance().AcquireJob(job_id);

Review Comment:
   ```suggestion
     auto job = 
::arrow::util::internal::QplJobHWPool::GetInstance().AcquireJob(job_id);
   ```



##########
cpp/src/parquet/CMakeLists.txt:
##########
@@ -127,7 +127,6 @@ set(PARQUET_SHARED_TEST_LINK_LIBS arrow_testing_shared 
${PARQUET_MIN_TEST_LIBS}
 
 set(PARQUET_STATIC_TEST_LINK_LIBS ${PARQUET_MIN_TEST_LIBS} parquet_static 
thrift::thrift
                                   ${ARROW_LIBRARIES_FOR_STATIC_TESTS})
-

Review Comment:
   Could you revert this to reduce needless diff?



##########
cpp/thirdparty/versions.txt:
##########
@@ -96,7 +98,6 @@ 
ARROW_ZLIB_BUILD_SHA256_CHECKSUM=b3a24de97a8fdbc835b9833169501030b8977031bcb54b3
 ARROW_ZSTD_BUILD_VERSION=v1.5.2
 
ARROW_ZSTD_BUILD_SHA256_CHECKSUM=f7de13462f7a82c29ab865820149e778cbfe01087b3a55b5332707abf9db4a6e
 
-

Review Comment:
   Could you revert this to reduce needless diff?



##########
cpp/src/arrow/util/bit_stream_utils.h:
##########
@@ -190,6 +195,14 @@ class BitReader {
   /// Maximum byte length of a vlq encoded int64
   static constexpr int kMaxVlqByteLengthForInt64 = 10;
 
+  const uint8_t* GetBuffer() { return buffer_ - 1; }
+
+  int GetBufferLen() { return max_bytes_ + 1; }
+
+  void updateValOffset(int batch_size) { val_offset_ += batch_size; }
+
+  int GetValOffset() { return val_offset_; }

Review Comment:
   I don't think that exporting internal buffer and changing offset from 
outside is a good API.
   This is a reader class. So this is responsible to manage them. This API just 
uses this as a data container.
   
   Can we choose another approach? For example, can we use QPL in this?



##########
cpp/src/arrow/util/rle_encoding.h:
##########
@@ -598,6 +605,83 @@ inline int RleDecoder::GetBatchWithDict(const T* 
dictionary, int32_t dictionary_
   return values_read;
 }
 
+#ifdef ARROW_WITH_QPL
+template <typename T, typename V>
+inline void CopyValues(const T* dictionary, std::vector<uint8_t>* destination, 
T* values,
+                       int batch_size) {
+  auto* out = values;
+  auto* indices = reinterpret_cast<V*>(destination->data());
+  for (int j = 0; j < batch_size; j++) {
+    auto idx = indices[j];
+    T val = dictionary[idx];
+    std::fill(out, out + 1, val);
+    out++;
+  }
+  return;
+}
+
+template <typename T>
+inline int RleDecoder::GetBatchWithDictIAA(const T* dictionary, int32_t 
dictionary_length,
+                                           T* values, int batch_size) {
+  if (batch_size <= 0) {
+    return batch_size;
+  }
+  if (!::arrow::util::internal::QplJobHWPool::GetInstance().job_ready() ||
+      bit_width_ <= 1) {
+    return GetBatchWithDict(dictionary, dictionary_length, values, batch_size);
+  }
+  uint32_t job_id = 0;
+  qpl_job* job = 
::arrow::util::internal::QplJobHWPool::GetInstance().AcquireJob(job_id);
+  if (job == NULL) {

Review Comment:
   ```suggestion
     if (!job) {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [arrow] kou commented on a diff in pull request #14585: ARROW-17884: [C++] Add Intel®-IAA/QPL-based Parquet RLE Decode

Reply via email to