Re: [PR] ORC-262: [C++] Support async io prefetch for orc c++ lib [orc]

via GitHub Thu, 10 Oct 2024 19:31:58 -0700


zhanglistar commented on code in PR #2048:
URL: https://github.com/apache/orc/pull/2048#discussion_r1796322424



##########
c++/src/MemoryPool.cc:
##########
@@ -242,9 +220,6 @@ namespace orc {
   template <>
   void DataBuffer<int16_t>::resize(uint64_t newSize) {
     reserve(newSize);
-    if (newSize > currentSize_) {

Review Comment:
   No need memset, yes.



##########
c++/src/Reader.cc:
##########
@@ -1474,6 +1476,77 @@ namespace orc {
     return ret;
   }
 
+  void ReaderImpl::releaseBuffer(uint64_t boundary) {
+    if (cachedSource_) {
+      cachedSource_->evictEntriesBefore(boundary);
+    }
+  }
+
+  void ReaderImpl::preBuffer(const std::vector<int>& stripes,
+                             const std::list<uint64_t>& includeTypes, const 
CacheOptions& options) {
+    if (stripes.empty() || includeTypes.empty()) {
+      return;
+    }
+
+    orc::RowReaderOptions row_reader_options;
+    row_reader_options.includeTypes(includeTypes);
+    ColumnSelector column_selector(contents_.get());
+    std::vector<bool> selected_columns;
+    column_selector.updateSelected(selected_columns, row_reader_options);
+
+    std::vector<ReadRange> ranges;

Review Comment:
   may shorten to std::vector<ReadRange> ranges(includeTypes.size());



##########
c++/src/io/Cache.hh:
##########
@@ -0,0 +1,230 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <orc/MemoryPool.hh>
+#include <orc/OrcFile.hh>
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <future>
+#include <utility>
+#include <vector>
+
+namespace orc {
+  class InputStream;
+
+  struct CacheOptions {
+    /// The maximum distance in bytes between two consecutive
+    /// ranges; beyond this value, ranges are not combined
+    uint64_t holeSizeLimit = 8192;
+
+    /// The maximum size in bytes of a combined range; if
+    /// combining two consecutive ranges would produce a range of a
+    /// size greater than this, they are not combined
+    uint64_t rangeSizeLimit = 32 * 1024 * 1024;
+  };
+
+  struct ReadRange {
+    uint64_t offset;
+    uint64_t length;
+
+    ReadRange() = default;
+    ReadRange(uint64_t offset, uint64_t length) : offset(offset), 
length(length) {}
+
+    friend bool operator==(const ReadRange& left, const ReadRange& right) {
+      return (left.offset == right.offset && left.length == right.length);
+    }
+    friend bool operator!=(const ReadRange& left, const ReadRange& right) {
+      return !(left == right);
+    }
+
+    bool contains(const ReadRange& other) const {
+      return (offset <= other.offset && offset + length >= other.offset + 
other.length);
+    }
+  };
+
+  struct ReadRangeCombiner {
+    std::vector<ReadRange> coalesce(std::vector<ReadRange> ranges) const {
+      if (ranges.empty()) {
+        return ranges;
+      }
+
+      // Remove zero-sized ranges
+      auto end = std::remove_if(ranges.begin(), ranges.end(),
+                                [](const ReadRange& range) { return 
range.length == 0; });
+      // Sort in position order
+      std::sort(ranges.begin(), end, [](const ReadRange& a, const ReadRange& 
b) {
+        return a.offset != b.offset ? a.offset < b.offset : a.length > 
b.length;
+      });
+
+      // Remove ranges that overlap 100%
+      end = std::unique(ranges.begin(), end, [](const ReadRange& left, const 
ReadRange& right) {
+        return left.contains(right);
+      });
+      ranges.resize(end - ranges.begin());
+
+      // Skip further processing if ranges is empty after removing zero-sized 
ranges.
+      if (ranges.empty()) {
+        return ranges;
+      }
+
+#ifndef NDEBUG
+      for (size_t i = 0; i < ranges.size() - 1; ++i) {
+        const auto& left = ranges[i];
+        const auto& right = ranges[i + 1];
+        assert(left.offset < right.offset);
+        assert(!left.contains(right));
+      }
+#endif
+
+      std::vector<ReadRange> coalesced;
+
+      auto itr = ranges.begin();
+      // Ensure ranges is not empty.
+      assert(itr <= ranges.end());
+      // Start of the current coalesced range and end (exclusive) of previous 
range.
+      // Both are initialized with the start of first range which is a 
placeholder value.
+      uint64_t coalesced_start = itr->offset;
+      uint64_t prev_range_end = coalesced_start;
+
+      for (; itr < ranges.end(); ++itr) {
+        const uint64_t current_range_start = itr->offset;
+        const uint64_t current_range_end = current_range_start + itr->length;
+        // We don't expect to have 0 sized ranges.
+        assert(current_range_start < current_range_end);
+
+        // At this point, the coalesced range is [coalesced_start, 
prev_range_end).
+        // Stop coalescing if:
+        //   - coalesced range is too large, or
+        //   - distance (hole/gap) between consecutive ranges is too large.
+        if ((current_range_end - coalesced_start > rangeSizeLimit) ||
+            (current_range_start > prev_range_end + holeSizeLimit)) {
+          assert(coalesced_start <= prev_range_end);
+          // Append the coalesced range only if coalesced range size > 0.
+          if (prev_range_end > coalesced_start) {
+            coalesced.push_back({coalesced_start, prev_range_end - 
coalesced_start});
+          }
+          // Start a new coalesced range.
+          coalesced_start = current_range_start;
+        }
+
+        // Update the prev_range_end with the current range.
+        prev_range_end = current_range_end;
+      }
+
+      // Append the coalesced range only if coalesced range size > 0.
+      if (prev_range_end > coalesced_start) {
+        coalesced.push_back({coalesced_start, prev_range_end - 
coalesced_start});
+      }
+
+      assert(coalesced.front().offset == ranges.front().offset);
+      assert(coalesced.back().offset + coalesced.back().length ==
+             ranges.back().offset + ranges.back().length);
+      return coalesced;
+    }
+
+    const uint64_t holeSizeLimit;
+    const uint64_t rangeSizeLimit;
+  };
+
+  std::vector<ReadRange> coalesceReadRanges(std::vector<ReadRange> ranges, 
uint64_t holeSizeLimit,
+                                            uint64_t rangeSizeLimit);
+  struct RangeCacheEntry {
+    using BufferPtr = InputStream::BufferPtr;
+
+    ReadRange range;
+
+    // The result may be get multiple times, so we use shared_future instead 
of std::future
+    std::shared_future<BufferPtr> future;
+
+    RangeCacheEntry() = default;
+    RangeCacheEntry(const ReadRange& range, std::future<BufferPtr> future)
+        : range(range), future(std::move(future).share()) {}
+
+    friend bool operator<(const RangeCacheEntry& left, const RangeCacheEntry& 
right) {
+      return left.range.offset < right.range.offset;
+    }
+  };
+
+  /// A read cache designed to hide IO latencies when reading.
+  ///
+  /// This class takes multiple byte ranges that an application expects to 
read, and
+  /// coalesces them into fewer, larger read requests, which benefits 
performance on some
+  /// filesystems, particularly remote ones like Amazon S3. By default, it 
also issues
+  /// these read requests in parallel up front.
+  ///
+  /// To use:
+  /// 1. Cache() the ranges you expect to read in the future. Ideally, these 
ranges have
+  ///    the exact offset and length that will later be read. The cache will 
combine those
+  ///    ranges according to parameters (see constructor).
+  ///
+  ///    By default, the cache will also start fetching the combined ranges in 
parallel in
+  ///    the background, unless CacheOptions.lazy is set.
+  ///
+  /// 2. Call WaitFor() to be notified when the given ranges have been read. If
+  ///    CacheOptions.lazy is set, I/O will be triggered in the background 
here instead.
+  ///    This can be done in parallel (e.g. if parsing a file, call WaitFor() 
for each
+  ///    chunk of the file that can be parsed in parallel).
+  ///
+  /// 3. Call Read() to retrieve the actual data for the given ranges.
+  ///    A synchronous application may skip WaitFor() and just call Read() - 
it will still
+  ///    benefit from coalescing and parallel fetching.
+  class ReadRangeCache {
+   public:
+    /// Construct a read cache with given options
+    explicit ReadRangeCache(InputStream* stream, CacheOptions options, 
MemoryPool* memoryPool)
+        : stream_(stream), options_(std::move(options)), 
memoryPool_(memoryPool) {}
+
+    ~ReadRangeCache() = default;
+
+    /// Cache the given ranges in the background.
+    ///
+    /// The caller must ensure that the ranges do not overlap with each other,
+    /// nor with previously cached ranges.  Otherwise, behaviour will be 
undefined.
+    void cache(std::vector<ReadRange> ranges);

Review Comment:
   const  std::vector<ReadRange> & ranges



##########
c++/src/io/Cache.hh:
##########
@@ -0,0 +1,230 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <orc/MemoryPool.hh>
+#include <orc/OrcFile.hh>
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <future>
+#include <utility>
+#include <vector>
+
+namespace orc {
+  class InputStream;
+
+  struct CacheOptions {
+    /// The maximum distance in bytes between two consecutive
+    /// ranges; beyond this value, ranges are not combined
+    uint64_t holeSizeLimit = 8192;
+
+    /// The maximum size in bytes of a combined range; if
+    /// combining two consecutive ranges would produce a range of a
+    /// size greater than this, they are not combined
+    uint64_t rangeSizeLimit = 32 * 1024 * 1024;
+  };

Review Comment:
   Do we have memory limit for cache?



##########
c++/src/io/Cache.hh:
##########
@@ -0,0 +1,230 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <orc/MemoryPool.hh>
+#include <orc/OrcFile.hh>
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <future>
+#include <utility>
+#include <vector>
+
+namespace orc {
+  class InputStream;
+
+  struct CacheOptions {
+    /// The maximum distance in bytes between two consecutive
+    /// ranges; beyond this value, ranges are not combined
+    uint64_t holeSizeLimit = 8192;
+
+    /// The maximum size in bytes of a combined range; if
+    /// combining two consecutive ranges would produce a range of a
+    /// size greater than this, they are not combined
+    uint64_t rangeSizeLimit = 32 * 1024 * 1024;
+  };
+
+  struct ReadRange {
+    uint64_t offset;
+    uint64_t length;
+
+    ReadRange() = default;
+    ReadRange(uint64_t offset, uint64_t length) : offset(offset), 
length(length) {}
+
+    friend bool operator==(const ReadRange& left, const ReadRange& right) {
+      return (left.offset == right.offset && left.length == right.length);
+    }
+    friend bool operator!=(const ReadRange& left, const ReadRange& right) {
+      return !(left == right);
+    }
+
+    bool contains(const ReadRange& other) const {
+      return (offset <= other.offset && offset + length >= other.offset + 
other.length);
+    }
+  };
+
+  struct ReadRangeCombiner {
+    std::vector<ReadRange> coalesce(std::vector<ReadRange> ranges) const {
+      if (ranges.empty()) {
+        return ranges;
+      }
+
+      // Remove zero-sized ranges
+      auto end = std::remove_if(ranges.begin(), ranges.end(),
+                                [](const ReadRange& range) { return 
range.length == 0; });
+      // Sort in position order
+      std::sort(ranges.begin(), end, [](const ReadRange& a, const ReadRange& 
b) {
+        return a.offset != b.offset ? a.offset < b.offset : a.length > 
b.length;
+      });
+
+      // Remove ranges that overlap 100%
+      end = std::unique(ranges.begin(), end, [](const ReadRange& left, const 
ReadRange& right) {
+        return left.contains(right);
+      });
+      ranges.resize(end - ranges.begin());
+
+      // Skip further processing if ranges is empty after removing zero-sized 
ranges.
+      if (ranges.empty()) {
+        return ranges;
+      }
+
+#ifndef NDEBUG
+      for (size_t i = 0; i < ranges.size() - 1; ++i) {
+        const auto& left = ranges[i];
+        const auto& right = ranges[i + 1];
+        assert(left.offset < right.offset);
+        assert(!left.contains(right));
+      }
+#endif
+
+      std::vector<ReadRange> coalesced;
+
+      auto itr = ranges.begin();
+      // Ensure ranges is not empty.
+      assert(itr <= ranges.end());
+      // Start of the current coalesced range and end (exclusive) of previous 
range.
+      // Both are initialized with the start of first range which is a 
placeholder value.
+      uint64_t coalesced_start = itr->offset;
+      uint64_t prev_range_end = coalesced_start;
+
+      for (; itr < ranges.end(); ++itr) {
+        const uint64_t current_range_start = itr->offset;
+        const uint64_t current_range_end = current_range_start + itr->length;
+        // We don't expect to have 0 sized ranges.
+        assert(current_range_start < current_range_end);
+
+        // At this point, the coalesced range is [coalesced_start, 
prev_range_end).
+        // Stop coalescing if:
+        //   - coalesced range is too large, or
+        //   - distance (hole/gap) between consecutive ranges is too large.
+        if ((current_range_end - coalesced_start > rangeSizeLimit) ||
+            (current_range_start > prev_range_end + holeSizeLimit)) {
+          assert(coalesced_start <= prev_range_end);
+          // Append the coalesced range only if coalesced range size > 0.
+          if (prev_range_end > coalesced_start) {
+            coalesced.push_back({coalesced_start, prev_range_end - 
coalesced_start});
+          }
+          // Start a new coalesced range.
+          coalesced_start = current_range_start;
+        }
+
+        // Update the prev_range_end with the current range.
+        prev_range_end = current_range_end;
+      }
+
+      // Append the coalesced range only if coalesced range size > 0.
+      if (prev_range_end > coalesced_start) {
+        coalesced.push_back({coalesced_start, prev_range_end - 
coalesced_start});
+      }
+
+      assert(coalesced.front().offset == ranges.front().offset);
+      assert(coalesced.back().offset + coalesced.back().length ==
+             ranges.back().offset + ranges.back().length);
+      return coalesced;
+    }
+
+    const uint64_t holeSizeLimit;
+    const uint64_t rangeSizeLimit;
+  };
+
+  std::vector<ReadRange> coalesceReadRanges(std::vector<ReadRange> ranges, 
uint64_t holeSizeLimit,
+                                            uint64_t rangeSizeLimit);
+  struct RangeCacheEntry {
+    using BufferPtr = InputStream::BufferPtr;
+
+    ReadRange range;
+
+    // The result may be get multiple times, so we use shared_future instead 
of std::future
+    std::shared_future<BufferPtr> future;
+
+    RangeCacheEntry() = default;
+    RangeCacheEntry(const ReadRange& range, std::future<BufferPtr> future)
+        : range(range), future(std::move(future).share()) {}
+
+    friend bool operator<(const RangeCacheEntry& left, const RangeCacheEntry& 
right) {
+      return left.range.offset < right.range.offset;
+    }
+  };
+
+  /// A read cache designed to hide IO latencies when reading.
+  ///
+  /// This class takes multiple byte ranges that an application expects to 
read, and
+  /// coalesces them into fewer, larger read requests, which benefits 
performance on some
+  /// filesystems, particularly remote ones like Amazon S3. By default, it 
also issues
+  /// these read requests in parallel up front.
+  ///
+  /// To use:
+  /// 1. Cache() the ranges you expect to read in the future. Ideally, these 
ranges have
+  ///    the exact offset and length that will later be read. The cache will 
combine those
+  ///    ranges according to parameters (see constructor).
+  ///
+  ///    By default, the cache will also start fetching the combined ranges in 
parallel in
+  ///    the background, unless CacheOptions.lazy is set.
+  ///
+  /// 2. Call WaitFor() to be notified when the given ranges have been read. If

Review Comment:
   Where is `WaitFor`?



##########
c++/src/io/Cache.hh:
##########
@@ -0,0 +1,230 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <orc/MemoryPool.hh>
+#include <orc/OrcFile.hh>
+
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <future>
+#include <utility>
+#include <vector>
+
+namespace orc {
+  class InputStream;
+
+  struct CacheOptions {

Review Comment:
   should be field `lazy` here?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] ORC-262: [C++] Support async io prefetch for orc c++ lib [orc]

Reply via email to