Re: [PR] ORC-262: [C++] Support async io prefetch for orc c++ lib [orc]

via GitHub Sun, 17 Nov 2024 20:31:33 -0800


taiyang-li commented on code in PR #2048:
URL: https://github.com/apache/orc/pull/2048#discussion_r1845855628



##########
c++/include/orc/OrcFile.hh:
##########
@@ -58,6 +73,17 @@ namespace orc {
      */
     virtual void read(void* buf, uint64_t length, uint64_t offset) = 0;
 
+    /**
+     * Read data asynchronously.
+     * @param offset the position in the stream to read from.
+     * @param length the number of bytes to read.
+     * @return a future that will be set to the buffer when the read is 
complete.
+     */
+    virtual std::future<BufferPtr> readAsync(uint64_t /*offset*/, uint64_t 
/*length*/,

Review Comment:
   I added a benchmark about it. 
   ``` cpp
   
   static constexpr size_t numThreads = 16;
   static constexpr size_t bufferSize = 1024 * 1024;
   static std::vector<std::thread> threads1(numThreads);
   static std::vector<std::thread> threads2(numThreads);
   
   static void allocateAndWriteInBackground()
   {
       for (size_t i = 0; i < numThreads; ++i)
       {
           threads1[i] = std::thread(
               []()
               {
                   orc::DataBuffer<char> buffer(*orc::getDefaultPool(), 
bufferSize);
                   for (size_t j = 0; j < bufferSize; ++j)
                       buffer[j] = static_cast<char>(j);
               });
       }
   
       for (auto & thread : threads1)
       {
           thread.join();
       }
   }
   
   static void BM_allocateAndWriteInBackground(benchmark::State & state)
   {
       for (auto _ : state)
           allocateAndWriteInBackground();
   }
   
   static void writeInBackground()
   {
       std::vector<std::shared_ptr<orc::DataBuffer<char>>> buffers(numThreads);
       for (size_t i = 0; i < numThreads; ++i)
       {
           auto buffer = 
std::make_shared<orc::DataBuffer<char>>(*orc::getDefaultPool(), bufferSize);
           buffers[i] = buffer;
           threads2[i] = std::thread(
               [&buffer]()
               {
                   for (size_t j = 0; j < bufferSize; ++j)
                       (*buffer)[j] = static_cast<char>(j);
               });
       }
   
       for (auto & thread : threads2)
       {
           thread.join();
       }
   }
   
   static void BM_writeInBackground(benchmark::State & state)
   {
       for (auto _ : state)
           writeInBackground();
   }
   
   BENCHMARK(BM_allocateAndWriteInBackground);
   BENCHMARK(BM_writeInBackground);
   BENCHMARK_MAIN();
   ```
   
   ``` bash
   2024-11-18T12:19:32+08:00
   Running ./build_gcc/src/Columns/benchmarks/column_insert_many_from
   Run on (32 X 2100 MHz CPU s)
   CPU Caches:
     L1 Data 32 KiB (x16)
     L1 Instruction 32 KiB (x16)
     L2 Unified 1024 KiB (x16)
     L3 Unified 11264 KiB (x2)
   Load Average: 11.94, 36.84, 51.97
   --------------------------------------------------------------------------
   Benchmark                                Time             CPU   Iterations
   --------------------------------------------------------------------------
   BM_allocateAndWriteInBackground    1837287 ns       626416 ns         1119
   BM_writeInBackground               3215108 ns       535975 ns         1319
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] ORC-262: [C++] Support async io prefetch for orc c++ lib [orc]

Reply via email to