FelixYBW commented on code in PR #7861:
URL: https://github.com/apache/incubator-gluten/pull/7861#discussion_r1857543376


##########
cpp/core/shuffle/Utils.cc:
##########
@@ -212,6 +215,106 @@ arrow::Result<std::shared_ptr<arrow::RecordBatch>> 
makeUncompressedRecordBatch(
   }
   return arrow::RecordBatch::Make(writeSchema, 1, {arrays});
 }
+
+MmapFileStream::MmapFileStream(arrow::internal::FileDescriptor fd, uint8_t* 
data, int64_t size)
+    : fd_(std::move(fd)), data_(data), size_(size){};
+
+arrow::Result<std::shared_ptr<MmapFileStream>> MmapFileStream::open(const 
std::string& path) {
+  ARROW_ASSIGN_OR_RAISE(auto fileName, 
arrow::internal::PlatformFilename::FromString(path));
+
+  ARROW_ASSIGN_OR_RAISE(auto fd, arrow::internal::FileOpenReadable(fileName));
+  ARROW_ASSIGN_OR_RAISE(auto size, arrow::internal::FileGetSize(fd.fd()));
+
+  void* result = mmap(nullptr, size, PROT_READ, MAP_PRIVATE, fd.fd(), 0);
+  if (result == MAP_FAILED) {
+    return arrow::Status::IOError("Memory mapping file failed: ", 
::arrow::internal::ErrnoMessage(errno));
+  }
+
+  return std::make_shared<MmapFileStream>(std::move(fd), 
static_cast<uint8_t*>(result), size);
+}
+
+arrow::Result<int64_t> MmapFileStream::actualReadSize(int64_t nbytes) {
+  if (nbytes < 0 || pos_ > size_) {
+    return arrow::Status::IOError("Read out of range. Offset: ", pos_, " Size: 
", nbytes, " File Size: ", size_);
+  }
+  return std::min(size_ - pos_, nbytes);
+}
+
+bool MmapFileStream::closed() const {
+  return data_ == nullptr;
+};
+
+void MmapFileStream::advance(int64_t length) {
+  static auto pageSize = static_cast<size_t>(arrow::internal::GetPageSize());

Review Comment:
   page should be too small. Can you use config of spark.shuffle.file.buffer?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to