This is an automated email from the ASF dual-hosted git repository.

felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new dcdf4e6953 GH-41460: [C++] Use ASAN to poison temp vector stack memory 
(#41695)
dcdf4e6953 is described below

commit dcdf4e6953b7fdab6078c444c8d07a606750fec1
Author: Rossi Sun <zanmato1...@gmail.com>
AuthorDate: Sat May 18 11:21:32 2024 +0800

    GH-41460: [C++] Use ASAN to poison temp vector stack memory (#41695)
    
    
    
    ### Rationale for this change
    
    See #41460. And reduce the overhead of current manual poisoning (filling 
the entire stack space with `0xFF`s) that happens even in release mode.
    
    ### What changes are included in this PR?
    
    Use ASAN API to replace the current manual poisoning of the temp vector 
stack memory.
    
    ### Are these changes tested?
    
    Wanted to add cases to assert that ASAN poison/unpoison is functioning. 
However I found it too tricky to catch an ASAN error because ASAN directly uses 
signals that are hard to interoperate in C++/gtest. So I just manually checked 
poisoning is working in my local, e.g. by intentionally not unpoisoning the 
allocated buffer and seeing ASAN unhappy.
    
    Just leveraging existing cases that use temp stack such as acero tests, 
which should cover this change well.
    
    ### Are there any user-facing changes?
    
    None.
    
    * GitHub Issue: #41460
    
    Authored-by: Ruoxi Sun <zanmato1...@gmail.com>
    Signed-off-by: Felipe Oliveira Carvalho <felipe...@gmail.com>
---
 cpp/src/arrow/compute/util_internal.cc | 27 ++++++++++++++++++++++++---
 cpp/src/arrow/compute/util_internal.h  | 10 +++++++++-
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/compute/util_internal.cc 
b/cpp/src/arrow/compute/util_internal.cc
index cc26982fef..9780d1b2f3 100644
--- a/cpp/src/arrow/compute/util_internal.cc
+++ b/cpp/src/arrow/compute/util_internal.cc
@@ -20,16 +20,29 @@
 #include "arrow/compute/util.h"
 #include "arrow/memory_pool.h"
 
+#ifdef ADDRESS_SANITIZER
+#include <sanitizer/asan_interface.h>
+#endif
+
 namespace arrow {
 namespace util {
 
+TempVectorStack::~TempVectorStack() {
+#ifdef ADDRESS_SANITIZER
+  if (buffer_) {
+    ASAN_UNPOISON_MEMORY_REGION(buffer_->mutable_data(), buffer_size_);
+  }
+#endif
+}
+
 Status TempVectorStack::Init(MemoryPool* pool, int64_t size) {
   num_vectors_ = 0;
   top_ = 0;
   buffer_size_ = EstimatedAllocationSize(size);
   ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool));
-  // Ensure later operations don't accidentally read uninitialized memory.
-  std::memset(buffer->mutable_data(), 0xFF, size);
+#ifdef ADDRESS_SANITIZER
+  ASAN_POISON_MEMORY_REGION(buffer->mutable_data(), size);
+#endif
   buffer_ = std::move(buffer);
   return Status::OK();
 }
@@ -53,12 +66,17 @@ void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** 
data, int* id) {
   ARROW_CHECK_LE(new_top, buffer_size_)
       << "TempVectorStack::alloc overflow: allocating " << estimated_alloc_size
       << " on top of " << top_ << " in stack of size " << buffer_size_;
-  *data = buffer_->mutable_data() + top_ + sizeof(uint64_t);
+#ifdef ADDRESS_SANITIZER
+  ASAN_UNPOISON_MEMORY_REGION(buffer_->mutable_data() + top_, 
estimated_alloc_size);
+#endif
+  *data = buffer_->mutable_data() + top_ + /*one guard*/ sizeof(uint64_t);
+#ifndef NDEBUG
   // We set 8 bytes before the beginning of the allocated range and
   // 8 bytes after the end to check for stack overflow (which would
   // result in those known bytes being corrupted).
   reinterpret_cast<uint64_t*>(buffer_->mutable_data() + top_)[0] = kGuard1;
   reinterpret_cast<uint64_t*>(buffer_->mutable_data() + new_top)[-1] = kGuard2;
+#endif
   *id = num_vectors_++;
   top_ = new_top;
 }
@@ -72,6 +90,9 @@ void TempVectorStack::release(int id, uint32_t num_bytes) {
   top_ -= size;
   ARROW_DCHECK(reinterpret_cast<const uint64_t*>(buffer_->mutable_data() + 
top_)[0] ==
                kGuard1);
+#ifdef ADDRESS_SANITIZER
+  ASAN_POISON_MEMORY_REGION(buffer_->mutable_data() + top_, size);
+#endif
   --num_vectors_;
 }
 
diff --git a/cpp/src/arrow/compute/util_internal.h 
b/cpp/src/arrow/compute/util_internal.h
index 043ff11806..f6c5ac1f83 100644
--- a/cpp/src/arrow/compute/util_internal.h
+++ b/cpp/src/arrow/compute/util_internal.h
@@ -20,6 +20,7 @@
 #include "arrow/status.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
 
 namespace arrow {
 namespace util {
@@ -38,13 +39,20 @@ class ARROW_EXPORT TempVectorStack {
   friend class TempVectorHolder;
 
  public:
+  TempVectorStack() = default;
+  ~TempVectorStack();
+
+  ARROW_DISALLOW_COPY_AND_ASSIGN(TempVectorStack);
+
+  ARROW_DEFAULT_MOVE_AND_ASSIGN(TempVectorStack);
+
   Status Init(MemoryPool* pool, int64_t size);
 
   int64_t AllocatedSize() const { return top_; }
 
  private:
   static int64_t EstimatedAllocationSize(int64_t size) {
-    return PaddedAllocationSize(size) + 2 * sizeof(uint64_t);
+    return PaddedAllocationSize(size) + /*two guards*/ 2 * sizeof(uint64_t);
   }
 
   static int64_t PaddedAllocationSize(int64_t num_bytes);

Reply via email to