This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 29e8ea0110 GH-44491: [C++] StatusConstant- cheaply copied const Status 
(#44493)
29e8ea0110 is described below

commit 29e8ea011045ba4318a552567a26b2bb0a7d3f05
Author: Benjamin Kietzman <[email protected]>
AuthorDate: Thu Nov 14 10:44:06 2024 -0600

    GH-44491: [C++] StatusConstant- cheaply copied const Status (#44493)
    
    
    
    ### Rationale for this change
    
    It'd be convenient to construct placeholder error Status cheaply.
    
    ### What changes are included in this PR?
    
    A constant error Status can now be constructed wrapped in a function like
    ```c++
    Status UninitializedResult() {
      static StatusConstant uninitialized_result{StatusCode::UnknownError,
                                                 "Uninitialized Result<T>"};
      return uninitialized_result;
    }
    ```
    
    Copying the constant status is relatively cheap (no heap interaction), so 
it's suitable for use as a placeholder error status.
    
    Added `bool Status::State::is_constant` which causes copies to be shallow 
and skips destruction. Also `Status::state_` is a const pointer now; this helps 
to ensure that it is obvious when we mutate state_ (as in AddContextLine).
    
    ### Are these changes tested?
    
    Minimal unit test added. The main consideration is probably benchmarks to 
make sure hot paths don't get much slower.
    
    ### Are there any user-facing changes?
    
    This API is not currently public; no user-facing changes.
    
    * GitHub Issue: #44491
    
    Lead-authored-by: Benjamin Kietzman <[email protected]>
    Co-authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/buffer_test.cc                   |  9 ++------
 cpp/src/arrow/result.cc                        | 13 +++++++----
 cpp/src/arrow/result.h                         |  8 ++++---
 cpp/src/arrow/status.cc                        | 23 +++++++++++-------
 cpp/src/arrow/status.h                         | 26 ++++++++++++++-------
 cpp/src/arrow/{result.cc => status_internal.h} | 32 ++++++++++++++++----------
 cpp/src/arrow/status_test.cc                   | 22 ++++++++++++++++++
 7 files changed, 90 insertions(+), 43 deletions(-)

diff --git a/cpp/src/arrow/buffer_test.cc b/cpp/src/arrow/buffer_test.cc
index 06ed0bfba0..4dd210076e 100644
--- a/cpp/src/arrow/buffer_test.cc
+++ b/cpp/src/arrow/buffer_test.cc
@@ -511,13 +511,8 @@ TEST(TestBuffer, CopySliceEmpty) {
 }
 
 TEST(TestBuffer, ToHexString) {
-  const uint8_t data_array[] = "\a0hex string\xa9";
-  std::basic_string<uint8_t> data_str = data_array;
-
-  auto data = reinterpret_cast<const uint8_t*>(data_str.c_str());
-
-  Buffer buf(data, data_str.size());
-
+  const std::string data_str = "\a0hex string\xa9";
+  Buffer buf(data_str);
   ASSERT_EQ(buf.ToHexString(), std::string("073068657820737472696E67A9"));
 }
 
diff --git a/cpp/src/arrow/result.cc b/cpp/src/arrow/result.cc
index 0bb65acb83..e8c5a57ee7 100644
--- a/cpp/src/arrow/result.cc
+++ b/cpp/src/arrow/result.cc
@@ -19,11 +19,10 @@
 
 #include <string>
 
+#include "arrow/status_internal.h"
 #include "arrow/util/logging.h"
 
-namespace arrow {
-
-namespace internal {
+namespace arrow::internal {
 
 void DieWithMessage(const std::string& msg) { ARROW_LOG(FATAL) << msg; }
 
@@ -31,6 +30,10 @@ void InvalidValueOrDie(const Status& st) {
   DieWithMessage(std::string("ValueOrDie called on an error: ") + 
st.ToString());
 }
 
-}  // namespace internal
+Status UninitializedResult() {
+  static StatusConstant uninitialized_result{StatusCode::UnknownError,
+                                             "Uninitialized Result<T>"};
+  return uninitialized_result;
+}
 
-}  // namespace arrow
+}  // namespace arrow::internal
diff --git a/cpp/src/arrow/result.h b/cpp/src/arrow/result.h
index 0913511542..f8ae5b15d5 100644
--- a/cpp/src/arrow/result.h
+++ b/cpp/src/arrow/result.h
@@ -39,6 +39,8 @@ ARROW_EXPORT void DieWithMessage(const std::string& msg);
 
 ARROW_EXPORT void InvalidValueOrDie(const Status& st);
 
+ARROW_EXPORT Status UninitializedResult();
+
 }  // namespace internal
 
 /// A class for representing either a usable value, or an error.
@@ -112,7 +114,7 @@ class [[nodiscard]] Result : public 
util::EqualityComparable<Result<T>> {
   /// an empty vector, it will actually invoke the default constructor of
   /// Result.
   explicit Result() noexcept  // NOLINT(runtime/explicit)
-      : status_(Status::UnknownError("Uninitialized Result<T>")) {}
+      : status_(internal::UninitializedResult()) {}
 
   ~Result() noexcept { Destroy(); }
 
@@ -301,8 +303,8 @@ class [[nodiscard]] Result : public 
util::EqualityComparable<Result<T>> {
   /// \return The stored non-OK status object, or an OK status if this object
   ///         has a value.
   Status status() && {
-    if (ok()) return Status::OK();
-    auto tmp = Status::UnknownError("Uninitialized Result<T>");
+    if (ARROW_PREDICT_TRUE(ok())) return Status::OK();
+    auto tmp = internal::UninitializedResult();
     std::swap(status_, tmp);
     return tmp;
   }
diff --git a/cpp/src/arrow/status.cc b/cpp/src/arrow/status.cc
index 368e03cac0..8cbc6842c4 100644
--- a/cpp/src/arrow/status.cc
+++ b/cpp/src/arrow/status.cc
@@ -15,7 +15,9 @@
 #include <cassert>
 #include <cstdlib>
 #include <iostream>
-#include <sstream>
+#ifdef ARROW_EXTRA_ERROR_CONTEXT
+#  include <sstream>
+#endif
 
 #include "arrow/util/logging.h"
 
@@ -26,18 +28,19 @@ Status::Status(StatusCode code, const std::string& msg)
 
 Status::Status(StatusCode code, std::string msg, std::shared_ptr<StatusDetail> 
detail) {
   ARROW_CHECK_NE(code, StatusCode::OK) << "Cannot construct ok status with 
message";
-  state_ = new State;
-  state_->code = code;
-  state_->msg = std::move(msg);
-  if (detail != nullptr) {
-    state_->detail = std::move(detail);
-  }
+  state_ = new State{code, /*is_constant=*/false, std::move(msg), 
std::move(detail)};
 }
 
 void Status::CopyFrom(const Status& s) {
-  delete state_;
+  if (ARROW_PREDICT_FALSE(state_ != NULL)) {
+    if (!state_->is_constant) {
+      DeleteState();
+    }
+  }
   if (s.state_ == nullptr) {
     state_ = nullptr;
+  } else if (s.state_->is_constant) {
+    state_ = s.state_;
   } else {
     state_ = new State(*s.state_);
   }
@@ -160,6 +163,10 @@ void Status::AddContextLine(const char* filename, int 
line, const char* expr) {
   ARROW_CHECK(!ok()) << "Cannot add context line to ok status";
   std::stringstream ss;
   ss << "\n" << filename << ":" << line << "  " << expr;
+  if (state_->is_constant) {
+    // We can't add context lines to a StatusConstant's state, so copy it now
+    state_ = new State{code(), /*is_constant=*/false, message(), detail()};
+  }
   state_->msg += ss.str();
 }
 #endif
diff --git a/cpp/src/arrow/status.h b/cpp/src/arrow/status.h
index fb75d963f3..853fc284ee 100644
--- a/cpp/src/arrow/status.h
+++ b/cpp/src/arrow/status.h
@@ -82,6 +82,9 @@
 #endif
 
 namespace arrow {
+namespace internal {
+class StatusConstant;
+}
 
 enum class StatusCode : char {
   OK = 0,
@@ -135,10 +138,10 @@ class ARROW_EXPORT [[nodiscard]] Status : public 
util::EqualityComparable<Status
   // Create a success status.
   constexpr Status() noexcept : state_(NULLPTR) {}
   ~Status() noexcept {
-    // ARROW-2400: On certain compilers, splitting off the slow path improves
-    // performance significantly.
     if (ARROW_PREDICT_FALSE(state_ != NULL)) {
-      DeleteState();
+      if (!state_->is_constant) {
+        DeleteState();
+      }
     }
   }
 
@@ -366,6 +369,7 @@ class ARROW_EXPORT [[nodiscard]] Status : public 
util::EqualityComparable<Status
  private:
   struct State {
     StatusCode code;
+    bool is_constant;
     std::string msg;
     std::shared_ptr<StatusDetail> detail;
   };
@@ -373,22 +377,28 @@ class ARROW_EXPORT [[nodiscard]] Status : public 
util::EqualityComparable<Status
   // a `State` structure containing the error code and message(s)
   State* state_;
 
-  void DeleteState() {
+  void DeleteState() noexcept {
+    // ARROW-2400: On certain compilers, splitting off the slow path improves
+    // performance significantly.
     delete state_;
-    state_ = NULLPTR;
   }
   void CopyFrom(const Status& s);
   inline void MoveFrom(Status& s);
+
+  friend class internal::StatusConstant;
 };
 
 void Status::MoveFrom(Status& s) {
-  delete state_;
+  if (ARROW_PREDICT_FALSE(state_ != NULL)) {
+    if (!state_->is_constant) {
+      DeleteState();
+    }
+  }
   state_ = s.state_;
   s.state_ = NULLPTR;
 }
 
-Status::Status(const Status& s)
-    : state_((s.state_ == NULLPTR) ? NULLPTR : new State(*s.state_)) {}
+Status::Status(const Status& s) : state_{NULLPTR} { CopyFrom(s); }
 
 Status& Status::operator=(const Status& s) {
   // The following condition catches both aliasing (when this == &s),
diff --git a/cpp/src/arrow/result.cc b/cpp/src/arrow/status_internal.h
similarity index 58%
copy from cpp/src/arrow/result.cc
copy to cpp/src/arrow/status_internal.h
index 0bb65acb83..f070bc208d 100644
--- a/cpp/src/arrow/result.cc
+++ b/cpp/src/arrow/status_internal.h
@@ -15,22 +15,30 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/result.h"
-
-#include <string>
+#pragma once
 
+#include "arrow/status.h"
 #include "arrow/util/logging.h"
 
-namespace arrow {
-
-namespace internal {
+namespace arrow::internal {
 
-void DieWithMessage(const std::string& msg) { ARROW_LOG(FATAL) << msg; }
+class StatusConstant {
+ public:
+  StatusConstant(StatusCode code, std::string msg,
+                 std::shared_ptr<StatusDetail> detail = nullptr)
+      : state_{code, /*is_constant=*/true, std::move(msg), std::move(detail)} {
+    ARROW_CHECK_NE(code, StatusCode::OK)
+        << "StatusConstant is not intended for use with OK status codes";
+  }
 
-void InvalidValueOrDie(const Status& st) {
-  DieWithMessage(std::string("ValueOrDie called on an error: ") + 
st.ToString());
-}
+  operator Status() {  // NOLINT(runtime/explicit)
+    Status st;
+    st.state_ = &state_;
+    return st;
+  }
 
-}  // namespace internal
+ private:
+  Status::State state_;
+};
 
-}  // namespace arrow
+}  // namespace arrow::internal
diff --git a/cpp/src/arrow/status_test.cc b/cpp/src/arrow/status_test.cc
index b446705933..005bdf665f 100644
--- a/cpp/src/arrow/status_test.cc
+++ b/cpp/src/arrow/status_test.cc
@@ -21,6 +21,7 @@
 #include <gtest/gtest.h>
 
 #include "arrow/status.h"
+#include "arrow/status_internal.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/matchers.h"
 
@@ -76,6 +77,27 @@ TEST(StatusTest, TestCoverageWarnNotOK) {
   ARROW_WARN_NOT_OK(Status::Invalid("invalid"), "Expected warning");
 }
 
+TEST(StatusTest, StatusConstant) {
+  internal::StatusConstant constant{StatusCode::Invalid, "default error"};
+  Status st = constant;
+
+  ASSERT_EQ(st.code(), StatusCode::Invalid);
+  ASSERT_EQ(st.message(), "default error");
+  ASSERT_EQ(st.detail(), nullptr);
+
+  Status copy = st;
+  ASSERT_EQ(&st.message(), &copy.message());
+  Status moved = std::move(st);
+  ASSERT_EQ(&copy.message(), &moved.message());
+  ASSERT_OK(st);
+
+  Status other = constant;
+  ASSERT_EQ(other.code(), StatusCode::Invalid);
+  ASSERT_EQ(other.message(), "default error");
+  ASSERT_EQ(other.detail(), nullptr);
+  ASSERT_EQ(&other.message(), &moved.message());
+}
+
 TEST(StatusTest, AndStatus) {
   Status a = Status::OK();
   Status b = Status::OK();

Reply via email to