This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 338459608a GH-48673: [C++] Fix ToStringWithoutContextLines to check
for :\d+ pattern before removing lines (#48674)
338459608a is described below
commit 338459608a78742af81ae444bc8053f0a2e4cdb4
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Thu Jan 29 20:24:03 2026 +0900
GH-48673: [C++] Fix ToStringWithoutContextLines to check for :\d+ pattern
before removing lines (#48674)
### Rationale for this change
This PR proposes to fix the todo
https://github.com/apache/arrow/blob/7ebc88c8fae62ed97bc30865c845c8061132af7e/cpp/src/arrow/status.cc#L131-L134
which would allows a better parsing for line numbers.
I could not find the relevant example to demonstrate within this project
but assume that we have a test such as:
(Generated by ChatGPT)
```cpp
TEST(BlockParser, ErrorMessageWithColonsPreserved) {
Status st(StatusCode::Invalid,
"CSV parse error: Row #2: Expected 2 columns, got 3:
12:34:56,key:value,data\n"
"Error details: Time format: 12:34:56, Key: value\n"
"parser_test.cc:940 Parse(parser, csv, &out_size)");
std::string expected_msg =
"Invalid: CSV parse error: Row #2: Expected 2 columns, got 3:
12:34:56,key:value,data\n"
"Error details: Time format: 12:34:56, Key: value";
ASSERT_RAISES_WITH_MESSAGE(Invalid, expected_msg, st);
}
// Test with URL-like data (another common case with colons)
TEST(BlockParser, ErrorMessageWithURLPreserved) {
Status st(StatusCode::Invalid,
"CSV parse error: Row #2: Expected 1 columns, got 2:
http://arrow.apache.org:8080/api,data\n"
"URL: http://arrow.apache.org:8080/api\n"
"parser_test.cc:974 Parse(parser, csv, &out_size)");
std::string expected_msg =
"Invalid: CSV parse error: Row #2: Expected 1 columns, got 2:
http://arrow.apache.org:8080/api,data\n"
"URL: http://arrow.apache.org:8080/api";
ASSERT_RAISES_WITH_MESSAGE(Invalid, expected_msg, st);
}
```
then it fails.
### What changes are included in this PR?
Fixed `Status::ToStringWithoutContextLines()` to only remove context lines
matching the `filename:line` pattern (`:\d+`), preventing legitimate error
messages containing colons from being incorrectly stripped.
### Are these changes tested?
Manually tested, and unittests were added, with `cmake .. --preset
ninja-debug -DARROW_EXTRA_ERROR_CONTEXT=ON`.
### Are there any user-facing changes?
No, test-only.
* GitHub Issue: #48673
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
cpp/src/arrow/status.cc | 22 ++++++++++++++++++++--
cpp/src/arrow/status_test.cc | 17 +++++++++++++++++
2 files changed, 37 insertions(+), 2 deletions(-)
diff --git a/cpp/src/arrow/status.cc b/cpp/src/arrow/status.cc
index 55ce3fb78d..4730bca8c6 100644
--- a/cpp/src/arrow/status.cc
+++ b/cpp/src/arrow/status.cc
@@ -13,6 +13,7 @@
#include "arrow/status.h"
#include <cassert>
+#include <cctype>
#include <cstdlib>
#include <iostream>
#ifdef ARROW_EXTRA_ERROR_CONTEXT
@@ -131,8 +132,25 @@ std::string Status::ToStringWithoutContextLines() const {
if (last_new_line_position == std::string::npos) {
break;
}
- // TODO: We may want to check /:\d+ /
- if (message.find(":", last_new_line_position) == std::string::npos) {
+ // Check for the pattern ":\d+ " (colon followed by one or more digits and
a space)
+ // to identify context lines in the format "filename:line expr"
+ auto colon_position = message.find(":", last_new_line_position);
+ if (colon_position == std::string::npos) {
+ break;
+ }
+ // Verify that the colon is followed by one or more digits and then a space
+ size_t pos = colon_position + 1;
+ if (pos >= message.size() ||
+ !std::isdigit(static_cast<unsigned char>(message[pos]))) {
+ break;
+ }
+ // Skip all digits
+ while (pos < message.size() &&
+ std::isdigit(static_cast<unsigned char>(message[pos]))) {
+ pos++;
+ }
+ // Check if followed by a space
+ if (pos >= message.size() || message[pos] != ' ') {
break;
}
message = message.substr(0, last_new_line_position);
diff --git a/cpp/src/arrow/status_test.cc b/cpp/src/arrow/status_test.cc
index 39a52bd2ba..72998cba78 100644
--- a/cpp/src/arrow/status_test.cc
+++ b/cpp/src/arrow/status_test.cc
@@ -342,4 +342,21 @@ TEST(StatusTest, ReturnIfNotOk) {
ASSERT_EQ(StripContext(st.message()), "StatusLike: 43");
}
+#ifdef ARROW_EXTRA_ERROR_CONTEXT
+TEST(StatusTest, ToStringWithoutContextLines) {
+ Status status = Status::IOError("base error");
+ status.AddContextLine("file1.cc", 42, "expr");
+ status.AddContextLine("file2.cc", 100, "expr");
+
+ ASSERT_EQ(status.ToStringWithoutContextLines(), "IOError: base error");
+
+ Status status2(StatusCode::Invalid,
+ "Error message\nThis line has: a colon but no digits");
+ status2.AddContextLine("file.cc", 20, "expr");
+
+ ASSERT_EQ(status2.ToStringWithoutContextLines(),
+ "Invalid: Error message\nThis line has: a colon but no digits");
+}
+#endif
+
} // namespace arrow