This is an automated email from the ASF dual-hosted git repository.
reshke pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/cloudberry.git
The following commit(s) were added to refs/heads/main by this push:
new c305e7ba961 Fix std::terminate() caused by uncaught exception in ORCA
(#1481)
c305e7ba961 is described below
commit c305e7ba961b9d8983b4f01462054bbaee828bbb
Author: NJrslv <[email protected]>
AuthorDate: Fri Dec 19 13:59:01 2025 +0300
Fix std::terminate() caused by uncaught exception in ORCA (#1481)
### What does this commit do?
This PR fixes a crash, `std::terminate()`, caused by an exception occurring
in the catch block of `GPOPTOptimizedPlan()` during
`gpopt_context.CloneErrorMsg()`.
When `CloneErrorMsg` fails (typically due to OOM), it throws a `gpdb`
error, causing `std::terminate()`.
Postgres has graceful error handling for errors that occur while processing
other errors - even during OOM, the `ErrorContext` has reserved space to
preserve error messages.
So rethrow the error to postgres handler.
### Test Plan
Added a new test to the regression `gporca_fault` test.
I added a query that errors out during optimization in orca and as the
result falls into `catch` block where another exception in `CloneErrorMsg` is
thrown via inject fault.
Previously it resulted in the `std::terminate()` call & process crashing.
Now catch the exception and let the postgres handle it with reserved buffer to
preserve error message.
### Impact
Instead of
```sql
server closed the connection unexpectedly
This probably means the server terminated abnormally
before or while processing the request.
```
postgres reports an error (in case of OOM we will see it, not
std::terminate with exiting processes):
```sql
ERROR: ...
```
---
src/backend/gpopt/CGPOptimizer.cpp | 5 +++--
src/backend/gpopt/utils/COptTasks.cpp | 25 ++++++++++++++++++++--
src/include/gpopt/utils/COptTasks.h | 2 +-
src/test/regress/expected/gporca_faults.out | 25 ++++++++++++++++++++++
.../regress/expected/gporca_faults_optimizer.out | 21 ++++++++++++++++++
src/test/regress/sql/gporca_faults.sql | 12 +++++++++++
6 files changed, 85 insertions(+), 5 deletions(-)
diff --git a/src/backend/gpopt/CGPOptimizer.cpp
b/src/backend/gpopt/CGPOptimizer.cpp
index 11d08f8438a..a0253e44b41 100644
--- a/src/backend/gpopt/CGPOptimizer.cpp
+++ b/src/backend/gpopt/CGPOptimizer.cpp
@@ -63,8 +63,9 @@ CGPOptimizer::GPOPTOptimizedPlan(
GPOS_CATCH_EX(ex)
{
// clone the error message before context free.
+ BOOL clone_failed = false;
CHAR *serialized_error_msg =
- gpopt_context.CloneErrorMsg(MessageContext);
+ gpopt_context.CloneErrorMsg(MessageContext,
&clone_failed);
// clean up context
gpopt_context.Free(gpopt_context.epinQuery,
gpopt_context.epinPlStmt);
@@ -72,7 +73,7 @@ CGPOptimizer::GPOPTOptimizedPlan(
// we want to use the correct error code for these, in case an
application
// tries to do something smart with them.
- if (GPOS_MATCH_EX(ex, gpdxl::ExmaGPDB, gpdxl::ExmiGPDBError))
+ if (clone_failed || GPOS_MATCH_EX(ex, gpdxl::ExmaGPDB,
gpdxl::ExmiGPDBError))
{
PG_RE_THROW();
}
diff --git a/src/backend/gpopt/utils/COptTasks.cpp
b/src/backend/gpopt/utils/COptTasks.cpp
index dd0e61116d8..6bd93379f0f 100644
--- a/src/backend/gpopt/utils/COptTasks.cpp
+++ b/src/backend/gpopt/utils/COptTasks.cpp
@@ -162,13 +162,34 @@ SOptContext::Free(SOptContext::EPin input,
SOptContext::EPin output) const
//
//---------------------------------------------------------------------------
CHAR *
-SOptContext::CloneErrorMsg(MemoryContext context) const
+SOptContext::CloneErrorMsg(MemoryContext context, BOOL *clone_failed) const
{
+ *clone_failed = false;
+
if (nullptr == context || nullptr == m_error_msg)
{
return nullptr;
}
- return gpdb::MemCtxtStrdup(context, m_error_msg);
+
+ CHAR *error_msg;
+ GPOS_TRY
+ {
+#ifdef FAULT_INJECTOR
+ if (gpdb::InjectFaultInOptTasks("opt_clone_error_msg") ==
FaultInjectorTypeSkip)
+ {
+ GpdbEreport(ERRCODE_INTERNAL_ERROR, ERROR, "Injected
error", nullptr);
+ }
+#endif
+ error_msg = gpdb::MemCtxtStrdup(context, m_error_msg);
+ }
+ GPOS_CATCH_EX(ex)
+ {
+ error_msg = nullptr;
+ *clone_failed = true;
+ }
+ GPOS_CATCH_END;
+
+ return error_msg;
}
diff --git a/src/include/gpopt/utils/COptTasks.h
b/src/include/gpopt/utils/COptTasks.h
index 3fa5f91216c..bed8f36d51a 100644
--- a/src/include/gpopt/utils/COptTasks.h
+++ b/src/include/gpopt/utils/COptTasks.h
@@ -107,7 +107,7 @@ struct SOptContext
void Free(EPin input, EPin epinOutput) const;
// Clone the error message in given context.
- CHAR *CloneErrorMsg(struct MemoryContextData *context) const;
+ CHAR *CloneErrorMsg(struct MemoryContextData *context, BOOL
*clone_failed) const;
// casting function
static SOptContext *Cast(void *ptr);
diff --git a/src/test/regress/expected/gporca_faults.out
b/src/test/regress/expected/gporca_faults.out
index c8e6f7ff557..e60f3b048b7 100644
--- a/src/test/regress/expected/gporca_faults.out
+++ b/src/test/regress/expected/gporca_faults.out
@@ -70,3 +70,28 @@ SELECT
gp_inject_fault('opt_relcache_translator_catalog_access', 'reset', 1);
Success:
(1 row)
+-- Test to check that GPOPTOptimizedPlan() does not cause std::terminate() by
throwing an uncaught exception.
+CREATE TABLE test_orca_uncaught_exc(a int, b int) DISTRIBUTED RANDOMLY;
+-- Since ORCA cannot optimize this query, an exception is generated. We then
inject a second exception when the
+-- first is caught and verify that it is not propagated further (no
std::terminate() is called and backend is alive).
+SELECT gp_inject_fault('opt_clone_error_msg', 'skip', 1);
+ gp_inject_fault
+-----------------
+ Success:
+(1 row)
+
+SELECT sum(distinct a), count(distinct b) FROM test_orca_uncaught_exc;
+ sum | count
+-----+-------
+ | 0
+(1 row)
+
+SELECT gp_inject_fault('opt_clone_error_msg', 'reset', 1);
+ gp_inject_fault
+-----------------
+ Success:
+(1 row)
+
+-- start_ignore
+DROP TABLE test_orca_uncaught_exc;
+-- end_ignore
diff --git a/src/test/regress/expected/gporca_faults_optimizer.out
b/src/test/regress/expected/gporca_faults_optimizer.out
index 46ca0efe7ad..32b5b317298 100644
--- a/src/test/regress/expected/gporca_faults_optimizer.out
+++ b/src/test/regress/expected/gporca_faults_optimizer.out
@@ -62,3 +62,24 @@ SELECT
gp_inject_fault('opt_relcache_translator_catalog_access', 'reset', 1);
Success:
(1 row)
+-- Test to check that GPOPTOptimizedPlan() does not cause std::terminate() by
throwing an uncaught exception.
+CREATE TABLE test_orca_uncaught_exc(a int, b int) DISTRIBUTED RANDOMLY;
+-- Since ORCA cannot optimize this query, an exception is generated. We then
inject a second exception when the
+-- first is caught and verify that it is not propagated further (no
std::terminate() is called and backend is alive).
+SELECT gp_inject_fault('opt_clone_error_msg', 'skip', 1);
+ gp_inject_fault
+-----------------
+ Success:
+(1 row)
+
+SELECT sum(distinct a), count(distinct b) FROM test_orca_uncaught_exc;
+ERROR: Injected error (COptTasks.cpp:180)
+SELECT gp_inject_fault('opt_clone_error_msg', 'reset', 1);
+ gp_inject_fault
+-----------------
+ Success:
+(1 row)
+
+-- start_ignore
+DROP TABLE test_orca_uncaught_exc;
+-- end_ignore
diff --git a/src/test/regress/sql/gporca_faults.sql
b/src/test/regress/sql/gporca_faults.sql
index c9d1634d724..a1d324ad1da 100644
--- a/src/test/regress/sql/gporca_faults.sql
+++ b/src/test/regress/sql/gporca_faults.sql
@@ -30,3 +30,15 @@ SELECT * FROM func1_nosql_vol(5), foo;
-- The fault should *not* be hit above when optimizer = off, to reset it now.
SELECT gp_inject_fault('opt_relcache_translator_catalog_access', 'reset', 1);
+
+-- Test to check that GPOPTOptimizedPlan() does not cause std::terminate() by
throwing an uncaught exception.
+CREATE TABLE test_orca_uncaught_exc(a int, b int) DISTRIBUTED RANDOMLY;
+-- Since ORCA cannot optimize this query, an exception is generated. We then
inject a second exception when the
+-- first is caught and verify that it is not propagated further (no
std::terminate() is called and backend is alive).
+SELECT gp_inject_fault('opt_clone_error_msg', 'skip', 1);
+SELECT sum(distinct a), count(distinct b) FROM test_orca_uncaught_exc;
+SELECT gp_inject_fault('opt_clone_error_msg', 'reset', 1);
+
+-- start_ignore
+DROP TABLE test_orca_uncaught_exc;
+-- end_ignore
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]