This is an automated email from the ASF dual-hosted git repository.
curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 46a63a926 fix(csharp/src/Drivers/Databricks): Fix HTTP handler chain
ordering to enable retry before exception (#3578)
46a63a926 is described below
commit 46a63a926e81284a37aac6e782174985fa7d3b1e
Author: eric-wang-1990 <[email protected]>
AuthorDate: Thu Oct 16 08:39:42 2025 -0700
fix(csharp/src/Drivers/Databricks): Fix HTTP handler chain ordering to
enable retry before exception (#3578)
## Summary
Reordered HTTP delegating handlers in DatabricksConnection to ensure
RetryHttpHandler processes responses before ThriftErrorMessageHandler
throws exceptions. This fixes a bug where 503 Service Unavailable
responses with Retry-After headers (e.g., during cluster auto-start)
were not being retried.
## Problem
Previously, the handler chain had ThriftErrorMessageHandler as the
innermost handler:
```
ThriftErrorMessageHandler (inner) → RetryHttpHandler (outer) → Network
```
This caused ThriftErrorMessageHandler to process error responses first
and throw exceptions immediately, preventing RetryHttpHandler from
retrying 503 responses during cluster auto-start scenarios.
## Solution
Reordered the chain so RetryHttpHandler is inside
ThriftErrorMessageHandler:
```
RetryHttpHandler (inner) → ThriftErrorMessageHandler (outer) → Network
```
Now responses flow: Network → RetryHttpHandler →
ThriftErrorMessageHandler
With this order:
1. RetryHttpHandler processes 503 responses first and retries them
according to Retry-After headers
2. Only after all retries are exhausted does ThriftErrorMessageHandler
throw exceptions with Thrift error messages
## Changes
- Reordered handlers in `DatabricksConnection.CreateHttpHandler()`
- Added comprehensive documentation explaining handler chain execution
order and why it matters
- Added cross-references in `RetryHttpHandlerTest` and
`ThriftErrorMessageHandlerTest` pointing to the production code
## Test Plan
- ✅ All existing unit tests pass:
- `ThriftErrorMessageHandlerTest`: 11/11 tests pass
- `RetryHttpHandlerTest`: 14/14 tests pass
- The fix will be validated in E2E tests when connecting to Databricks
clusters that need auto-start
## Related Issues
Fixes cluster auto-start retry issues where 503 responses with
Retry-After headers were not being retried.
Co-authored-by: Claude <[email protected]>
---
.../src/Drivers/Databricks/DatabricksConnection.cs | 40 ++++++++++++++++++----
.../Databricks/Unit/RetryHttpHandlerTest.cs | 5 +++
.../Unit/ThriftErrorMessageHandlerTest.cs | 6 ++++
3 files changed, 45 insertions(+), 6 deletions(-)
diff --git a/csharp/src/Drivers/Databricks/DatabricksConnection.cs
b/csharp/src/Drivers/Databricks/DatabricksConnection.cs
index 89603136b..d6480d214 100644
--- a/csharp/src/Drivers/Databricks/DatabricksConnection.cs
+++ b/csharp/src/Drivers/Databricks/DatabricksConnection.cs
@@ -554,11 +554,32 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
HttpMessageHandler baseHandler = base.CreateHttpHandler();
HttpMessageHandler baseAuthHandler =
HiveServer2TlsImpl.NewHttpClientHandler(TlsOptions, _proxyConfigurator);
- // Add Thrift error message handler first (innermost) to capture
x-thriftserver-error-message headers
- baseHandler = new ThriftErrorMessageHandler(baseHandler);
- baseAuthHandler = new ThriftErrorMessageHandler(baseAuthHandler);
-
- // Add tracing handler to propagate W3C trace context if enabled
+ // IMPORTANT: Handler Order Matters!
+ //
+ // HTTP delegating handlers form a chain where execution flows
from outermost to innermost
+ // on the request, and then innermost to outermost on the response.
+ //
+ // Request flow (outer → inner): Handler1 → Handler2 → Handler3 →
Network
+ // Response flow (inner → outer): Network → Handler3 → Handler2 →
Handler1
+ //
+ // Current chain order (outermost to innermost):
+ // 1. OAuth handlers (OAuthDelegatingHandler, etc.) - only on
baseHandler for API requests
+ // 2. ThriftErrorMessageHandler - extracts
x-thriftserver-error-message and throws descriptive exceptions
+ // 3. RetryHttpHandler - retries 408, 502, 503, 504 with
Retry-After support
+ // 4. TracingDelegatingHandler - propagates W3C trace context
+ // 5. Base HTTP handler - actual network communication
+ //
+ // Why this order:
+ // - TracingDelegatingHandler must be innermost (closest to
network) to capture full request timing
+ // - RetryHttpHandler must be INSIDE ThriftErrorMessageHandler so
it can retry 503 responses
+ // (e.g., during cluster auto-start) before
ThriftErrorMessageHandler throws an exception
+ // - ThriftErrorMessageHandler must be OUTSIDE RetryHttpHandler so
it only processes final
+ // error responses after all retry attempts are exhausted
+ // - OAuth handlers are outermost since they modify request
headers and don't need retry logic
+ //
+ // DO NOT change this order without understanding the implications!
+
+ // Add tracing handler to propagate W3C trace context if enabled
(INNERMOST - closest to network)
if (_tracePropagationEnabled)
{
baseHandler = new TracingDelegatingHandler(baseHandler, this,
_traceParentHeaderName, _traceStateEnabled);
@@ -567,11 +588,18 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
if (TemporarilyUnavailableRetry)
{
- // Add retry handler for 503 responses
+ // Add retry handler for 408, 502, 503, 504 responses with
Retry-After support
+ // This must be INSIDE ThriftErrorMessageHandler so retries
happen before exceptions are thrown
baseHandler = new RetryHttpHandler(baseHandler,
TemporarilyUnavailableRetryTimeout);
baseAuthHandler = new RetryHttpHandler(baseAuthHandler,
TemporarilyUnavailableRetryTimeout);
}
+ // Add Thrift error message handler AFTER retry handler (OUTSIDE
in the chain)
+ // This ensures retryable status codes (408, 502, 503, 504) are
retried by RetryHttpHandler
+ // before ThriftErrorMessageHandler throws exceptions with Thrift
error messages
+ baseHandler = new ThriftErrorMessageHandler(baseHandler);
+ baseAuthHandler = new ThriftErrorMessageHandler(baseAuthHandler);
+
if (Properties.TryGetValue(SparkParameters.AuthType, out string?
authType) &&
SparkAuthTypeParser.TryParse(authType, out SparkAuthType
authTypeValue) &&
authTypeValue == SparkAuthType.OAuth)
diff --git a/csharp/test/Drivers/Databricks/Unit/RetryHttpHandlerTest.cs
b/csharp/test/Drivers/Databricks/Unit/RetryHttpHandlerTest.cs
index d21fe04e6..3a09fe877 100644
--- a/csharp/test/Drivers/Databricks/Unit/RetryHttpHandlerTest.cs
+++ b/csharp/test/Drivers/Databricks/Unit/RetryHttpHandlerTest.cs
@@ -26,6 +26,11 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks.Unit
{
/// <summary>
/// Tests for the RetryHttpHandler class.
+ ///
+ /// IMPORTANT: These tests verify retry behavior in isolation. In
production, RetryHttpHandler
+ /// must be positioned INSIDE (closer to network)
ThriftErrorMessageHandler in the handler chain
+ /// so that retries happen before exceptions are thrown. See
DatabricksConnection.CreateHttpHandler()
+ /// for the correct handler chain ordering and detailed explanation.
/// </summary>
public class RetryHttpHandlerTest
{
diff --git
a/csharp/test/Drivers/Databricks/Unit/ThriftErrorMessageHandlerTest.cs
b/csharp/test/Drivers/Databricks/Unit/ThriftErrorMessageHandlerTest.cs
index eba3aee2e..464bccd97 100644
--- a/csharp/test/Drivers/Databricks/Unit/ThriftErrorMessageHandlerTest.cs
+++ b/csharp/test/Drivers/Databricks/Unit/ThriftErrorMessageHandlerTest.cs
@@ -27,6 +27,12 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks.Unit
{
/// <summary>
/// Tests for the ThriftErrorMessageHandler class.
+ ///
+ /// IMPORTANT: These tests verify Thrift error message extraction in
isolation. In production,
+ /// ThriftErrorMessageHandler must be positioned OUTSIDE (farther from
network) RetryHttpHandler
+ /// in the handler chain so that retryable errors (408, 502, 503, 504) are
retried before
+ /// exceptions are thrown. See DatabricksConnection.CreateHttpHandler()
for the correct handler
+ /// chain ordering and detailed explanation.
/// </summary>
public class ThriftErrorMessageHandlerTest
{