This is an automated email from the ASF dual-hosted git repository.

curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
     new 46a63a926 fix(csharp/src/Drivers/Databricks): Fix HTTP handler chain 
ordering to enable retry before exception (#3578)
46a63a926 is described below

commit 46a63a926e81284a37aac6e782174985fa7d3b1e
Author: eric-wang-1990 <[email protected]>
AuthorDate: Thu Oct 16 08:39:42 2025 -0700

    fix(csharp/src/Drivers/Databricks): Fix HTTP handler chain ordering to 
enable retry before exception (#3578)
    
    ## Summary
    Reordered HTTP delegating handlers in DatabricksConnection to ensure
    RetryHttpHandler processes responses before ThriftErrorMessageHandler
    throws exceptions. This fixes a bug where 503 Service Unavailable
    responses with Retry-After headers (e.g., during cluster auto-start)
    were not being retried.
    
    ## Problem
    Previously, the handler chain had ThriftErrorMessageHandler as the
    innermost handler:
    ```
    ThriftErrorMessageHandler (inner) → RetryHttpHandler (outer) → Network
    ```
    
    This caused ThriftErrorMessageHandler to process error responses first
    and throw exceptions immediately, preventing RetryHttpHandler from
    retrying 503 responses during cluster auto-start scenarios.
    
    ## Solution
    Reordered the chain so RetryHttpHandler is inside
    ThriftErrorMessageHandler:
    ```
    RetryHttpHandler (inner) → ThriftErrorMessageHandler (outer) → Network
    ```
    
    Now responses flow: Network → RetryHttpHandler →
    ThriftErrorMessageHandler
    
    With this order:
    1. RetryHttpHandler processes 503 responses first and retries them
    according to Retry-After headers
    2. Only after all retries are exhausted does ThriftErrorMessageHandler
    throw exceptions with Thrift error messages
    
    ## Changes
    - Reordered handlers in `DatabricksConnection.CreateHttpHandler()`
    - Added comprehensive documentation explaining handler chain execution
    order and why it matters
    - Added cross-references in `RetryHttpHandlerTest` and
    `ThriftErrorMessageHandlerTest` pointing to the production code
    
    ## Test Plan
    - ✅ All existing unit tests pass:
      - `ThriftErrorMessageHandlerTest`: 11/11 tests pass
      - `RetryHttpHandlerTest`: 14/14 tests pass
    - The fix will be validated in E2E tests when connecting to Databricks
    clusters that need auto-start
    
    ## Related Issues
    Fixes cluster auto-start retry issues where 503 responses with
    Retry-After headers were not being retried.
    
    Co-authored-by: Claude <[email protected]>
---
 .../src/Drivers/Databricks/DatabricksConnection.cs | 40 ++++++++++++++++++----
 .../Databricks/Unit/RetryHttpHandlerTest.cs        |  5 +++
 .../Unit/ThriftErrorMessageHandlerTest.cs          |  6 ++++
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/csharp/src/Drivers/Databricks/DatabricksConnection.cs 
b/csharp/src/Drivers/Databricks/DatabricksConnection.cs
index 89603136b..d6480d214 100644
--- a/csharp/src/Drivers/Databricks/DatabricksConnection.cs
+++ b/csharp/src/Drivers/Databricks/DatabricksConnection.cs
@@ -554,11 +554,32 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
             HttpMessageHandler baseHandler = base.CreateHttpHandler();
             HttpMessageHandler baseAuthHandler = 
HiveServer2TlsImpl.NewHttpClientHandler(TlsOptions, _proxyConfigurator);
 
-            // Add Thrift error message handler first (innermost) to capture 
x-thriftserver-error-message headers
-            baseHandler = new ThriftErrorMessageHandler(baseHandler);
-            baseAuthHandler = new ThriftErrorMessageHandler(baseAuthHandler);
-
-            // Add tracing handler to propagate W3C trace context if enabled
+            // IMPORTANT: Handler Order Matters!
+            //
+            // HTTP delegating handlers form a chain where execution flows 
from outermost to innermost
+            // on the request, and then innermost to outermost on the response.
+            //
+            // Request flow (outer → inner):  Handler1 → Handler2 → Handler3 → 
Network
+            // Response flow (inner → outer): Network → Handler3 → Handler2 → 
Handler1
+            //
+            // Current chain order (outermost to innermost):
+            // 1. OAuth handlers (OAuthDelegatingHandler, etc.) - only on 
baseHandler for API requests
+            // 2. ThriftErrorMessageHandler - extracts 
x-thriftserver-error-message and throws descriptive exceptions
+            // 3. RetryHttpHandler - retries 408, 502, 503, 504 with 
Retry-After support
+            // 4. TracingDelegatingHandler - propagates W3C trace context
+            // 5. Base HTTP handler - actual network communication
+            //
+            // Why this order:
+            // - TracingDelegatingHandler must be innermost (closest to 
network) to capture full request timing
+            // - RetryHttpHandler must be INSIDE ThriftErrorMessageHandler so 
it can retry 503 responses
+            //   (e.g., during cluster auto-start) before 
ThriftErrorMessageHandler throws an exception
+            // - ThriftErrorMessageHandler must be OUTSIDE RetryHttpHandler so 
it only processes final
+            //   error responses after all retry attempts are exhausted
+            // - OAuth handlers are outermost since they modify request 
headers and don't need retry logic
+            //
+            // DO NOT change this order without understanding the implications!
+
+            // Add tracing handler to propagate W3C trace context if enabled 
(INNERMOST - closest to network)
             if (_tracePropagationEnabled)
             {
                 baseHandler = new TracingDelegatingHandler(baseHandler, this, 
_traceParentHeaderName, _traceStateEnabled);
@@ -567,11 +588,18 @@ namespace Apache.Arrow.Adbc.Drivers.Databricks
 
             if (TemporarilyUnavailableRetry)
             {
-                // Add retry handler for 503 responses
+                // Add retry handler for 408, 502, 503, 504 responses with 
Retry-After support
+                // This must be INSIDE ThriftErrorMessageHandler so retries 
happen before exceptions are thrown
                 baseHandler = new RetryHttpHandler(baseHandler, 
TemporarilyUnavailableRetryTimeout);
                 baseAuthHandler = new RetryHttpHandler(baseAuthHandler, 
TemporarilyUnavailableRetryTimeout);
             }
 
+            // Add Thrift error message handler AFTER retry handler (OUTSIDE 
in the chain)
+            // This ensures retryable status codes (408, 502, 503, 504) are 
retried by RetryHttpHandler
+            // before ThriftErrorMessageHandler throws exceptions with Thrift 
error messages
+            baseHandler = new ThriftErrorMessageHandler(baseHandler);
+            baseAuthHandler = new ThriftErrorMessageHandler(baseAuthHandler);
+
             if (Properties.TryGetValue(SparkParameters.AuthType, out string? 
authType) &&
                 SparkAuthTypeParser.TryParse(authType, out SparkAuthType 
authTypeValue) &&
                 authTypeValue == SparkAuthType.OAuth)
diff --git a/csharp/test/Drivers/Databricks/Unit/RetryHttpHandlerTest.cs 
b/csharp/test/Drivers/Databricks/Unit/RetryHttpHandlerTest.cs
index d21fe04e6..3a09fe877 100644
--- a/csharp/test/Drivers/Databricks/Unit/RetryHttpHandlerTest.cs
+++ b/csharp/test/Drivers/Databricks/Unit/RetryHttpHandlerTest.cs
@@ -26,6 +26,11 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks.Unit
 {
     /// <summary>
     /// Tests for the RetryHttpHandler class.
+    ///
+    /// IMPORTANT: These tests verify retry behavior in isolation. In 
production, RetryHttpHandler
+    /// must be positioned INSIDE (closer to network) 
ThriftErrorMessageHandler in the handler chain
+    /// so that retries happen before exceptions are thrown. See 
DatabricksConnection.CreateHttpHandler()
+    /// for the correct handler chain ordering and detailed explanation.
     /// </summary>
     public class RetryHttpHandlerTest
     {
diff --git 
a/csharp/test/Drivers/Databricks/Unit/ThriftErrorMessageHandlerTest.cs 
b/csharp/test/Drivers/Databricks/Unit/ThriftErrorMessageHandlerTest.cs
index eba3aee2e..464bccd97 100644
--- a/csharp/test/Drivers/Databricks/Unit/ThriftErrorMessageHandlerTest.cs
+++ b/csharp/test/Drivers/Databricks/Unit/ThriftErrorMessageHandlerTest.cs
@@ -27,6 +27,12 @@ namespace Apache.Arrow.Adbc.Tests.Drivers.Databricks.Unit
 {
     /// <summary>
     /// Tests for the ThriftErrorMessageHandler class.
+    ///
+    /// IMPORTANT: These tests verify Thrift error message extraction in 
isolation. In production,
+    /// ThriftErrorMessageHandler must be positioned OUTSIDE (farther from 
network) RetryHttpHandler
+    /// in the handler chain so that retryable errors (408, 502, 503, 504) are 
retried before
+    /// exceptions are thrown. See DatabricksConnection.CreateHttpHandler() 
for the correct handler
+    /// chain ordering and detailed explanation.
     /// </summary>
     public class ThriftErrorMessageHandlerTest
     {

Reply via email to