This is an automated email from the ASF dual-hosted git repository. kenhuuu pushed a commit to branch server-coordinator in repository https://gitbox.apache.org/repos/asf/tinkerpop.git
commit 8676ca9c86258dd277749e5e6d46d499c04289c8 Author: Ken Hu <[email protected]> AuthorDate: Thu Jun 11 15:15:59 2026 -0700 Ensure enqueued transactions return errors. Makes sure that if a traversal closes the transaction that all subsequent requests meant for that transaction receive a 404 and aren't left waiting for a response. Assisted-by: Claude Code:claude-opus-4-8 --- .../server/transaction/UnmanagedTransaction.java | 22 +++++++++++++++++----- .../GremlinServerHttpTransactionIntegrateTest.java | 8 +++++++- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/gremlin-server/src/main/java/org/apache/tinkerpop/gremlin/server/transaction/UnmanagedTransaction.java b/gremlin-server/src/main/java/org/apache/tinkerpop/gremlin/server/transaction/UnmanagedTransaction.java index c3d3b8a274..3dfc394204 100644 --- a/gremlin-server/src/main/java/org/apache/tinkerpop/gremlin/server/transaction/UnmanagedTransaction.java +++ b/gremlin-server/src/main/java/org/apache/tinkerpop/gremlin/server/transaction/UnmanagedTransaction.java @@ -165,12 +165,24 @@ public class UnmanagedTransaction { } } - // prevent any additional requests from processing. if the kill was not "forced" then jobs were scheduled to - // try to rollback open transactions. those jobs either timed-out or completed successfully. either way, no - // additional jobs will be allowed, running jobs will be cancelled (if possible) and any scheduled jobs will - // be cancelled - executor.shutdownNow(); + // ORDERING IS LOAD-BEARING: manager.destroy(transactionId) MUST happen before executor.shutdown(). + // + // We use a graceful shutdown() rather than shutdownNow(). A sibling request for this transaction may already + // be queued behind the commit/rollback that triggered this close (single-threaded executor). shutdownNow() + // would drain and silently discard those queued tasks, leaving their HTTP clients with no response (a hang) - + // this was observed as an intermittent CI hang. shutdown() instead lets each queued task run to completion + // (so every request gets a response) while still terminating the worker thread once the queue drains, so the + // transaction thread is not leaked. shutdown() also avoids the self-interrupt shutdownNow() caused when close() + // runs on the tx thread itself (commit/rollback), which could corrupt the in-flight response write. + // + // Because those queued tasks now actually RUN, correctness depends on them NOT mutating a committed/rolled-back + // transaction. Removing the transaction from the manager FIRST guarantees that: when a queued sibling task + // runs, its pre-evaluation guard (transactionManager.get(txId)) finds nothing and fails fast with a 404 + // (transaction not found) before reaching evaluation. If the destroy were moved after shutdown(), a queued + // task (e.g. an addV submitted after a commit) could execute against the transaction and leak data. Do not + // reorder these two statements. manager.destroy(transactionId); + executor.shutdown(); Optional.ofNullable(timeoutFuture.get()).ifPresent(f -> f.cancel(true)); logger.debug("Transaction {} closed", transactionId); } diff --git a/gremlin-server/src/test/java/org/apache/tinkerpop/gremlin/server/GremlinServerHttpTransactionIntegrateTest.java b/gremlin-server/src/test/java/org/apache/tinkerpop/gremlin/server/GremlinServerHttpTransactionIntegrateTest.java index 156b2773dd..95895e66b4 100644 --- a/gremlin-server/src/test/java/org/apache/tinkerpop/gremlin/server/GremlinServerHttpTransactionIntegrateTest.java +++ b/gremlin-server/src/test/java/org/apache/tinkerpop/gremlin/server/GremlinServerHttpTransactionIntegrateTest.java @@ -467,13 +467,19 @@ public class GremlinServerHttpTransactionIntegrateTest extends AbstractGremlinSe public void shouldNotLeakDataWhenTraversalQueuedBehindCommit() throws Exception { final String txId = beginTx(client, GTX); - // add vertices and an edge in the transaction + // add two vertices and an edge between them in the transaction. the edge is required so that the + // "long traversal" below (repeat(both())) actually has something to traverse and keeps the single + // transaction executor occupied while the commit and short query queue behind it. without an edge, + // both() yields nothing and the traversal returns immediately, defeating the ordering this test relies on. try (final CloseableHttpResponse r = submitInTx(client, txId, "g.addV().property(T.id, 1)", GTX)) { assertEquals(200, r.getStatusLine().getStatusCode()); } try (final CloseableHttpResponse r = submitInTx(client, txId, "g.addV().property(T.id, 2)", GTX)) { assertEquals(200, r.getStatusLine().getStatusCode()); } + try (final CloseableHttpResponse r = submitInTx(client, txId, "g.V(1).addE('self').to(__.V(2))", GTX)) { + assertEquals(200, r.getStatusLine().getStatusCode()); + } // Fire three requests concurrently: a long traversal to occupy the server executor, then a commit that queues // behind it, then a short query that queues behind the commit. The short query should fail with 404 because the
