This is an automated email from the ASF dual-hosted git repository.

joaoreis pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/cassandra-gocql-driver.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 90bcc9c5 Fix deadlock in refresh debouncer stop
90bcc9c5 is described below

commit 90bcc9c5a1ede363a3f880376300c51a88ed4b5b
Author: João Reis <joaor...@apache.org>
AuthorDate: Thu Apr 24 16:01:41 2025 +0100

    Fix deadlock in refresh debouncer stop
    
    Deadlock could happen when closing the refresh debouncer (i.e. when closing 
the session).
    This would result in either a panic or the goroutine calling 
session.Close() to hang.
    
    patch by João Reis; reviewed by James Hartig, Kevin Kyyro for CASSGO-41
---
 CHANGELOG.md        | 32 +++++---------------------------
 host_source.go      |  5 ++++-
 host_source_test.go | 26 ++++++++++++++++++++++++++
 3 files changed, 35 insertions(+), 28 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7d0d5c7e..6c6c7cc2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,7 +5,7 @@ All notable changes to this project will be documented in this 
file.
 The format is based on [Keep a 
Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic 
Versioning](https://semver.org/spec/v2.0.0.html).
 
-## [Unreleased]
+## [2.0.0] - Unreleased
 
 ### Removed
 
@@ -13,47 +13,28 @@ and this project adheres to [Semantic 
Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
-- Support vector type 
[CASSGO-11](https://issues.apache.org/jira/browse/CASSGO-11)
-
-- Allow SERIAL and LOCAL_SERIAL on SELECT statements 
[CASSGO-26](https://issues.apache.org/jira/browse/CASSGO-26)
-
+- Support vector type (CASSGO-11)
+- Allow SERIAL and LOCAL_SERIAL on SELECT statements (CASSGO-26)
 - Support of sending queries to the specific node with Query.SetHostID() 
(CASSGO-4)
-
-- Support for Native Protocol 5. Following protocol changes exposed new API 
-  Query.SetKeyspace(), Query.WithNowInSeconds(), Batch.SetKeyspace(), 
Batch.WithNowInSeconds() (CASSGO-1)
+- Support for Native Protocol 5 (CASSGO-1)
 
 ### Changed
 
 - Move lz4 compressor to lz4 package within the gocql module (CASSGO-32)
-
 - Don't restrict server authenticator unless 
PasswordAuthentictor.AllowedAuthenticators is provided (CASSGO-19)
-
 - Cleanup of deprecated elements (CASSGO-12)
-
 - Remove global NewBatch function (CASSGO-15)
-
 - Detailed description for NumConns (CASSGO-3)
-
 - Change Batch API to be consistent with Query() (CASSGO-7)
-
 - Added Cassandra 4.0 table options support(CASSGO-13)
-
 - Remove deprecated global logger (CASSGO-24)
-
 - Bumped actions/upload-artifact and actions/cache versions to v4 in CI 
workflow (CASSGO-48)
-
 - Keep nil slices in MapScan (CASSGO-44)
-
 - Improve error messages for marshalling (CASSGO-38)
-
 - Remove HostPoolHostPolicy from gocql package (CASSGO-21)
-
 - Standardized spelling of datacenter (CASSGO-35)
-
 - Refactor HostInfo creation and ConnectAddress() method (CASSGO-45)
-
 - gocql.Compressor interface changes to follow append-like design. Bumped Go 
version to 1.19 (CASSGO-1)
-
 - Refactoring hostpool package test and Expose HostInfo creation (CASSGO-59)
 
 - Move "execute batch" methods to Batch type (CASSGO-57)
@@ -62,16 +43,13 @@ and this project adheres to [Semantic 
Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 - Cassandra version unmarshal fix (CASSGO-49)
-
 - Retry policy now takes into account query idempotency (CASSGO-27)
-
 - Don't return error to caller with RetryType Ignore (CASSGO-28)
 - The marshalBigInt return 8 bytes slice in all cases except for big.Int,
   which returns a variable length slice, but should be 8 bytes slice as well 
(CASSGO-2)
-
 - Skip metadata only if the prepared result includes metadata (CASSGO-40)
-
 - Don't panic in MapExecuteBatchCAS if no `[applied]` column is returned 
(CASSGO-42)
+- Fix deadlock in refresh debouncer stop (CASSGO-41)
 
 - Endless query execution fix (CASSGO-50)
 
diff --git a/host_source.go b/host_source.go
index adcf1a72..396bcfe1 100644
--- a/host_source.go
+++ b/host_source.go
@@ -807,6 +807,7 @@ type refreshDebouncer struct {
        timer        *time.Timer
        refreshNowCh chan struct{}
        quit         chan struct{}
+       done         chan struct{}
        refreshFn    func() error
 }
 
@@ -816,6 +817,7 @@ func newRefreshDebouncer(interval time.Duration, refreshFn 
func() error) *refres
                broadcaster:  nil,
                refreshNowCh: make(chan struct{}, 1),
                quit:         make(chan struct{}),
+               done:         make(chan struct{}),
                interval:     interval,
                timer:        time.NewTimer(interval),
                refreshFn:    refreshFn,
@@ -851,6 +853,7 @@ func (d *refreshDebouncer) refreshNow() <-chan error {
 }
 
 func (d *refreshDebouncer) flusher() {
+       defer close(d.done)
        for {
                select {
                case <-d.refreshNowCh:
@@ -899,8 +902,8 @@ func (d *refreshDebouncer) stop() {
        }
        d.stopped = true
        d.mu.Unlock()
-       d.quit <- struct{}{} // sync with flusher
        close(d.quit)
+       <-d.done
 }
 
 // broadcasts an error to multiple channels (listeners)
diff --git a/host_source_test.go b/host_source_test.go
index cd4dfc02..c8f93c78 100644
--- a/host_source_test.go
+++ b/host_source_test.go
@@ -307,6 +307,32 @@ func TestRefreshDebouncer_EventsAfterRefreshNow(t 
*testing.T) {
        }
 }
 
+// https://github.com/gocql/gocql/issues/1752
+func TestRefreshDebouncer_DeadlockOnStop(t *testing.T) {
+       // there's no way to guarantee this bug manifests because it depends on 
which `case` is picked from the `select`
+       // with 4 iterations of this test the deadlock would be hit pretty 
consistently
+       const iterations = 4
+       for i := 0; i < iterations; i++ {
+               refreshCalledCh := make(chan int, 5)
+               refreshDuration := 500 * time.Millisecond
+               fn := func() error {
+                       refreshCalledCh <- 0
+                       time.Sleep(refreshDuration)
+                       return nil
+               }
+               d := newRefreshDebouncer(50*time.Millisecond, fn)
+               timeBeforeRefresh := time.Now()
+               _ = d.refreshNow()
+               <-refreshCalledCh
+               d.debounce()
+               d.stop()
+               timeAfterRefresh := time.Now()
+               if timeAfterRefresh.Sub(timeBeforeRefresh) < refreshDuration {
+                       t.Errorf("refresh debouncer stop() didn't wait until 
flusher stopped")
+               }
+       }
+}
+
 func TestErrorBroadcaster_MultipleListeners(t *testing.T) {
        b := newErrorBroadcaster()
        defer b.stop()


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@cassandra.apache.org
For additional commands, e-mail: commits-h...@cassandra.apache.org

Reply via email to