Skip to content

Commit 560d450

Browse files
vitess-bot[bot]arthurschreiber
authored andcommitted
[release-22.0] connpool: fix connection leak during idle connection reopen (vitessio#18967) (vitessio#18970)
Signed-off-by: Arthur Schreiber <[email protected]> Co-authored-by: Arthur Schreiber <[email protected]>
1 parent 04da7bc commit 560d450

File tree

2 files changed

+105
-4
lines changed

2 files changed

+105
-4
lines changed

go/pools/smartconnpool/pool.go

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -435,9 +435,13 @@ func (pool *ConnPool[C]) pop(stack *connStack[C]) *Pooled[C] {
435435
// to expire this connection (even if it's still visible to them), so it's
436436
// safe to return it
437437
for conn, ok := stack.Pop(); ok; conn, ok = stack.Pop() {
438-
if conn.timeUsed.borrow() {
439-
return conn
438+
if !conn.timeUsed.borrow() {
439+
// Ignore the connection that couldn't be borrowed;
440+
// it's being closed by the idle worker and replaced by a new connection.
441+
continue
440442
}
443+
444+
return conn
441445
}
442446
return nil
443447
}
@@ -742,11 +746,23 @@ func (pool *ConnPool[C]) closeIdleResources(now time.Time) {
742746
for conn := s.Peek(); conn != nil; conn = conn.next.Load() {
743747
if conn.timeUsed.expired(mono, timeout) {
744748
pool.Metrics.idleClosed.Add(1)
749+
745750
conn.Close()
751+
pool.closedConn()
752+
746753
// Using context.Background() is fine since MySQL connection already enforces
747754
// a connect timeout via the `db_connect_timeout_ms` config param.
748-
if err := pool.connReopen(context.Background(), conn, mono); err != nil {
749-
pool.closedConn()
755+
c, err := pool.getNew(context.Background())
756+
if err != nil {
757+
// If we couldn't open a new connection, just continue
758+
continue
759+
}
760+
761+
// opening a new connection might have raced with other goroutines,
762+
// so it's possible that we got back `nil` here
763+
if c != nil {
764+
// Return the new connection to the pool
765+
pool.tryReturnConn(c)
750766
}
751767
}
752768
}

go/pools/smartconnpool/pool_test.go

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1280,3 +1280,88 @@ func TestCloseDuringWaitForConn(t *testing.T) {
12801280
require.EqualValues(t, 0, state.open.Load())
12811281
}
12821282
}
1283+
1284+
// TestIdleTimeoutConnectionLeak checks for leaked connections after idle timeout
1285+
func TestIdleTimeoutConnectionLeak(t *testing.T) {
1286+
var state TestState
1287+
1288+
// Slow connection creation to ensure idle timeout happens during reopening
1289+
state.chaos.delayConnect = 300 * time.Millisecond
1290+
1291+
p := NewPool(&Config[*TestConn]{
1292+
Capacity: 2,
1293+
IdleTimeout: 50 * time.Millisecond,
1294+
LogWait: state.LogWait,
1295+
}).Open(newConnector(&state), nil)
1296+
1297+
getCtx, cancel := context.WithTimeout(t.Context(), 500*time.Millisecond)
1298+
defer cancel()
1299+
1300+
// Get and return two connections
1301+
conn1, err := p.Get(getCtx, nil)
1302+
require.NoError(t, err)
1303+
1304+
conn2, err := p.Get(getCtx, nil)
1305+
require.NoError(t, err)
1306+
1307+
p.put(conn1)
1308+
p.put(conn2)
1309+
1310+
// At this point: Active=2, InUse=0, Available=2
1311+
require.EqualValues(t, 2, p.Active())
1312+
require.EqualValues(t, 0, p.InUse())
1313+
require.EqualValues(t, 2, p.Available())
1314+
1315+
// Wait for idle timeout to kick in and start expiring connections
1316+
require.EventuallyWithT(t, func(c *assert.CollectT) {
1317+
// Check the actual number of currently open connections
1318+
assert.Equal(c, int64(2), state.open.Load())
1319+
// Check the total number of closed connections
1320+
assert.Equal(c, int64(1), state.close.Load())
1321+
}, 100*time.Millisecond, 10*time.Millisecond)
1322+
1323+
// At this point, the idle timeout worker has expired the connections
1324+
// and is trying to reopen them (which takes 300ms due to delayConnect)
1325+
1326+
// Try to get connections while they're being reopened
1327+
// This should trigger the bug where connections get discarded
1328+
for i := 0; i < 2; i++ {
1329+
getCtx, cancel := context.WithTimeout(t.Context(), 50*time.Millisecond)
1330+
defer cancel()
1331+
1332+
conn, err := p.Get(getCtx, nil)
1333+
require.NoError(t, err)
1334+
1335+
p.put(conn)
1336+
}
1337+
1338+
// Wait a moment for all reopening to complete
1339+
require.EventuallyWithT(t, func(c *assert.CollectT) {
1340+
// Check the actual number of currently open connections
1341+
require.Equal(c, int64(2), state.open.Load())
1342+
// Check the total number of closed connections
1343+
require.Equal(c, int64(2), state.close.Load())
1344+
}, 400*time.Millisecond, 10*time.Millisecond)
1345+
1346+
// Check the pool state
1347+
assert.Equal(t, int64(2), p.Active())
1348+
assert.Equal(t, int64(0), p.InUse())
1349+
assert.Equal(t, int64(2), p.Available())
1350+
assert.Equal(t, int64(2), p.Metrics.IdleClosed())
1351+
1352+
// Try to close the pool - if there are leaked connections, this will timeout
1353+
closeCtx, cancel := context.WithTimeout(t.Context(), 500*time.Millisecond)
1354+
defer cancel()
1355+
1356+
err = p.CloseWithContext(closeCtx)
1357+
require.NoError(t, err)
1358+
1359+
// Pool should be completely closed now
1360+
assert.Equal(t, int64(0), p.Active())
1361+
assert.Equal(t, int64(0), p.InUse())
1362+
assert.Equal(t, int64(0), p.Available())
1363+
assert.Equal(t, int64(2), p.Metrics.IdleClosed())
1364+
1365+
assert.Equal(t, int64(0), state.open.Load())
1366+
assert.Equal(t, int64(4), state.close.Load())
1367+
}

0 commit comments

Comments
 (0)