Skip to content

Commit e419fd9

Browse files
austinywangclaude
andauthored
Fix remote proxy notification spam with cooldown, backoff, and SSH keepalive (#2325) (#2330)
- Add 5-minute per-host cooldown for remote error notifications - Add exponential backoff (capped at 60s) to proxy broker and session controller retries - Add default SSH ConnectTimeout/ServerAliveInterval/ServerAliveCountMax to detect dead connections faster - Fix error status clearing to only reset on actual .connected state Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 9e75355 commit e419fd9

File tree

3 files changed

+100
-24
lines changed

3 files changed

+100
-24
lines changed

CLI/cmux.swift

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4163,6 +4163,15 @@ struct CMUXCLI {
41634163
remoteRelayPort: options.remoteRelayPort
41644164
)
41654165
var parts: [String] = ["ssh"]
4166+
if !hasSSHOptionKey(effectiveSSHOptions, key: "ConnectTimeout") {
4167+
parts += ["-o", "ConnectTimeout=6"]
4168+
}
4169+
if !hasSSHOptionKey(effectiveSSHOptions, key: "ServerAliveInterval") {
4170+
parts += ["-o", "ServerAliveInterval=20"]
4171+
}
4172+
if !hasSSHOptionKey(effectiveSSHOptions, key: "ServerAliveCountMax") {
4173+
parts += ["-o", "ServerAliveCountMax=2"]
4174+
}
41664175
if !hasSSHOptionKey(effectiveSSHOptions, key: "SetEnv") {
41674176
parts += ["-o", "SetEnv COLORTERM=truecolor"]
41684177
}

Sources/TerminalNotificationStore.swift

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,7 @@ final class TerminalNotificationStore: ObservableObject {
733733
notification in
734734
store.playSuppressedNotificationFeedback(for: notification)
735735
}
736+
private var lastNotificationDateByCooldownKey: [String: Date] = [:]
736737
private var indexes = NotificationIndexes()
737738

738739
private init() {
@@ -890,7 +891,29 @@ final class TerminalNotificationStore: ObservableObject {
890891
focusedReadIndicatorByTabId[tabId]
891892
}
892893

893-
func addNotification(tabId: UUID, surfaceId: UUID?, title: String, subtitle: String, body: String) {
894+
func addNotification(
895+
tabId: UUID,
896+
surfaceId: UUID?,
897+
title: String,
898+
subtitle: String,
899+
body: String,
900+
cooldownKey: String? = nil,
901+
cooldownInterval: TimeInterval? = nil
902+
) {
903+
let now = Date()
904+
let resolvedCooldownInterval: TimeInterval?
905+
if let cooldownInterval, cooldownInterval.isFinite, cooldownInterval > 0 {
906+
resolvedCooldownInterval = cooldownInterval
907+
} else {
908+
resolvedCooldownInterval = nil
909+
}
910+
if let cooldownKey,
911+
let resolvedCooldownInterval,
912+
let lastNotificationDate = lastNotificationDateByCooldownKey[cooldownKey],
913+
now.timeIntervalSince(lastNotificationDate) < resolvedCooldownInterval {
914+
return
915+
}
916+
894917
var updated = notifications
895918
var idsToClear: [String] = []
896919
updated.removeAll { existing in
@@ -925,11 +948,14 @@ final class TerminalNotificationStore: ObservableObject {
925948
title: title,
926949
subtitle: subtitle,
927950
body: body,
928-
createdAt: Date(),
951+
createdAt: now,
929952
isRead: false
930953
)
931954
updated.insert(notification, at: 0)
932955
notifications = updated
956+
if let cooldownKey, resolvedCooldownInterval != nil {
957+
lastNotificationDateByCooldownKey[cooldownKey] = now
958+
}
933959
if !idsToClear.isEmpty {
934960
center.removeDeliveredNotificationsOffMain(withIdentifiers: idsToClear)
935961
center.removePendingNotificationRequestsOffMain(withIdentifiers: idsToClear)

Sources/Workspace.swift

Lines changed: 63 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2489,6 +2489,7 @@ private final class WorkspaceRemoteProxyBroker {
24892489
var tunnel: WorkspaceRemoteDaemonProxyTunnel?
24902490
var endpoint: BrowserProxyEndpoint?
24912491
var restartWorkItem: DispatchWorkItem?
2492+
var restartRetryCount = 0
24922493
var subscribers: [UUID: (Update) -> Void] = [:]
24932494

24942495
init(configuration: WorkspaceRemoteConfiguration, remotePath: String) {
@@ -2515,6 +2516,7 @@ private final class WorkspaceRemoteProxyBroker {
25152516
entry = existing
25162517
if existing.remotePath != remotePath {
25172518
existing.remotePath = remotePath
2519+
existing.restartRetryCount = 0
25182520
if existing.tunnel != nil {
25192521
stopEntryRuntimeLocked(existing)
25202522
notifyLocked(existing, update: .connecting)
@@ -2558,12 +2560,13 @@ private final class WorkspaceRemoteProxyBroker {
25582560
// Internal deterministic test hook used by docker regressions to force bind conflicts.
25592561
localPort = forcedLocalPort
25602562
} else {
2563+
let retryDelay = Self.retryDelay(baseDelay: 3.0, retry: entry.restartRetryCount + 1)
25612564
guard let allocatedPort = Self.allocateLoopbackPort() else {
25622565
notifyLocked(
25632566
entry,
2564-
update: .error("Failed to allocate local proxy port\(Self.retrySuffix(delay: 3.0))")
2567+
update: .error("Failed to allocate local proxy port\(Self.retrySuffix(delay: retryDelay))")
25652568
)
2566-
scheduleRestartLocked(key: key, entry: entry, delay: 3.0)
2569+
scheduleRestartLocked(key: key, entry: entry, baseDelay: 3.0)
25672570
return
25682571
}
25692572
localPort = allocatedPort
@@ -2583,28 +2586,33 @@ private final class WorkspaceRemoteProxyBroker {
25832586
entry.tunnel = tunnel
25842587
let endpoint = BrowserProxyEndpoint(host: "127.0.0.1", port: localPort)
25852588
entry.endpoint = endpoint
2589+
entry.restartRetryCount = 0
25862590
notifyLocked(entry, update: .ready(endpoint))
25872591
} catch {
25882592
stopEntryRuntimeLocked(entry)
25892593
let detail = "Failed to start local daemon proxy: \(error.localizedDescription)"
2590-
notifyLocked(entry, update: .error("\(detail)\(Self.retrySuffix(delay: 3.0))"))
2591-
scheduleRestartLocked(key: key, entry: entry, delay: 3.0)
2594+
let retryDelay = Self.retryDelay(baseDelay: 3.0, retry: entry.restartRetryCount + 1)
2595+
notifyLocked(entry, update: .error("\(detail)\(Self.retrySuffix(delay: retryDelay))"))
2596+
scheduleRestartLocked(key: key, entry: entry, baseDelay: 3.0)
25922597
}
25932598
}
25942599

25952600
private func handleTunnelFailureLocked(key: String, detail: String) {
25962601
guard let entry = entries[key], entry.tunnel != nil else { return }
25972602
stopEntryRuntimeLocked(entry)
2598-
notifyLocked(entry, update: .error("\(detail)\(Self.retrySuffix(delay: 3.0))"))
2599-
scheduleRestartLocked(key: key, entry: entry, delay: 3.0)
2603+
let retryDelay = Self.retryDelay(baseDelay: 3.0, retry: entry.restartRetryCount + 1)
2604+
notifyLocked(entry, update: .error("\(detail)\(Self.retrySuffix(delay: retryDelay))"))
2605+
scheduleRestartLocked(key: key, entry: entry, baseDelay: 3.0)
26002606
}
26012607

2602-
private func scheduleRestartLocked(key: String, entry: Entry, delay: TimeInterval) {
2608+
private func scheduleRestartLocked(key: String, entry: Entry, baseDelay: TimeInterval) {
26032609
guard !entry.subscribers.isEmpty else {
26042610
teardownEntryLocked(key: key, entry: entry)
26052611
return
26062612
}
26072613
guard entry.restartWorkItem == nil else { return }
2614+
entry.restartRetryCount += 1
2615+
let retryDelay = Self.retryDelay(baseDelay: baseDelay, retry: entry.restartRetryCount)
26082616

26092617
let workItem = DispatchWorkItem { [weak self] in
26102618
guard let self, let currentEntry = self.entries[key] else { return }
@@ -2618,7 +2626,7 @@ private final class WorkspaceRemoteProxyBroker {
26182626
}
26192627

26202628
entry.restartWorkItem = workItem
2621-
queue.asyncAfter(deadline: .now() + delay, execute: workItem)
2629+
queue.asyncAfter(deadline: .now() + retryDelay, execute: workItem)
26222630
}
26232631

26242632
private func teardownEntryLocked(key: String, entry: Entry) {
@@ -2687,6 +2695,11 @@ private final class WorkspaceRemoteProxyBroker {
26872695
let seconds = max(1, Int(delay.rounded()))
26882696
return " (retry in \(seconds)s)"
26892697
}
2698+
2699+
private static func retryDelay(baseDelay: TimeInterval, retry: Int) -> TimeInterval {
2700+
let exponent = Double(max(0, retry - 1))
2701+
return min(baseDelay * pow(2.0, exponent), 60.0)
2702+
}
26902703
}
26912704

26922705
private final class WorkspaceRemoteCLIRelayServer {
@@ -3170,6 +3183,11 @@ private final class WorkspaceRemoteCLIRelayServer {
31703183
}
31713184

31723185
final class WorkspaceRemoteSessionController {
3186+
private struct RetrySchedule {
3187+
let retry: Int
3188+
let delay: TimeInterval
3189+
}
3190+
31733191
private struct CommandResult {
31743192
let status: Int32
31753193
let stdout: String
@@ -3357,8 +3375,8 @@ final class WorkspaceRemoteSessionController {
33573375
daemonReady = false
33583376
daemonBootstrapVersion = nil
33593377
daemonRemotePath = nil
3360-
let nextRetry = scheduleReconnectLocked(delay: 4.0)
3361-
let retrySuffix = Self.retrySuffix(retry: nextRetry, delay: 4.0)
3378+
let retrySchedule = scheduleReconnectLocked(baseDelay: 4.0)
3379+
let retrySuffix = Self.retrySuffix(retry: retrySchedule.retry, delay: retrySchedule.delay)
33623380
let detail = "Remote daemon bootstrap failed: \(error.localizedDescription)\(retrySuffix)"
33633381
publishDaemonStatus(.error, detail: detail)
33643382
publishState(.error, detail: detail)
@@ -3371,8 +3389,8 @@ final class WorkspaceRemoteSessionController {
33713389
guard proxyLease == nil else { return }
33723390
guard let remotePath = daemonRemotePath,
33733391
!remotePath.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else {
3374-
let nextRetry = scheduleReconnectLocked(delay: 4.0)
3375-
let retrySuffix = Self.retrySuffix(retry: nextRetry, delay: 4.0)
3392+
let retrySchedule = scheduleReconnectLocked(baseDelay: 4.0)
3393+
let retrySuffix = Self.retrySuffix(retry: retrySchedule.retry, delay: retrySchedule.delay)
33763394
let detail = "Remote daemon did not provide a valid remote path\(retrySuffix)"
33773395
publishDaemonStatus(.error, detail: detail)
33783396
publishState(.error, detail: detail)
@@ -3588,8 +3606,8 @@ final class WorkspaceRemoteSessionController {
35883606
daemonBootstrapVersion = nil
35893607
daemonRemotePath = nil
35903608

3591-
let nextRetry = scheduleReconnectLocked(delay: 2.0)
3592-
let retrySuffix = Self.retrySuffix(retry: nextRetry, delay: 2.0)
3609+
let retrySchedule = scheduleReconnectLocked(baseDelay: 2.0)
3610+
let retrySuffix = Self.retrySuffix(retry: retrySchedule.retry, delay: retrySchedule.delay)
35933611
publishDaemonStatus(
35943612
.error,
35953613
detail: "Remote daemon transport needs re-bootstrap after proxy failure\(retrySuffix)"
@@ -3598,11 +3616,12 @@ final class WorkspaceRemoteSessionController {
35983616
}
35993617

36003618
@discardableResult
3601-
private func scheduleReconnectLocked(delay: TimeInterval) -> Int {
3602-
guard !isStopping else { return reconnectRetryCount }
3619+
private func scheduleReconnectLocked(baseDelay: TimeInterval) -> RetrySchedule {
3620+
let retryNumber = reconnectRetryCount + 1
3621+
let retryDelay = Self.retryDelay(baseDelay: baseDelay, retry: retryNumber)
3622+
guard !isStopping else { return RetrySchedule(retry: retryNumber, delay: retryDelay) }
36033623
reconnectWorkItem?.cancel()
3604-
reconnectRetryCount += 1
3605-
let retryNumber = reconnectRetryCount
3624+
reconnectRetryCount = retryNumber
36063625
let workItem = DispatchWorkItem { [weak self] in
36073626
guard let self else { return }
36083627
self.reconnectWorkItem = nil
@@ -3611,8 +3630,8 @@ final class WorkspaceRemoteSessionController {
36113630
self.beginConnectionAttemptLocked()
36123631
}
36133632
reconnectWorkItem = workItem
3614-
queue.asyncAfter(deadline: .now() + delay, execute: workItem)
3615-
return retryNumber
3633+
queue.asyncAfter(deadline: .now() + retryDelay, execute: workItem)
3634+
return RetrySchedule(retry: retryNumber, delay: retryDelay)
36163635
}
36173636

36183637
private func publishState(_ state: WorkspaceRemoteConnectionState, detail: String?) {
@@ -4853,6 +4872,11 @@ final class WorkspaceRemoteSessionController {
48534872
return " (retry \(retry) in \(seconds)s)"
48544873
}
48554874

4875+
private static func retryDelay(baseDelay: TimeInterval, retry: Int) -> TimeInterval {
4876+
let exponent = Double(max(0, retry - 1))
4877+
return min(baseDelay * pow(2.0, exponent), 60.0)
4878+
}
4879+
48564880
private static func shouldEscalateProxyErrorToBootstrap(_ detail: String) -> Bool {
48574881
let lowered = detail.lowercased()
48584882
return lowered.contains("remote daemon transport failed")
@@ -5542,6 +5566,7 @@ final class Workspace: Identifiable, ObservableObject {
55425566

55435567
private static let remoteErrorStatusKey = "remote.error"
55445568
private static let remotePortConflictStatusKey = "remote.port_conflicts"
5569+
private static let remoteNotificationCooldown: TimeInterval = 5 * 60
55455570
private static let sshControlMasterCleanupQueue = DispatchQueue(
55465571
label: "com.cmux.remote-ssh.control-master-cleanup",
55475572
qos: .utility
@@ -5577,6 +5602,20 @@ final class Workspace: Identifiable, ObservableObject {
55775602
return entry.lowercased().contains("remote proxy unavailable")
55785603
}
55795604

5605+
private func remoteNotificationCooldownKey(target: String) -> String? {
5606+
let rawTarget = (remoteConfiguration?.destination ?? target)
5607+
.trimmingCharacters(in: .whitespacesAndNewlines)
5608+
guard !rawTarget.isEmpty else { return nil }
5609+
let normalizedHost = rawTarget
5610+
.split(separator: "@", maxSplits: 1, omittingEmptySubsequences: false)
5611+
.last
5612+
.map(String.init)?
5613+
.trimmingCharacters(in: .whitespacesAndNewlines)
5614+
.lowercased()
5615+
guard let normalizedHost, !normalizedHost.isEmpty else { return nil }
5616+
return "remote-host:\(normalizedHost)"
5617+
}
5618+
55805619
var focusedSurfaceId: UUID? { focusedPanelId }
55815620
var surfaceDirectories: [UUID: String] {
55825621
get { panelDirectories }
@@ -7165,13 +7204,15 @@ final class Workspace: Identifiable, ObservableObject {
71657204
surfaceId: nil,
71667205
title: notificationTitle,
71677206
subtitle: target,
7168-
body: trimmedDetail
7207+
body: trimmedDetail,
7208+
cooldownKey: remoteNotificationCooldownKey(target: target),
7209+
cooldownInterval: Self.remoteNotificationCooldown
71697210
)
71707211
}
71717212
return
71727213
}
71737214

7174-
if !preserveConnectedStateForRetry && state != .error {
7215+
if state == .connected {
71757216
statusEntries.removeValue(forKey: Self.remoteErrorStatusKey)
71767217
remoteLastErrorFingerprint = nil
71777218
}

0 commit comments

Comments
 (0)