From 6afb49ca3b31ab82b8fdc016066b449f2b5cc854 Mon Sep 17 00:00:00 2001 From: Kaguya-19 Date: Fri, 5 Jun 2026 11:56:46 +0800 Subject: [PATCH 1/4] feat(errors): add user-friendly error classification with actionable hints - Extend CanonicalModelErrorCode with billing, model_not_found, context_overflow, image_too_large, payload_too_large - Add userHint + settingsFix fields to CanonicalModelError for actionable user-facing guidance on every classified error - Expand error pattern matching (20+ patterns) covering Ollama, llama.cpp, vLLM, Bedrock, Chinese error messages, etc. - Add 402 disambiguation: billing exhaustion vs transient rate limit - Add sanitizeErrorMessage: extract from HTML error pages, normalize whitespace, truncate overly long messages - Propagate userHint through AgentError and classifyModelError - Make billing/model_not_found/auth_error fallback-eligible - Teach ContextOverflowRecovery about context_overflow + image_too_large Co-authored-by: Cursor <cursoragent@cursor.com> --- src/agent/loop/AgentLoop.ts | 18 +- src/agent/protocol/errors.ts | 10 +- .../recovery/ContextOverflowRecovery.ts | 9 +- src/model/errors/normalizeModelError.ts | 165 +++++++++++++++++- src/model/protocol/errors.ts | 37 ++++ src/router/fallback/runFallbackChain.ts | 16 +- 6 files changed, 239 insertions(+), 16 deletions(-) diff --git a/src/agent/loop/AgentLoop.ts b/src/agent/loop/AgentLoop.ts index 0a1fb696..2df41937 100644 --- a/src/agent/loop/AgentLoop.ts +++ b/src/agent/loop/AgentLoop.ts @@ -828,6 +828,8 @@ export class AgentLoop { errors: [agentError( "agent_tool_error_loop", `Terminated: ${consecutiveAllInvalidTurns} consecutive turns with all tool calls failing input validation. The model appears stuck in a loop.`, + undefined, + "The model is repeatedly producing invalid tool calls. Consider switching to a more capable model via settings.", )], }); yield { type: "turn_failed", sessionId: input.sessionId, turnId: input.turnId, error: result.errors![0]! }; @@ -868,7 +870,12 @@ export class AgentLoop { startedAt, finalMessage, structuredOutput, - errors: [agentError("agent_max_turns_reached", `Reached maximum number of turns (${input.maxTurns}).`)], + errors: [agentError( + "agent_max_turns_reached", + `Reached maximum number of turns (${input.maxTurns}).`, + undefined, + "Max turn limit reached. Increase maxTurns in config or break the task into smaller steps.", + )], }); await captureTurn(result.type === "error"); yield { type: "turn_completed", sessionId: input.sessionId, turnId: input.turnId, result }; @@ -1568,12 +1575,17 @@ function classifyModelError(error: CanonicalModelError): { if (isPromptTooLong(error)) { return { stopReason: "prompt_too_long", - error: agentError("agent_prompt_too_long", error.message, error), + error: agentError( + "agent_prompt_too_long", + error.message, + error, + error.userHint ?? "Input exceeds the model context window. Try /compact to compress history or /new for a fresh session.", + ), }; } return { stopReason: "model_error", - error: agentError("agent_model_error", error.message, error), + error: agentError("agent_model_error", error.message, error, error.userHint), }; } diff --git a/src/agent/protocol/errors.ts b/src/agent/protocol/errors.ts index 674b630f..3c152a06 100644 --- a/src/agent/protocol/errors.ts +++ b/src/agent/protocol/errors.ts @@ -15,6 +15,8 @@ export type AgentError = { code: AgentErrorCode; message: string; details?: unknown; + /** User-facing actionable hint for resolving this error. */ + userHint?: string; }; export class AgentRuntimeError extends Error { @@ -29,8 +31,12 @@ export class AgentRuntimeError extends Error { } } -export function agentError(code: AgentErrorCode, message: string, details?: unknown): AgentError { - return { code, message, details }; +export function agentError(code: AgentErrorCode, message: string, details?: unknown, userHint?: string): AgentError { + const result: AgentError = { code, message, details }; + if (userHint) { + result.userHint = userHint; + } + return result; } export function normalizeAgentError(error: unknown): AgentError { diff --git a/src/context/recovery/ContextOverflowRecovery.ts b/src/context/recovery/ContextOverflowRecovery.ts index 318b4db7..ffef6e1a 100644 --- a/src/context/recovery/ContextOverflowRecovery.ts +++ b/src/context/recovery/ContextOverflowRecovery.ts @@ -27,7 +27,14 @@ export class ContextOverflowRecovery { if (input.error.recoverableViaImageStrip) { return { type: "strip_images_and_retry", reason: "multimodal-processor-error" }; } - if (input.error.code !== "prompt_too_long") { + if (input.error.code === "image_too_large") { + return { type: "strip_images_and_retry", reason: "image-too-large" }; + } + const isContextError = + input.error.code === "prompt_too_long" || + input.error.code === "context_overflow" || + input.error.recoverableViaCompact === true; + if (!isContextError) { return { type: "give_up", reason: `non_recoverable_model_error:${input.error.code}` }; } if (input.hasAttemptedCompact) { diff --git a/src/model/errors/normalizeModelError.ts b/src/model/errors/normalizeModelError.ts index a5ddb527..cda632f2 100644 --- a/src/model/errors/normalizeModelError.ts +++ b/src/model/errors/normalizeModelError.ts @@ -1,12 +1,20 @@ import type { ModelProtocol } from "../protocol/canonical.js"; import { + BILLING_PATTERN, + CONTEXT_OVERFLOW_PATTERN, + IMAGE_TOO_LARGE_PATTERN, MAX_OUTPUT_REACHED_PATTERN, + MODEL_NOT_FOUND_PATTERN, MULTIMODAL_PROCESSOR_PATTERN, PROMPT_TOO_LONG_ANTHROPIC_PATTERN, PROMPT_TOO_LONG_OPENAI_PATTERN, + RATE_LIMIT_MESSAGE_PATTERN, REQUEST_TOO_LARGE_PATTERN, + TRANSIENT_USAGE_SIGNAL_PATTERN, + USAGE_LIMIT_PATTERN, type CanonicalModelError, type CanonicalModelErrorCode, + type SettingsFix, } from "../protocol/errors.js"; export function normalizeModelError( @@ -20,14 +28,18 @@ export function normalizeModelError( const nestedError = record && isRecord(record.error) ? record.error : undefined; const source = nestedError ?? record; - const message = + const rawMessage = readString(source?.message) ?? (error instanceof Error ? error.message : undefined) ?? "Model provider request failed."; + const message = sanitizeErrorMessage(rawMessage); + const semanticCode = classifySemanticError(message, status, protocol); const code: CanonicalModelErrorCode | (string & {}) = - semanticCode ?? readString(source?.code) ?? readString(source?.type) ?? statusCodeToCode(status); + semanticCode ?? readString(source?.code) ?? readString(source?.type) ?? statusCodeToCode(status, message); + + const hint = resolveUserHint(code, message, status, provider); const result: CanonicalModelError = { provider, @@ -37,44 +49,70 @@ export function normalizeModelError( message, retryable: isRetryable(status, code), raw, + ...hint, }; - if (code === "prompt_too_long") { + if (code === "prompt_too_long" || code === "context_overflow") { result.recoverableViaCompact = true; } if (MULTIMODAL_PROCESSOR_PATTERN.test(message)) { result.recoverableViaImageStrip = true; } + if (code === "image_too_large") { + result.recoverableViaImageStrip = true; + } return result; } +/** + * Priority-ordered semantic classification pipeline. + * Matches provider-agnostic error message patterns to canonical codes. + */ function classifySemanticError( message: string, status: number | undefined, protocol: ModelProtocol, ): CanonicalModelErrorCode | undefined { - // Legacy upstream matches "prompt is too long" case-insensitively for Anthropic and Vertex. if (PROMPT_TOO_LONG_ANTHROPIC_PATTERN.test(message)) { return "prompt_too_long"; } - // Legacy OpenAI withRetry path matches the standard OpenAI 400 message. if (PROMPT_TOO_LONG_OPENAI_PATTERN.test(message)) { return "prompt_too_long"; } - // Legacy splits "request too large" (PDF / body size) from PTL token overruns. if (REQUEST_TOO_LARGE_PATTERN.test(message)) { return "request_too_large"; } if (MAX_OUTPUT_REACHED_PATTERN.test(message)) { return "max_output_reached"; } + + if (IMAGE_TOO_LARGE_PATTERN.test(message)) { + return "image_too_large"; + } + if (CONTEXT_OVERFLOW_PATTERN.test(message)) { + return "context_overflow"; + } + if (MODEL_NOT_FOUND_PATTERN.test(message)) { + return "model_not_found"; + } + if (BILLING_PATTERN.test(message)) { + return "billing"; + } + if (RATE_LIMIT_MESSAGE_PATTERN.test(message)) { + return "rate_limit_error"; + } + if (status === 413) { - // 413 with no PTL phrase is treated as request_too_large (Vertex pattern noted in legacy). - return protocol === "anthropic" ? "request_too_large" : "request_too_large"; + return "payload_too_large"; } return undefined; } function isRetryable(status: number | undefined, code: string): boolean { + const nonRetryable = ["auth_error", "billing", "model_not_found", "invalid_request"]; + if (nonRetryable.includes(code)) { + return false; + } + if (status === 408 || status === 409 || status === 429 || (status !== undefined && status >= 500)) { return true; } @@ -82,10 +120,30 @@ function isRetryable(status: number | undefined, code: string): boolean { return ["rate_limit_error", "overloaded_error", "timeout", "server_error"].includes(code); } -function statusCodeToCode(status: number | undefined): string { +/** + * Map HTTP status to canonical code when message-based classification + * didn't match. Includes 402 disambiguation: some providers return + * transient usage-limit errors as 402 instead of 429. + */ +function statusCodeToCode(status: number | undefined, message?: string): string { if (status === 401 || status === 403) { return "auth_error"; } + if (status === 402) { + const msg = message ?? ""; + const hasUsageLimit = USAGE_LIMIT_PATTERN.test(msg); + const hasTransient = TRANSIENT_USAGE_SIGNAL_PATTERN.test(msg); + if (hasUsageLimit && hasTransient) { + return "rate_limit_error"; + } + return "billing"; + } + if (status === 404) { + return "model_not_found"; + } + if (status === 413) { + return "payload_too_large"; + } if (status === 429) { return "rate_limit_error"; } @@ -95,6 +153,95 @@ function statusCodeToCode(status: number | undefined): string { return "provider_error"; } +/** + * Generate user-facing actionable hints based on classified error code. + */ +function resolveUserHint( + code: string, + message: string, + status?: number, + provider?: string, +): { userHint?: string; settingsFix?: SettingsFix } { + switch (code) { + case "billing": + return { + userHint: "API account balance exhausted or quota depleted.", + settingsFix: { + description: "Top up credits or switch to a different provider.", + configPath: "model.provider", + }, + }; + case "auth_error": + return { + userHint: "API key rejected by the provider. Verify the key is valid and not expired.", + settingsFix: { + description: "Reconfigure API key via setup.", + command: "pilotdeck setup", + }, + }; + case "model_not_found": + return { + userHint: "The requested model does not exist or your account lacks access.", + settingsFix: { + description: "Switch to a valid model.", + configPath: "model.default", + }, + }; + case "context_overflow": + case "prompt_too_long": + return { + userHint: "Input exceeds the model context window. Try /compact to compress history or /new for a fresh session.", + }; + case "image_too_large": + return { + userHint: "Image exceeds the provider per-image size limit (typically 5 MB). Resize and retry.", + }; + case "payload_too_large": + case "request_too_large": + return { + userHint: "Request payload too large. Try /compact to reduce context, or start a new session with /new.", + }; + case "rate_limit_error": + return { + userHint: "Rate limited by the provider. The request will be retried automatically after a short wait.", + }; + case "overloaded_error": + return { + userHint: "Provider is temporarily overloaded. Retrying with backoff.", + }; + case "max_output_reached": + return { + userHint: "Model output hit the token limit. The system will attempt to resume automatically.", + }; + case "timeout": + return { + userHint: "Request timed out. Check your network connection; the system will retry.", + }; + case "server_error": + return { + userHint: "Provider returned a server error. Retrying automatically.", + }; + default: + return {}; + } +} + +/** + * Clean raw error messages for user display: + * - Extract <title> from Cloudflare / proxy HTML error pages + * - Normalize whitespace + * - Truncate overly long messages + */ +function sanitizeErrorMessage(raw: string): string { + if (raw.includes("<!DOCTYPE") || raw.includes("<html")) { + const match = raw.match(/<title[^>]*>([^<]+)<\/title>/i); + return match?.[1]?.trim() ?? "Service temporarily unavailable (HTML error page returned)."; + } + + const cleaned = raw.replace(/\s+/g, " ").trim(); + return cleaned.length > 300 ? cleaned.slice(0, 297) + "..." : cleaned; +} + function readString(value: unknown): string | undefined { return typeof value === "string" && value.length > 0 ? value : undefined; } diff --git a/src/model/protocol/errors.ts b/src/model/protocol/errors.ts index 1fd8f34e..7ca222f8 100644 --- a/src/model/protocol/errors.ts +++ b/src/model/protocol/errors.ts @@ -9,8 +9,20 @@ export type CanonicalModelErrorCode = | "overloaded_error" | "invalid_request" | "provider_error" + | "billing" + | "model_not_found" + | "context_overflow" + | "image_too_large" + | "payload_too_large" | "unknown"; +export type SettingsFix = { + description: string; + configPath?: string; + command?: string; + url?: string; +}; + export type CanonicalModelError = { provider: string; protocol: "anthropic" | "openai"; @@ -23,6 +35,10 @@ export type CanonicalModelError = { recoverableViaCompact?: boolean; /** True for multimodal processor errors recoverable by stripping images from context. */ recoverableViaImageStrip?: boolean; + /** User-facing one-line actionable hint for resolving this error. */ + userHint?: string; + /** Structured settings fix info — config path, CLI command, or URL the user can act on. */ + settingsFix?: SettingsFix; }; export const PROMPT_TOO_LONG_ANTHROPIC_PATTERN = /prompt is too long/i; @@ -31,6 +47,27 @@ export const REQUEST_TOO_LARGE_PATTERN = /request too large/i; export const MAX_OUTPUT_REACHED_PATTERN = /max(?:imum)? (?:output|completion) tokens? (?:exceeded|reached)/i; export const MULTIMODAL_PROCESSOR_PATTERN = /failed to apply.*processor/i; +export const CONTEXT_OVERFLOW_PATTERN = + /context length|context size|maximum context|token limit|too many tokens|reduce the length|context window|prompt exceeds max length|max_tokens|maximum number of tokens|exceeds the max_model_len|max_model_len|input is too long|maximum model length|context length exceeded|slot context|n_ctx_slot|超过最大长度|上下文长度|input token|exceeds the maximum number of input tokens/i; + +export const BILLING_PATTERN = + /insufficient credits|insufficient_quota|insufficient balance|credit balance|credits have been exhausted|top up your credits|payment required|billing hard limit|exceeded your current quota|account is deactivated|plan does not include/i; + +export const MODEL_NOT_FOUND_PATTERN = + /is not a valid model|invalid model|model not found|model_not_found|does not exist|no such model|unknown model|unsupported model/i; + +export const IMAGE_TOO_LARGE_PATTERN = + /image exceeds|image too large|image_too_large|image size exceeds/i; + +export const RATE_LIMIT_MESSAGE_PATTERN = + /rate limit|rate_limit|too many requests|throttled|requests per minute|tokens per minute|try again in|please retry after|resource_exhausted/i; + +export const TRANSIENT_USAGE_SIGNAL_PATTERN = + /try again|retry|resets at|reset in|wait|requests remaining|window/i; + +export const USAGE_LIMIT_PATTERN = + /usage limit|quota|limit exceeded|key limit exceeded/i; + export class ModelConfigError extends Error { readonly name = "ModelConfigError"; diff --git a/src/router/fallback/runFallbackChain.ts b/src/router/fallback/runFallbackChain.ts index c779dc76..37f952bc 100644 --- a/src/router/fallback/runFallbackChain.ts +++ b/src/router/fallback/runFallbackChain.ts @@ -30,10 +30,24 @@ export function planFallback( */ const SELF_CORRECTABLE_CODES = new Set(["invalid_tool_arguments"]); +/** + * Non-retryable error codes that should still attempt provider fallback + * because a different provider may succeed (e.g. billing exhaustion on + * one provider, model not found on another). + */ +const FALLBACK_ELIGIBLE_NON_RETRYABLE = new Set([ + "billing", + "model_not_found", + "auth_error", +]); + export function isFallbackEligible(error: CanonicalModelError): boolean { if (SELF_CORRECTABLE_CODES.has(error.code)) { return true; } + if (FALLBACK_ELIGIBLE_NON_RETRYABLE.has(error.code)) { + return true; + } if (!error.retryable) { return false; } @@ -43,7 +57,7 @@ export function isFallbackEligible(error: CanonicalModelError): boolean { if (error.recoverableViaImageStrip) { return false; } - if (error.code === "prompt_too_long" || error.code === "request_too_large") { + if (error.code === "prompt_too_long" || error.code === "request_too_large" || error.code === "context_overflow") { return false; } return true; From e4f617223b1c1bca7a2b03ab92f89b6b8bd86cda Mon Sep 17 00:00:00 2001 From: Kaguya-19 <liyishanthu@gmail.com> Date: Fri, 12 Jun 2026 14:51:47 +0800 Subject: [PATCH 2/4] fix(errors): reorder pattern matching and harden edge cases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. classifySemanticError: move RATE_LIMIT and BILLING patterns before CONTEXT_OVERFLOW to prevent "input tokens per minute" being misclassified as context overflow. 2. statusCodeToCode 402: check BILLING_PATTERN first so explicit billing exhaustion messages are never mistaken for transient rate limits (avoids futile retries). 3. DefaultContextRuntime inline fallback: align with ContextOverflowRecovery — handle image_too_large and context_overflow codes, check recoverableViaCompact flag. Co-authored-by: Cursor <cursoragent@cursor.com> --- src/context/DefaultContextRuntime.ts | 12 +++++++++++- src/model/errors/normalizeModelError.ts | 23 ++++++++++++----------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/context/DefaultContextRuntime.ts b/src/context/DefaultContextRuntime.ts index 51f1eb1d..b9b88402 100644 --- a/src/context/DefaultContextRuntime.ts +++ b/src/context/DefaultContextRuntime.ts @@ -374,7 +374,17 @@ export class DefaultContextRuntime implements ContextRuntime { reason: "multimodal-processor-error", }; } - if (input.error.code !== "prompt_too_long") { + if (input.error.code === "image_too_large") { + return { + type: "strip_images_and_retry", + reason: "image-too-large", + }; + } + const isContextError = + input.error.code === "prompt_too_long" || + input.error.code === "context_overflow" || + input.error.recoverableViaCompact === true; + if (!isContextError) { return { type: "give_up", reason: `non_recoverable_model_error:${input.error.code}`, diff --git a/src/model/errors/normalizeModelError.ts b/src/model/errors/normalizeModelError.ts index cda632f2..db3d3e53 100644 --- a/src/model/errors/normalizeModelError.ts +++ b/src/model/errors/normalizeModelError.ts @@ -85,20 +85,20 @@ function classifySemanticError( return "max_output_reached"; } + if (RATE_LIMIT_MESSAGE_PATTERN.test(message)) { + return "rate_limit_error"; + } + if (BILLING_PATTERN.test(message)) { + return "billing"; + } if (IMAGE_TOO_LARGE_PATTERN.test(message)) { return "image_too_large"; } - if (CONTEXT_OVERFLOW_PATTERN.test(message)) { - return "context_overflow"; - } if (MODEL_NOT_FOUND_PATTERN.test(message)) { return "model_not_found"; } - if (BILLING_PATTERN.test(message)) { - return "billing"; - } - if (RATE_LIMIT_MESSAGE_PATTERN.test(message)) { - return "rate_limit_error"; + if (CONTEXT_OVERFLOW_PATTERN.test(message)) { + return "context_overflow"; } if (status === 413) { @@ -131,9 +131,10 @@ function statusCodeToCode(status: number | undefined, message?: string): string } if (status === 402) { const msg = message ?? ""; - const hasUsageLimit = USAGE_LIMIT_PATTERN.test(msg); - const hasTransient = TRANSIENT_USAGE_SIGNAL_PATTERN.test(msg); - if (hasUsageLimit && hasTransient) { + if (BILLING_PATTERN.test(msg)) { + return "billing"; + } + if (USAGE_LIMIT_PATTERN.test(msg) && TRANSIENT_USAGE_SIGNAL_PATTERN.test(msg)) { return "rate_limit_error"; } return "billing"; From 872373566a99268422000dacaab0527b0cd3c40a Mon Sep 17 00:00:00 2001 From: Kaguya-19 <liyishanthu@gmail.com> Date: Thu, 11 Jun 2026 16:41:51 +0800 Subject: [PATCH 3/4] =?UTF-8?q?feat(resilience):=20=E8=BF=9C=E7=AB=AF=20AP?= =?UTF-8?q?I=20=E9=B2=81=E6=A3=92=E6=80=A7=E6=94=B9=E8=BF=9B=EF=BC=88?= =?UTF-8?q?=E5=AF=B9=E6=A0=87=20Codex=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. 解析 Retry-After HTTP 头和错误消息中的 retry hint 2. 流式空闲超时(默认 5 分钟),防止连接假活永久挂起 3. Provider 级别可配置重试策略(provider.retry 生效) 4. Mid-stream 429 重试:利用 checkpoint 续传而非直接终止 5. 重试进度对用户可见(Reconnecting... 2/5) 6. 提供商健康状态追踪(简易熔断 healthy/degraded/open) Co-authored-by: Cursor <cursoragent@cursor.com> --- src/agent/protocol/events.ts | 2 + src/cli/createLocalGateway.ts | 6 + src/gateway/client/InProcessGateway.ts | 37 ++++++ src/model/config/parseModelConfig.ts | 23 +++- src/model/errors/normalizeModelError.ts | 11 +- src/model/index.ts | 3 + src/model/protocol/canonical.ts | 15 ++- src/model/protocol/errors.ts | 45 ++++++- src/model/providers/anthropic/stream.ts | 7 +- src/model/streaming/streamModel.ts | 109 +++++++++++++--- src/router/RouterRuntime.ts | 130 +++++++++++++++++-- src/router/health/ProviderHealthTracker.ts | 139 +++++++++++++++++++++ src/router/index.ts | 5 + src/router/protocol/events.ts | 15 ++- ui/server/pilotdeck-bridge.js | 19 +++ 15 files changed, 529 insertions(+), 37 deletions(-) create mode 100644 src/router/health/ProviderHealthTracker.ts diff --git a/src/agent/protocol/events.ts b/src/agent/protocol/events.ts index b0bef81b..6fa76003 100644 --- a/src/agent/protocol/events.ts +++ b/src/agent/protocol/events.ts @@ -4,6 +4,7 @@ import type { AgentError } from "./errors.js"; import type { AgentTurnResult } from "./result.js"; import type { AgentLoopTransition } from "./state.js"; import type { TokenBudgetSnapshot } from "../../context/budget/TokenBudgetManager.js"; +import type { RouterRetryProgressEvent } from "../../router/protocol/events.js"; export type AgentEvent = | { type: "session_started"; sessionId: string } @@ -51,6 +52,7 @@ export type AgentEvent = | { type: "turn_continued"; sessionId: string; turnId: string; reason: AgentLoopTransition["reason"] } | { type: "turn_completed"; sessionId: string; turnId: string; result: AgentTurnResult } | { type: "turn_failed"; sessionId: string; turnId: string; error: AgentError } + | { type: "retry_progress"; sessionId: string; turnId: string; detail: RouterRetryProgressEvent } | { type: "session_aborted"; sessionId: string; reason?: string }; export type AgentEventEmitter = (event: AgentEvent) => void; diff --git a/src/cli/createLocalGateway.ts b/src/cli/createLocalGateway.ts index c1a38ae9..1ba8cdf2 100644 --- a/src/cli/createLocalGateway.ts +++ b/src/cli/createLocalGateway.ts @@ -431,11 +431,17 @@ class ProjectRuntimeRegistry { renameSync(oldPath, eventsPath); } } catch { /* best-effort migration */ } + const self = this; return { emit(event: RouterEvent) { try { appendFileSync(eventsPath, JSON.stringify(event) + "\n"); } catch { /* best-effort, never crash the agent loop */ } + if (event.type === "pilotdeck_router_retry_progress") { + try { + self.gateway?.broadcastRetryProgress(event); + } catch { /* best-effort */ } + } }, }; } diff --git a/src/gateway/client/InProcessGateway.ts b/src/gateway/client/InProcessGateway.ts index 352b9067..0dc05c7b 100644 --- a/src/gateway/client/InProcessGateway.ts +++ b/src/gateway/client/InProcessGateway.ts @@ -230,6 +230,30 @@ export class InProcessGateway implements Gateway { return true; } + broadcastRetryProgress(detail: { + sessionId: string; + attempt: number; + maxAttempts: number; + delayMs: number; + reason: string; + provider: string; + model: string; + }): void { + const event: GatewayEvent = { + type: "agent_status", + event: "retry_progress", + detail: { + attempt: detail.attempt, + maxAttempts: detail.maxAttempts, + delayMs: detail.delayMs, + reason: detail.reason, + provider: detail.provider, + model: detail.model, + }, + }; + this.emitForSession(detail.sessionId, event); + } + async *submitTurn(input: GatewaySubmitTurnInput): AsyncIterable<GatewayEvent> { // Per-turn config refresh (defensive). The fs watcher path already // catches most edits, but this guarantees a fresh apiKey/url is in @@ -1232,6 +1256,19 @@ export function mapAgentEvent(event: AgentEvent, runId: string): GatewayEvent[] durationMs: event.durationMs, }, }]; + case "retry_progress": + return [{ + type: "agent_status", + event: "retry_progress", + detail: { + attempt: event.detail.attempt, + maxAttempts: event.detail.maxAttempts, + delayMs: event.detail.delayMs, + reason: event.detail.reason, + provider: event.detail.provider, + model: event.detail.model, + }, + }]; case "session_ended": case "user_prompt_submitted": case "setup_completed": diff --git a/src/model/config/parseModelConfig.ts b/src/model/config/parseModelConfig.ts index 4d2f7e33..5a0092fd 100644 --- a/src/model/config/parseModelConfig.ts +++ b/src/model/config/parseModelConfig.ts @@ -11,6 +11,7 @@ import type { ModelDefinition, ModelProtocol, ProviderConfig, + ProviderRetryConfig, } from "../protocol/canonical.js"; import { mergeCapabilities, type ModelCapabilities } from "../protocol/capabilities.js"; import { ModelConfigError } from "../protocol/errors.js"; @@ -101,11 +102,31 @@ function parseProvider(providerId: string, rawProvider: unknown, env?: Credentia timeoutMs: readOptionalPositiveNumber(provider.timeoutMs, "timeoutMs"), headers: readStringRecord(provider.headers, "headers"), extraBody: isRecord(provider.extraBody) ? (provider.extraBody as Record<string, unknown>) : undefined, - retry: isRecord(provider.retry) ? provider.retry : undefined, + retry: parseRetryConfig(provider.retry), models, }; } +function parseRetryConfig(raw: unknown): ProviderRetryConfig | undefined { + if (raw === undefined) return undefined; + if (!isRecord(raw)) return undefined; + const result: ProviderRetryConfig = {}; + const numFields = [ + "requestMaxRetries", "streamMaxRetries", "streamIdleTimeoutMs", + "baseDelayMs", "maxDelayMs", + ] as const; + for (const key of numFields) { + const value = raw[key]; + if (value !== undefined) { + if (typeof value !== "number" || !Number.isFinite(value) || value < 0) { + throw new ModelConfigError("invalid_config_value", `retry.${key} must be a non-negative number.`); + } + result[key] = value; + } + } + return Object.keys(result).length > 0 ? result : undefined; +} + function parseModelDefinition( modelId: string, protocol: ModelProtocol, diff --git a/src/model/errors/normalizeModelError.ts b/src/model/errors/normalizeModelError.ts index db3d3e53..aff87ba1 100644 --- a/src/model/errors/normalizeModelError.ts +++ b/src/model/errors/normalizeModelError.ts @@ -12,6 +12,7 @@ import { REQUEST_TOO_LARGE_PATTERN, TRANSIENT_USAGE_SIGNAL_PATTERN, USAGE_LIMIT_PATTERN, + parseRetryAfterFromMessage, type CanonicalModelError, type CanonicalModelErrorCode, type SettingsFix, @@ -60,6 +61,10 @@ export function normalizeModelError( if (code === "image_too_large") { result.recoverableViaImageStrip = true; } + const retryAfterMs = parseRetryAfterFromMessage(rawMessage); + if (retryAfterMs !== undefined) { + result.retryAfterMs = retryAfterMs; + } return result; } @@ -85,12 +90,12 @@ function classifySemanticError( return "max_output_reached"; } - if (RATE_LIMIT_MESSAGE_PATTERN.test(message)) { - return "rate_limit_error"; - } if (BILLING_PATTERN.test(message)) { return "billing"; } + if (RATE_LIMIT_MESSAGE_PATTERN.test(message)) { + return "rate_limit_error"; + } if (IMAGE_TOO_LARGE_PATTERN.test(message)) { return "image_too_large"; } diff --git a/src/model/index.ts b/src/model/index.ts index 5f8c900d..9a2a81c1 100644 --- a/src/model/index.ts +++ b/src/model/index.ts @@ -50,6 +50,7 @@ export type { ModelDefinition, ModelProtocol, ProviderConfig, + ProviderRetryConfig, } from "./protocol/canonical.js"; export { flattenToolResultBlockText, @@ -77,6 +78,8 @@ export { PROMPT_TOO_LONG_OPENAI_PATTERN, REQUEST_TOO_LARGE_PATTERN, MAX_OUTPUT_REACHED_PATTERN, + parseRetryAfterFromMessage, + parseRetryAfterHeader, type CanonicalModelError, type CanonicalModelErrorCode, } from "./protocol/errors.js"; diff --git a/src/model/protocol/canonical.ts b/src/model/protocol/canonical.ts index f8f7c1e0..acc71c58 100644 --- a/src/model/protocol/canonical.ts +++ b/src/model/protocol/canonical.ts @@ -241,6 +241,19 @@ export type ModelDefinition = { aliases?: string[]; }; +export type ProviderRetryConfig = { + /** Max retries for non-streaming HTTP requests. Default 2. */ + requestMaxRetries?: number; + /** Max retries for dropped SSE streams. Default 2. */ + streamMaxRetries?: number; + /** Idle timeout (ms) for streaming responses before treating as lost. Default 300000 (5 min). */ + streamIdleTimeoutMs?: number; + /** Base delay (ms) for exponential backoff. Default 1000. */ + baseDelayMs?: number; + /** Max delay cap (ms) for backoff. Default 30000. */ + maxDelayMs?: number; +}; + export type ProviderConfig = { id: string; protocol: ModelProtocol; @@ -250,7 +263,7 @@ export type ProviderConfig = { headers: Record<string, string>; /** Arbitrary fields merged into every request body (e.g. OpenRouter provider preferences). */ extraBody?: Record<string, unknown>; - retry?: Record<string, unknown>; + retry?: ProviderRetryConfig; models: Record<string, ModelDefinition>; }; diff --git a/src/model/protocol/errors.ts b/src/model/protocol/errors.ts index 7ca222f8..3ca06de4 100644 --- a/src/model/protocol/errors.ts +++ b/src/model/protocol/errors.ts @@ -35,12 +35,53 @@ export type CanonicalModelError = { recoverableViaCompact?: boolean; /** True for multimodal processor errors recoverable by stripping images from context. */ recoverableViaImageStrip?: boolean; + /** Provider-suggested wait time before retrying (parsed from Retry-After header or error message). */ + retryAfterMs?: number; /** User-facing one-line actionable hint for resolving this error. */ userHint?: string; /** Structured settings fix info — config path, CLI command, or URL the user can act on. */ settingsFix?: SettingsFix; }; +/** + * Parse a provider-suggested retry delay from an error message. + * Covers common phrasings: "try again in 3s", "retry in 500ms", + * "Please try again in 1.898s", "Try again in 35 seconds", etc. + */ +export const RETRY_AFTER_MESSAGE_PATTERN = + /(?:try again|retry)\s+in\s+(\d+(?:\.\d+)?)\s*(ms|milliseconds?|s|seconds?|minutes?|m)\b/i; + +export function parseRetryAfterFromMessage(message: string): number | undefined { + const match = RETRY_AFTER_MESSAGE_PATTERN.exec(message); + if (!match) return undefined; + const value = parseFloat(match[1]); + if (!Number.isFinite(value) || value <= 0) return undefined; + const unit = match[2].toLowerCase(); + if (unit === "ms" || unit.startsWith("millisecond")) return Math.round(value); + if (unit === "s" || unit.startsWith("second")) return Math.round(value * 1000); + if (unit === "m" || unit.startsWith("minute")) return Math.round(value * 60_000); + return undefined; +} + +/** + * Parse the HTTP `Retry-After` header value. + * Supports both delta-seconds ("30") and HTTP-date formats. + */ +export function parseRetryAfterHeader(headerValue: string | null | undefined): number | undefined { + if (!headerValue) return undefined; + const trimmed = headerValue.trim(); + const seconds = Number(trimmed); + if (Number.isFinite(seconds) && seconds >= 0) { + return Math.round(seconds * 1000); + } + const date = Date.parse(trimmed); + if (!Number.isNaN(date)) { + const delta = date - Date.now(); + return delta > 0 ? delta : undefined; + } + return undefined; +} + export const PROMPT_TOO_LONG_ANTHROPIC_PATTERN = /prompt is too long/i; export const PROMPT_TOO_LONG_OPENAI_PATTERN = /input length and max_tokens exceed context limit/i; export const REQUEST_TOO_LARGE_PATTERN = /request too large/i; @@ -48,7 +89,7 @@ export const MAX_OUTPUT_REACHED_PATTERN = /max(?:imum)? (?:output|completion) to export const MULTIMODAL_PROCESSOR_PATTERN = /failed to apply.*processor/i; export const CONTEXT_OVERFLOW_PATTERN = - /context length|context size|maximum context|token limit|too many tokens|reduce the length|context window|prompt exceeds max length|max_tokens|maximum number of tokens|exceeds the max_model_len|max_model_len|input is too long|maximum model length|context length exceeded|slot context|n_ctx_slot|超过最大长度|上下文长度|input token|exceeds the maximum number of input tokens/i; + /context length|context size|maximum context|too many tokens|context window|prompt exceeds max length|maximum number of tokens|exceeds the max_model_len|max_model_len|input is too long|maximum model length|context length exceeded|slot context|n_ctx_slot|超过最大长度|上下文长度|input token|exceeds the maximum number of input tokens/i; export const BILLING_PATTERN = /insufficient credits|insufficient_quota|insufficient balance|credit balance|credits have been exhausted|top up your credits|payment required|billing hard limit|exceeded your current quota|account is deactivated|plan does not include/i; @@ -60,7 +101,7 @@ export const IMAGE_TOO_LARGE_PATTERN = /image exceeds|image too large|image_too_large|image size exceeds/i; export const RATE_LIMIT_MESSAGE_PATTERN = - /rate limit|rate_limit|too many requests|throttled|requests per minute|tokens per minute|try again in|please retry after|resource_exhausted/i; + /rate limit|rate_limit|too many requests|throttled|requests per minute|tokens per minute|resource_exhausted/i; export const TRANSIENT_USAGE_SIGNAL_PATTERN = /try again|retry|resets at|reset in|wait|requests remaining|window/i; diff --git a/src/model/providers/anthropic/stream.ts b/src/model/providers/anthropic/stream.ts index f0acd793..0d7865fc 100644 --- a/src/model/providers/anthropic/stream.ts +++ b/src/model/providers/anthropic/stream.ts @@ -1,6 +1,6 @@ import { jsonrepair } from "jsonrepair"; import type { CanonicalModelEvent, CanonicalToolCall } from "../../protocol/canonical.js"; -import { ModelProviderError } from "../../protocol/errors.js"; +import { ModelProviderError, parseRetryAfterFromMessage } from "../../protocol/errors.js"; import { normalizeAnthropicFinishReason } from "../../response/normalizeFinishReason.js"; import { normalizeAnthropicUsage } from "../../response/normalizeUsage.js"; @@ -63,6 +63,8 @@ export function normalizeAnthropicStreamEvent( const TRANSIENT_ERROR_TYPES = new Set([ "overloaded_error", "rate_limit_error", "api_error", "timeout_error", ]); + const errMessage = readString(errObj.message) ?? "Anthropic stream error."; + const retryAfterMs = parseRetryAfterFromMessage(errMessage); return [ { type: "error", @@ -70,8 +72,9 @@ export function normalizeAnthropicStreamEvent( provider: "anthropic", protocol: "anthropic", code: errType, - message: readString(errObj.message) ?? "Anthropic stream error.", + message: errMessage, retryable: TRANSIENT_ERROR_TYPES.has(errType), + ...(retryAfterMs !== undefined ? { retryAfterMs } : {}), raw, }, }, diff --git a/src/model/streaming/streamModel.ts b/src/model/streaming/streamModel.ts index 0a9a0a9c..8700a316 100644 --- a/src/model/streaming/streamModel.ts +++ b/src/model/streaming/streamModel.ts @@ -8,7 +8,7 @@ import type { ModelProtocol, ProviderConfig, } from "../protocol/canonical.js"; -import { ModelProviderError } from "../protocol/errors.js"; +import { ModelProviderError, parseRetryAfterHeader } from "../protocol/errors.js"; import { parseModelResponse } from "../response/parseModelResponse.js"; import { createStreamNormalizerState, normalizeStreamEvent } from "./normalizeStreamEvent.js"; import { normalizeProviderBaseUrl } from "../normalizeProviderBaseUrl.js"; @@ -42,7 +42,8 @@ export async function complete( return parseModelResponse(provider.protocol, raw, provider.id); } -const MAX_STREAM_RETRIES = 2; +const DEFAULT_STREAM_MAX_RETRIES = 2; +const DEFAULT_RETRY_BASE_DELAY_MS = 1000; export async function* streamModel( request: CanonicalModelRequest, @@ -51,6 +52,8 @@ export async function* streamModel( ): AsyncIterable<CanonicalModelEvent> { const streamingRequest = { ...request, stream: true }; const { provider } = validateModelRequest(streamingRequest, config); + const maxRetries = provider.retry?.streamMaxRetries ?? DEFAULT_STREAM_MAX_RETRIES; + const retryBaseDelay = provider.retry?.baseDelayMs ?? DEFAULT_RETRY_BASE_DELAY_MS; yield { type: "request_started", @@ -63,7 +66,7 @@ export async function* streamModel( let currentRequest = streamingRequest; const checkpoint = new StreamingCheckpointManager(); - for (let attempt = 0; attempt <= MAX_STREAM_RETRIES; attempt++) { + for (let attempt = 0; attempt <= maxRetries; attempt++) { throwIfAborted(options.signal); const body = buildModelRequest(currentRequest, config); if (process.env.PILOTDECK_DUMP_REQUEST === "1") { @@ -76,8 +79,8 @@ export async function* streamModel( try { response = await sendProviderRequest(provider, body, true, options.fetch ?? fetch, options.signal); } catch (error) { - if (attempt < MAX_STREAM_RETRIES && isRetryableStreamError(error)) { - await delay(1000 * (attempt + 1)); + if (attempt < maxRetries && isRetryableStreamError(error)) { + await delay(retryBaseDelay * (attempt + 1)); continue; } throw error; @@ -85,10 +88,14 @@ export async function* streamModel( if (!response.ok) { const raw = await safeReadJson(response); - yield { - type: "error", - error: normalizeModelError(provider.id, provider.protocol, raw, response.status), - }; + const error = normalizeModelError(provider.id, provider.protocol, raw, response.status); + if (error.retryAfterMs === undefined) { + const headerMs = parseRetryAfterHeader(response.headers.get("retry-after")); + if (headerMs !== undefined) { + error.retryAfterMs = headerMs; + } + } + yield { type: "error", error }; return; } @@ -103,8 +110,10 @@ export async function* streamModel( const state = createStreamNormalizerState(provider.protocol); let streamCompleted = false; + const streamIdleTimeoutMs = resolveStreamIdleTimeout(provider); + try { - for await (const rawEvent of readServerSentEvents(response.body, options.signal)) { + for await (const rawEvent of readServerSentEvents(response.body, options.signal, streamIdleTimeoutMs)) { for (const event of normalizeStreamEvent(provider.protocol, rawEvent, state)) { checkpoint.onEvent(event); yield event; @@ -113,18 +122,18 @@ export async function* streamModel( streamCompleted = true; } catch (error) { if ( - attempt < MAX_STREAM_RETRIES && + attempt < maxRetries && isRetryableStreamError(error) && checkpoint.hasSubstantialContent() ) { currentRequest = buildContinuationRequest(currentRequest, checkpoint.get().partialText); checkpoint.reset(); - await delay(1000 * (attempt + 1), options.signal); + await delay(retryBaseDelay * (attempt + 1), options.signal); continue; } - if (isRetryableStreamError(error) && attempt < MAX_STREAM_RETRIES) { - await delay(1000 * (attempt + 1), options.signal); + if (isRetryableStreamError(error) && attempt < maxRetries) { + await delay(retryBaseDelay * (attempt + 1), options.signal); continue; } @@ -144,6 +153,9 @@ function isRetryableStreamError(error: unknown): boolean { if (error instanceof ModelProviderError) { return false; } + if (error instanceof StreamIdleTimeoutError) { + return true; + } if (error instanceof Error) { const msg = error.message.toLowerCase(); return ( @@ -285,13 +297,24 @@ async function safeReadJson(response: Response): Promise<unknown> { } } +const DEFAULT_STREAM_IDLE_TIMEOUT_MS = 300_000; // 5 minutes + +class StreamIdleTimeoutError extends Error { + constructor(idleMs: number) { + super(`Stream idle timeout: no data received for ${idleMs}ms`); + this.name = "StreamIdleTimeoutError"; + } +} + async function* readServerSentEvents( body: ReadableStream<Uint8Array>, signal?: AbortSignal, + idleTimeoutMs?: number, ): AsyncIterable<unknown> { const reader = body.getReader(); const decoder = new TextDecoder(); let buffer = ""; + const effectiveIdleMs = idleTimeoutMs ?? DEFAULT_STREAM_IDLE_TIMEOUT_MS; const cancelReader = () => { reader.cancel(signal?.reason).catch(() => undefined); }; @@ -305,8 +328,9 @@ async function* readServerSentEvents( try { while (true) { throwIfAborted(signal); - const { value, done } = await reader.read(); + const readResult = await readWithIdleTimeout(reader, effectiveIdleMs, signal); throwIfAborted(signal); + const { value, done } = readResult; if (done) { buffer += decoder.decode(); break; @@ -335,6 +359,61 @@ async function* readServerSentEvents( } } +function readWithIdleTimeout( + reader: ReadableStreamDefaultReader<Uint8Array>, + idleMs: number, + signal?: AbortSignal, +): Promise<ReadableStreamReadResult<Uint8Array>> { + return new Promise<ReadableStreamReadResult<Uint8Array>>((resolve, reject) => { + let settled = false; + const timer = setTimeout(() => { + if (!settled) { + settled = true; + reject(new StreamIdleTimeoutError(idleMs)); + } + }, idleMs); + if (typeof timer === "object" && "unref" in timer) { + (timer as NodeJS.Timeout).unref(); + } + const onAbort = () => { + if (!settled) { + settled = true; + clearTimeout(timer); + reject(createAbortError(signal?.reason)); + } + }; + if (signal) { + signal.addEventListener("abort", onAbort, { once: true }); + } + reader.read().then( + (result) => { + if (!settled) { + settled = true; + clearTimeout(timer); + if (signal) signal.removeEventListener("abort", onAbort); + resolve(result); + } + }, + (err) => { + if (!settled) { + settled = true; + clearTimeout(timer); + if (signal) signal.removeEventListener("abort", onAbort); + reject(err); + } + }, + ); + }); +} + +function resolveStreamIdleTimeout(provider: ProviderConfig): number { + const retry = provider.retry; + if (retry && typeof retry.streamIdleTimeoutMs === "number" && retry.streamIdleTimeoutMs > 0) { + return retry.streamIdleTimeoutMs; + } + return DEFAULT_STREAM_IDLE_TIMEOUT_MS; +} + function throwIfAborted(signal?: AbortSignal): void { if (signal?.aborted) { throw createAbortError(signal.reason); diff --git a/src/router/RouterRuntime.ts b/src/router/RouterRuntime.ts index aaf8c6d8..f961687d 100644 --- a/src/router/RouterRuntime.ts +++ b/src/router/RouterRuntime.ts @@ -28,6 +28,7 @@ import { decideScenario } from "./scenario/decideScenario.js"; import { stripSubagentTagFromMessages } from "./scenario/subagentDetector.js"; import { SessionRouterStore } from "./session/SessionRouterStore.js"; import { SessionUsageCache } from "./session/sessionUsageCache.js"; +import { ProviderHealthTracker } from "./health/ProviderHealthTracker.js"; import { createZeroUsageState, observeEventForZeroUsage, @@ -102,6 +103,7 @@ export function createRouterRuntime( const judgeRuntime = deps.judgeRuntime ?? deps.modelRuntime; const events = deps.events ?? { emit: () => undefined }; const telemetry = deps.telemetry; + const healthTracker = new ProviderHealthTracker(); async function resolveCustom( input: RouterDecisionInput, @@ -388,13 +390,20 @@ export function createRouterRuntime( return; } const attempt = attempts[attemptIndex]; + if ( + attemptIndex > 0 && + healthTracker.shouldSkip(attempt.provider) && + attemptIndex < attempts.length - 1 + ) { + continue; + } const attemptDecision: RouterDecision = { ...decision, provider: attempt.provider, model: attempt.model, resolvedFrom: attemptIndex === 0 ? decision.resolvedFrom : "fallback", }; - const attemptRequest = applyDecisionToRequest(attemptDecision, request); + let attemptRequest = applyDecisionToRequest(attemptDecision, request); lastAttempt = attempt; lastDecision = attemptDecision; @@ -458,8 +467,7 @@ export function createRouterRuntime( if (outcome.error) { lastError = outcome.error; - // Only retry/fallback if we haven't surfaced content yet — otherwise - // we'd produce duplicate text on the consumer side. + healthTracker.recordFailure(attempt.provider); if (!hasYieldedContent && isFallbackEligible(outcome.error)) { if (attemptIndex < attempts.length - 1) { const next = attempts[attemptIndex + 1]; @@ -502,10 +510,12 @@ export function createRouterRuntime( transientRetryEnabled && transientRetryCount < transientRetryMax ) { - const delay = Math.min( - transientBaseDelayMs * Math.pow(2, transientRetryCount) + Math.random() * 500, - transientMaxDelayMs, - ); + const delay = outcome.error.retryAfterMs != null + ? Math.min(outcome.error.retryAfterMs, transientMaxDelayMs) + : Math.min( + transientBaseDelayMs * Math.pow(2, transientRetryCount) + Math.random() * 500, + transientMaxDelayMs, + ); console.warn( `[PilotDeck] transientRetry: ${outcome.error.code} (attempt ${transientRetryCount + 1}/${transientRetryMax}, delay=${Math.round(delay)}ms)`, ); @@ -519,6 +529,17 @@ export function createRouterRuntime( model: attempt.model, errorCode: outcome.error.code, }); + events.emit({ + type: "pilotdeck_router_retry_progress", + sessionId: ctx.sessionId, + turnId: ctx.turnId, + attempt: transientRetryCount + 1, + maxAttempts: transientRetryMax, + delayMs: Math.round(delay), + reason: classifyRetryReason(outcome.error.code), + provider: attempt.provider, + model: attempt.model, + }); telemetry?.trackFeatureLoopStage({ module: "router", ownerModule: "router", @@ -539,9 +560,40 @@ export function createRouterRuntime( transientRetryCount++; continue; } - // Either we've already surfaced content, the error isn't eligible - // for fallback/retry, or we've exhausted all retry attempts. Replay - // any queued framing events then surface the error. + if ( + hasYieldedContent && + isMidStreamRateLimitError(outcome.error) && + transientRetryCount < transientRetryMax + ) { + const partialText = extractPartialText(outcome.buffered); + if (partialText.length > 100) { + const midDelay = outcome.error.retryAfterMs != null + ? Math.min(outcome.error.retryAfterMs, transientMaxDelayMs) + : Math.min( + transientBaseDelayMs * Math.pow(2, transientRetryCount) + Math.random() * 500, + transientMaxDelayMs, + ); + console.warn( + `[PilotDeck] midStreamRetry: ${outcome.error.code} after partial content ` + + `(attempt ${transientRetryCount + 1}/${transientRetryMax}, delay=${Math.round(midDelay)}ms)`, + ); + events.emit({ + type: "pilotdeck_router_retry_progress", + sessionId: ctx.sessionId, + turnId: ctx.turnId, + attempt: transientRetryCount + 1, + maxAttempts: transientRetryMax, + delayMs: Math.round(midDelay), + reason: classifyRetryReason(outcome.error.code), + provider: attempt.provider, + model: attempt.model, + }); + await abortableDelay(midDelay, ctx.abortSignal); + attemptRequest = buildMidStreamContinuationRequest(attemptRequest, partialText); + transientRetryCount++; + continue; + } + } for (const queued of pending) { yield queued; } @@ -567,6 +619,17 @@ export function createRouterRuntime( provider: attempt.provider, model: attempt.model, }); + events.emit({ + type: "pilotdeck_router_retry_progress", + sessionId: ctx.sessionId, + turnId: ctx.turnId, + attempt: zeroUsageAttempt, + maxAttempts: zeroUsageMax, + delayMs: 500 * zeroUsageAttempt, + reason: "zero_usage", + provider: attempt.provider, + model: attempt.model, + }); telemetry?.trackFeatureLoopStage({ module: "router", ownerModule: "router", @@ -585,8 +648,8 @@ export function createRouterRuntime( continue; } - // Success path: flush any pending framing events that didn't reach - // a content event (e.g. zero-content responses, tool-only turns). + healthTracker.recordSuccess(attempt.provider); + if (!hasYieldedContent) { for (const queued of pending) { yield queued; @@ -716,6 +779,7 @@ export function createRouterRuntime( disposeTokenizer(); if (!externalStore) sessionStore.clear(); usageCache.clear(); + healthTracker.resetAll(); }, }; } @@ -859,3 +923,45 @@ function classifyNetworkErrorCode(error: unknown): string { if (msg.includes("abort") || error.name === "AbortError") return "aborted"; return "network_error"; } + +function isMidStreamRateLimitError(error: import("../model/index.js").CanonicalModelError): boolean { + return error.code === "rate_limit_error" || error.code === "overloaded_error"; +} + +function classifyRetryReason(errorCode: string): "rate_limit" | "server_error" | "network_error" | "zero_usage" | "overloaded" { + if (errorCode === "rate_limit_error") return "rate_limit"; + if (errorCode === "overloaded_error") return "overloaded"; + if (errorCode === "server_error") return "server_error"; + if (errorCode === "network_error" || errorCode === "timeout") return "network_error"; + return "server_error"; +} + +function extractPartialText(buffered: CanonicalModelEvent[]): string { + let text = ""; + for (const ev of buffered) { + if (ev.type === "text_delta") { + text += ev.text; + } + } + return text; +} + +function buildMidStreamContinuationRequest( + original: CanonicalModelRequest, + partialText: string, +): CanonicalModelRequest { + return { + ...original, + messages: [ + ...original.messages, + { + role: "assistant" as const, + content: [{ type: "text" as const, text: partialText }], + }, + { + role: "user" as const, + content: [{ type: "text" as const, text: "Continue from where you left off." }], + }, + ], + }; +} diff --git a/src/router/health/ProviderHealthTracker.ts b/src/router/health/ProviderHealthTracker.ts new file mode 100644 index 00000000..92a0f314 --- /dev/null +++ b/src/router/health/ProviderHealthTracker.ts @@ -0,0 +1,139 @@ +export type ProviderHealthState = "healthy" | "degraded" | "open" | "half_open"; + +const DEFAULT_DEGRADE_THRESHOLD = 3; +const DEFAULT_OPEN_THRESHOLD = 5; +const DEFAULT_OPEN_DURATION_MS = 30_000; +const DEFAULT_WINDOW_SIZE = 20; + +type ProviderRecord = { + state: ProviderHealthState; + consecutiveFailures: number; + /** Timestamp (ms) when the circuit was opened. */ + openedAt: number; + /** Sliding window of recent results (true = success). */ + window: boolean[]; +}; + +/** + * Lightweight circuit-breaker that tracks per-provider health. + * + * Three+ states: + * healthy → degraded (after `degradeThreshold` consecutive failures) + * degraded → open (after `openThreshold` consecutive failures) + * open → half_open (after `openDurationMs` has elapsed) + * half_open → healthy (probe succeeds) or open (probe fails) + * + * The tracker never blocks requests for explicitly-chosen providers + * (the caller is responsible for that check). + */ +export class ProviderHealthTracker { + private readonly records = new Map<string, ProviderRecord>(); + private readonly degradeThreshold: number; + private readonly openThreshold: number; + private readonly openDurationMs: number; + private readonly windowSize: number; + + constructor(options?: { + degradeThreshold?: number; + openThreshold?: number; + openDurationMs?: number; + windowSize?: number; + }) { + this.degradeThreshold = options?.degradeThreshold ?? DEFAULT_DEGRADE_THRESHOLD; + this.openThreshold = options?.openThreshold ?? DEFAULT_OPEN_THRESHOLD; + this.openDurationMs = options?.openDurationMs ?? DEFAULT_OPEN_DURATION_MS; + this.windowSize = options?.windowSize ?? DEFAULT_WINDOW_SIZE; + } + + private getOrCreate(providerId: string): ProviderRecord { + let rec = this.records.get(providerId); + if (!rec) { + rec = { state: "healthy", consecutiveFailures: 0, openedAt: 0, window: [] }; + this.records.set(providerId, rec); + } + return rec; + } + + recordSuccess(providerId: string): void { + const rec = this.getOrCreate(providerId); + rec.consecutiveFailures = 0; + rec.window.push(true); + if (rec.window.length > this.windowSize) rec.window.shift(); + if (rec.state === "half_open" || rec.state === "degraded" || rec.state === "open") { + rec.state = "healthy"; + } + } + + recordFailure(providerId: string): void { + const rec = this.getOrCreate(providerId); + rec.consecutiveFailures++; + rec.window.push(false); + if (rec.window.length > this.windowSize) rec.window.shift(); + if (rec.consecutiveFailures >= this.openThreshold) { + if (rec.state !== "open") { + rec.state = "open"; + rec.openedAt = Date.now(); + } + } else if (rec.consecutiveFailures >= this.degradeThreshold) { + if (rec.state === "healthy") { + rec.state = "degraded"; + } + } + if (rec.state === "half_open") { + rec.state = "open"; + rec.openedAt = Date.now(); + } + } + + getState(providerId: string): ProviderHealthState { + const rec = this.records.get(providerId); + if (!rec) return "healthy"; + if (rec.state === "open" && Date.now() - rec.openedAt >= this.openDurationMs) { + rec.state = "half_open"; + } + return rec.state; + } + + /** + * Returns true when the provider should be skipped (circuit is open). + * `half_open` allows one probe request through. + */ + shouldSkip(providerId: string): boolean { + return this.getState(providerId) === "open"; + } + + /** + * Returns true when the provider is in a healthy or half_open (probing) state + * and can accept requests. + */ + isAvailable(providerId: string): boolean { + const state = this.getState(providerId); + return state !== "open"; + } + + getSuccessRate(providerId: string): number { + const rec = this.records.get(providerId); + if (!rec || rec.window.length === 0) return 1; + return rec.window.filter(Boolean).length / rec.window.length; + } + + reset(providerId: string): void { + this.records.delete(providerId); + } + + resetAll(): void { + this.records.clear(); + } + + snapshot(): Map<string, { state: ProviderHealthState; successRate: number; consecutiveFailures: number }> { + const result = new Map<string, { state: ProviderHealthState; successRate: number; consecutiveFailures: number }>(); + for (const [id, rec] of this.records) { + result.set(id, { + state: this.getState(id), + successRate: this.getSuccessRate(id), + consecutiveFailures: rec.consecutiveFailures, + }); + } + return result; + } +} diff --git a/src/router/index.ts b/src/router/index.ts index 5d47630b..4e7cfd6a 100644 --- a/src/router/index.ts +++ b/src/router/index.ts @@ -20,6 +20,7 @@ export type { RouterEventBus, RouterExecuteFailedEvent, RouterFallbackEvent, + RouterRetryProgressEvent, RouterTokenSaverFailedEvent, RouterZeroUsageRetryEvent, } from "./protocol/events.js"; @@ -71,3 +72,7 @@ export { type CustomRouterRegistry, type PilotDeckCustomRouter, } from "./customRouter/customRouter.js"; +export { + ProviderHealthTracker, + type ProviderHealthState, +} from "./health/ProviderHealthTracker.js"; diff --git a/src/router/protocol/events.ts b/src/router/protocol/events.ts index 53d2870f..64816147 100644 --- a/src/router/protocol/events.ts +++ b/src/router/protocol/events.ts @@ -67,6 +67,18 @@ export type RouterTransientRetryEvent = { errorCode: string; }; +export type RouterRetryProgressEvent = { + type: "pilotdeck_router_retry_progress"; + sessionId: string; + turnId?: string; + attempt: number; + maxAttempts: number; + delayMs: number; + reason: "rate_limit" | "server_error" | "network_error" | "zero_usage" | "overloaded"; + provider: string; + model: string; +}; + export type RouterEvent = | RouterDecisionEvent | RouterFallbackEvent @@ -74,7 +86,8 @@ export type RouterEvent = | RouterTokenSaverFailedEvent | RouterCustomFailedEvent | RouterExecuteFailedEvent - | RouterTransientRetryEvent; + | RouterTransientRetryEvent + | RouterRetryProgressEvent; export type RouterEventBus = { emit(event: RouterEvent): void; diff --git a/ui/server/pilotdeck-bridge.js b/ui/server/pilotdeck-bridge.js index 080a6836..6524934f 100644 --- a/ui/server/pilotdeck-bridge.js +++ b/ui/server/pilotdeck-bridge.js @@ -560,6 +560,25 @@ export function gatewayEventToFrames(event, sessionId, provider) { }), ]; } + if (event.event === 'retry_progress') { + return [ + createNormalizedMessage({ + ...base, + kind: 'status', + text: `Reconnecting... ${detail.attempt}/${detail.maxAttempts}`, + tokens: 0, + canInterrupt: true, + retryProgress: { + attempt: detail.attempt, + maxAttempts: detail.maxAttempts, + delayMs: detail.delayMs, + reason: detail.reason, + provider: detail.provider, + model: detail.model, + }, + }), + ]; + } return []; } default: From 4e77bb9ab15cf53bde69d7abda32e7b40eb2dafa Mon Sep 17 00:00:00 2001 From: Kaguya-19 <liyishanthu@gmail.com> Date: Fri, 12 Jun 2026 19:14:28 +0800 Subject: [PATCH 4/4] feat(ui): sync robust-api settings to frontend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add transientRetry panel in RouterSection Advanced area - Add per-provider retry config in ProviderCard Advanced area - Passthrough userHint from gateway → bridge → chat error render - Add retryProgress structured rendering in live status step - Add GatewayEvent userHint type field - Add i18n keys for transientRetry and provider retry (en + zh-CN) Co-authored-by: Cursor <cursoragent@cursor.com> --- src/gateway/client/InProcessGateway.ts | 1 + src/gateway/protocol/types.ts | 2 +- ui/server/pilotdeck-bridge.js | 1 + ui/src/components/chat-v2/MessagesPaneV2.tsx | 22 ++++ .../components/chat/hooks/useChatMessages.ts | 1 + .../chat/hooks/useChatRealtimeHandlers.ts | 1 + ui/src/components/chat/types/types.ts | 10 ++ .../view/subcomponents/MessageComponent.tsx | 9 ++ .../settings/view/tabs/PilotDeckConfigTab.tsx | 112 ++++++++++++++++++ ui/src/i18n/locales/en/chat.json | 3 +- ui/src/i18n/locales/en/settings.json | 17 ++- ui/src/i18n/locales/zh-CN/chat.json | 3 +- ui/src/i18n/locales/zh-CN/settings.json | 17 ++- 13 files changed, 194 insertions(+), 5 deletions(-) diff --git a/src/gateway/client/InProcessGateway.ts b/src/gateway/client/InProcessGateway.ts index 0dc05c7b..21a527a7 100644 --- a/src/gateway/client/InProcessGateway.ts +++ b/src/gateway/client/InProcessGateway.ts @@ -1140,6 +1140,7 @@ export function mapAgentEvent(event: AgentEvent, runId: string): GatewayEvent[] code: event.error.code, message: event.error.message, recoverable: false, + userHint: event.error.userHint, }, ]; case "session_aborted": diff --git a/src/gateway/protocol/types.ts b/src/gateway/protocol/types.ts index c1f62324..1c9bc531 100644 --- a/src/gateway/protocol/types.ts +++ b/src/gateway/protocol/types.ts @@ -162,7 +162,7 @@ export type GatewayEvent = } | { type: "turn_completed"; usage: TurnUsage; finishReason: AgentTurnResult["stopReason"] | string } | { type: "agent_status"; event: string; detail?: Record<string, unknown> } - | { type: "error"; message: string; code?: string; recoverable: boolean }; + | { type: "error"; message: string; code?: string; recoverable: boolean; userHint?: string }; export type GatewayActiveTurnSnapshotInput = { sessionKey: string; diff --git a/ui/server/pilotdeck-bridge.js b/ui/server/pilotdeck-bridge.js index 6524934f..98f5420a 100644 --- a/ui/server/pilotdeck-bridge.js +++ b/ui/server/pilotdeck-bridge.js @@ -519,6 +519,7 @@ export function gatewayEventToFrames(event, sessionId, provider) { content: event.message, code: event.code, recoverable: event.recoverable, + userHint: event.userHint, }), ]; case 'agent_status': { diff --git a/ui/src/components/chat-v2/MessagesPaneV2.tsx b/ui/src/components/chat-v2/MessagesPaneV2.tsx index 503ae6c2..59a833de 100644 --- a/ui/src/components/chat-v2/MessagesPaneV2.tsx +++ b/ui/src/components/chat-v2/MessagesPaneV2.tsx @@ -826,6 +826,28 @@ function getLiveStatusStep( return activityToLiveStep(latestActivity); } + const retryProgress = (workingStatus as any)?.retryProgress; + if (retryProgress) { + const parts: string[] = []; + if (retryProgress.reason) parts.push(retryProgress.reason); + if (retryProgress.provider) parts.push(retryProgress.provider); + if (retryProgress.model) parts.push(retryProgress.model); + const delayStr = retryProgress.delayMs ? ` (${Math.round(retryProgress.delayMs / 1000)}s)` : ''; + return { + id: 'live-retry', + title: t('working.retrying', { + defaultValue: 'Reconnecting {{attempt}}/{{max}}{{delay}}', + attempt: retryProgress.attempt, + max: retryProgress.maxAttempts, + delay: delayStr, + }), + detail: parts.join(' · '), + phase: 'retry', + state: 'running', + severity: 'warning', + }; + } + if (workingStatus?.compactProgress) { const progress = workingStatus.compactProgress; return { diff --git a/ui/src/components/chat/hooks/useChatMessages.ts b/ui/src/components/chat/hooks/useChatMessages.ts index 626bdf9f..8b49914d 100644 --- a/ui/src/components/chat/hooks/useChatMessages.ts +++ b/ui/src/components/chat/hooks/useChatMessages.ts @@ -152,6 +152,7 @@ function convertNormalizedMessages(messages: NormalizedMessage[]): ChatMessage[] type: 'error', content: msg.content || 'Unknown error', timestamp: msg.timestamp, + ...(msg.userHint ? { userHint: msg.userHint } : {}), }); break; diff --git a/ui/src/components/chat/hooks/useChatRealtimeHandlers.ts b/ui/src/components/chat/hooks/useChatRealtimeHandlers.ts index f7b390ea..c57d37dc 100644 --- a/ui/src/components/chat/hooks/useChatRealtimeHandlers.ts +++ b/ui/src/components/chat/hooks/useChatRealtimeHandlers.ts @@ -495,6 +495,7 @@ export function useChatRealtimeHandlers({ tokens: msg.tokens || 0, can_interrupt: msg.canInterrupt !== undefined ? msg.canInterrupt : true, compactProgress: msg.compactProgress || msg.compact_progress || null, + retryProgress: msg.retryProgress || null, }); setIsLoading(true); setCanAbortSession(msg.canInterrupt !== false); diff --git a/ui/src/components/chat/types/types.ts b/ui/src/components/chat/types/types.ts index f2b9340a..6e995d4c 100644 --- a/ui/src/components/chat/types/types.ts +++ b/ui/src/components/chat/types/types.ts @@ -130,11 +130,21 @@ export interface ClaudeWorkStatus { compactProgress?: CompactProgress | null; } +export interface RetryProgress { + attempt: number; + maxAttempts: number; + delayMs?: number; + reason?: string; + provider?: string; + model?: string; +} + export interface PilotDeckWorkStatus { text: string; tokens: number; can_interrupt: boolean; compactProgress?: CompactProgress | null; + retryProgress?: RetryProgress | null; } export interface PilotDeckSettings { diff --git a/ui/src/components/chat/view/subcomponents/MessageComponent.tsx b/ui/src/components/chat/view/subcomponents/MessageComponent.tsx index 4a546815..d644a203 100644 --- a/ui/src/components/chat/view/subcomponents/MessageComponent.tsx +++ b/ui/src/components/chat/view/subcomponents/MessageComponent.tsx @@ -710,6 +710,15 @@ const MessageComponent = memo(({ message, prevMessage, createDiff, onFileOpen, o </div> ); })()} + + {message.type === 'error' && message.userHint && ( + <div className="mt-2 flex items-start gap-2 rounded-md border border-amber-200 bg-amber-50 px-3 py-2 dark:border-amber-800/50 dark:bg-amber-950/30"> + <svg className="mt-0.5 h-4 w-4 flex-shrink-0 text-amber-500 dark:text-amber-400" fill="none" stroke="currentColor" viewBox="0 0 24 24"> + <path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9.663 17h4.673M12 3v1m6.364 1.636l-.707.707M21 12h-1M4 12H3m3.343-5.657l-.707-.707m2.828 9.9a5 5 0 117.072 0l-.548.547A3.374 3.374 0 0014 18.469V19a2 2 0 11-4 0v-.531c0-.895-.356-1.754-.988-2.386l-.548-.547z" /> + </svg> + <span className="text-xs text-amber-700 dark:text-amber-300">{String(message.userHint)}</span> + </div> + )} </div> )} diff --git a/ui/src/components/settings/view/tabs/PilotDeckConfigTab.tsx b/ui/src/components/settings/view/tabs/PilotDeckConfigTab.tsx index f9377971..1d788052 100644 --- a/ui/src/components/settings/view/tabs/PilotDeckConfigTab.tsx +++ b/ui/src/components/settings/view/tabs/PilotDeckConfigTab.tsx @@ -63,6 +63,13 @@ type V2Provider = { timeoutMs?: number; headers?: Record<string, string>; models?: Record<string, Record<string, unknown> | null>; + retry?: { + requestMaxRetries?: number; + streamMaxRetries?: number; + streamIdleTimeoutMs?: number; + baseDelayMs?: number; + maxDelayMs?: number; + }; }; type PilotDeckConfig = { @@ -146,6 +153,12 @@ type PilotDeckConfig = { enabled?: boolean; maxAttempts?: number; }; + transientRetry?: { + enabled?: boolean; + maxAttempts?: number; + baseDelayMs?: number; + maxDelayMs?: number; + }; tokenSaver?: { enabled?: boolean; judge?: string; @@ -704,6 +717,7 @@ function ProviderCard({ const effectiveUrl = provider.url || catalogEntry?.defaultUrl || ''; const enabledModels = Object.keys(provider.models ?? {}); const [newModelId, setNewModelId] = useState(''); + const [showProviderAdvanced, setShowProviderAdvanced] = useState(false); const [providerIdDraft, setProviderIdDraft] = useState(providerId); const [providerIdError, setProviderIdError] = useState(''); const displayName = providerDisplayName( @@ -925,6 +939,58 @@ function ProviderCard({ </Button> </div> </div> + + {/* ── Advanced (per-provider retry) ─────────────────────── */} + <div className="px-4 pb-4"> + <button + type="button" + onClick={() => setShowProviderAdvanced((v) => !v)} + aria-expanded={showProviderAdvanced} + className="inline-flex items-center gap-1.5 rounded-md px-2 py-1 text-[12px] font-medium leading-5 text-muted-foreground transition-colors hover:bg-accent hover:text-foreground" + > + <ChevronDown className={cn('h-3.5 w-3.5 transition-transform', showProviderAdvanced && 'rotate-180')} /> + {t('pilotDeckConfig.panels.models.providerAdvancedToggle')} + </button> + {showProviderAdvanced && ( + <div className="mt-3 space-y-3 divide-y divide-border rounded-md border border-border p-3"> + <FormRow label={t('pilotDeckConfig.panels.models.providerRetry.requestMaxRetries.label')} description={t('pilotDeckConfig.panels.models.providerRetry.requestMaxRetries.description')}> + <NumberInput + value={provider.retry?.requestMaxRetries} + placeholder="2" + onChange={(v) => onChange({ ...provider, retry: { ...provider.retry, requestMaxRetries: v } })} + /> + </FormRow> + <FormRow label={t('pilotDeckConfig.panels.models.providerRetry.streamMaxRetries.label')} description={t('pilotDeckConfig.panels.models.providerRetry.streamMaxRetries.description')}> + <NumberInput + value={provider.retry?.streamMaxRetries} + placeholder="3" + onChange={(v) => onChange({ ...provider, retry: { ...provider.retry, streamMaxRetries: v } })} + /> + </FormRow> + <FormRow label={t('pilotDeckConfig.panels.models.providerRetry.streamIdleTimeoutMs.label')} description={t('pilotDeckConfig.panels.models.providerRetry.streamIdleTimeoutMs.description')}> + <NumberInput + value={provider.retry?.streamIdleTimeoutMs} + placeholder="30000" + onChange={(v) => onChange({ ...provider, retry: { ...provider.retry, streamIdleTimeoutMs: v } })} + /> + </FormRow> + <FormRow label={t('pilotDeckConfig.panels.models.providerRetry.baseDelayMs.label')} description={t('pilotDeckConfig.panels.models.providerRetry.baseDelayMs.description')}> + <NumberInput + value={provider.retry?.baseDelayMs} + placeholder="1000" + onChange={(v) => onChange({ ...provider, retry: { ...provider.retry, baseDelayMs: v } })} + /> + </FormRow> + <FormRow label={t('pilotDeckConfig.panels.models.providerRetry.maxDelayMs.label')} description={t('pilotDeckConfig.panels.models.providerRetry.maxDelayMs.description')}> + <NumberInput + value={provider.retry?.maxDelayMs} + placeholder="60000" + onChange={(v) => onChange({ ...provider, retry: { ...provider.retry, maxDelayMs: v } })} + /> + </FormRow> + </div> + )} + </div> </div> ); } @@ -2641,8 +2707,10 @@ function RouterSection({ config, onChange }: { config: PilotDeckConfig; onChange const ts = r.tokenSaver ?? {}; const ao = r.autoOrchestrate ?? {}; const zr = r.zeroUsageRetry ?? {}; + const tr = r.transientRetry ?? {}; const statsEnabled = r.stats?.enabled !== false; const zeroUsageEnabled = zr.enabled !== false; + const transientRetryEnabled = tr.enabled !== false; const tokenSaverEnabled = ts.enabled !== false; const autoOrchestrateEnabled = ao.enabled !== false; @@ -2671,6 +2739,12 @@ function RouterSection({ config, onChange }: { config: PilotDeckConfig; onChange if (next.router?.zeroUsageRetry?.maxAttempts == null) { next = patch(next, ['router', 'zeroUsageRetry', 'maxAttempts'], 2); } + if (next.router?.transientRetry?.enabled !== true) { + next = patch(next, ['router', 'transientRetry', 'enabled'], true); + } + if (next.router?.transientRetry?.maxAttempts == null) { + next = patch(next, ['router', 'transientRetry', 'maxAttempts'], 5); + } if (next.router?.tokenSaver?.enabled !== true) { next = patch(next, ['router', 'tokenSaver', 'enabled'], true); } @@ -2780,6 +2854,44 @@ function RouterSection({ config, onChange }: { config: PilotDeckConfig; onChange )} </SettingsCard> + {/* ── TransientRetry ───────────────────────────────────── */} + <SettingsCard className="space-y-4 p-4"> + <SettingsRow + label={t('pilotDeckConfig.panels.router.transientRetry.label')} + description={t('pilotDeckConfig.panels.router.transientRetry.description')} + > + <SettingsToggle + checked={transientRetryEnabled} + onChange={(v) => onChange(patch(config, ['router', 'transientRetry', 'enabled'], v))} + /> + </SettingsRow> + {transientRetryEnabled && ( + <> + <FormRow label={t('pilotDeckConfig.panels.router.transientRetry.maxAttempts.label')} description={t('pilotDeckConfig.panels.router.transientRetry.maxAttempts.description')}> + <NumberInput + value={tr.maxAttempts} + placeholder="5" + onChange={(v) => onChange(patch(config, ['router', 'transientRetry', 'maxAttempts'], v))} + /> + </FormRow> + <FormRow label={t('pilotDeckConfig.panels.router.transientRetry.baseDelayMs.label')} description={t('pilotDeckConfig.panels.router.transientRetry.baseDelayMs.description')}> + <NumberInput + value={tr.baseDelayMs} + placeholder="1000" + onChange={(v) => onChange(patch(config, ['router', 'transientRetry', 'baseDelayMs'], v))} + /> + </FormRow> + <FormRow label={t('pilotDeckConfig.panels.router.transientRetry.maxDelayMs.label')} description={t('pilotDeckConfig.panels.router.transientRetry.maxDelayMs.description')}> + <NumberInput + value={tr.maxDelayMs} + placeholder="30000" + onChange={(v) => onChange(patch(config, ['router', 'transientRetry', 'maxDelayMs'], v))} + /> + </FormRow> + </> + )} + </SettingsCard> + {/* ── TokenSaver ─────────────────────────────────────────── */} <SettingsCard className="space-y-4 p-4"> <div className="flex items-center justify-between"> diff --git a/ui/src/i18n/locales/en/chat.json b/ui/src/i18n/locales/en/chat.json index 68e8a9eb..b85ac968 100644 --- a/ui/src/i18n/locales/en/chat.json +++ b/ui/src/i18n/locales/en/chat.json @@ -51,7 +51,8 @@ "generating": "Generating response", "compacting": "Compacting context...", "compactingLevel": "Triggering level {{level}} compaction: {{label}}", - "waitingForPermission": "Waiting for permission" + "waitingForPermission": "Waiting for permission", + "retrying": "Reconnecting {{attempt}}/{{max}}{{delay}}" }, "process": { "summary": { diff --git a/ui/src/i18n/locales/en/settings.json b/ui/src/i18n/locales/en/settings.json index d2ced327..3ac7006c 100644 --- a/ui/src/i18n/locales/en/settings.json +++ b/ui/src/i18n/locales/en/settings.json @@ -705,7 +705,15 @@ "supportsImageInput": "supports image input", "clickEnable": "Click to enable", "clickDisable": "Click to disable", - "customModelPlaceholder": "Custom model ID" + "customModelPlaceholder": "Custom model ID", + "providerAdvancedToggle": "Advanced retry settings", + "providerRetry": { + "requestMaxRetries": { "label": "Request max retries", "description": "Maximum retries for non-streaming requests." }, + "streamMaxRetries": { "label": "Stream max retries", "description": "Maximum retries for streaming requests." }, + "streamIdleTimeoutMs": { "label": "Stream idle timeout (ms)", "description": "Abort a stream if no data is received for this duration." }, + "baseDelayMs": { "label": "Base delay (ms)", "description": "Initial delay before the first retry." }, + "maxDelayMs": { "label": "Max delay (ms)", "description": "Maximum delay between retries." } + } }, "agents": { "title": "Agents", @@ -913,6 +921,13 @@ "description": "Automatically retry when the upstream returns a response with zero tokens used.", "maxAttempts": { "label": "Max attempts", "description": "Maximum retry attempts before giving up." } }, + "transientRetry": { + "label": "Transient error retry", + "description": "Automatically retry on transient errors (rate limits, server errors, overloaded) with exponential backoff.", + "maxAttempts": { "label": "Max attempts", "description": "Maximum retry attempts before giving up." }, + "baseDelayMs": { "label": "Base delay (ms)", "description": "Initial delay before the first retry, in milliseconds." }, + "maxDelayMs": { "label": "Max delay (ms)", "description": "Maximum delay between retries, in milliseconds." } + }, "tokenSaver": { "title": "Token Saver", "description": "A lightweight judge classifies each turn into a tier and routes it to a cost-appropriate model.", diff --git a/ui/src/i18n/locales/zh-CN/chat.json b/ui/src/i18n/locales/zh-CN/chat.json index 05f2824b..35e6b0d9 100644 --- a/ui/src/i18n/locales/zh-CN/chat.json +++ b/ui/src/i18n/locales/zh-CN/chat.json @@ -51,7 +51,8 @@ "generating": "正在生成回复", "compacting": "正在压缩上下文...", "compactingLevel": "正在触发 {{level}} 级压缩:{{label}}", - "waitingForPermission": "等待授权" + "waitingForPermission": "等待授权", + "retrying": "重连中 {{attempt}}/{{max}}{{delay}}" }, "process": { "summary": { diff --git a/ui/src/i18n/locales/zh-CN/settings.json b/ui/src/i18n/locales/zh-CN/settings.json index 87105380..087ab301 100644 --- a/ui/src/i18n/locales/zh-CN/settings.json +++ b/ui/src/i18n/locales/zh-CN/settings.json @@ -705,7 +705,15 @@ "supportsImageInput": "支持图片输入", "clickEnable": "点击启用", "clickDisable": "点击禁用", - "customModelPlaceholder": "自定义模型 ID" + "customModelPlaceholder": "自定义模型 ID", + "providerAdvancedToggle": "高级重试设置", + "providerRetry": { + "requestMaxRetries": { "label": "请求最大重试次数", "description": "非流式请求的最大重试次数。" }, + "streamMaxRetries": { "label": "流式最大重试次数", "description": "流式请求的最大重试次数。" }, + "streamIdleTimeoutMs": { "label": "流式空闲超时(毫秒)", "description": "在此时间内没有收到数据则中止流。" }, + "baseDelayMs": { "label": "基础延迟(毫秒)", "description": "首次重试前的初始等待时间。" }, + "maxDelayMs": { "label": "最大延迟(毫秒)", "description": "两次重试之间的最长等待时间。" } + } }, "agents": { "title": "智能体", @@ -913,6 +921,13 @@ "description": "上游返回零 Token 用量响应时自动重试。", "maxAttempts": { "label": "最大重试次数", "description": "放弃前的最大重试次数。" } }, + "transientRetry": { + "label": "瞬态错误重试", + "description": "遇到瞬态错误(限流、服务端错误、过载)时自动重试,采用指数退避策略。", + "maxAttempts": { "label": "最大重试次数", "description": "放弃前的最大重试次数。" }, + "baseDelayMs": { "label": "基础延迟(毫秒)", "description": "首次重试前的初始等待时间。" }, + "maxDelayMs": { "label": "最大延迟(毫秒)", "description": "两次重试之间的最长等待时间。" } + }, "tokenSaver": { "title": "Token 节省", "description": "轻量级判定器将每轮对话分类到对应层级,并路由到成本适合的模型。",