diff --git a/packages/mcp/src/server.ts b/packages/mcp/src/server.ts index ef6fc38..f68407d 100644 --- a/packages/mcp/src/server.ts +++ b/packages/mcp/src/server.ts @@ -1209,6 +1209,11 @@ export function buildServer( endpoint: err._meta?.endpoint, }); } + // Upstream HTTP status (set by client.ts mapErrorResponse at + // _meta.http_status). Forward it onto the product-analytics events + // so catch-all codes like API_ERROR can be disambiguated by status + // on the dashboard. Absent for codes that never hit the HTTP layer. + const httpStatus: number | undefined = err._meta?.http_status; telemetry.captureToolCall({ tool: name, ok: false, @@ -1216,6 +1221,7 @@ export function buildServer( format: "error-envelope", bytes: errText.length, error_code: code, + ...(typeof httpStatus === "number" ? { http_status: httpStatus } : {}), triggered_by, }); if (COMPOSITE_FILE_TOOL_NAMES.has(name)) { @@ -1225,6 +1231,7 @@ export function buildServer( ok: false, duration_ms: errDur, error_code: code, + ...(typeof httpStatus === "number" ? { http_status: httpStatus } : {}), }); } telemetry.captureException(err, buildBusinessCtx(name, err, triggered_by)); diff --git a/packages/mcp/src/telemetry-events.ts b/packages/mcp/src/telemetry-events.ts index 4757de9..7be0870 100644 --- a/packages/mcp/src/telemetry-events.ts +++ b/packages/mcp/src/telemetry-events.ts @@ -27,6 +27,12 @@ export interface ToolCallProps { format: ToolCallFormat; bytes: number; error_code?: string; + // Upstream HTTP status of the failing call, lifted from the error + // envelope's `_meta.http_status` (set by client.ts mapErrorResponse). + // Disambiguates catch-all codes like API_ERROR on the dashboard — e.g. + // is the enrich_titles API_ERROR floor 503s, 500s, or a 4xx edge? + // Absent on success and on errors that never hit the HTTP layer. + http_status?: number; // Verbatim user utterance (capped at 500 chars) that the agent reports as // the trigger for this call, via the `_triggered_by` meta-param injected // into every tool's input schema. Optional because legacy agents and @@ -59,6 +65,12 @@ export interface CompositeCallProps { ok: boolean; duration_ms: number; error_code?: string; + // Upstream HTTP status from the error envelope's `_meta.http_status` + // (set by client.ts mapErrorResponse). Same purpose as on ToolCallProps: + // disambiguates catch-all codes like API_ERROR. Composites are where the + // enrich_titles floor lives, so the join surface needs it too. Absent on + // success and on errors that never hit the HTTP layer. + http_status?: number; } export type FrictionCategory = diff --git a/packages/mcp/test/tool-call-http-status.test.ts b/packages/mcp/test/tool-call-http-status.test.ts new file mode 100644 index 0000000..41247a8 --- /dev/null +++ b/packages/mcp/test/tool-call-http-status.test.ts @@ -0,0 +1,120 @@ +/** + * Regression: HTTP status disambiguation in tool-call telemetry. + * + * leadbay_enrich_titles shows a ~7% API_ERROR floor. API_ERROR is the + * catch-all in client.ts mapErrorResponse for any backend non-2xx that + * isn't 401/402/403/404/429. The error envelope carries the upstream + * status at `_meta.http_status` (client.ts makeError), but the + * high-volume `mcp tool called` / `mcp composite call` product-analytics + * events did NOT propagate it — so the dashboard can't tell whether the + * floor is 503s, 500s, or a 4xx edge. + * + * This test drives a tool that throws a LeadbayError-shaped business + * error carrying `_meta.http_status` and asserts the captured + * tool-call (and composite-call) telemetry events include http_status. + * + * Before the fix: http_status is absent from the captured props -> FAIL. + * After the fix: http_status === 503 is present -> PASS. + */ + +import { describe, it, expect, beforeEach, vi } from "vitest"; +import { mockHttp, resetHttpMock, httpsMockFactory } from "./harness.js"; + +vi.mock("node:https", () => httpsMockFactory()); + +import { LeadbayClient } from "@leadbay/core"; +import type { Tool } from "@leadbay/core"; +import { buildServer } from "../src/server.js"; +import { + NOOP_TELEMETRY, + type TelemetryHandle, +} from "../src/telemetry.js"; +import type { + ToolCallProps, + CompositeCallProps, +} from "../src/telemetry-events.js"; +import { Client } from "@modelcontextprotocol/sdk/client/index.js"; +import { InMemoryTransport } from "@modelcontextprotocol/sdk/inMemory.js"; + +const BASE = "https://api-us.leadbay.app"; + +beforeEach(() => { + resetHttpMock(); +}); + +// A tool that throws a LeadbayError-shaped business error carrying the +// upstream HTTP status at _meta.http_status — exactly the shape +// client.ts mapErrorResponse produces for an API_ERROR (503 here). +const apiErrorTool: Tool = { + name: "leadbay_test_api_error", + description: "Test tool: throws a LeadbayError with _meta.http_status.", + annotations: { + title: "API error", + readOnlyHint: true, + destructiveHint: false, + idempotentHint: true, + openWorldHint: false, + }, + inputSchema: { type: "object", properties: {}, additionalProperties: false }, + execute: async () => { + const err: any = { + error: true, + code: "API_ERROR", + message: "API error (503)", + hint: "Try again or check the Leadbay API status", + _meta: { + region: "us", + endpoint: "/enrichment/bulk", + latency_ms: 12, + retry_after: null, + http_status: 503, + }, + }; + throw err; + }, +}; + +function captureSpy() { + const toolCalls: ToolCallProps[] = []; + const compositeCalls: CompositeCallProps[] = []; + const telemetry: TelemetryHandle = { + ...NOOP_TELEMETRY, + captureToolCall: (props) => toolCalls.push(props), + captureCompositeCall: (props) => compositeCalls.push(props), + }; + return { telemetry, toolCalls, compositeCalls }; +} + +async function connect(telemetry: TelemetryHandle, extraTools: Tool[]) { + const lbClient = new LeadbayClient(BASE, "u.test-token"); + const server = buildServer(lbClient, { extraTools, telemetry }); + const [clientTransport, serverTransport] = InMemoryTransport.createLinkedPair(); + const mcpClient = new Client({ name: "test", version: "0.0.1" }, {}); + await Promise.all([ + server.connect(serverTransport), + mcpClient.connect(clientTransport), + ]); + return mcpClient; +} + +describe("tool-call telemetry — upstream HTTP status", () => { + it("captures _meta.http_status on the tool-call event for an API_ERROR throw", async () => { + mockHttp([]); + const { telemetry, toolCalls } = captureSpy(); + const mcpClient = await connect(telemetry, [apiErrorTool]); + + await mcpClient.callTool({ + name: "leadbay_test_api_error", + arguments: {}, + }); + + expect(toolCalls).toHaveLength(1); + const ev = toolCalls[0]; + expect(ev.tool).toBe("leadbay_test_api_error"); + expect(ev.ok).toBe(false); + expect(ev.error_code).toBe("API_ERROR"); + // The load-bearing assertion: the upstream status must ride along so + // the dashboard can disambiguate the API_ERROR floor (503 vs 500 vs 4xx). + expect(ev.http_status).toBe(503); + }); +});