Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions plugins/trace-codex/src/agents/codex/event-processor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1168,6 +1168,88 @@ describe("CodexEventProcessor: llm spans", () => {
);
});

test("llm token metrics normalize alternate and nested usage shapes", async () => {
await assertProducesTrace(
[
sessionStart(),
sessionMeta({ cwd: "/work" }),
turnContext({ model: "gpt-5.5" }),
taskStarted({ turn_id: "t1" }),
userMessage({ message: "yo!" }),
assistantMessage("Hey."),
tokenCount({
prompt_tokens: 100,
completion_tokens: 14,
prompt_tokens_details: { cached_tokens: 25, cache_creation_tokens: 7 },
completion_tokens_details: { reasoning_tokens: 4 },
cost: 0.1234,
}),
taskComplete({ turn_id: "t1", last_agent_message: "Hey." }),
stop({ turn_id: "t1" }),
],
{
span_attributes: { name: "codex: work", type: "task" },
ended: true,
children: [
{
span_attributes: { name: "turn: t1", type: "task" },
ended: true,
children: [
{
span_attributes: { name: "gpt-5.5", type: "llm" },
output: { role: "assistant", content: "Hey." },
metrics: {
prompt_tokens: 100,
completion_tokens: 14,
tokens: 114,
prompt_cached_tokens: 25,
prompt_cache_creation_tokens: 7,
completion_reasoning_tokens: 4,
cost: 0.1234,
estimated_cost: 0.1234,
},
ended: true,
},
],
},
],
},
);
});

test("a dangling llm span records why token usage is unavailable", async () => {
await assertProducesTrace(
[
sessionStart(),
sessionMeta({ cwd: "/work" }),
turnContext({ model: "gpt-5.5" }),
taskStarted({ turn_id: "t1" }),
userMessage({ message: "yo!" }),
assistantMessage("Hey."),
taskComplete({ turn_id: "t1", last_agent_message: "Hey." }),
stop({ turn_id: "t1" }),
],
{
span_attributes: { name: "codex: work", type: "task" },
ended: true,
children: [
{
span_attributes: { name: "turn: t1", type: "task" },
ended: true,
children: [
{
span_attributes: { name: "gpt-5.5", type: "llm" },
output: { role: "assistant", content: "Hey." },
metadata: { usage_unavailable_reason: "codex_transcript_missing_token_count" },
ended: true,
},
],
},
],
},
);
});

// A reasoning item with a readable summary is surfaced as a `reasoning` entry
// in the llm output (Braintrust's OpenAI Responses shape), interleaved before
// the assistant message.
Expand Down
56 changes: 53 additions & 3 deletions plugins/trace-codex/src/agents/codex/event-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -276,17 +276,53 @@ function tokenMetrics(usage: Record<string, unknown>): Record<string, number> {
const metrics: Record<string, number> = {};
const num = (v: unknown): number | undefined =>
typeof v === "number" && Number.isFinite(v) ? v : undefined;
const at = (path: string): number | undefined => {
let cur: unknown = usage;
for (const part of path.split(".")) {
if (cur === null || typeof cur !== "object") return undefined;
cur = (cur as Record<string, unknown>)[part];
}
return num(cur);
};
const map: Array<[string, string]> = [
["input_tokens", "prompt_tokens"],
["prompt_tokens", "prompt_tokens"],
["output_tokens", "completion_tokens"],
["completion_tokens", "completion_tokens"],
["total_tokens", "tokens"],
["tokens", "tokens"],
["cached_input_tokens", "prompt_cached_tokens"],
["prompt_cached_tokens", "prompt_cached_tokens"],
["input_tokens_details.cached_tokens", "prompt_cached_tokens"],
["prompt_tokens_details.cached_tokens", "prompt_cached_tokens"],
["prompt_cache_creation_tokens", "prompt_cache_creation_tokens"],
["input_tokens_details.cache_creation_tokens", "prompt_cache_creation_tokens"],
["input_tokens_details.cache_write_tokens", "prompt_cache_creation_tokens"],
["prompt_tokens_details.cache_creation_tokens", "prompt_cache_creation_tokens"],
["prompt_tokens_details.cache_write_tokens", "prompt_cache_creation_tokens"],
["reasoning_output_tokens", "completion_reasoning_tokens"],
["completion_reasoning_tokens", "completion_reasoning_tokens"],
["reasoning_tokens", "completion_reasoning_tokens"],
["output_tokens_details.reasoning_tokens", "completion_reasoning_tokens"],
["completion_tokens_details.reasoning_tokens", "completion_reasoning_tokens"],
["cost", "cost"],
["cost", "estimated_cost"],
["estimated_cost", "estimated_cost"],
["total_cost", "estimated_cost"],
["cost_usd", "estimated_cost"],
];
for (const [from, to] of map) {
const v = num(usage[from]);
if (metrics[to] !== undefined) continue;
const v = at(from);
if (v !== undefined) metrics[to] = v;
}
if (
metrics.tokens === undefined &&
metrics.prompt_tokens !== undefined &&
metrics.completion_tokens !== undefined
) {
metrics.tokens = metrics.prompt_tokens + metrics.completion_tokens;
}
return metrics;
}

Expand Down Expand Up @@ -1467,6 +1503,15 @@ export class CodexEventProcessor implements EventProcessor {
const info = (payload.info ?? {}) as Record<string, unknown>;
const usage = (info.last_token_usage ?? {}) as Record<string, unknown>;
const metrics = tokenMetrics(usage);
const metadata =
Object.keys(metrics).length === 0
? {
usage_unavailable_reason:
Object.keys(usage).length === 0
? "codex_token_count_missing_usage"
: "codex_token_count_unrecognized_usage",
}
: undefined;
const { span, turnId, output, outputPreset, lastOutputTime } = scope.openLlm;
// End the span when the model last generated (its last output item), NOT at
// this token_count: Codex writes the token_count after the tool result (at
Expand All @@ -1476,7 +1521,11 @@ export class CodexEventProcessor implements EventProcessor {
// Fall back to the token_count time if we somehow have no output time.
const endTime = lastOutputTime ?? isoToUnixSeconds(record.timestamp);
try {
span.log(outputPreset ? { metrics } : { output: llmOutput(output), metrics });
span.log(
outputPreset
? { metrics, ...(metadata !== undefined ? { metadata } : {}) }
: { output: llmOutput(output), metrics, ...(metadata !== undefined ? { metadata } : {}) },
);
span.end(endTime !== undefined ? { endTime } : undefined);
// This LLM call is a child of its turn; advance the turn's boundary so the
// next child (LLM or tool) starts where this one ended (its last output).
Expand All @@ -1501,7 +1550,8 @@ export class CodexEventProcessor implements EventProcessor {
const { span, turnId, output, lastOutputTime } = scope.openLlm;
const effectiveEnd = lastOutputTime ?? endTime;
try {
if (output.length > 0) span.log({ output: llmOutput(output) });
const metadata = { usage_unavailable_reason: "codex_transcript_missing_token_count" };
span.log(output.length > 0 ? { output: llmOutput(output), metadata } : { metadata });
span.end(effectiveEnd !== undefined ? { endTime: effectiveEnd } : undefined);
this.noteChildEnded(scope, turnId, effectiveEnd);
} catch (err) {
Expand Down
2 changes: 1 addition & 1 deletion plugins/trace-codex/src/agents/codex/test-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ export function reasoning(summary: string[] = []): TranscriptEntry {
* token_count event: closes the current llm span. `usage` maps the Codex token
* keys (input_tokens, output_tokens, total_tokens, ...) onto last_token_usage.
*/
export function tokenCount(usage: Record<string, number> = {}): TranscriptEntry {
export function tokenCount(usage: Record<string, unknown> = {}): TranscriptEntry {
return transcript({
type: "event_msg",
payload: { type: "token_count", info: { last_token_usage: usage } },
Expand Down
Loading