From 2250c636360be128fd92a7f4e4d03f9c018b9b92 Mon Sep 17 00:00:00 2001 From: Aegis Date: Wed, 24 Jun 2026 13:28:07 -0500 Subject: [PATCH 1/2] feat(workers-ai): promote CF Workers AI to primary executor with full env-var config (closes #72) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add executor-router.contract.ts: Zod v4 contract with ExecutorTier, fallback DAG validation, single-default invariant, and placeholder isolation guards (OTDD semantic contract) - Add executor-router-semantic.test.ts: 19 tests across S1–S10 covering schema parse, acyclic DAG, cost-downgrade invariant, anthropic resilience, free-tier terminal, and env-override behavior - Extend ExecutorRoute with tier/isDefault/placeholder/fallback fields; mark workers_ai as isDefault:true with env-driven model override (OTDD refactor) - Wire WORKERS_AI_MODEL env var end-to-end: Env interface → buildEdgeEnv → EdgeEnv.workersAiModel → executeWorkersAi and executor-router model resolver - Add WORKERS_AI_MODEL to wrangler.toml.example [vars] section with default model documented Co-Authored-By: Claude Sonnet 4.6 --- web/src/edge-env.ts | 1 + web/src/kernel/dispatch.ts | 1 + web/src/kernel/executor-router.contract.ts | 112 ++++++++++++++ web/src/kernel/executor-router.ts | 31 +++- web/src/kernel/executors/workers-ai.ts | 3 +- web/src/types.ts | 3 + web/tests/executor-router-semantic.test.ts | 161 +++++++++++++++++++++ web/wrangler.toml.example | 8 + 8 files changed, 314 insertions(+), 6 deletions(-) create mode 100644 web/src/kernel/executor-router.contract.ts create mode 100644 web/tests/executor-router-semantic.test.ts diff --git a/web/src/edge-env.ts b/web/src/edge-env.ts index 3f8f97e..25a6dc4 100644 --- a/web/src/edge-env.ts +++ b/web/src/edge-env.ts @@ -21,6 +21,7 @@ export function buildEdgeEnv(env: Env, ctx?: ExecutionContext): EdgeEnv { claudeModel: env.CLAUDE_MODEL || 'claude-sonnet-4-6', opusModel: env.CLAUDE_OPUS_MODEL || 'claude-opus-4-6', gptOssModel: env.GPT_OSS_MODEL || '@cf/openai/gpt-oss-120b', + workersAiModel: env.WORKERS_AI_MODEL || undefined, groqApiKey: env.GROQ_API_KEY, groqModel: env.GROQ_MODEL || 'llama-3.3-70b-versatile', groqResponseModel: env.GROQ_RESPONSE_MODEL || 'llama-3.1-8b-instant', diff --git a/web/src/kernel/dispatch.ts b/web/src/kernel/dispatch.ts index e203656..04787e8 100755 --- a/web/src/kernel/dispatch.ts +++ b/web/src/kernel/dispatch.ts @@ -33,6 +33,7 @@ export interface EdgeEnv { claudeModel: string; opusModel: string; gptOssModel: string; + workersAiModel?: string; groqApiKey: string; groqModel: string; groqResponseModel: string; diff --git a/web/src/kernel/executor-router.contract.ts b/web/src/kernel/executor-router.contract.ts new file mode 100644 index 0000000..29120e4 --- /dev/null +++ b/web/src/kernel/executor-router.contract.ts @@ -0,0 +1,112 @@ +import { z } from 'zod'; + +// ─── Provider Contract ──────────────────────────────────────── +export const LLMProviderNameSchema = z.enum(['anthropic', 'cloudflare', 'groq', 'cerebras']); +export type LLMProviderName = z.infer; + +// ─── Cost Tier Contract ─────────────────────────────────────── +// Encodes the cost hierarchy: premium > standard > free. +// Invariant: a fallback MUST resolve to an equal or lower tier than its source. +export const ExecutorTierSchema = z.enum(['premium', 'standard', 'free']); +export type ExecutorTier = z.infer; + +export const TIER_ORDER: Record = { + premium: 2, + standard: 1, + free: 0, +}; + +// ─── Executor Contract ──────────────────────────────────────── +export const LLMExecutorSchema = z.enum([ + 'claude', + 'claude_opus', + 'gpt_oss', + 'workers_ai', + 'groq', + 'cerebras_mid', + 'cerebras_reasoning', +]); +export type LLMExecutor = z.infer; + +// ─── Route Shape Contract ───────────────────────────────────── +// Validates the static, serialisable portion of an ExecutorRoute. +// The `model` function is excluded — it's behavioural, not structural. +export const ExecutorRouteShapeSchema = z.object({ + provider: LLMProviderNameSchema, + tier: ExecutorTierSchema, + // isDefault=true: this is the nominal default executor for the dispatch + // layer. Exactly one route may carry this flag. + isDefault: z.literal(true).optional(), + // placeholder=true marks a forward-declared route whose executor is not + // yet wired. Consumers must check this flag before dispatching. + placeholder: z.literal(true).optional(), + // placeholder routes MUST NOT define a fallback (can't route through an + // executor that isn't implemented). + fallback: LLMExecutorSchema.optional(), +}); +export type ExecutorRouteShape = z.infer; + +// ─── Domain Invariants ──────────────────────────────────────── + +// I1: Anthropic-provider routes must define a fallback (resilience requirement — +// Anthropic is the most expensive tier; a provider outage must not dead-end). +export const PROVIDER_REQUIRES_FALLBACK: Partial> = { + anthropic: true, +}; + +// I2: Free-tier routes are terminal — no fallback allowed. +// (A downgrade chain can only terminate at free; going from free to free is redundant.) +export const TIER_IS_TERMINAL: Partial> = { + free: true, +}; + +// I3: Placeholder routes must not have a fallback. +// (Routing through an unimplemented executor is undefined behaviour.) +export function validatePlaceholderIsolation(shape: ExecutorRouteShape): void { + if (shape.placeholder && shape.fallback !== undefined) { + throw new Error( + `Placeholder route for provider '${shape.provider}' must not define a fallback.`, + ); + } +} + +// I4: Fallback cost must be ≤ source cost (never upgrade cost on failure). +export function validateFallbackTier( + source: ExecutorTier, + fallbackTier: ExecutorTier, + executorName: string, +): void { + if (TIER_ORDER[fallbackTier] > TIER_ORDER[source]) { + throw new Error( + `Executor '${executorName}' fallback tier '${fallbackTier}' is more expensive than source tier '${source}'. Fallbacks must be cost-neutral or cheaper.`, + ); + } +} + +// I6: Exactly one route must be marked isDefault. +export function validateSingleDefault(routes: Record): void { + const defaults = Object.entries(routes).filter(([, r]) => r.isDefault); + if (defaults.length !== 1) { + throw new Error( + `Expected exactly 1 default executor, found ${defaults.length}: ${defaults.map(([k]) => k).join(', ') || 'none'}`, + ); + } +} + +// I5: Fallback DAG must be acyclic (depth-limited DFS). +export function detectFallbackCycle( + routes: Record, +): string | null { + for (const start of Object.keys(routes)) { + const visited = new Set(); + let cursor: string | undefined = start; + while (cursor !== undefined) { + if (visited.has(cursor)) { + return `Cycle detected in fallback chain starting at '${start}': revisited '${cursor}'`; + } + visited.add(cursor); + cursor = routes[cursor]?.fallback; + } + } + return null; +} diff --git a/web/src/kernel/executor-router.ts b/web/src/kernel/executor-router.ts index fb12c7b..b3d851d 100644 --- a/web/src/kernel/executor-router.ts +++ b/web/src/kernel/executor-router.ts @@ -1,5 +1,6 @@ import type { EdgeEnv } from './dispatch.js'; import type { Executor } from './types.js'; +import type { ExecutorTier } from './executor-router.contract.js'; // ─── Provider Names ────────────────────────────────────────── // 'anthropic' and 'cloudflare' are wired in @stackbilt/llm-providers v1.6.0. @@ -23,6 +24,12 @@ export type LLMExecutor = Extract< export interface ExecutorRoute { provider: LLMProviderName; + // Cost classification: premium > standard > free. + // Fallback invariant: fallback.tier ≤ this.tier (never upgrade cost on failure). + tier: ExecutorTier; + // placeholder=true: executor is forward-declared but not yet wired. + // Consumers must skip dispatch for placeholder routes. + placeholder?: true; // Resolves the concrete model string at dispatch time — called with the live // EdgeEnv so per-deployment env-var overrides and AI Gateway config are respected. model: (env: EdgeEnv) => string; @@ -47,11 +54,13 @@ export interface ExecutorRoute { export const EXECUTOR_ROUTES: Record = { claude: { provider: 'anthropic', + tier: 'premium', model: (env) => env.claudeModel, fallback: 'gpt_oss', }, claude_opus: { provider: 'anthropic', + tier: 'premium', model: (env) => env.opusModel, // Falls back directly to gpt_oss — mirrors executeWithAnthropicFailover behavior. // A two-hop chain (opus → claude → gpt_oss) is a possible future refinement. @@ -59,30 +68,42 @@ export const EXECUTOR_ROUTES: Record = { }, gpt_oss: { provider: 'cloudflare', + tier: 'free', model: (env) => env.gptOssModel, - // Terminal fallback — no further fallback defined. + // Terminal — free tier, no further fallback. }, workers_ai: { provider: 'cloudflare', - // Hardcoded in executeWorkersAi today; no env override. - model: () => '@cf/meta/llama-3.3-70b-instruct-fp8-fast', + tier: 'free', + isDefault: true, + // env.workersAiModel overrides the model at deploy time. + // Default: llama-3.3-70b-fp8-fast — COST_EFFECTIVE + TOOL_CALLING in the CF model catalog. + model: (env) => (env as any).workersAiModel ?? '@cf/meta/llama-3.3-70b-instruct-fp8-fast', + // Terminal — free tier, no further fallback. }, groq: { provider: 'groq', + tier: 'standard', // groqResponseModel = 8B (llama-3.1-8b-instant) — fast/cheap for greetings. // Intentionally NOT groqModel (70B). See executors/groq.ts:12. model: (env) => env.groqResponseModel, + // Falls back to CF Workers AI (free tier) on Groq API failure. + fallback: 'workers_ai', }, cerebras_mid: { + provider: 'cerebras', + tier: 'standard', + placeholder: true, // TODO: EdgeEnv has no cerebras fields yet. Add cerebrasApiKey + cerebrasModel // when executors/cerebras.ts lands. Model name below is a placeholder. - provider: 'cerebras', model: () => 'llama3.1-8b', }, cerebras_reasoning: { + provider: 'cerebras', + tier: 'standard', + placeholder: true, // TODO: EdgeEnv has no cerebras fields yet. Add cerebrasApiKey + cerebrasReasoningModel // when executors/cerebras.ts lands. Model name below is a placeholder. - provider: 'cerebras', model: () => 'qwen-3-32b', }, }; diff --git a/web/src/kernel/executors/workers-ai.ts b/web/src/kernel/executors/workers-ai.ts index 59db97c..2707d75 100755 --- a/web/src/kernel/executors/workers-ai.ts +++ b/web/src/kernel/executors/workers-ai.ts @@ -16,9 +16,10 @@ export async function executeWorkersAi( ): Promise<{ text: string; cost: number }> { if (!env.ai) throw new Error('Workers AI binding not available'); const factory = buildLLMProviderFactory(env); + const model = env.workersAiModel ?? '@cf/meta/llama-3.3-70b-instruct-fp8-fast'; const result = await factory.generateResponse({ messages: [{ role: 'user', content: intent.raw }], - model: '@cf/meta/llama-3.3-70b-instruct-fp8-fast', + model, systemPrompt: buildGroqSystemPrompt(), }); return { text: result.message || '(no response)', cost: result.usage.cost }; diff --git a/web/src/types.ts b/web/src/types.ts index e22c0a2..54eb6de 100755 --- a/web/src/types.ts +++ b/web/src/types.ts @@ -21,6 +21,9 @@ export interface Env { // GPT-OSS (standard executor — tool-capable, cheap) GPT_OSS_MODEL: string; + // Workers AI (primary default executor — set to override the built-in default model) + WORKERS_AI_MODEL?: string; + // Groq (classification + greeting executor + composite orchestration) GROQ_API_KEY: string; GROQ_MODEL: string; diff --git a/web/tests/executor-router-semantic.test.ts b/web/tests/executor-router-semantic.test.ts new file mode 100644 index 0000000..798f1dd --- /dev/null +++ b/web/tests/executor-router-semantic.test.ts @@ -0,0 +1,161 @@ +import { describe, it, expect } from 'vitest'; +import { EXECUTOR_ROUTES, getExecutorRoute } from '../src/kernel/executor-router.js'; +import { + ExecutorRouteShapeSchema, + LLMExecutorSchema, + TIER_ORDER, + PROVIDER_REQUIRES_FALLBACK, + TIER_IS_TERMINAL, + validatePlaceholderIsolation, + validateFallbackTier, + validateSingleDefault, + detectFallbackCycle, +} from '../src/kernel/executor-router.contract.js'; + +// ─── Semantic Contract Suite ────────────────────────────────── + +describe('executor-router semantic contract', () => { + // ── S1: Structural parse ────────────────────────────────── + it('every route satisfies ExecutorRouteShapeSchema', () => { + for (const [name, route] of Object.entries(EXECUTOR_ROUTES)) { + const result = ExecutorRouteShapeSchema.safeParse(route); + expect(result.success, `Route '${name}' failed schema: ${JSON.stringify((result as any).error?.issues)}`).toBe(true); + } + }); + + // ── S2: Acyclic DAG ─────────────────────────────────────── + it('fallback chains contain no cycles', () => { + const cycle = detectFallbackCycle(EXECUTOR_ROUTES); + expect(cycle).toBeNull(); + }); + + // ── S3: Fallback cost downgrade ─────────────────────────── + it('fallback tier is always ≤ source tier (never upgrades cost)', () => { + for (const [name, route] of Object.entries(EXECUTOR_ROUTES)) { + if (!route.fallback) continue; + const fallbackRoute = EXECUTOR_ROUTES[route.fallback as keyof typeof EXECUTOR_ROUTES]; + expect(fallbackRoute, `Fallback '${route.fallback}' for '${name}' is not in EXECUTOR_ROUTES`).toBeDefined(); + expect( + TIER_ORDER[fallbackRoute.tier], + `Executor '${name}' (${route.tier}) fallback '${route.fallback}' (${fallbackRoute.tier}) is a cost upgrade`, + ).toBeLessThanOrEqual(TIER_ORDER[route.tier]); + } + }); + + // ── S4: Anthropic resilience ────────────────────────────── + it('all anthropic-provider routes define a fallback', () => { + for (const [name, route] of Object.entries(EXECUTOR_ROUTES)) { + if (!PROVIDER_REQUIRES_FALLBACK[route.provider]) continue; + expect( + route.fallback, + `Anthropic-provider executor '${name}' must define a fallback for provider outage resilience`, + ).toBeDefined(); + } + }); + + // ── S5: Placeholder isolation ───────────────────────────── + it('placeholder routes do not define a fallback', () => { + for (const [name, route] of Object.entries(EXECUTOR_ROUTES)) { + if (!route.placeholder) continue; + expect( + route.fallback, + `Placeholder route '${name}' must not define a fallback`, + ).toBeUndefined(); + } + }); + + // ── S6: Free-tier routes are terminal ───────────────────── + it('free-tier routes define no fallback', () => { + for (const [name, route] of Object.entries(EXECUTOR_ROUTES)) { + if (!TIER_IS_TERMINAL[route.tier]) continue; + expect( + route.fallback, + `Free-tier executor '${name}' is terminal and must not define a fallback`, + ).toBeUndefined(); + } + }); + + // ── S7: Fallback targets are valid LLM executors ────────── + it('all fallback targets are registered LLM executors', () => { + const validExecutors = new Set(LLMExecutorSchema.options); + for (const [name, route] of Object.entries(EXECUTOR_ROUTES)) { + if (!route.fallback) continue; + expect( + validExecutors.has(route.fallback), + `Executor '${name}' fallback '${route.fallback}' is not a valid LLM executor`, + ).toBe(true); + // Must also be in EXECUTOR_ROUTES (not just the type union) + expect( + getExecutorRoute(route.fallback as any), + `Fallback '${route.fallback}' for '${name}' has no route entry`, + ).not.toBeNull(); + } + }); + + // ── S8: Domain helpers enforce invariants ───────────────── + it('validatePlaceholderIsolation throws when placeholder has fallback', () => { + expect(() => + validatePlaceholderIsolation({ provider: 'cerebras', tier: 'standard', placeholder: true, fallback: 'workers_ai' }), + ).toThrow(); + }); + + it('validatePlaceholderIsolation is silent when no fallback', () => { + expect(() => + validatePlaceholderIsolation({ provider: 'cerebras', tier: 'standard', placeholder: true }), + ).not.toThrow(); + }); + + it('validateFallbackTier throws on cost upgrade', () => { + expect(() => validateFallbackTier('free', 'premium', 'test-executor')).toThrow(); + }); + + it('validateFallbackTier is silent on cost-neutral or downgrade', () => { + expect(() => validateFallbackTier('premium', 'standard', 'claude')).not.toThrow(); + expect(() => validateFallbackTier('premium', 'free', 'claude')).not.toThrow(); + expect(() => validateFallbackTier('standard', 'standard', 'groq')).not.toThrow(); + }); + + // ── S9: Single default ──────────────────────────────────── + it('exactly one executor is marked isDefault', () => { + const defaults = Object.entries(EXECUTOR_ROUTES).filter(([, r]) => r.isDefault); + expect(defaults.length).toBe(1); + expect(defaults[0][0]).toBe('workers_ai'); + }); + + it('validateSingleDefault passes on the live route table', () => { + expect(() => validateSingleDefault(EXECUTOR_ROUTES)).not.toThrow(); + }); + + it('validateSingleDefault throws when no default is set', () => { + expect(() => validateSingleDefault({ a: {}, b: {} })).toThrow(/Expected exactly 1/); + }); + + it('validateSingleDefault throws when multiple defaults are set', () => { + expect(() => validateSingleDefault({ a: { isDefault: true }, b: { isDefault: true } })).toThrow(/Expected exactly 1/); + }); + + // ── S10: Default executor model is env-configurable ─────── + it('workers_ai model reads from env.workersAiModel override', () => { + const overrideModel = '@cf/moonshotai/kimi-k2.6'; + const result = EXECUTOR_ROUTES.workers_ai.model({ workersAiModel: overrideModel } as any); + expect(result).toBe(overrideModel); + }); + + it('workers_ai model falls back to llama-3.3-70b when env has no override', () => { + const result = EXECUTOR_ROUTES.workers_ai.model({} as any); + expect(result).toBe('@cf/meta/llama-3.3-70b-instruct-fp8-fast'); + }); + + it('detectFallbackCycle returns null for the live route table', () => { + expect(detectFallbackCycle(EXECUTOR_ROUTES)).toBeNull(); + }); + + it('detectFallbackCycle detects an introduced cycle', () => { + const cyclic = { + a: { fallback: 'b' }, + b: { fallback: 'c' }, + c: { fallback: 'a' }, + }; + expect(detectFallbackCycle(cyclic)).not.toBeNull(); + }); +}); diff --git a/web/wrangler.toml.example b/web/wrangler.toml.example index 1da6dfb..169e414 100644 --- a/web/wrangler.toml.example +++ b/web/wrangler.toml.example @@ -48,6 +48,14 @@ new_sqlite_classes = ["ChatSession"] tag = "v2-aegis-voice-adapter" new_sqlite_classes = ["AegisVoiceAdapter"] +# ─── Model Configuration ───────────────────────────────────── +# Override default models at deploy time via vars. +# Secrets (ANTHROPIC_API_KEY, GROQ_API_KEY, etc.) are set via wrangler secret put. +[vars] +# Primary CF Workers AI model — used by the workers_ai executor. +# Must be an active model in the @stackbilt/llm-providers cloudflare catalog. +WORKERS_AI_MODEL = "@cf/meta/llama-3.3-70b-instruct-fp8-fast" + # Triggers — hourly cron for scheduled tasks [triggers] crons = ["0 * * * *"] From 8510e085a76d155c8236fe820af3981e1ba74def Mon Sep 17 00:00:00 2001 From: Aegis Date: Wed, 24 Jun 2026 13:35:13 -0500 Subject: [PATCH 2/2] =?UTF-8?q?fix(workers-ai):=20address=20review=20comme?= =?UTF-8?q?nts=20=E2=80=94=20remove=20stale=20as-any=20cast,=20align=20||?= =?UTF-8?q?=20vs=20=3F=3F,=20fix=20I5/I6=20ordering,=20add=20buildEdgeEnv?= =?UTF-8?q?=20integration=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Sonnet 4.6 --- web/src/edge-env.ts | 2 +- web/src/kernel/executor-router.contract.ts | 4 ++-- web/src/kernel/executor-router.ts | 2 +- web/tests/edge-env.test.ts | 12 ++++++++++++ 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/web/src/edge-env.ts b/web/src/edge-env.ts index 25a6dc4..81ce8ee 100644 --- a/web/src/edge-env.ts +++ b/web/src/edge-env.ts @@ -21,7 +21,7 @@ export function buildEdgeEnv(env: Env, ctx?: ExecutionContext): EdgeEnv { claudeModel: env.CLAUDE_MODEL || 'claude-sonnet-4-6', opusModel: env.CLAUDE_OPUS_MODEL || 'claude-opus-4-6', gptOssModel: env.GPT_OSS_MODEL || '@cf/openai/gpt-oss-120b', - workersAiModel: env.WORKERS_AI_MODEL || undefined, + workersAiModel: env.WORKERS_AI_MODEL, groqApiKey: env.GROQ_API_KEY, groqModel: env.GROQ_MODEL || 'llama-3.3-70b-versatile', groqResponseModel: env.GROQ_RESPONSE_MODEL || 'llama-3.1-8b-instant', diff --git a/web/src/kernel/executor-router.contract.ts b/web/src/kernel/executor-router.contract.ts index 29120e4..f4c68f9 100644 --- a/web/src/kernel/executor-router.contract.ts +++ b/web/src/kernel/executor-router.contract.ts @@ -83,7 +83,7 @@ export function validateFallbackTier( } } -// I6: Exactly one route must be marked isDefault. +// I5: Exactly one route must be marked isDefault. export function validateSingleDefault(routes: Record): void { const defaults = Object.entries(routes).filter(([, r]) => r.isDefault); if (defaults.length !== 1) { @@ -93,7 +93,7 @@ export function validateSingleDefault(routes: Record, ): string | null { diff --git a/web/src/kernel/executor-router.ts b/web/src/kernel/executor-router.ts index b3d851d..95e3314 100644 --- a/web/src/kernel/executor-router.ts +++ b/web/src/kernel/executor-router.ts @@ -78,7 +78,7 @@ export const EXECUTOR_ROUTES: Record = { isDefault: true, // env.workersAiModel overrides the model at deploy time. // Default: llama-3.3-70b-fp8-fast — COST_EFFECTIVE + TOOL_CALLING in the CF model catalog. - model: (env) => (env as any).workersAiModel ?? '@cf/meta/llama-3.3-70b-instruct-fp8-fast', + model: (env) => env.workersAiModel ?? '@cf/meta/llama-3.3-70b-instruct-fp8-fast', // Terminal — free tier, no further fallback. }, groq: { diff --git a/web/tests/edge-env.test.ts b/web/tests/edge-env.test.ts index 804a742..b024a15 100755 --- a/web/tests/edge-env.test.ts +++ b/web/tests/edge-env.test.ts @@ -220,4 +220,16 @@ describe('buildEdgeEnv', () => { expect(edge.groqResponseModel.length).toBeGreaterThan(0); expect(edge.groqGptOssModel.length).toBeGreaterThan(0); }); + + it('WORKERS_AI_MODEL is passed through to workersAiModel', () => { + const edge = buildEdgeEnv(makeEnv({ WORKERS_AI_MODEL: '@cf/meta/custom-model' })); + expect(edge.workersAiModel).toBe('@cf/meta/custom-model'); + }); + + it('workersAiModel is undefined when WORKERS_AI_MODEL is not set', () => { + const env = makeEnv(); + delete (env as any).WORKERS_AI_MODEL; + const edge = buildEdgeEnv(env); + expect(edge.workersAiModel).toBeUndefined(); + }); });