Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions web/src/edge-env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ export function buildEdgeEnv(env: Env, ctx?: ExecutionContext): EdgeEnv {
claudeModel: env.CLAUDE_MODEL || 'claude-sonnet-4-6',
opusModel: env.CLAUDE_OPUS_MODEL || 'claude-opus-4-6',
gptOssModel: env.GPT_OSS_MODEL || '@cf/openai/gpt-oss-120b',
workersAiModel: env.WORKERS_AI_MODEL,
groqApiKey: env.GROQ_API_KEY,
groqModel: env.GROQ_MODEL || 'llama-3.3-70b-versatile',
groqResponseModel: env.GROQ_RESPONSE_MODEL || 'llama-3.1-8b-instant',
Expand Down
1 change: 1 addition & 0 deletions web/src/kernel/dispatch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ export interface EdgeEnv {
claudeModel: string;
opusModel: string;
gptOssModel: string;
workersAiModel?: string;
groqApiKey: string;
groqModel: string;
groqResponseModel: string;
Expand Down
112 changes: 112 additions & 0 deletions web/src/kernel/executor-router.contract.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
import { z } from 'zod';

// ─── Provider Contract ────────────────────────────────────────
export const LLMProviderNameSchema = z.enum(['anthropic', 'cloudflare', 'groq', 'cerebras']);
export type LLMProviderName = z.infer<typeof LLMProviderNameSchema>;

// ─── Cost Tier Contract ───────────────────────────────────────
// Encodes the cost hierarchy: premium > standard > free.
// Invariant: a fallback MUST resolve to an equal or lower tier than its source.
export const ExecutorTierSchema = z.enum(['premium', 'standard', 'free']);
export type ExecutorTier = z.infer<typeof ExecutorTierSchema>;

export const TIER_ORDER: Record<ExecutorTier, number> = {
premium: 2,
standard: 1,
free: 0,
};

// ─── Executor Contract ────────────────────────────────────────
export const LLMExecutorSchema = z.enum([
'claude',
'claude_opus',
'gpt_oss',
'workers_ai',
'groq',
'cerebras_mid',
'cerebras_reasoning',
]);
export type LLMExecutor = z.infer<typeof LLMExecutorSchema>;

// ─── Route Shape Contract ─────────────────────────────────────
// Validates the static, serialisable portion of an ExecutorRoute.
// The `model` function is excluded — it's behavioural, not structural.
export const ExecutorRouteShapeSchema = z.object({
provider: LLMProviderNameSchema,
tier: ExecutorTierSchema,
// isDefault=true: this is the nominal default executor for the dispatch
// layer. Exactly one route may carry this flag.
isDefault: z.literal(true).optional(),
// placeholder=true marks a forward-declared route whose executor is not
// yet wired. Consumers must check this flag before dispatching.
placeholder: z.literal(true).optional(),
// placeholder routes MUST NOT define a fallback (can't route through an
// executor that isn't implemented).
fallback: LLMExecutorSchema.optional(),
});
export type ExecutorRouteShape = z.infer<typeof ExecutorRouteShapeSchema>;

// ─── Domain Invariants ────────────────────────────────────────

// I1: Anthropic-provider routes must define a fallback (resilience requirement —
// Anthropic is the most expensive tier; a provider outage must not dead-end).
export const PROVIDER_REQUIRES_FALLBACK: Partial<Record<LLMProviderName, boolean>> = {
anthropic: true,
};

// I2: Free-tier routes are terminal — no fallback allowed.
// (A downgrade chain can only terminate at free; going from free to free is redundant.)
export const TIER_IS_TERMINAL: Partial<Record<ExecutorTier, boolean>> = {
free: true,
};

// I3: Placeholder routes must not have a fallback.
// (Routing through an unimplemented executor is undefined behaviour.)
export function validatePlaceholderIsolation(shape: ExecutorRouteShape): void {
if (shape.placeholder && shape.fallback !== undefined) {
throw new Error(
`Placeholder route for provider '${shape.provider}' must not define a fallback.`,
);
}
}

// I4: Fallback cost must be ≤ source cost (never upgrade cost on failure).
export function validateFallbackTier(
source: ExecutorTier,
fallbackTier: ExecutorTier,
executorName: string,
): void {
if (TIER_ORDER[fallbackTier] > TIER_ORDER[source]) {
throw new Error(
`Executor '${executorName}' fallback tier '${fallbackTier}' is more expensive than source tier '${source}'. Fallbacks must be cost-neutral or cheaper.`,
);
}
}

// I5: Exactly one route must be marked isDefault.
export function validateSingleDefault(routes: Record<string, { isDefault?: boolean }>): void {
const defaults = Object.entries(routes).filter(([, r]) => r.isDefault);
if (defaults.length !== 1) {
throw new Error(
`Expected exactly 1 default executor, found ${defaults.length}: ${defaults.map(([k]) => k).join(', ') || 'none'}`,
);
}
}

// I6: Fallback DAG must be acyclic (depth-limited DFS).
export function detectFallbackCycle(
routes: Record<string, { fallback?: string }>,
): string | null {
for (const start of Object.keys(routes)) {
const visited = new Set<string>();
let cursor: string | undefined = start;
while (cursor !== undefined) {
if (visited.has(cursor)) {
return `Cycle detected in fallback chain starting at '${start}': revisited '${cursor}'`;
}
visited.add(cursor);
cursor = routes[cursor]?.fallback;
}
}
return null;
}
31 changes: 26 additions & 5 deletions web/src/kernel/executor-router.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type { EdgeEnv } from './dispatch.js';
import type { Executor } from './types.js';
import type { ExecutorTier } from './executor-router.contract.js';

// ─── Provider Names ──────────────────────────────────────────
// 'anthropic' and 'cloudflare' are wired in @stackbilt/llm-providers v1.6.0.
Expand All @@ -23,6 +24,12 @@ export type LLMExecutor = Extract<

export interface ExecutorRoute {
provider: LLMProviderName;
// Cost classification: premium > standard > free.
// Fallback invariant: fallback.tier ≤ this.tier (never upgrade cost on failure).
tier: ExecutorTier;
// placeholder=true: executor is forward-declared but not yet wired.
// Consumers must skip dispatch for placeholder routes.
placeholder?: true;
// Resolves the concrete model string at dispatch time — called with the live
// EdgeEnv so per-deployment env-var overrides and AI Gateway config are respected.
model: (env: EdgeEnv) => string;
Expand All @@ -47,42 +54,56 @@ export interface ExecutorRoute {
export const EXECUTOR_ROUTES: Record<LLMExecutor, ExecutorRoute> = {
claude: {
provider: 'anthropic',
tier: 'premium',
model: (env) => env.claudeModel,
fallback: 'gpt_oss',
},
claude_opus: {
provider: 'anthropic',
tier: 'premium',
model: (env) => env.opusModel,
// Falls back directly to gpt_oss — mirrors executeWithAnthropicFailover behavior.
// A two-hop chain (opus → claude → gpt_oss) is a possible future refinement.
fallback: 'gpt_oss',
},
gpt_oss: {
provider: 'cloudflare',
tier: 'free',
model: (env) => env.gptOssModel,
// Terminal fallback — no further fallback defined.
// Terminal — free tier, no further fallback.
},
workers_ai: {
provider: 'cloudflare',
// Hardcoded in executeWorkersAi today; no env override.
model: () => '@cf/meta/llama-3.3-70b-instruct-fp8-fast',
tier: 'free',
isDefault: true,
// env.workersAiModel overrides the model at deploy time.
// Default: llama-3.3-70b-fp8-fast — COST_EFFECTIVE + TOOL_CALLING in the CF model catalog.
model: (env) => env.workersAiModel ?? '@cf/meta/llama-3.3-70b-instruct-fp8-fast',
// Terminal — free tier, no further fallback.
},
groq: {
provider: 'groq',
tier: 'standard',
// groqResponseModel = 8B (llama-3.1-8b-instant) — fast/cheap for greetings.
// Intentionally NOT groqModel (70B). See executors/groq.ts:12.
model: (env) => env.groqResponseModel,
// Falls back to CF Workers AI (free tier) on Groq API failure.
fallback: 'workers_ai',
},
cerebras_mid: {
provider: 'cerebras',
tier: 'standard',
placeholder: true,
// TODO: EdgeEnv has no cerebras fields yet. Add cerebrasApiKey + cerebrasModel
// when executors/cerebras.ts lands. Model name below is a placeholder.
provider: 'cerebras',
model: () => 'llama3.1-8b',
},
cerebras_reasoning: {
provider: 'cerebras',
tier: 'standard',
placeholder: true,
// TODO: EdgeEnv has no cerebras fields yet. Add cerebrasApiKey + cerebrasReasoningModel
// when executors/cerebras.ts lands. Model name below is a placeholder.
provider: 'cerebras',
model: () => 'qwen-3-32b',
},
};
Expand Down
3 changes: 2 additions & 1 deletion web/src/kernel/executors/workers-ai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@ export async function executeWorkersAi(
): Promise<{ text: string; cost: number }> {
if (!env.ai) throw new Error('Workers AI binding not available');
const factory = buildLLMProviderFactory(env);
const model = env.workersAiModel ?? '@cf/meta/llama-3.3-70b-instruct-fp8-fast';
const result = await factory.generateResponse({
messages: [{ role: 'user', content: intent.raw }],
model: '@cf/meta/llama-3.3-70b-instruct-fp8-fast',
model,
systemPrompt: buildGroqSystemPrompt(),
});
return { text: result.message || '(no response)', cost: result.usage.cost };
Expand Down
3 changes: 3 additions & 0 deletions web/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ export interface Env {
// GPT-OSS (standard executor — tool-capable, cheap)
GPT_OSS_MODEL: string;

// Workers AI (primary default executor — set to override the built-in default model)
WORKERS_AI_MODEL?: string;

// Groq (classification + greeting executor + composite orchestration)
GROQ_API_KEY: string;
GROQ_MODEL: string;
Expand Down
12 changes: 12 additions & 0 deletions web/tests/edge-env.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -220,4 +220,16 @@ describe('buildEdgeEnv', () => {
expect(edge.groqResponseModel.length).toBeGreaterThan(0);
expect(edge.groqGptOssModel.length).toBeGreaterThan(0);
});

it('WORKERS_AI_MODEL is passed through to workersAiModel', () => {
const edge = buildEdgeEnv(makeEnv({ WORKERS_AI_MODEL: '@cf/meta/custom-model' }));
expect(edge.workersAiModel).toBe('@cf/meta/custom-model');
});

it('workersAiModel is undefined when WORKERS_AI_MODEL is not set', () => {
const env = makeEnv();
delete (env as any).WORKERS_AI_MODEL;
const edge = buildEdgeEnv(env);
expect(edge.workersAiModel).toBeUndefined();
});
});
Loading
Loading