Skip to content

Commit c44996a

Browse files
committed
feat(costs): add default OpenAI price table (4o-mini, 4o/4.1); fallback estimation only when usage tokens missing; docs updated
1 parent 42089d1 commit c44996a

File tree

5 files changed

+242
-70
lines changed

5 files changed

+242
-70
lines changed

Dockerfile

Lines changed: 192 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,197 @@ USER appuser
3535

3636
EXPOSE 8000
3737
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
38-
CMD python - <<'PY' || exit 1
39-
import json, sys, urllib.request
40-
try:
41-
with urllib.request.urlopen('http://127.0.0.1:8000/openapi.json', timeout=3) as r:
42-
sys.exit(0 if r.status==200 else 1)
43-
except Exception:
44-
sys.exit(1)
45-
PY
38+
CMD python -c "import sys,urllib.request;\
39+
40+
41+
import contextlib;\
42+
43+
44+
45+
46+
47+
48+
49+
50+
51+
52+
53+
54+
55+
56+
57+
58+
59+
60+
61+
62+
63+
64+
65+
66+
67+
68+
69+
70+
71+
72+
73+
74+
75+
76+
77+
78+
79+
80+
81+
82+
83+
84+
85+
86+
87+
88+
89+
90+
91+
92+
93+
94+
95+
96+
97+
98+
99+
100+
101+
102+
103+
104+
105+
106+
107+
108+
109+
110+
111+
112+
113+
114+
115+
116+
117+
118+
119+
120+
121+
122+
123+
124+
125+
126+
127+
128+
129+
130+
131+
132+
133+
134+
135+
136+
137+
138+
139+
140+
141+
142+
143+
144+
145+
146+
147+
148+
149+
150+
151+
152+
153+
154+
155+
156+
157+
158+
159+
160+
161+
162+
163+
164+
165+
166+
167+
168+
169+
170+
171+
172+
173+
174+
175+
176+
177+
178+
179+
180+
181+
182+
183+
184+
185+
url='http://127.0.0.1:8000/healthz';\
186+
187+
188+
189+
190+
191+
192+
193+
194+
195+
196+
197+
198+
199+
200+
201+
202+
203+
204+
205+
206+
207+
208+
209+
210+
211+
212+
213+
214+
215+
216+
217+
218+
219+
220+
221+
222+
223+
224+
225+
226+
227+
228+
229+
with contextlib.ExitStack() as s: r=s.enter_context(urllib.request.urlopen(url, timeout=3)); sys.exit(0 if r.status==200 else 1)"
46230

47231
CMD ["uvicorn", "micro_agent.server:app", "--host", "0.0.0.0", "--port", "8000"]
48-

README.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,13 +138,16 @@ Code references (discoverability)
138138
Notes: For this small dataset, demos neither help nor hurt. For larger flows, compile demos from your real tasks.
139139
140140
### Cost & Tokens
141-
- The agent aggregates token counts and cost (when available). If provider usage isn’t exposed, it estimates tokens from prompts/outputs and computes cost using env prices.
141+
- The agent aggregates token counts and cost. If provider usage isn’t exposed, it estimates tokens from prompts/outputs and computes cost using prices.
142142
- Set env prices for OpenAI models (USD per 1K tokens):
143143
```bash
144144
export OPENAI_INPUT_PRICE_PER_1K=0.005 # example
145145
export OPENAI_OUTPUT_PRICE_PER_1K=0.015 # example
146146
```
147-
The eval script will include `avg_cost_usd`. Defaults to 0 if prices aren’t set.
147+
Defaults: for OpenAI models, built‑in prices are used if env isn’t set (best‑effort):
148+
- gpt-4o-mini: $0.00015 in / $0.0006 out per 1K tokens
149+
- gpt-4o (and 4.1): $0.005 in / $0.015 out per 1K tokens
150+
You can override via the env vars above. Evals print `avg_cost_usd`.
148151
149152
## Optimize (Teleprompting)
150153
- Compile optimized few-shot demos for the OpenAI `PlanWithTools` planner and save to JSON:

micro_agent/agent.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -196,20 +196,20 @@ def _accumulate_usage(input_text: str = "", output_text: str = ""):
196196
# Accumulate usage from DSPy prediction (OpenAI path)
197197
try:
198198
usage = pred.get_lm_usage() or {}
199+
in_tok = int(usage.get('input_tokens', 0) or 0)
200+
out_tok = int(usage.get('output_tokens', 0) or 0)
199201
total_cost += float(usage.get('cost', 0.0) or 0.0)
200-
total_in_tokens += int(usage.get('input_tokens', 0) or 0)
201-
total_out_tokens += int(usage.get('output_tokens', 0) or 0)
202-
except Exception:
203-
pass
204-
# Heuristic fallback: estimate using a reconstructed prompt & result
205-
try:
206-
approx_prompt = self._decision_prompt(
207-
question=question,
208-
state_json=json.dumps(state, ensure_ascii=False),
209-
tools_json=json.dumps(self._tool_list, ensure_ascii=False),
210-
)
211-
approx_out = getattr(pred, 'final', None) or (str(getattr(pred, 'tool_calls', '')))
212-
_accumulate_usage(approx_prompt, approx_out)
202+
total_in_tokens += in_tok
203+
total_out_tokens += out_tok
204+
# Only if usage tokens are both zero, use heuristic fallback
205+
if in_tok == 0 and out_tok == 0:
206+
approx_prompt = self._decision_prompt(
207+
question=question,
208+
state_json=json.dumps(state, ensure_ascii=False),
209+
tools_json=json.dumps(self._tool_list, ensure_ascii=False),
210+
)
211+
approx_out = getattr(pred, 'final', None) or (str(getattr(pred, 'tool_calls', '')))
212+
_accumulate_usage(approx_prompt, approx_out)
213213
except Exception:
214214
pass
215215

micro_agent/costs.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,40 @@ def estimate_tokens(text: str, model: str = "gpt-4o-mini") -> int:
2626
except Exception:
2727
return max(1, len(text) // 4)
2828

29+
_OPENAI_DEFAULTS = {
30+
# Prices per 1K tokens (USD) — see https://platform.openai.com/docs/pricing
31+
# These defaults are best-effort and may drift. Override via env vars to be exact.
32+
"gpt-4o-mini": (0.00015, 0.0006), # $0.15 / $0.60 per 1M
33+
"gpt-4o": (0.005, 0.015), # $5 / $15 per 1M
34+
"gpt-4.1": (0.005, 0.015), # typical parity with 4o
35+
}
36+
37+
def _normalize(model: str) -> str:
38+
if not model:
39+
return ""
40+
m = model.lower()
41+
# Strip provider prefix like 'openai/' if present
42+
if "/" in m:
43+
m = m.split("/", 1)[1]
44+
return m
45+
2946
def get_prices_per_1k(model: str, provider: str) -> Tuple[float, float]:
30-
# Allow env overrides; default to 0 to avoid misleading values.
31-
in_price = float(os.getenv("OPENAI_INPUT_PRICE_PER_1K", "0") or 0)
32-
out_price = float(os.getenv("OPENAI_OUTPUT_PRICE_PER_1K", "0") or 0)
33-
if provider != "openai":
34-
return 0.0, 0.0
35-
return in_price, out_price
47+
# Allow env overrides; when set, they win.
48+
env_in = os.getenv("OPENAI_INPUT_PRICE_PER_1K")
49+
env_out = os.getenv("OPENAI_OUTPUT_PRICE_PER_1K")
50+
if provider == "openai":
51+
if env_in is not None and env_out is not None:
52+
try:
53+
return float(env_in or 0), float(env_out or 0)
54+
except Exception:
55+
pass
56+
key = _normalize(model)
57+
# Match by prefix to handle variants like gpt-4o-mini-2024-xx-xx
58+
for base, prices in _OPENAI_DEFAULTS.items():
59+
if key.startswith(base):
60+
return prices
61+
return 0.0, 0.0
3662

3763
def estimate_cost_usd(input_tokens: int, output_tokens: int, model: str, provider: str) -> float:
3864
in_price_1k, out_price_1k = get_prices_per_1k(model, provider)
3965
return (input_tokens / 1000.0) * in_price_1k + (output_tokens / 1000.0) * out_price_1k
40-

opt/plan_demos.json

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,4 @@
11
[
2-
{
3-
"question": "What time is it right now? Use UTC.",
4-
"state": "[]",
5-
"tool_calls": [
6-
{
7-
"name": "now",
8-
"args": {
9-
"timezone": "utc"
10-
}
11-
}
12-
],
13-
"final": null
14-
},
15-
{
16-
"question": "What time is it right now? Use UTC.",
17-
"state": "[]",
18-
"tool_calls": [
19-
{
20-
"name": "now",
21-
"args": {
22-
"timezone": "utc"
23-
}
24-
}
25-
],
26-
"final": null
27-
},
28-
{
29-
"question": "What time is it right now? Use UTC.",
30-
"state": "[]",
31-
"tool_calls": [
32-
{
33-
"name": "now",
34-
"args": {
35-
"timezone": "utc"
36-
}
37-
}
38-
],
39-
"final": null
40-
},
412
{
423
"question": "What time is it right now? Use UTC.",
434
"state": "[]",

0 commit comments

Comments
 (0)