-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathconfig.py
More file actions
207 lines (165 loc) · 9.03 KB
/
config.py
File metadata and controls
207 lines (165 loc) · 9.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
import os
from dotenv import load_dotenv
from enum import Enum
from google.genai.types import ThinkingLevel
from openai.types import ReasoningEffort
load_dotenv()
deployed_llm_base_url = os.getenv("AZURE_DEPLOYMENT_BASE_URL")
deployed_llm_key = os.getenv("AZURE_DEPLOYMENT_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")
xai_api_key = os.getenv("XAI_API_KEY")
fireworks_api_key = os.getenv("FIREWORKS_API_KEY")
cerebras_api_key = os.getenv("CEREBRAS_API_KEY")
google_ai_api_key = os.getenv("GOOGLE_AI_STUDIO_API_KEY")
gcloud_project_id = os.getenv("GCLOUD_PROJECT_ID")
default_temperature = 0
attempts_count = 1
def get_azure_config(model, max_tokens=None):
def config():
return {
"max_tokens": max_tokens,
"model_id": model,
"api_key": deployed_llm_key,
"url": f"{deployed_llm_base_url}/openai/deployments/{model}/chat/completions?api-version=2023-12-01-preview",
}
return config
def get_open_ai_config(
model,
max_tokens=None,
skip_system=False,
system_role_name="system",
base_url="https://api.openai.com/v1",
reasoning_effort=None,
):
config = {
"model_id": model,
"api_key": openai_api_key,
"max_tokens": max_tokens,
"skip_system": skip_system,
"system_role_name": system_role_name,
"url": f"{base_url}",
"reasoning_effort": reasoning_effort,
}
# if reasoning model o1, o3 or o4, change temperature and reasoning effort
if model.startswith("o1") or model.startswith("o3") or model.startswith("o4"):
config["temperature"] = 1
config["reasoning_effort"] = "high"
return config
def get_open_ai_responses_config(model, effort: ReasoningEffort = "high", verbosity=None, max_tokens=None, background=False):
config = {
"api_key": openai_api_key,
"max_tokens": max_tokens,
"model_id": model,
"temperature": 1,
"reasoning_effort": effort,
"verbosity": verbosity,
"background": background,
}
return config
def get_xai_config(model, **kwargs):
return {
"model_id": model,
"api_key": xai_api_key,
"url": "https://api.x.ai/v1",
**kwargs,
}
def get_fireworks_config(model, max_tokens):
return {
"model_id": model,
"max_tokens": max_tokens,
"api_key": fireworks_api_key,
"url": "https://api.fireworks.ai/inference/v1",
}
def get_cerebras_config(model, max_tokens, reasoning_effort):
return {
"model_id": model,
"max_tokens": max_tokens,
"api_key": cerebras_api_key,
"reasoning_effort": reasoning_effort,
"url": "https://api.cerebras.ai/v1",
}
# thinking_level is supported only for Gemini 3 and above
def get_gemini_ai_studio_config(model, max_tokens=None, thinking_level: ThinkingLevel = None):
return {"model_id": model, "max_tokens": max_tokens, "thinking_level": thinking_level}
# Docs: https://docs.anthropic.com/en/api/claude-on-vertex-ai#making-requests
def get_anthropic_vertexai_config(model, adaptive_thinking=False, max_tokens=None):
"""
Configure Anthropic Vertex AI model with thinking options.
Args:
model: Model ID
adaptive_thinking: Use adaptive thinking mode (recommended for Opus 4.6)
max_tokens: Max tokens for response
Notes:
- adaptive_thinking=True enables the new adaptive thinking mode for Opus 4.6
- Adaptive thinking uses "high" effort by default (can be modified via output_config parameter if needed)
"""
thinking = {"type": "disabled"}
if adaptive_thinking:
thinking = {"type": "adaptive"}
return {
"region": "global",
"project_id": gcloud_project_id,
"model_id": model,
"thinking": thinking,
"max_tokens": max_tokens or 64000,
"temperature": 1 if adaptive_thinking else default_temperature,
}
def get_amazon_nova_model_config(model):
MODEL_ID = model
return {"model_id": MODEL_ID}
class ModelProvider(Enum):
AISTUDIO = "aistudio"
VERTEXAI = "vertexai"
VERTEXAI_ANTHROPIC = "vertexai_anthropic"
OPENAI = "openai"
OPENAI_RESPONSES = "openai_responses"
AZURE = "azure"
FIREWORKS = "fireworks"
XAI = "xai"
AMAZON = "amazon"
class Model(Enum):
# fmt: off
# Gemini models
Gemini_3_Pro_Preview = ("Gemini_3_Pro_Preview", ModelProvider.AISTUDIO, lambda: get_gemini_ai_studio_config("gemini-3-pro-preview", max_tokens=65536, thinking_level=ThinkingLevel.HIGH))
Gemini_3_Flash_Preview = ("Gemini_3_Flash_Preview", ModelProvider.AISTUDIO, lambda: get_gemini_ai_studio_config("gemini-3-flash-preview", max_tokens=65536, thinking_level=ThinkingLevel.HIGH))
# OpenAI models
GPT_OSS_120B = ("GPT_OSS_120B", ModelProvider.OPENAI, lambda: get_cerebras_config("gpt-oss-120b", max_tokens=32000, reasoning_effort="low"))
GPT_OSS_20B = ("GPT_OSS_20B", ModelProvider.OPENAI, lambda: get_open_ai_config("openai/gpt-oss-20b", max_tokens=-1, reasoning_effort="low", base_url="http://localhost:1234/v1"))
GPT5_Nano_high = ("GPT5_Nano_high", ModelProvider.OPENAI_RESPONSES, lambda: get_open_ai_responses_config("gpt-5-nano-2025-08-07", effort="low", verbosity="high", max_tokens=128000))
GPT5_Mini_high = ("GPT5_Mini_high", ModelProvider.OPENAI_RESPONSES, lambda: get_open_ai_responses_config("gpt-5-mini-2025-08-07", effort="high", verbosity="high", max_tokens=128000))
GPT51_Codex = ("GPT51_Codex", ModelProvider.OPENAI_RESPONSES, lambda: get_open_ai_responses_config("gpt-5.1-codex", effort="high", max_tokens=128000))
GPT52_Codex = ("GPT52_Codex", ModelProvider.OPENAI_RESPONSES, lambda: get_open_ai_responses_config("gpt-5.2-codex", effort="medium", max_tokens=128000))
GPT52_Codex_high = ("GPT52_Codex_high", ModelProvider.OPENAI_RESPONSES, lambda: get_open_ai_responses_config("gpt-5.2-codex", effort="high", max_tokens=128000))
GPT51_Codex_mini = ("GPT51_Codex_mini", ModelProvider.OPENAI_RESPONSES, lambda: get_open_ai_responses_config("gpt-5.1-codex-mini", effort="high", max_tokens=128000))
GPT52_1211 = ("GPT52_1211", ModelProvider.OPENAI_RESPONSES, lambda: get_open_ai_responses_config("gpt-5.2-2025-12-11", effort="none", verbosity="high", max_tokens=128000))
GPT52_1211_high = ("GPT52_1211_high", ModelProvider.OPENAI_RESPONSES, lambda: get_open_ai_responses_config("gpt-5.2-2025-12-11", effort="high", verbosity="high", max_tokens=128000))
GPT53_Codex = ("GPT53_Codex", ModelProvider.OPENAI_RESPONSES, lambda: get_open_ai_responses_config("gpt-5.3-codex", effort="medium", max_tokens=128000))
# Claude models
Sonnet_45 = ("Claude_Sonnet_45", ModelProvider.VERTEXAI_ANTHROPIC, lambda: get_anthropic_vertexai_config("claude-sonnet-4-5@20250929"))
Sonnet_46 = ("Claude_Sonnet_46", ModelProvider.VERTEXAI_ANTHROPIC, lambda: get_anthropic_vertexai_config("claude-sonnet-4-6", adaptive_thinking=True, max_tokens=64000))
Opus_46 = ("Claude_Opus_46", ModelProvider.VERTEXAI_ANTHROPIC, lambda: get_anthropic_vertexai_config("claude-opus-4-6", adaptive_thinking=True, max_tokens=128000))
# Haiku_45 = ("Claude_Haiku_45", ModelProvider.VERTEXAI_ANTHROPIC, lambda: get_anthropic_vertexai_config("claude-haiku-4-5@20251001"))
# Other models
Grok4_0709 = ("Grok4_0709", ModelProvider.XAI, lambda: get_xai_config("grok-4-0709")) # reasoning effort is not supported for Grok4
Grok_Code_0825 = ("Grok_Code_0825", ModelProvider.XAI, lambda: get_xai_config("grok-code-fast-1-0825"))
Grok41_Fast = ("Grok41_Fast", ModelProvider.XAI, lambda: get_xai_config("grok-4-1-fast-non-reasoning"))
Grok41_FastReasoning = ("Grok41_FastReasoning", ModelProvider.XAI, lambda: get_xai_config("grok-4-1-fast-reasoning"))
AmazonNovaPremier = ("AmazonNovaPremier", ModelProvider.AMAZON, lambda: get_amazon_nova_model_config("us.amazon.nova-premier-v1:0"))
MiniMax_M21 = ("MiniMax_M21", ModelProvider.FIREWORKS, lambda: get_fireworks_config("accounts/fireworks/models/minimax-m2p1", max_tokens=16000))
DeepSeek_v32 = ("DeepSeek_v32", ModelProvider.FIREWORKS, lambda: get_fireworks_config("accounts/fireworks/models/deepseek-v3p2", max_tokens=60000))
Kimi_K2 = ("Kimi_K2", ModelProvider.FIREWORKS, lambda: get_fireworks_config("accounts/fireworks/models/kimi-k2-thinking", max_tokens=60000))
GLM_5 = ("GLM_5", ModelProvider.FIREWORKS, lambda: get_fireworks_config("accounts/fireworks/models/glm-5", max_tokens=25000))
Kimi_K2p5 = ("Kimi_K2p5", ModelProvider.FIREWORKS, lambda: get_fireworks_config("accounts/fireworks/models/kimi-k2p5", max_tokens=32000))
MiniMax_M2p5 = ("MiniMax_M2p5", ModelProvider.FIREWORKS, lambda: get_fireworks_config("accounts/fireworks/models/minimax-m2p5", max_tokens=25000))
# fmt: on
def __init__(self, model_id: str, provider: ModelProvider, config_func: callable):
"""Initialize the model"""
self.model_id = model_id
self.provider = provider
self.config_func = config_func
def __call__(self):
"""Get the configuration for this model"""
return self.config_func()
def __str__(self):
"""Return the model ID"""
return self.model_id