It seems like endpoint isn't fully openai compatible. Atleast it seems it doesn't properly accept configuration from opencode.
I didn't yet receive reasonable answer from the model. Can you help how to set it up properly? I even tried different chat template from unsloth, but didn't help with the issue
{
"$schema": "https://opencode.ai/config.json",
"provider": {
"llama.cpp": {
"npm": "@ai-sdk/openai-compatible",
"name": "llama-server (local)",
"options": {
"baseURL": "http://127.0.0.1:8080/v1"
},
"models": {
"claude": {
"name": "local claude model",
"modalities": { "input": ["image", "text"], "output": ["text"] },
"limit": {
"context": 262144,
"output": 262144
}
},
"q-3.5-35b-a3b": {
"name": "Q-3.5-35B-A3B",
"modalities": { "input": ["image", "text"], "output": ["text"] },
"limit": {
"context": 262144,
"output": 81920
},
"cost": {
"input": 0.3125,
"output": 1.25,
"cache_read": 0.1563
},
"temperature": true,
"variants": {
"general": {
"options": {
"temperature": 1.0,
"top_p": 0.95,
"top_k": 20,
"min_p": 0.0,
"presence_penalty": 1.5,
"repetition_penalty": 1.0,
"chat_template_kwargs": {
"enable_thinking": true
}
}
},
"coding": {
"options": {
"temperature": 0.6,
"top_p": 0.95,
"top_k": 20,
"min_p": 0.0,
"presence_penalty": 0.0,
"repetition_penalty": 1.0,
"chat_template_kwargs": {
"enable_thinking": true
}
}
},
"no-thinking": {
"options": {
"temperature": 0.7,
"top_p": 0.8,
"top_k": 20,
"min_p": 0.0,
"presence_penalty": 1.5,
"repetition_penalty": 1.0,
"chat_template_kwargs": {
"enable_thinking": false
}
}
},
"reasoning": {
"options": {
"temperature": 1.0,
"top_p": 1.0,
"top_k": 40,
"min_p": 0.0,
"presence_penalty": 2.0,
"repetition_penalty": 1.0,
"chat_template_kwargs": {
"enable_thinking": false
}
}
}
}
}
}
},
"krasis": {
"npm": "@ai-sdk/openai-compatible",
"name": "krasis-server (local)",
"options": {
"baseURL": "http://localhost:8012/v1"
},
"models": {
"q-3.5-35b-a3b": {
"name": "Q-3.5-35B-A3B",
"modalities": { "input": ["image", "text"], "output": ["text"] },
"limit": {
"context": 262144,
"output": 81920
},
"cost": {
"input": 0.3125,
"output": 1.25,
"cache_read": 0.1563
},
"temperature": true,
"variants": {
"general": {
"options": {
"temperature": 1.0,
"top_p": 0.95,
"top_k": 20,
"min_p": 0.0,
"presence_penalty": 1.5,
"repetition_penalty": 1.0,
"chat_template_kwargs": {
"enable_thinking": true
}
}
},
"coding": {
"options": {
"temperature": 0.6,
"top_p": 0.95,
"top_k": 20,
"min_p": 0.0,
"presence_penalty": 0.0,
"repetition_penalty": 1.0,
"chat_template_kwargs": {
"enable_thinking": true
}
}
},
"no-thinking": {
"options": {
"temperature": 0.7,
"top_p": 0.8,
"top_k": 20,
"min_p": 0.0,
"presence_penalty": 1.5,
"repetition_penalty": 1.0,
"chat_template_kwargs": {
"enable_thinking": false
}
}
},
"reasoning": {
"options": {
"temperature": 1.0,
"top_p": 1.0,
"top_k": 40,
"min_p": 0.0,
"presence_penalty": 2.0,
"repetition_penalty": 1.0,
"chat_template_kwargs": {
"enable_thinking": false
}
}
}
}
}
}
}
},
"mcp": {
"context7": {
"type": "local",
"command": ["npx", "-y", "@upstash/context7-mcp"]
}
}
}
I tried Qwen 3.5 35b A3B today with opencode and it seems something is off
It seems like endpoint isn't fully openai compatible. Atleast it seems it doesn't properly accept configuration from opencode.
I didn't yet receive reasonable answer from the model. Can you help how to set it up properly? I even tried different chat template from unsloth, but didn't help with the issue