epam · astsiapanay · Feb 4, 2026 · Feb 4, 2026
@@ -2,6 +2,8 @@
 
 import com.epam.aidial.core.config.Application;
 import com.epam.aidial.core.config.Deployment;
+import com.epam.aidial.core.config.Model;
+import com.epam.aidial.core.config.Upstream;
 import com.epam.aidial.core.server.Proxy;
 import com.epam.aidial.core.server.ProxyContext;
 import com.epam.aidial.core.server.data.ApiKeyData;
@@ -191,6 +193,11 @@ void handleProxyRequest(HttpClientRequest proxyRequest) {
         ApiKeyData proxyApiKeyData = context.getProxyApiKeyData();
         proxyRequest.headers().add(Proxy.HEADER_API_KEY, proxyApiKeyData.getPerRequestKey());
 
+        if (deployment instanceof Model model && !model.getUpstreams().isEmpty()) {
+            Upstream upstream = model.getUpstreams().getFirst();
+            proxyRequest.putHeader(Proxy.HEADER_UPSTREAM_ENDPOINT, upstream.getEndpoint());
+        }
+
         Buffer requestBody = context.getRequestBody();
         proxyRequest.putHeader(HttpHeaders.CONTENT_LENGTH, Integer.toString(requestBody.length()));
         context.getRequestHeaders().forEach(proxyRequest::putHeader);

@@ -15,6 +15,8 @@
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
 
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
 public class FeaturesApiTest extends ResourceBaseTest {
 
     private static String[] convertHeadersToFlatArray(Headers headers) {
@@ -50,6 +52,13 @@ void testRateEndpointModel() {
         testUpstreamEndpoint(inboundPath, upstream, HttpMethod.POST, body);
     }
 
+    @Test
+    void testConfigurationEndpointModel() {
+        String inboundPath = "/v1/deployments/chat-gpt-35-turbo/configuration";
+        String upstream = "http://localhost:7001/upstream/v1/deployments/gpt-35-turbo/model_config";
+        testUpstreamEndpoint(inboundPath, upstream, HttpMethod.GET);
+    }
+
     @Test
     void testRateEndpointApplication() {
         String inboundPath = "/v1/app/rate";
@@ -102,7 +111,11 @@ void testUpstreamEndpoint(String inboundPath, String upstream, HttpMethod method
         try (TestWebServer server = new TestWebServer(uri.getPort())) {
             server.map(method, uri.getPath(), request -> {
                 Headers responseHeaders = filterHeaders(request.getHeaders(), requestExtraHeaders);
-                if (request.getPath().endsWith("rate_response")) {
+                String path = request.getPath();
+                if (path.endsWith("model_config")) {
+                    assertEquals("http://localhost:7001", request.getHeader(Proxy.HEADER_UPSTREAM_ENDPOINT));
+                }
+                if (path.endsWith("rate_response")) {
                     return handleRateResponse(request, responseHeaders);
                 } else {
                     return TestWebServer.createResponse(200, "PONG", convertHeadersToFlatArray(responseHeaders));

@@ -65,7 +65,7 @@ void testFeaturesModel(Vertx vertx, VertxTestContext context) {
                     { "rate": true, "tokenize": true, "truncate_prompt": true
                     , "system_prompt": true, "tools": true, "seed": true
                     , "url_attachments": true, "folder_attachments": false
-                    , "configuration": false, "allow_resume": true, "accessible_by_per_request_key": true,
+                    , "configuration": true, "allow_resume": true, "accessible_by_per_request_key": true,
                     "content_parts": false, "temperature" : true, "cache" : false,
                     "auto_caching" : false, "parallel_tool_calls": true,
                     "assistant_attachments_in_request": false

@@ -138,6 +138,7 @@
         {"endpoint": "http://localhost:7003", "key": "modelKey3"}
       ],
       "features": {
+        "configurationEndpoint": "http://localhost:7001/upstream/v1/deployments/gpt-35-turbo/model_config",
         "rateEndpoint": "http://localhost:7001/upstream/v1/deployments/gpt-35-turbo/rate_response",
         "tokenizeEndpoint": "http://localhost:7001/upstream/v1/deployments/gpt-35-turbo/tokenizer",
         "truncatePromptEndpoint": "http://localhost:7001/upstream/v1/deployments/gpt-35-turbo/trim_history",