@@ -8,8 +8,10 @@ import (
88 "fmt"
99 "io"
1010 "net/http"
11+ "net/http/httptest"
1112 "strings"
1213 "sync"
14+ "time"
1315
1416 "github.com/docker/model-runner/pkg/distribution/distribution"
1517 "github.com/docker/model-runner/pkg/inference"
@@ -19,6 +21,10 @@ import (
1921 "github.com/docker/model-runner/pkg/middleware"
2022)
2123
24+ type contextKey bool
25+
26+ const preloadOnlyKey contextKey = false
27+
2228// HTTPHandler handles HTTP requests for the scheduler.
2329// It wraps the Scheduler to provide HTTP endpoint functionality without
2430// coupling the core scheduling logic to HTTP concerns.
@@ -223,6 +229,12 @@ func (h *HTTPHandler) handleOpenAIInference(w http.ResponseWriter, r *http.Reque
223229 }
224230 defer h .scheduler .loader .release (runner )
225231
232+ // If this is a preload-only request, return here without running inference.
233+ // Can be triggered via context (internal) or X-Preload-Only header (external).
234+ if r .Context ().Value (preloadOnlyKey ) != nil || r .Header .Get ("X-Preload-Only" ) == "true" {
235+ return
236+ }
237+
226238 // Record the request in the OpenAI recorder.
227239 recordID := h .scheduler .openAIRecorder .RecordRequest (request .Model , r , body )
228240 w = h .scheduler .openAIRecorder .NewResponseRecorder (w )
@@ -357,7 +369,7 @@ func (h *HTTPHandler) Configure(w http.ResponseWriter, r *http.Request) {
357369 return
358370 }
359371
360- _ , err = h .scheduler .ConfigureRunner (r .Context (), backend , configureRequest , r .UserAgent ())
372+ backend , err = h .scheduler .ConfigureRunner (r .Context (), backend , configureRequest , r .UserAgent ())
361373 if err != nil {
362374 if errors .Is (err , errRunnerAlreadyActive ) {
363375 http .Error (w , err .Error (), http .StatusConflict )
@@ -367,6 +379,37 @@ func (h *HTTPHandler) Configure(w http.ResponseWriter, r *http.Request) {
367379 return
368380 }
369381
382+ // Preload the model in the background by calling handleOpenAIInference with preload-only context.
383+ // This makes Compose preload the model as well as it calls `configure` by default.
384+ go func () {
385+ preloadBody , err := json .Marshal (OpenAIInferenceRequest {Model : configureRequest .Model })
386+ if err != nil {
387+ h .scheduler .log .Warnf ("failed to marshal preload request body: %v" , err )
388+ return
389+ }
390+ ctx , cancel := context .WithTimeout (context .Background (), time .Minute )
391+ defer cancel ()
392+ preloadReq , err := http .NewRequestWithContext (
393+ context .WithValue (ctx , preloadOnlyKey , true ),
394+ http .MethodPost ,
395+ inference .InferencePrefix + "/v1/chat/completions" ,
396+ bytes .NewReader (preloadBody ),
397+ )
398+ if err != nil {
399+ h .scheduler .log .Warnf ("failed to create preload request: %v" , err )
400+ return
401+ }
402+ preloadReq .Header .Set ("User-Agent" , r .UserAgent ())
403+ if backend != nil {
404+ preloadReq .SetPathValue ("backend" , backend .Name ())
405+ }
406+ recorder := httptest .NewRecorder ()
407+ h .handleOpenAIInference (recorder , preloadReq )
408+ if recorder .Code != http .StatusOK {
409+ h .scheduler .log .Warnf ("background model preload failed with status %d: %s" , recorder .Code , recorder .Body .String ())
410+ }
411+ }()
412+
370413 w .WriteHeader (http .StatusAccepted )
371414}
372415
0 commit comments