Skip to content

Commit 9e75f04

Browse files
authored
Merge pull request #700 from doringeman/unify-vllm
refactor: unify vllm and vllm-metal into a single "vllm" backend
2 parents dcfc7f3 + 94efd18 commit 9e75f04

16 files changed

+116
-116
lines changed

cmd/cli/commands/install-runner.go

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import (
1616
"github.com/docker/model-runner/pkg/inference/backends/diffusers"
1717
"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
1818
"github.com/docker/model-runner/pkg/inference/backends/vllm"
19-
"github.com/docker/model-runner/pkg/inference/backends/vllmmetal"
19+
"github.com/docker/model-runner/pkg/inference/platform"
2020
"github.com/moby/moby/api/types/container"
2121
"github.com/spf13/cobra"
2222
)
@@ -29,7 +29,7 @@ const (
2929
// installation will try to reach the model runner while waiting for it to
3030
// be ready.
3131
installWaitRetryInterval = 500 * time.Millisecond
32-
backendUsage = "Specify backend (" + llamacpp.Name + "|" + vllm.Name + "|" + diffusers.Name + "|" + vllmmetal.Name + "). Default: " + llamacpp.Name
32+
backendUsage = "Specify backend (" + llamacpp.Name + "|" + vllm.Name + "|" + diffusers.Name + "). Default: " + llamacpp.Name
3333
)
3434

3535
// waitForStandaloneRunnerAfterInstall waits for a standalone model runner
@@ -242,14 +242,14 @@ type runnerOptions struct {
242242

243243
// runInstallOrStart is shared logic for install-runner and start-runner commands
244244
func runInstallOrStart(cmd *cobra.Command, opts runnerOptions, debug bool) error {
245-
// vllm-metal is installed on-demand via the running model runner,
246-
// not as a standalone container. This applies to all engine kinds.
247-
if opts.backend == vllmmetal.Name {
248-
cmd.Println("Installing vllm-metal backend...")
249-
if err := desktopClient.InstallBackend(vllmmetal.Name); err != nil {
250-
return fmt.Errorf("failed to install vllm-metal backend: %w", err)
245+
// On macOS ARM64, the vllm backend requires deferred installation
246+
// (on-demand via the running model runner), not as a standalone container.
247+
if opts.backend == vllm.Name && platform.SupportsVLLMMetal() {
248+
cmd.Println("Installing vllm backend...")
249+
if err := desktopClient.InstallBackend(vllm.Name); err != nil {
250+
return fmt.Errorf("failed to install vllm backend: %w", err)
251251
}
252-
cmd.Println("vllm-metal backend installed successfully")
252+
cmd.Println("vllm backend installed successfully")
253253
return nil
254254
}
255255

@@ -340,7 +340,7 @@ func runInstallOrStart(cmd *cobra.Command, opts runnerOptions, debug bool) error
340340
}
341341

342342
// Validate backend selection
343-
validBackends := []string{llamacpp.Name, vllm.Name, diffusers.Name, vllmmetal.Name}
343+
validBackends := []string{llamacpp.Name, vllm.Name, diffusers.Name}
344344
if opts.backend != "" {
345345
isValid := false
346346
for _, valid := range validBackends {
@@ -354,8 +354,8 @@ func runInstallOrStart(cmd *cobra.Command, opts runnerOptions, debug bool) error
354354
}
355355
}
356356

357-
// Validate backend-GPU compatibility
358-
if opts.backend == vllm.Name && gpu != gpupkg.GPUSupportCUDA {
357+
// Validate backend-GPU compatibility (only on Linux; macOS ARM64 uses Metal)
358+
if opts.backend == vllm.Name && !platform.SupportsVLLMMetal() && gpu != gpupkg.GPUSupportCUDA {
359359
return fmt.Errorf("--backend vllm requires CUDA GPU support (--gpu=cuda or auto-detected CUDA)")
360360
}
361361

cmd/cli/docs/reference/docker_model_install-runner.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ plink: docker_model.yaml
88
options:
99
- option: backend
1010
value_type: string
11-
description: |
12-
Specify backend (llama.cpp|vllm|diffusers|vllm-metal). Default: llama.cpp
11+
description: 'Specify backend (llama.cpp|vllm|diffusers). Default: llama.cpp'
1312
deprecated: false
1413
hidden: false
1514
experimental: false

cmd/cli/docs/reference/docker_model_reinstall-runner.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@ plink: docker_model.yaml
88
options:
99
- option: backend
1010
value_type: string
11-
description: |
12-
Specify backend (llama.cpp|vllm|diffusers|vllm-metal). Default: llama.cpp
11+
description: 'Specify backend (llama.cpp|vllm|diffusers). Default: llama.cpp'
1312
deprecated: false
1413
hidden: false
1514
experimental: false

cmd/cli/docs/reference/docker_model_start-runner.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@ plink: docker_model.yaml
1010
options:
1111
- option: backend
1212
value_type: string
13-
description: |
14-
Specify backend (llama.cpp|vllm|diffusers|vllm-metal). Default: llama.cpp
13+
description: 'Specify backend (llama.cpp|vllm|diffusers). Default: llama.cpp'
1514
deprecated: false
1615
hidden: false
1716
experimental: false

cmd/cli/docs/reference/model_install-runner.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Install Docker Model Runner (Docker Engine only)
77

88
| Name | Type | Default | Description |
99
|:-----------------|:---------|:------------|:-------------------------------------------------------------------------------------------------------|
10-
| `--backend` | `string` | | Specify backend (llama.cpp\|vllm\|diffusers\|vllm-metal). Default: llama.cpp |
10+
| `--backend` | `string` | | Specify backend (llama.cpp\|vllm\|diffusers). Default: llama.cpp |
1111
| `--debug` | `bool` | | Enable debug logging |
1212
| `--do-not-track` | `bool` | | Do not track models usage in Docker Model Runner |
1313
| `--gpu` | `string` | `auto` | Specify GPU support (none\|auto\|cuda\|rocm\|musa\|cann) |

cmd/cli/docs/reference/model_reinstall-runner.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Reinstall Docker Model Runner (Docker Engine only)
77

88
| Name | Type | Default | Description |
99
|:-----------------|:---------|:------------|:-------------------------------------------------------------------------------------------------------|
10-
| `--backend` | `string` | | Specify backend (llama.cpp\|vllm\|diffusers\|vllm-metal). Default: llama.cpp |
10+
| `--backend` | `string` | | Specify backend (llama.cpp\|vllm\|diffusers). Default: llama.cpp |
1111
| `--debug` | `bool` | | Enable debug logging |
1212
| `--do-not-track` | `bool` | | Do not track models usage in Docker Model Runner |
1313
| `--gpu` | `string` | `auto` | Specify GPU support (none\|auto\|cuda\|rocm\|musa\|cann) |

cmd/cli/docs/reference/model_start-runner.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Start Docker Model Runner (Docker Engine only)
77

88
| Name | Type | Default | Description |
99
|:-----------------|:---------|:------------|:-------------------------------------------------------------------------------------------------------|
10-
| `--backend` | `string` | | Specify backend (llama.cpp\|vllm\|diffusers\|vllm-metal). Default: llama.cpp |
10+
| `--backend` | `string` | | Specify backend (llama.cpp\|vllm\|diffusers). Default: llama.cpp |
1111
| `--debug` | `bool` | | Enable debug logging |
1212
| `--do-not-track` | `bool` | | Do not track models usage in Docker Model Runner |
1313
| `--gpu` | `string` | `auto` | Specify GPU support (none\|auto\|cuda\|rocm\|musa\|cann) |

main.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,28 +122,30 @@ func main() {
122122
Logger: log.WithFields(logrus.Fields{"component": "model-manager"}),
123123
Transport: baseTransport,
124124
},
125-
Backends: append(append(
125+
Backends: append(
126126
routing.DefaultBackendDefs(routing.BackendsConfig{
127127
Log: log,
128128
LlamaCppVendoredPath: llamaServerPath,
129129
LlamaCppUpdatedPath: updatedServerPath,
130130
LlamaCppConfig: llamaCppConfig,
131131
IncludeMLX: true,
132132
MLXPath: mlxServerPath,
133+
IncludeVLLM: includeVLLM,
134+
VLLMPath: vllmServerPath,
135+
VLLMMetalPath: vllmMetalServerPath,
133136
}),
134137
routing.BackendDef{Name: sglang.Name, Init: func(mm *models.Manager) (inference.Backend, error) {
135138
return sglang.New(log, mm, log.WithFields(logrus.Fields{"component": sglang.Name}), nil, sglangServerPath)
136139
}},
137140
routing.BackendDef{Name: diffusers.Name, Init: func(mm *models.Manager) (inference.Backend, error) {
138141
return diffusers.New(log, mm, log.WithFields(logrus.Fields{"component": diffusers.Name}), nil, diffusersServerPath)
139142
}},
140-
), vllmBackendDefs(log, vllmServerPath)...),
143+
),
141144
OnBackendError: func(name string, err error) {
142145
log.Fatalf("unable to initialize %s backend: %v", name, err)
143146
},
144-
DefaultBackendName: llamacpp.Name,
145-
VLLMMetalServerPath: vllmMetalServerPath,
146-
HTTPClient: http.DefaultClient,
147+
DefaultBackendName: llamacpp.Name,
148+
HTTPClient: http.DefaultClient,
147149
MetricsTracker: metrics.NewTracker(
148150
http.DefaultClient,
149151
log.WithField("component", "metrics"),

pkg/inference/backends/vllm/vllm.go

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,33 @@ type vLLM struct {
4444
customBinaryPath string
4545
}
4646

47-
// New creates a new vLLM-based backend.
47+
// Options holds the configuration for the unified vLLM backend constructor.
48+
type Options struct {
49+
Config *Config // Linux-only: extra vllm args (nil = defaults)
50+
LinuxBinaryPath string // Linux: custom vllm binary path
51+
MetalPythonPath string // macOS ARM64: custom python path
52+
}
53+
54+
// New creates the appropriate vLLM backend for the current platform.
55+
// On macOS ARM64, it returns the vllm-metal backend; on Linux, the standard
56+
// vLLM backend. On unsupported platforms, the returned backend's Install/Run
57+
// methods return errors.
58+
func New(log logging.Logger, modelManager *models.Manager, serverLog logging.Logger, opts Options) (inference.Backend, error) {
59+
if platform.SupportsVLLMMetal() {
60+
return newMetal(log, modelManager, serverLog, opts.MetalPythonPath)
61+
}
62+
return newLinux(log, modelManager, serverLog, opts.Config, opts.LinuxBinaryPath)
63+
}
64+
65+
// NeedsDeferredInstall reports whether vllm on the current platform
66+
// requires deferred (on-demand) installation.
67+
func NeedsDeferredInstall() bool {
68+
return platform.SupportsVLLMMetal()
69+
}
70+
71+
// newLinux creates a new Linux vLLM-based backend.
4872
// customBinaryPath is an optional path to a custom vllm binary; if empty, the default path is used.
49-
func New(log logging.Logger, modelManager *models.Manager, serverLog logging.Logger, conf *Config, customBinaryPath string) (inference.Backend, error) {
73+
func newLinux(log logging.Logger, modelManager *models.Manager, serverLog logging.Logger, conf *Config, customBinaryPath string) (inference.Backend, error) {
5074
// If no config is provided, use the default configuration
5175
if conf == nil {
5276
conf = NewDefaultVLLMConfig()

pkg/inference/backends/vllmmetal/vllmmetal.go renamed to pkg/inference/backends/vllm/vllm_metal.go

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
package vllmmetal
1+
package vllm
22

33
import (
44
"context"
@@ -19,12 +19,9 @@ import (
1919
"github.com/docker/model-runner/pkg/inference/platform"
2020
"github.com/docker/model-runner/pkg/internal/dockerhub"
2121
"github.com/docker/model-runner/pkg/logging"
22-
"github.com/sirupsen/logrus"
2322
)
2423

2524
const (
26-
// Name is the backend name.
27-
Name = "vllm-metal"
2825
defaultInstallDir = ".docker/model-runner/vllm-metal"
2926
// vllmMetalVersion is the vllm-metal release tag to download from Docker Hub.
3027
vllmMetalVersion = "v0.1.0-20260126-121650"
@@ -53,9 +50,9 @@ type vllmMetal struct {
5350
status string
5451
}
5552

56-
// New creates a new vllm-metal backend.
53+
// newMetal creates a new vllm-metal backend.
5754
// customPythonPath is an optional path to a custom python3 binary; if empty, the default installation is used.
58-
func New(log logging.Logger, modelManager *models.Manager, serverLog logging.Logger, customPythonPath string) (inference.Backend, error) {
55+
func newMetal(log logging.Logger, modelManager *models.Manager, serverLog logging.Logger, customPythonPath string) (inference.Backend, error) {
5956
homeDir, err := os.UserHomeDir()
6057
if err != nil {
6158
return nil, fmt.Errorf("failed to get user home directory: %w", err)
@@ -72,22 +69,6 @@ func New(log logging.Logger, modelManager *models.Manager, serverLog logging.Log
7269
}, nil
7370
}
7471

75-
// TryRegister initializes the vllm-metal backend if the platform supports it
76-
// and registers it in the provided backends map. It returns the backend names
77-
// whose installation should be deferred until explicitly requested.
78-
func TryRegister(log logging.Logger, modelManager *models.Manager, backends map[string]inference.Backend, serverPath string) []string {
79-
if !platform.SupportsVLLMMetal() {
80-
return nil
81-
}
82-
backend, err := New(log, modelManager, log.WithFields(logrus.Fields{"component": Name}), serverPath)
83-
if err != nil {
84-
log.Warnf("Failed to initialize vllm-metal backend: %v", err)
85-
return nil
86-
}
87-
backends[Name] = backend
88-
return []string{Name}
89-
}
90-
9172
// Name implements inference.Backend.Name.
9273
func (v *vllmMetal) Name() string {
9374
return Name

0 commit comments

Comments
 (0)