@@ -16,7 +16,7 @@ import (
1616 "github.com/docker/model-runner/pkg/inference/backends/diffusers"
1717 "github.com/docker/model-runner/pkg/inference/backends/llamacpp"
1818 "github.com/docker/model-runner/pkg/inference/backends/vllm"
19- "github.com/docker/model-runner/pkg/inference/backends/vllmmetal "
19+ "github.com/docker/model-runner/pkg/inference/platform "
2020 "github.com/moby/moby/api/types/container"
2121 "github.com/spf13/cobra"
2222)
@@ -29,7 +29,7 @@ const (
2929 // installation will try to reach the model runner while waiting for it to
3030 // be ready.
3131 installWaitRetryInterval = 500 * time .Millisecond
32- backendUsage = "Specify backend (" + llamacpp .Name + "|" + vllm .Name + "|" + diffusers .Name + "|" + vllmmetal . Name + " ). Default: " + llamacpp .Name
32+ backendUsage = "Specify backend (" + llamacpp .Name + "|" + vllm .Name + "|" + diffusers .Name + "). Default: " + llamacpp .Name
3333)
3434
3535// waitForStandaloneRunnerAfterInstall waits for a standalone model runner
@@ -242,14 +242,14 @@ type runnerOptions struct {
242242
243243// runInstallOrStart is shared logic for install-runner and start-runner commands
244244func runInstallOrStart (cmd * cobra.Command , opts runnerOptions , debug bool ) error {
245- // vllm-metal is installed on-demand via the running model runner,
246- // not as a standalone container. This applies to all engine kinds .
247- if opts .backend == vllmmetal .Name {
248- cmd .Println ("Installing vllm-metal backend..." )
249- if err := desktopClient .InstallBackend (vllmmetal .Name ); err != nil {
250- return fmt .Errorf ("failed to install vllm-metal backend: %w" , err )
245+ // On macOS ARM64, the vllm backend requires deferred installation
246+ // (on-demand via the running model runner), not as a standalone container .
247+ if opts .backend == vllm .Name && platform . SupportsVLLMMetal () {
248+ cmd .Println ("Installing vllm backend..." )
249+ if err := desktopClient .InstallBackend (vllm .Name ); err != nil {
250+ return fmt .Errorf ("failed to install vllm backend: %w" , err )
251251 }
252- cmd .Println ("vllm-metal backend installed successfully" )
252+ cmd .Println ("vllm backend installed successfully" )
253253 return nil
254254 }
255255
@@ -340,7 +340,7 @@ func runInstallOrStart(cmd *cobra.Command, opts runnerOptions, debug bool) error
340340 }
341341
342342 // Validate backend selection
343- validBackends := []string {llamacpp .Name , vllm .Name , diffusers .Name , vllmmetal . Name }
343+ validBackends := []string {llamacpp .Name , vllm .Name , diffusers .Name }
344344 if opts .backend != "" {
345345 isValid := false
346346 for _ , valid := range validBackends {
@@ -354,8 +354,8 @@ func runInstallOrStart(cmd *cobra.Command, opts runnerOptions, debug bool) error
354354 }
355355 }
356356
357- // Validate backend-GPU compatibility
358- if opts .backend == vllm .Name && gpu != gpupkg .GPUSupportCUDA {
357+ // Validate backend-GPU compatibility (only on Linux; macOS ARM64 uses Metal)
358+ if opts .backend == vllm .Name && ! platform . SupportsVLLMMetal () && gpu != gpupkg .GPUSupportCUDA {
359359 return fmt .Errorf ("--backend vllm requires CUDA GPU support (--gpu=cuda or auto-detected CUDA)" )
360360 }
361361
0 commit comments