feat(unpack): refine model packaging documentation and streamline command descriptions

ilopezluna · ilopezluna · commit cd3860fe1d88 · 2026-02-12T15:17:56.000+01:00
diff --git a/cmd/cli/commands/package.go b/cmd/cli/commands/package.go
@@ -39,13 +39,45 @@ func newPackagedCmd() *cobra.Command {
 
 	c := &cobra.Command{
 		Use:   "package (--gguf <path> | --safetensors-dir <path> | --dduf <path> | --from <model>) [--license <path>...] [--mmproj <path>] [--context-size <tokens>] [--push] MODEL",
-		Short: "Package a GGUF file, Safetensors directory, DDUF file, or existing model into a Docker model OCI artifact.",
-		Long: "Package a GGUF file, Safetensors directory, DDUF file, or existing model into a Docker model OCI artifact, with optional licenses and multimodal projector. The package is sent to the model-runner, unless --push is specified.\n" +
-			"When packaging a sharded GGUF model, --gguf should point to the first shard. All shard files should be siblings and should include the index in the file name (e.g. model-00001-of-00015.gguf).\n" +
-			"When packaging a Safetensors model, --safetensors-dir should point to a directory containing .safetensors files and config files. All files (including nested subdirectories) will be auto-discovered and each file is packaged as an individual layer.\n" +
-			"When packaging a DDUF file (Diffusers Unified Format), --dduf should point to a .dduf archive file.\n" +
-			"When packaging from an existing model using --from, you can modify properties like context size to create a variant of the original model.\n" +
-			"For multimodal models, use --mmproj to include a multimodal projector file.",
+		Short: "Package a model into a Docker Model OCI artifact",
+		Long: `Package a model into a Docker Model OCI artifact.
+
+The model source must be one of:
+  --gguf               A GGUF file (single file or first shard of a sharded model)
+  --safetensors-dir    A directory containing .safetensors and configuration files
+  --dduf               A .dduf (Diffusers Unified Format) archive
+  --from               An existing packaged model reference
+
+By default, the packaged artifact is loaded into the local Model Runner content store.
+Use --push to publish the model to a registry instead.
+
+MODEL specifies the target model reference (for example: myorg/llama3:8b).
+When using --push, MODEL must be a registry-qualified reference.
+
+Packaging behavior:
+
+  GGUF
+    --gguf must point to a .gguf file.
+    For sharded models, point to the first shard. All shards must:
+      • reside in the same directory
+      • follow an indexed naming convention (e.g. model-00001-of-00015.gguf)
+    All shards are automatically discovered and packaged together.
+
+  Safetensors
+    --safetensors-dir must point to a directory containing .safetensors files
+    and required configuration files (e.g. model config, tokenizer files).
+    All files under the directory (including nested subdirectories) are
+    automatically discovered. Each file is packaged as a separate OCI layer.
+
+  DDUF
+    --dduf must point to a .dduf archive file.
+
+  Repackaging
+    --from repackages an existing model. You may override selected properties
+    such as --context-size to create a variant of the original model.
+
+  Multimodal models
+    Use --mmproj to include a multimodal projector file.`,
 		Args: func(cmd *cobra.Command, args []string) error {
 			if err := requireExactArgs(1, "package", "MODEL")(cmd, args); err != nil {
 				return err
diff --git a/cmd/cli/docs/reference/docker_model_package.yaml b/cmd/cli/docs/reference/docker_model_package.yaml
@@ -1,13 +1,44 @@
 command: docker model package
-short: |
-    Package a GGUF file, Safetensors directory, DDUF file, or existing model into a Docker model OCI artifact.
+short: Package a model into a Docker Model OCI artifact
 long: |-
-    Package a GGUF file, Safetensors directory, DDUF file, or existing model into a Docker model OCI artifact, with optional licenses and multimodal projector. The package is sent to the model-runner, unless --push is specified.
-    When packaging a sharded GGUF model, --gguf should point to the first shard. All shard files should be siblings and should include the index in the file name (e.g. model-00001-of-00015.gguf).
-    When packaging a Safetensors model, --safetensors-dir should point to a directory containing .safetensors files and config files. All files (including nested subdirectories) will be auto-discovered and each file is packaged as an individual layer.
-    When packaging a DDUF file (Diffusers Unified Format), --dduf should point to a .dduf archive file.
-    When packaging from an existing model using --from, you can modify properties like context size to create a variant of the original model.
-    For multimodal models, use --mmproj to include a multimodal projector file.
+    Package a model into a Docker Model OCI artifact.
+
+    The model source must be one of:
+      --gguf               A GGUF file (single file or first shard of a sharded model)
+      --safetensors-dir    A directory containing .safetensors and configuration files
+      --dduf               A .dduf (Diffusers Unified Format) archive
+      --from               An existing packaged model reference
+
+    By default, the packaged artifact is loaded into the local Model Runner content store.
+    Use --push to publish the model to a registry instead.
+
+    MODEL specifies the target model reference (for example: myorg/llama3:8b).
+    When using --push, MODEL must be a registry-qualified reference.
+
+    Packaging behavior:
+
+      GGUF
+        --gguf must point to a .gguf file.
+        For sharded models, point to the first shard. All shards must:
+          • reside in the same directory
+          • follow an indexed naming convention (e.g. model-00001-of-00015.gguf)
+        All shards are automatically discovered and packaged together.
+
+      Safetensors
+        --safetensors-dir must point to a directory containing .safetensors files
+        and required configuration files (e.g. model config, tokenizer files).
+        All files under the directory (including nested subdirectories) are
+        automatically discovered. Each file is packaged as a separate OCI layer.
+
+      DDUF
+        --dduf must point to a .dduf archive file.
+
+      Repackaging
+        --from repackages an existing model. You may override selected properties
+        such as --context-size to create a variant of the original model.
+
+      Multimodal models
+        Use --mmproj to include a multimodal projector file.
 usage: docker model package (--gguf <path> | --safetensors-dir <path> | --dduf <path> | --from <model>) [--license <path>...] [--mmproj <path>] [--context-size <tokens>] [--push] MODEL
 pname: docker model
 plink: docker_model.yaml
diff --git a/cmd/cli/docs/reference/model.md b/cmd/cli/docs/reference/model.md
@@ -5,35 +5,35 @@ Docker Model Runner
 
 ### Subcommands
 
-| Name                                            | Description                                                                                                |
-|:------------------------------------------------|:-----------------------------------------------------------------------------------------------------------|
-| [`bench`](model_bench.md)                       | Benchmark a model's performance at different concurrency levels                                            |
-| [`df`](model_df.md)                             | Show Docker Model Runner disk usage                                                                        |
-| [`inspect`](model_inspect.md)                   | Display detailed information on one model                                                                  |
-| [`install-runner`](model_install-runner.md)     | Install Docker Model Runner (Docker Engine only)                                                           |
-| [`launch`](model_launch.md)                     | Launch an app configured to use Docker Model Runner                                                        |
-| [`list`](model_list.md)                         | List the models pulled to your local environment                                                           |
-| [`logs`](model_logs.md)                         | Fetch the Docker Model Runner logs                                                                         |
-| [`package`](model_package.md)                   | Package a GGUF file, Safetensors directory, DDUF file, or existing model into a Docker model OCI artifact. |
-| [`ps`](model_ps.md)                             | List running models                                                                                        |
-| [`pull`](model_pull.md)                         | Pull a model from Docker Hub or HuggingFace to your local environment                                      |
-| [`purge`](model_purge.md)                       | Remove all models                                                                                          |
-| [`push`](model_push.md)                         | Push a model to Docker Hub                                                                                 |
-| [`reinstall-runner`](model_reinstall-runner.md) | Reinstall Docker Model Runner (Docker Engine only)                                                         |
-| [`requests`](model_requests.md)                 | Fetch requests+responses from Docker Model Runner                                                          |
-| [`restart-runner`](model_restart-runner.md)     | Restart Docker Model Runner (Docker Engine only)                                                           |
-| [`rm`](model_rm.md)                             | Remove local models downloaded from Docker Hub                                                             |
-| [`run`](model_run.md)                           | Run a model and interact with it using a submitted prompt or chat mode                                     |
-| [`search`](model_search.md)                     | Search for models on Docker Hub and HuggingFace                                                            |
-| [`show`](model_show.md)                         | Show information for a model                                                                               |
-| [`skills`](model_skills.md)                     | Install Docker Model Runner skills for AI coding assistants                                                |
-| [`start-runner`](model_start-runner.md)         | Start Docker Model Runner (Docker Engine only)                                                             |
-| [`status`](model_status.md)                     | Check if the Docker Model Runner is running                                                                |
-| [`stop-runner`](model_stop-runner.md)           | Stop Docker Model Runner (Docker Engine only)                                                              |
-| [`tag`](model_tag.md)                           | Tag a model                                                                                                |
-| [`uninstall-runner`](model_uninstall-runner.md) | Uninstall Docker Model Runner (Docker Engine only)                                                         |
-| [`unload`](model_unload.md)                     | Unload running models                                                                                      |
-| [`version`](model_version.md)                   | Show the Docker Model Runner version                                                                       |
+| Name                                            | Description                                                            |
+|:------------------------------------------------|:-----------------------------------------------------------------------|
+| [`bench`](model_bench.md)                       | Benchmark a model's performance at different concurrency levels        |
+| [`df`](model_df.md)                             | Show Docker Model Runner disk usage                                    |
+| [`inspect`](model_inspect.md)                   | Display detailed information on one model                              |
+| [`install-runner`](model_install-runner.md)     | Install Docker Model Runner (Docker Engine only)                       |
+| [`launch`](model_launch.md)                     | Launch an app configured to use Docker Model Runner                    |
+| [`list`](model_list.md)                         | List the models pulled to your local environment                       |
+| [`logs`](model_logs.md)                         | Fetch the Docker Model Runner logs                                     |
+| [`package`](model_package.md)                   | Package a model into a Docker Model OCI artifact                       |
+| [`ps`](model_ps.md)                             | List running models                                                    |
+| [`pull`](model_pull.md)                         | Pull a model from Docker Hub or HuggingFace to your local environment  |
+| [`purge`](model_purge.md)                       | Remove all models                                                      |
+| [`push`](model_push.md)                         | Push a model to Docker Hub                                             |
+| [`reinstall-runner`](model_reinstall-runner.md) | Reinstall Docker Model Runner (Docker Engine only)                     |
+| [`requests`](model_requests.md)                 | Fetch requests+responses from Docker Model Runner                      |
+| [`restart-runner`](model_restart-runner.md)     | Restart Docker Model Runner (Docker Engine only)                       |
+| [`rm`](model_rm.md)                             | Remove local models downloaded from Docker Hub                         |
+| [`run`](model_run.md)                           | Run a model and interact with it using a submitted prompt or chat mode |
+| [`search`](model_search.md)                     | Search for models on Docker Hub and HuggingFace                        |
+| [`show`](model_show.md)                         | Show information for a model                                           |
+| [`skills`](model_skills.md)                     | Install Docker Model Runner skills for AI coding assistants            |
+| [`start-runner`](model_start-runner.md)         | Start Docker Model Runner (Docker Engine only)                         |
+| [`status`](model_status.md)                     | Check if the Docker Model Runner is running                            |
+| [`stop-runner`](model_stop-runner.md)           | Stop Docker Model Runner (Docker Engine only)                          |
+| [`tag`](model_tag.md)                           | Tag a model                                                            |
+| [`uninstall-runner`](model_uninstall-runner.md) | Uninstall Docker Model Runner (Docker Engine only)                     |
+| [`unload`](model_unload.md)                     | Unload running models                                                  |
+| [`version`](model_version.md)                   | Show the Docker Model Runner version                                   |
 
 
 
diff --git a/cmd/cli/docs/reference/model_package.md b/cmd/cli/docs/reference/model_package.md
@@ -1,12 +1,44 @@
 # docker model package
 
 <!---MARKER_GEN_START-->
-Package a GGUF file, Safetensors directory, DDUF file, or existing model into a Docker model OCI artifact, with optional licenses and multimodal projector. The package is sent to the model-runner, unless --push is specified.
-When packaging a sharded GGUF model, --gguf should point to the first shard. All shard files should be siblings and should include the index in the file name (e.g. model-00001-of-00015.gguf).
-When packaging a Safetensors model, --safetensors-dir should point to a directory containing .safetensors files and config files. All files (including nested subdirectories) will be auto-discovered and each file is packaged as an individual layer.
-When packaging a DDUF file (Diffusers Unified Format), --dduf should point to a .dduf archive file.
-When packaging from an existing model using --from, you can modify properties like context size to create a variant of the original model.
-For multimodal models, use --mmproj to include a multimodal projector file.
+Package a model into a Docker Model OCI artifact.
+
+The model source must be one of:
+  --gguf               A GGUF file (single file or first shard of a sharded model)
+  --safetensors-dir    A directory containing .safetensors and configuration files
+  --dduf               A .dduf (Diffusers Unified Format) archive
+  --from               An existing packaged model reference
+
+By default, the packaged artifact is loaded into the local Model Runner content store.
+Use --push to publish the model to a registry instead.
+
+MODEL specifies the target model reference (for example: myorg/llama3:8b).
+When using --push, MODEL must be a registry-qualified reference.
+
+Packaging behavior:
+
+  GGUF
+    --gguf must point to a .gguf file.
+    For sharded models, point to the first shard. All shards must:
+      • reside in the same directory
+      • follow an indexed naming convention (e.g. model-00001-of-00015.gguf)
+    All shards are automatically discovered and packaged together.
+
+  Safetensors
+    --safetensors-dir must point to a directory containing .safetensors files
+    and required configuration files (e.g. model config, tokenizer files).
+    All files under the directory (including nested subdirectories) are
+    automatically discovered. Each file is packaged as a separate OCI layer.
+
+  DDUF
+    --dduf must point to a .dduf archive file.
+
+  Repackaging
+    --from repackages an existing model. You may override selected properties
+    such as --context-size to create a variant of the original model.
+
+  Multimodal models
+    Use --mmproj to include a multimodal projector file.
 
 ### Options
 
diff --git a/main.go b/main.go
@@ -18,7 +18,6 @@ import (
 	"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
 	"github.com/docker/model-runner/pkg/inference/backends/mlx"
 	"github.com/docker/model-runner/pkg/inference/backends/sglang"
-	"github.com/docker/model-runner/pkg/inference/backends/vllm"
 	"github.com/docker/model-runner/pkg/inference/backends/vllmmetal"
 	"github.com/docker/model-runner/pkg/inference/config"
 	"github.com/docker/model-runner/pkg/inference/models"
@@ -144,11 +143,6 @@ func main() {
 		log.Fatalf("unable to initialize %s backend: %v", llamacpp.Name, err)
 	}
 
-	vllmBackend, err := initVLLMBackend(log, modelManager, vllmServerPath)
-	if err != nil {
-		log.Fatalf("unable to initialize %s backend: %v", vllm.Name, err)
-	}
-
 	mlxBackend, err := mlx.New(
 		log,
 		modelManager,
@@ -202,7 +196,6 @@ func main() {
 		sglang.Name:    sglangBackend,
 		diffusers.Name: diffusersBackend,
 	}
-	registerVLLMBackend(backends, vllmBackend)
 
 	if vllmMetalBackend != nil {
 		backends[vllmmetal.Name] = vllmMetalBackend