Skip to content

Commit cd3860f

Browse files
committed
feat(unpack): refine model packaging documentation and streamline command descriptions
1 parent ff896b5 commit cd3860f

File tree

5 files changed

+145
-57
lines changed

5 files changed

+145
-57
lines changed

cmd/cli/commands/package.go

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,45 @@ func newPackagedCmd() *cobra.Command {
3939

4040
c := &cobra.Command{
4141
Use: "package (--gguf <path> | --safetensors-dir <path> | --dduf <path> | --from <model>) [--license <path>...] [--mmproj <path>] [--context-size <tokens>] [--push] MODEL",
42-
Short: "Package a GGUF file, Safetensors directory, DDUF file, or existing model into a Docker model OCI artifact.",
43-
Long: "Package a GGUF file, Safetensors directory, DDUF file, or existing model into a Docker model OCI artifact, with optional licenses and multimodal projector. The package is sent to the model-runner, unless --push is specified.\n" +
44-
"When packaging a sharded GGUF model, --gguf should point to the first shard. All shard files should be siblings and should include the index in the file name (e.g. model-00001-of-00015.gguf).\n" +
45-
"When packaging a Safetensors model, --safetensors-dir should point to a directory containing .safetensors files and config files. All files (including nested subdirectories) will be auto-discovered and each file is packaged as an individual layer.\n" +
46-
"When packaging a DDUF file (Diffusers Unified Format), --dduf should point to a .dduf archive file.\n" +
47-
"When packaging from an existing model using --from, you can modify properties like context size to create a variant of the original model.\n" +
48-
"For multimodal models, use --mmproj to include a multimodal projector file.",
42+
Short: "Package a model into a Docker Model OCI artifact",
43+
Long: `Package a model into a Docker Model OCI artifact.
44+
45+
The model source must be one of:
46+
--gguf A GGUF file (single file or first shard of a sharded model)
47+
--safetensors-dir A directory containing .safetensors and configuration files
48+
--dduf A .dduf (Diffusers Unified Format) archive
49+
--from An existing packaged model reference
50+
51+
By default, the packaged artifact is loaded into the local Model Runner content store.
52+
Use --push to publish the model to a registry instead.
53+
54+
MODEL specifies the target model reference (for example: myorg/llama3:8b).
55+
When using --push, MODEL must be a registry-qualified reference.
56+
57+
Packaging behavior:
58+
59+
GGUF
60+
--gguf must point to a .gguf file.
61+
For sharded models, point to the first shard. All shards must:
62+
• reside in the same directory
63+
• follow an indexed naming convention (e.g. model-00001-of-00015.gguf)
64+
All shards are automatically discovered and packaged together.
65+
66+
Safetensors
67+
--safetensors-dir must point to a directory containing .safetensors files
68+
and required configuration files (e.g. model config, tokenizer files).
69+
All files under the directory (including nested subdirectories) are
70+
automatically discovered. Each file is packaged as a separate OCI layer.
71+
72+
DDUF
73+
--dduf must point to a .dduf archive file.
74+
75+
Repackaging
76+
--from repackages an existing model. You may override selected properties
77+
such as --context-size to create a variant of the original model.
78+
79+
Multimodal models
80+
Use --mmproj to include a multimodal projector file.`,
4981
Args: func(cmd *cobra.Command, args []string) error {
5082
if err := requireExactArgs(1, "package", "MODEL")(cmd, args); err != nil {
5183
return err

cmd/cli/docs/reference/docker_model_package.yaml

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,44 @@
11
command: docker model package
2-
short: |
3-
Package a GGUF file, Safetensors directory, DDUF file, or existing model into a Docker model OCI artifact.
2+
short: Package a model into a Docker Model OCI artifact
43
long: |-
5-
Package a GGUF file, Safetensors directory, DDUF file, or existing model into a Docker model OCI artifact, with optional licenses and multimodal projector. The package is sent to the model-runner, unless --push is specified.
6-
When packaging a sharded GGUF model, --gguf should point to the first shard. All shard files should be siblings and should include the index in the file name (e.g. model-00001-of-00015.gguf).
7-
When packaging a Safetensors model, --safetensors-dir should point to a directory containing .safetensors files and config files. All files (including nested subdirectories) will be auto-discovered and each file is packaged as an individual layer.
8-
When packaging a DDUF file (Diffusers Unified Format), --dduf should point to a .dduf archive file.
9-
When packaging from an existing model using --from, you can modify properties like context size to create a variant of the original model.
10-
For multimodal models, use --mmproj to include a multimodal projector file.
4+
Package a model into a Docker Model OCI artifact.
5+
6+
The model source must be one of:
7+
--gguf A GGUF file (single file or first shard of a sharded model)
8+
--safetensors-dir A directory containing .safetensors and configuration files
9+
--dduf A .dduf (Diffusers Unified Format) archive
10+
--from An existing packaged model reference
11+
12+
By default, the packaged artifact is loaded into the local Model Runner content store.
13+
Use --push to publish the model to a registry instead.
14+
15+
MODEL specifies the target model reference (for example: myorg/llama3:8b).
16+
When using --push, MODEL must be a registry-qualified reference.
17+
18+
Packaging behavior:
19+
20+
GGUF
21+
--gguf must point to a .gguf file.
22+
For sharded models, point to the first shard. All shards must:
23+
• reside in the same directory
24+
• follow an indexed naming convention (e.g. model-00001-of-00015.gguf)
25+
All shards are automatically discovered and packaged together.
26+
27+
Safetensors
28+
--safetensors-dir must point to a directory containing .safetensors files
29+
and required configuration files (e.g. model config, tokenizer files).
30+
All files under the directory (including nested subdirectories) are
31+
automatically discovered. Each file is packaged as a separate OCI layer.
32+
33+
DDUF
34+
--dduf must point to a .dduf archive file.
35+
36+
Repackaging
37+
--from repackages an existing model. You may override selected properties
38+
such as --context-size to create a variant of the original model.
39+
40+
Multimodal models
41+
Use --mmproj to include a multimodal projector file.
1142
usage: docker model package (--gguf <path> | --safetensors-dir <path> | --dduf <path> | --from <model>) [--license <path>...] [--mmproj <path>] [--context-size <tokens>] [--push] MODEL
1243
pname: docker model
1344
plink: docker_model.yaml

cmd/cli/docs/reference/model.md

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -5,35 +5,35 @@ Docker Model Runner
55

66
### Subcommands
77

8-
| Name | Description |
9-
|:------------------------------------------------|:-----------------------------------------------------------------------------------------------------------|
10-
| [`bench`](model_bench.md) | Benchmark a model's performance at different concurrency levels |
11-
| [`df`](model_df.md) | Show Docker Model Runner disk usage |
12-
| [`inspect`](model_inspect.md) | Display detailed information on one model |
13-
| [`install-runner`](model_install-runner.md) | Install Docker Model Runner (Docker Engine only) |
14-
| [`launch`](model_launch.md) | Launch an app configured to use Docker Model Runner |
15-
| [`list`](model_list.md) | List the models pulled to your local environment |
16-
| [`logs`](model_logs.md) | Fetch the Docker Model Runner logs |
17-
| [`package`](model_package.md) | Package a GGUF file, Safetensors directory, DDUF file, or existing model into a Docker model OCI artifact. |
18-
| [`ps`](model_ps.md) | List running models |
19-
| [`pull`](model_pull.md) | Pull a model from Docker Hub or HuggingFace to your local environment |
20-
| [`purge`](model_purge.md) | Remove all models |
21-
| [`push`](model_push.md) | Push a model to Docker Hub |
22-
| [`reinstall-runner`](model_reinstall-runner.md) | Reinstall Docker Model Runner (Docker Engine only) |
23-
| [`requests`](model_requests.md) | Fetch requests+responses from Docker Model Runner |
24-
| [`restart-runner`](model_restart-runner.md) | Restart Docker Model Runner (Docker Engine only) |
25-
| [`rm`](model_rm.md) | Remove local models downloaded from Docker Hub |
26-
| [`run`](model_run.md) | Run a model and interact with it using a submitted prompt or chat mode |
27-
| [`search`](model_search.md) | Search for models on Docker Hub and HuggingFace |
28-
| [`show`](model_show.md) | Show information for a model |
29-
| [`skills`](model_skills.md) | Install Docker Model Runner skills for AI coding assistants |
30-
| [`start-runner`](model_start-runner.md) | Start Docker Model Runner (Docker Engine only) |
31-
| [`status`](model_status.md) | Check if the Docker Model Runner is running |
32-
| [`stop-runner`](model_stop-runner.md) | Stop Docker Model Runner (Docker Engine only) |
33-
| [`tag`](model_tag.md) | Tag a model |
34-
| [`uninstall-runner`](model_uninstall-runner.md) | Uninstall Docker Model Runner (Docker Engine only) |
35-
| [`unload`](model_unload.md) | Unload running models |
36-
| [`version`](model_version.md) | Show the Docker Model Runner version |
8+
| Name | Description |
9+
|:------------------------------------------------|:-----------------------------------------------------------------------|
10+
| [`bench`](model_bench.md) | Benchmark a model's performance at different concurrency levels |
11+
| [`df`](model_df.md) | Show Docker Model Runner disk usage |
12+
| [`inspect`](model_inspect.md) | Display detailed information on one model |
13+
| [`install-runner`](model_install-runner.md) | Install Docker Model Runner (Docker Engine only) |
14+
| [`launch`](model_launch.md) | Launch an app configured to use Docker Model Runner |
15+
| [`list`](model_list.md) | List the models pulled to your local environment |
16+
| [`logs`](model_logs.md) | Fetch the Docker Model Runner logs |
17+
| [`package`](model_package.md) | Package a model into a Docker Model OCI artifact |
18+
| [`ps`](model_ps.md) | List running models |
19+
| [`pull`](model_pull.md) | Pull a model from Docker Hub or HuggingFace to your local environment |
20+
| [`purge`](model_purge.md) | Remove all models |
21+
| [`push`](model_push.md) | Push a model to Docker Hub |
22+
| [`reinstall-runner`](model_reinstall-runner.md) | Reinstall Docker Model Runner (Docker Engine only) |
23+
| [`requests`](model_requests.md) | Fetch requests+responses from Docker Model Runner |
24+
| [`restart-runner`](model_restart-runner.md) | Restart Docker Model Runner (Docker Engine only) |
25+
| [`rm`](model_rm.md) | Remove local models downloaded from Docker Hub |
26+
| [`run`](model_run.md) | Run a model and interact with it using a submitted prompt or chat mode |
27+
| [`search`](model_search.md) | Search for models on Docker Hub and HuggingFace |
28+
| [`show`](model_show.md) | Show information for a model |
29+
| [`skills`](model_skills.md) | Install Docker Model Runner skills for AI coding assistants |
30+
| [`start-runner`](model_start-runner.md) | Start Docker Model Runner (Docker Engine only) |
31+
| [`status`](model_status.md) | Check if the Docker Model Runner is running |
32+
| [`stop-runner`](model_stop-runner.md) | Stop Docker Model Runner (Docker Engine only) |
33+
| [`tag`](model_tag.md) | Tag a model |
34+
| [`uninstall-runner`](model_uninstall-runner.md) | Uninstall Docker Model Runner (Docker Engine only) |
35+
| [`unload`](model_unload.md) | Unload running models |
36+
| [`version`](model_version.md) | Show the Docker Model Runner version |
3737

3838

3939

cmd/cli/docs/reference/model_package.md

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,44 @@
11
# docker model package
22

33
<!---MARKER_GEN_START-->
4-
Package a GGUF file, Safetensors directory, DDUF file, or existing model into a Docker model OCI artifact, with optional licenses and multimodal projector. The package is sent to the model-runner, unless --push is specified.
5-
When packaging a sharded GGUF model, --gguf should point to the first shard. All shard files should be siblings and should include the index in the file name (e.g. model-00001-of-00015.gguf).
6-
When packaging a Safetensors model, --safetensors-dir should point to a directory containing .safetensors files and config files. All files (including nested subdirectories) will be auto-discovered and each file is packaged as an individual layer.
7-
When packaging a DDUF file (Diffusers Unified Format), --dduf should point to a .dduf archive file.
8-
When packaging from an existing model using --from, you can modify properties like context size to create a variant of the original model.
9-
For multimodal models, use --mmproj to include a multimodal projector file.
4+
Package a model into a Docker Model OCI artifact.
5+
6+
The model source must be one of:
7+
--gguf A GGUF file (single file or first shard of a sharded model)
8+
--safetensors-dir A directory containing .safetensors and configuration files
9+
--dduf A .dduf (Diffusers Unified Format) archive
10+
--from An existing packaged model reference
11+
12+
By default, the packaged artifact is loaded into the local Model Runner content store.
13+
Use --push to publish the model to a registry instead.
14+
15+
MODEL specifies the target model reference (for example: myorg/llama3:8b).
16+
When using --push, MODEL must be a registry-qualified reference.
17+
18+
Packaging behavior:
19+
20+
GGUF
21+
--gguf must point to a .gguf file.
22+
For sharded models, point to the first shard. All shards must:
23+
• reside in the same directory
24+
• follow an indexed naming convention (e.g. model-00001-of-00015.gguf)
25+
All shards are automatically discovered and packaged together.
26+
27+
Safetensors
28+
--safetensors-dir must point to a directory containing .safetensors files
29+
and required configuration files (e.g. model config, tokenizer files).
30+
All files under the directory (including nested subdirectories) are
31+
automatically discovered. Each file is packaged as a separate OCI layer.
32+
33+
DDUF
34+
--dduf must point to a .dduf archive file.
35+
36+
Repackaging
37+
--from repackages an existing model. You may override selected properties
38+
such as --context-size to create a variant of the original model.
39+
40+
Multimodal models
41+
Use --mmproj to include a multimodal projector file.
1042

1143
### Options
1244

main.go

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818
"github.com/docker/model-runner/pkg/inference/backends/llamacpp"
1919
"github.com/docker/model-runner/pkg/inference/backends/mlx"
2020
"github.com/docker/model-runner/pkg/inference/backends/sglang"
21-
"github.com/docker/model-runner/pkg/inference/backends/vllm"
2221
"github.com/docker/model-runner/pkg/inference/backends/vllmmetal"
2322
"github.com/docker/model-runner/pkg/inference/config"
2423
"github.com/docker/model-runner/pkg/inference/models"
@@ -144,11 +143,6 @@ func main() {
144143
log.Fatalf("unable to initialize %s backend: %v", llamacpp.Name, err)
145144
}
146145

147-
vllmBackend, err := initVLLMBackend(log, modelManager, vllmServerPath)
148-
if err != nil {
149-
log.Fatalf("unable to initialize %s backend: %v", vllm.Name, err)
150-
}
151-
152146
mlxBackend, err := mlx.New(
153147
log,
154148
modelManager,
@@ -202,7 +196,6 @@ func main() {
202196
sglang.Name: sglangBackend,
203197
diffusers.Name: diffusersBackend,
204198
}
205-
registerVLLMBackend(backends, vllmBackend)
206199

207200
if vllmMetalBackend != nil {
208201
backends[vllmmetal.Name] = vllmMetalBackend

0 commit comments

Comments
 (0)