Skip to content

Commit 95d5fd8

Browse files
committed
add dmrlet - lightweight node agent for Docker Model Runner
dmrlet is a "Kubelet for AI" that runs inference containers directly with zero YAML overhead. It provides a simple CLI to serve models: dmrlet serve ai/smollm2 # Pulls model, starts inference container, exposes OpenAI API Key features: - Reuses existing pkg/distribution for model management - containerd integration for container lifecycle - GPU detection and passthrough (NVIDIA/AMD) - Auto port allocation (30000-30999 range) - Health checking with configurable timeout - Backend auto-detection (llama-server for GGUF, vLLM for safetensors) Commands: serve, stop, list, pull, version Signed-off-by: Eric Curtin <eric.curtin@docker.com>
1 parent aa6b09e commit 95d5fd8

File tree

22 files changed

+4077
-275
lines changed

22 files changed

+4077
-275
lines changed

Makefile

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,24 @@ DOCKER_BUILD_ARGS := \
2626
BUILD_DMR ?= 1
2727

2828
# Main targets
29-
.PHONY: build run clean test integration-tests test-docker-ce-installation docker-build docker-build-multiplatform docker-run docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang docker-run-impl help validate lint docker-build-diffusers docker-run-diffusers vllm-metal-build vllm-metal-install vllm-metal-dev vllm-metal-clean
29+
.PHONY: build build-dmrlet run clean test integration-tests test-docker-ce-installation docker-build docker-build-multiplatform docker-run docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang docker-run-impl help validate lint docker-build-diffusers docker-run-diffusers vllm-metal-build vllm-metal-install vllm-metal-dev vllm-metal-clean
3030
# Default target
3131
.DEFAULT_GOAL := build
3232

3333
# Build the Go application
3434
build:
3535
CGO_ENABLED=1 go build -ldflags="-s -w" -o $(APP_NAME) .
3636

37+
# Build dmrlet binary
38+
build-dmrlet:
39+
@echo "Building dmrlet..."
40+
@VERSION=$$(git describe --tags --always --dirty 2>/dev/null || echo "dev"); \
41+
GIT_COMMIT=$$(git rev-parse HEAD 2>/dev/null || echo "unknown"); \
42+
BUILD_DATE=$$(date -u +"%Y-%m-%dT%H:%M:%SZ" 2>/dev/null || echo "unknown"); \
43+
cd cmd/dmrlet && CGO_ENABLED=0 go build -ldflags="-s -w -X 'main.Version=$${VERSION}' -X 'main.GitCommit=$${GIT_COMMIT}' -X 'main.BuildDate=$${BUILD_DATE}'" -o dmrlet .
44+
mv cmd/dmrlet/dmrlet .
45+
@echo "Built: dmrlet"
46+
3747
# Run the application locally
3848
run: build
3949
@LLAMACPP_BIN="llamacpp/install/bin"; \
@@ -46,6 +56,7 @@ run: build
4656
# Clean build artifacts
4757
clean:
4858
rm -f $(APP_NAME)
59+
rm -f dmrlet
4960
rm -f model-runner.sock
5061
rm -rf $(MODELS_PATH)
5162

@@ -219,6 +230,7 @@ vllm-metal-clean:
219230
help:
220231
@echo "Available targets:"
221232
@echo " build - Build the Go application"
233+
@echo " build-dmrlet - Build dmrlet binary (lightweight node agent)"
222234
@echo " run - Run the application locally"
223235
@echo " clean - Clean build artifacts"
224236
@echo " test - Run tests"

cmd/dmrlet/commands/list.go

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
package commands
2+
3+
import (
4+
"fmt"
5+
"os"
6+
7+
"github.com/olekukonko/tablewriter"
8+
"github.com/olekukonko/tablewriter/renderer"
9+
"github.com/olekukonko/tablewriter/tw"
10+
"github.com/spf13/cobra"
11+
)
12+
13+
func newListCmd() *cobra.Command {
14+
cmd := &cobra.Command{
15+
Use: "list",
16+
Aliases: []string{"ls"},
17+
Short: "List running models",
18+
Long: `List all running inference models managed by dmrlet.
19+
20+
Examples:
21+
dmrlet list
22+
dmrlet ls`,
23+
Args: cobra.NoArgs,
24+
RunE: func(cmd *cobra.Command, args []string) error {
25+
return runList(cmd)
26+
},
27+
}
28+
29+
return cmd
30+
}
31+
32+
func runList(cmd *cobra.Command) error {
33+
ctx := cmd.Context()
34+
35+
if err := initManager(ctx); err != nil {
36+
return fmt.Errorf("initializing manager: %w", err)
37+
}
38+
39+
running, err := manager.List(ctx)
40+
if err != nil {
41+
return fmt.Errorf("listing models: %w", err)
42+
}
43+
44+
if len(running) == 0 {
45+
cmd.Println("No running models")
46+
return nil
47+
}
48+
49+
table := tablewriter.NewTable(os.Stdout,
50+
tablewriter.WithRenderer(renderer.NewBlueprint(tw.Rendition{
51+
Borders: tw.BorderNone,
52+
Settings: tw.Settings{
53+
Separators: tw.Separators{
54+
BetweenColumns: tw.Off,
55+
},
56+
Lines: tw.Lines{
57+
ShowHeaderLine: tw.Off,
58+
},
59+
},
60+
})),
61+
tablewriter.WithConfig(tablewriter.Config{
62+
Header: tw.CellConfig{
63+
Formatting: tw.CellFormatting{
64+
AutoFormat: tw.Off,
65+
},
66+
Alignment: tw.CellAlignment{Global: tw.AlignLeft},
67+
Padding: tw.CellPadding{Global: tw.Padding{Left: "", Right: " "}},
68+
},
69+
Row: tw.CellConfig{
70+
Alignment: tw.CellAlignment{Global: tw.AlignLeft},
71+
Padding: tw.CellPadding{Global: tw.Padding{Left: "", Right: " "}},
72+
},
73+
}),
74+
)
75+
table.Header([]string{"MODEL", "BACKEND", "PORT", "ENDPOINT"})
76+
77+
for _, m := range running {
78+
table.Append([]string{
79+
m.ModelRef,
80+
string(m.Backend),
81+
fmt.Sprintf("%d", m.Port),
82+
m.Endpoint,
83+
})
84+
}
85+
86+
table.Render()
87+
return nil
88+
}

cmd/dmrlet/commands/pull.go

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
package commands
2+
3+
import (
4+
"fmt"
5+
"os"
6+
7+
"github.com/spf13/cobra"
8+
)
9+
10+
func newPullCmd() *cobra.Command {
11+
cmd := &cobra.Command{
12+
Use: "pull MODEL",
13+
Short: "Pull a model without serving",
14+
Long: `Pull a model from Docker Hub or HuggingFace without starting an inference container.
15+
This is useful for pre-downloading models.
16+
17+
Examples:
18+
dmrlet pull ai/smollm2
19+
dmrlet pull huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf`,
20+
Args: cobra.ExactArgs(1),
21+
RunE: func(cmd *cobra.Command, args []string) error {
22+
return runPull(cmd, args[0])
23+
},
24+
}
25+
26+
return cmd
27+
}
28+
29+
func runPull(cmd *cobra.Command, modelRef string) error {
30+
ctx := cmd.Context()
31+
32+
if err := initStore(); err != nil {
33+
return fmt.Errorf("initializing store: %w", err)
34+
}
35+
36+
cmd.Printf("Pulling model: %s\n", modelRef)
37+
38+
if err := store.EnsureModel(ctx, modelRef, os.Stdout); err != nil {
39+
return fmt.Errorf("pulling model: %w", err)
40+
}
41+
42+
cmd.Printf("\nModel pulled successfully: %s\n", modelRef)
43+
return nil
44+
}

cmd/dmrlet/commands/root.go

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
// Package commands implements the dmrlet CLI commands.
2+
package commands
3+
4+
import (
5+
"context"
6+
"os"
7+
"os/signal"
8+
goruntime "runtime"
9+
"syscall"
10+
11+
"github.com/docker/model-runner/pkg/dmrlet/inference"
12+
"github.com/docker/model-runner/pkg/dmrlet/models"
13+
"github.com/docker/model-runner/pkg/dmrlet/runtime"
14+
"github.com/sirupsen/logrus"
15+
"github.com/spf13/cobra"
16+
)
17+
18+
var (
19+
// Global flags
20+
verbose bool
21+
logJSON bool
22+
23+
// Shared state
24+
log *logrus.Entry
25+
store *models.Store
26+
rt runtime.Runner
27+
manager *inference.Manager
28+
)
29+
30+
// rootCmd is the root command for dmrlet.
31+
var rootCmd = &cobra.Command{
32+
Use: "dmrlet",
33+
Short: "Lightweight node agent for Docker Model Runner",
34+
Long: `dmrlet is a lightweight node agent for Docker Model Runner - a "Kubelet for AI"
35+
that runs inference containers directly with zero YAML overhead.
36+
37+
Example:
38+
dmrlet serve ai/smollm2
39+
# Pulls model, starts inference container, exposes OpenAI API at http://localhost:30000/v1`,
40+
PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
41+
// Skip initialization for help and version commands
42+
if cmd.Name() == "help" || cmd.Name() == "version" {
43+
return nil
44+
}
45+
46+
// Setup logging
47+
logger := logrus.New()
48+
if verbose {
49+
logger.SetLevel(logrus.DebugLevel)
50+
} else {
51+
logger.SetLevel(logrus.InfoLevel)
52+
}
53+
if logJSON {
54+
logger.SetFormatter(&logrus.JSONFormatter{})
55+
}
56+
57+
// Check DMRLET_LOG_LEVEL environment variable
58+
if level := os.Getenv("DMRLET_LOG_LEVEL"); level != "" {
59+
if lvl, err := logrus.ParseLevel(level); err == nil {
60+
logger.SetLevel(lvl)
61+
}
62+
}
63+
64+
log = logger.WithField("component", "dmrlet")
65+
66+
return nil
67+
},
68+
SilenceUsage: true,
69+
SilenceErrors: true,
70+
}
71+
72+
// Execute runs the root command.
73+
func Execute() error {
74+
// Setup context with signal handling
75+
ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
76+
defer cancel()
77+
78+
return rootCmd.ExecuteContext(ctx)
79+
}
80+
81+
func init() {
82+
rootCmd.PersistentFlags().BoolVarP(&verbose, "verbose", "v", false, "Enable verbose output")
83+
rootCmd.PersistentFlags().BoolVar(&logJSON, "log-json", false, "Output logs in JSON format")
84+
85+
rootCmd.AddCommand(
86+
newServeCmd(),
87+
newStopCmd(),
88+
newListCmd(),
89+
newPullCmd(),
90+
newVersionCmd(),
91+
)
92+
}
93+
94+
// initStore initializes the model store.
95+
func initStore() error {
96+
if store != nil {
97+
return nil
98+
}
99+
100+
var err error
101+
store, err = models.NewStore(
102+
models.WithLogger(log),
103+
)
104+
if err != nil {
105+
return err
106+
}
107+
return nil
108+
}
109+
110+
// initRuntime initializes the runtime (native process on macOS, containerd elsewhere).
111+
func initRuntime(ctx context.Context) error {
112+
if rt != nil {
113+
return nil
114+
}
115+
116+
var err error
117+
switch goruntime.GOOS {
118+
case "darwin":
119+
rt = runtime.NewProcessRuntime(log)
120+
default:
121+
rt, err = runtime.NewRuntime(ctx,
122+
runtime.WithRuntimeLogger(log),
123+
)
124+
if err != nil {
125+
return err
126+
}
127+
}
128+
return nil
129+
}
130+
131+
// initManager initializes the inference manager.
132+
func initManager(ctx context.Context) error {
133+
if err := initStore(); err != nil {
134+
return err
135+
}
136+
if err := initRuntime(ctx); err != nil {
137+
return err
138+
}
139+
140+
if manager == nil {
141+
manager = inference.NewManager(store, rt,
142+
inference.WithManagerLogger(log),
143+
)
144+
}
145+
return nil
146+
}

0 commit comments

Comments
 (0)