docker
diff --git a/‎Makefile‎
Lines changed: 13 additions & 1 deletion b/‎Makefile‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 109 additions & 0 deletions b/‎README.md‎
Lines changed: 109 additions & 0 deletions
diff --git a/‎cmd/dmrlet/commands/list.go‎
Lines changed: 88 additions & 0 deletions b/‎cmd/dmrlet/commands/list.go‎
Lines changed: 88 additions & 0 deletions
diff --git a/‎cmd/dmrlet/commands/pull.go‎
Lines changed: 44 additions & 0 deletions b/‎cmd/dmrlet/commands/pull.go‎
Lines changed: 44 additions & 0 deletions
@@ -26,14 +26,24 @@ DOCKER_BUILD_ARGS := \
 BUILD_DMR ?= 1
 
 # Main targets
-.PHONY: build run clean test integration-tests test-docker-ce-installation docker-build docker-build-multiplatform docker-run docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang docker-run-impl help validate lint docker-build-diffusers docker-run-diffusers vllm-metal-build vllm-metal-install vllm-metal-dev vllm-metal-clean
+.PHONY: build build-dmrlet run clean test integration-tests test-docker-ce-installation docker-build docker-build-multiplatform docker-run docker-build-vllm docker-run-vllm docker-build-sglang docker-run-sglang docker-run-impl help validate lint docker-build-diffusers docker-run-diffusers vllm-metal-build vllm-metal-install vllm-metal-dev vllm-metal-clean
 # Default target
 .DEFAULT_GOAL := build
 
 # Build the Go application
 build:
 	CGO_ENABLED=1 go build -ldflags="-s -w" -o $(APP_NAME) .
 
+# Build dmrlet binary
+build-dmrlet:
+	@echo "Building dmrlet..."
+	@VERSION=$$(git describe --tags --always --dirty 2>/dev/null || echo "dev"); \
+	GIT_COMMIT=$$(git rev-parse HEAD 2>/dev/null || echo "unknown"); \
+	BUILD_DATE=$$(date -u +"%Y-%m-%dT%H:%M:%SZ" 2>/dev/null || echo "unknown"); \
+	cd cmd/dmrlet && CGO_ENABLED=0 go build -ldflags="-s -w -X 'main.Version=$${VERSION}' -X 'main.GitCommit=$${GIT_COMMIT}' -X 'main.BuildDate=$${BUILD_DATE}'" -o dmrlet .
+	mv cmd/dmrlet/dmrlet .
+	@echo "Built: dmrlet"
+
 # Run the application locally
 run: build
 	@LLAMACPP_BIN="llamacpp/install/bin"; \
@@ -46,6 +56,7 @@ run: build
 # Clean build artifacts
 clean:
 	rm -f $(APP_NAME)
+	rm -f dmrlet
 	rm -f model-runner.sock
 	rm -rf $(MODELS_PATH)
 
@@ -219,6 +230,7 @@ vllm-metal-clean:
 help:
 	@echo "Available targets:"
 	@echo "  build				- Build the Go application"
+	@echo "  build-dmrlet			- Build dmrlet binary (lightweight node agent)"
 	@echo "  run				- Run the application locally"
 	@echo "  clean				- Clean build artifacts"
 	@echo "  test				- Run tests"
 
@@ -415,6 +415,115 @@ in the form of [a Helm chart and static YAML](charts/docker-model-runner/README.
 If you are interested in a specific Kubernetes use-case, please start a
 discussion on the issue tracker.
 
+## dmrlet: Container Orchestrator for AI Inference
+
+dmrlet is a purpose-built container orchestrator for AI inference workloads. Unlike Kubernetes, it focuses exclusively on running stateless inference containers with zero configuration overhead. Multi-GPU mapping "just works" without YAML, device plugins, or node selectors.
+
+### Key Features
+
+| Feature | Kubernetes | dmrlet |
+|---------|------------|--------|
+| Multi-GPU setup | Device plugins + node selectors + resource limits YAML | `dmrlet serve llama3 --gpus all` |
+| Config overhead | 50+ lines of YAML minimum | Zero YAML, CLI-only |
+| Time to first inference | Minutes (pod scheduling, image pull) | Seconds (model already local) |
+| Model management | External (mount PVCs, manage yourself) | Integrated with Docker Model Runner store |
+
+### Building dmrlet
+
+```bash
+# Build the dmrlet binary
+go build -o dmrlet ./cmd/dmrlet
+
+# Verify it works
+./dmrlet --help
+```
+
+### Usage
+
+**Start the daemon:**
+```bash
+# Start in foreground
+dmrlet daemon
+
+# With custom socket path
+dmrlet daemon --socket /tmp/dmrlet.sock
+```
+
+**Serve a model:**
+```bash
+# Auto-detect backend and GPUs
+dmrlet serve llama3.2
+
+# Specify backend
+dmrlet serve llama3.2 --backend vllm
+
+# Specify GPU allocation
+dmrlet serve llama3.2 --gpus 0,1
+dmrlet serve llama3.2 --gpus all
+
+# Multiple replicas
+dmrlet serve llama3.2 --replicas 2
+
+# Backend-specific options
+dmrlet serve llama3.2 --ctx-size 4096      # llama.cpp context size
+dmrlet serve llama3.2 --gpu-memory 0.8     # vLLM GPU memory utilization
+```
+
+**List running models:**
+```bash
+dmrlet ps
+# MODEL          BACKEND    REPLICAS   GPUS      ENDPOINTS              STATUS
+# llama3.2       llama.cpp  1          [0,1,2,3] localhost:30000        healthy
+```
+
+**View logs:**
+```bash
+dmrlet logs llama3.2        # Last 100 lines
+dmrlet logs llama3.2 -f     # Follow logs
+```
+
+**Scale replicas:**
+```bash
+dmrlet scale llama3.2 4     # Scale to 4 replicas
+```
+
+**Stop a model:**
+```bash
+dmrlet stop llama3.2
+dmrlet stop --all           # Stop all models
+```
+
+**Check status:**
+```bash
+dmrlet status
+# DAEMON: running
+# SOCKET: /var/run/dmrlet.sock
+#
+# GPUs:
+#   GPU 0:  NVIDIA A100 80GB  81920MB  (in use: llama3.2)
+#   GPU 1:  NVIDIA A100 80GB  81920MB  (available)
+#
+# MODELS: 1 running
+```
+
+### Supported Backends
+
+- **llama.cpp** - Default backend for GGUF models
+- **vLLM** - High-throughput serving for safetensors models
+- **SGLang** - Fast serving with RadixAttention
+
+### Architecture
+
+```
+dmrlet daemon
+  ├── GPU Manager      - Auto-detect and allocate GPUs
+  ├── Container Manager - Docker-based container lifecycle
+  ├── Service Registry  - Endpoint discovery with load balancing
+  ├── Health Monitor    - Auto-restart unhealthy containers
+  ├── Auto-scaler       - Scale based on QPS/latency/GPU utilization
+  └── Log Aggregator    - Centralized log collection
+```
+
 ## Community
 
 For general questions and discussion, please use [Docker Model Runner's Slack channel](https://dockercommunity.slack.com/archives/C09H9P5E57B).
 
@@ -0,0 +1,88 @@
+package commands
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/olekukonko/tablewriter"
+	"github.com/olekukonko/tablewriter/renderer"
+	"github.com/olekukonko/tablewriter/tw"
+	"github.com/spf13/cobra"
+)
+
+func newListCmd() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:     "list",
+		Aliases: []string{"ls"},
+		Short:   "List running models",
+		Long: `List all running inference models managed by dmrlet.
+
+Examples:
+  dmrlet list
+  dmrlet ls`,
+		Args: cobra.NoArgs,
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return runList(cmd)
+		},
+	}
+
+	return cmd
+}
+
+func runList(cmd *cobra.Command) error {
+	ctx := cmd.Context()
+
+	if err := initManager(ctx); err != nil {
+		return fmt.Errorf("initializing manager: %w", err)
+	}
+
+	running, err := manager.List(ctx)
+	if err != nil {
+		return fmt.Errorf("listing models: %w", err)
+	}
+
+	if len(running) == 0 {
+		cmd.Println("No running models")
+		return nil
+	}
+
+	table := tablewriter.NewTable(os.Stdout,
+		tablewriter.WithRenderer(renderer.NewBlueprint(tw.Rendition{
+			Borders: tw.BorderNone,
+			Settings: tw.Settings{
+				Separators: tw.Separators{
+					BetweenColumns: tw.Off,
+				},
+				Lines: tw.Lines{
+					ShowHeaderLine: tw.Off,
+				},
+			},
+		})),
+		tablewriter.WithConfig(tablewriter.Config{
+			Header: tw.CellConfig{
+				Formatting: tw.CellFormatting{
+					AutoFormat: tw.Off,
+				},
+				Alignment: tw.CellAlignment{Global: tw.AlignLeft},
+				Padding:   tw.CellPadding{Global: tw.Padding{Left: "", Right: "  "}},
+			},
+			Row: tw.CellConfig{
+				Alignment: tw.CellAlignment{Global: tw.AlignLeft},
+				Padding:   tw.CellPadding{Global: tw.Padding{Left: "", Right: "  "}},
+			},
+		}),
+	)
+	table.Header([]string{"MODEL", "BACKEND", "PORT", "ENDPOINT"})
+
+	for _, m := range running {
+		table.Append([]string{
+			m.ModelRef,
+			string(m.Backend),
+			fmt.Sprintf("%d", m.Port),
+			m.Endpoint,
+		})
+	}
+
+	table.Render()
+	return nil
+}
@@ -0,0 +1,44 @@
+package commands
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/spf13/cobra"
+)
+
+func newPullCmd() *cobra.Command {
+	cmd := &cobra.Command{
+		Use:   "pull MODEL",
+		Short: "Pull a model without serving",
+		Long: `Pull a model from Docker Hub or HuggingFace without starting an inference container.
+This is useful for pre-downloading models.
+
+Examples:
+  dmrlet pull ai/smollm2
+  dmrlet pull huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf`,
+		Args: cobra.ExactArgs(1),
+		RunE: func(cmd *cobra.Command, args []string) error {
+			return runPull(cmd, args[0])
+		},
+	}
+
+	return cmd
+}
+
+func runPull(cmd *cobra.Command, modelRef string) error {
+	ctx := cmd.Context()
+
+	if err := initStore(); err != nil {
+		return fmt.Errorf("initializing store: %w", err)
+	}
+
+	cmd.Printf("Pulling model: %s\n", modelRef)
+
+	if err := store.EnsureModel(ctx, modelRef, os.Stdout); err != nil {
+		return fmt.Errorf("pulling model: %w", err)
+	}
+
+	cmd.Printf("\nModel pulled successfully: %s\n", modelRef)
+	return nil
+}