Skip to content

Commit 2ee9e84

Browse files
committed
add kvevent online example for uds path
1 parent 836b709 commit 2ee9e84

File tree

9 files changed

+453
-40
lines changed

9 files changed

+453
-40
lines changed

.github/workflows/ci-release-uds-tokenizer.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@ jobs:
3434
image-name: llm-d-uds-tokenizer
3535
registry: ghcr.io/llm-d
3636
github-token: ${{ secrets.GHCR_TOKEN }}
37-
context: services/uds_tokenizer
38-
dockerfile: services/uds_tokenizer/Dockerfile
37+
dockerfile: Dockerfile.tokenizer
3938

4039
- name: Run Trivy scan
4140
uses: ./.github/actions/trivy-scan

Dockerfile.uds

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Copyright 2025 The llm-d Authors.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# Build Stage: using Go 1.24.1 image
16+
FROM quay.io/projectquay/golang:1.24 AS builder
17+
ARG TARGETOS
18+
ARG TARGETARCH
19+
20+
WORKDIR /workspace
21+
22+
# Install system-level dependencies first. This layer is very stable.
23+
USER root
24+
# Install EPEL repository directly and then ZeroMQ, as epel-release is not in default repos.
25+
# Install all necessary dependencies including Python 3.12 for chat-completions templating.
26+
# The builder is based on UBI8, so we need epel-release-8.
27+
RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm' && \
28+
dnf install -y gcc-c++ libstdc++ libstdc++-devel clang zeromq-devel pkgconfig && \
29+
dnf clean all
30+
31+
# Copy the Go Modules manifests
32+
COPY go.mod go.mod
33+
COPY go.sum go.sum
34+
# cache deps before building and copying source so that we don't need to re-download as much
35+
# and so that source changes don't invalidate our downloaded layer
36+
RUN go mod download
37+
38+
# Copy the source code.
39+
COPY . .
40+
41+
RUN make build-uds
42+
43+
# Use distroless as minimal base image to package the manager binary
44+
# Refer to https://github.com/GoogleContainerTools/distroless for more details
45+
FROM registry.access.redhat.com/ubi9/ubi:latest
46+
WORKDIR /
47+
# Install zeromq runtime library needed by the manager.
48+
# The final image is UBI9, so we need epel-release-9.
49+
USER root
50+
RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm' && \
51+
dnf install -y zeromq libxcrypt-compat && \
52+
dnf clean all
53+
54+
# Copy the compiled Go application
55+
COPY --from=builder /workspace/bin/llm-d-kv-cache /app/kv-cache-manager
56+
USER 65532:65532
57+
58+
# Set the entrypoint to the kv-cache-manager binary
59+
ENTRYPOINT ["/app/kv-cache-manager"]

Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,8 @@ build: build-uds build-embedded ## Build both UDS-only and embedded binaries
240240

241241
.PHONY: build-uds
242242
build-uds: check-go download-zmq ## Build without embedded tokenizers (no Python required)
243-
@printf "\033[33;1m==== Building (UDS-only, no embedded tokenizers) ====\033[0m\n"
244-
@go build ./pkg/...
243+
@printf "\033[33;1m==== Building application binary (with uds tokenizers) ====\033[0m\n"
244+
@go build -o bin/$(PROJECT_NAME) examples/kv_events/online_uds/main.go
245245
@echo "✅ UDS-only build succeeded"
246246

247247
.PHONY: build-embedded

examples/kv_events/online/main.go

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,6 @@ const (
6262
envExternalTokenization = "EXTERNAL_TOKENIZATION"
6363
)
6464

65-
// ChatCompletionsRequest holds the fields needed for chat-completions rendering.
66-
type ChatCompletionsRequest struct {
67-
Model string `json:"model"`
68-
*types.RenderChatRequest
69-
}
70-
7165
func main() {
7266
baseLogger := zap.New(zap.UseDevMode(true))
7367
log.SetLogger(baseLogger)
@@ -319,13 +313,20 @@ func setupUnifiedHTTPEndpoints(
319313
return
320314
}
321315

322-
var req ChatCompletionsRequest
316+
var req struct {
317+
Model string `json:"model"`
318+
Messages []types.Conversation `json:"messages"`
319+
}
323320
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
324321
http.Error(w, "Invalid request body", http.StatusBadRequest)
325322
return
326323
}
327324

328-
pods, err := kvCacheIndexer.GetPodScores(ctx, req.RenderChatRequest, "", req.Model, nil)
325+
renderChatReq := &types.RenderChatRequest{
326+
Conversation: req.Messages,
327+
}
328+
329+
pods, err := kvCacheIndexer.GetPodScores(ctx, renderChatReq, "", req.Model, nil)
329330
if err != nil {
330331
http.Error(w, fmt.Sprintf("Failed to get score request: %v", err), http.StatusInternalServerError)
331332
return

0 commit comments

Comments
 (0)