Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/ci-release-uds-tokenizer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ jobs:
image-name: llm-d-uds-tokenizer
registry: ghcr.io/${{ github.repository_owner }}
github-token: ${{ secrets.GHCR_TOKEN }}
context: services/uds_tokenizer
dockerfile: services/uds_tokenizer/Dockerfile
dockerfile: Dockerfile.tokenizer
platform: linux/amd64

- name: Run Trivy scan
Expand Down
69 changes: 69 additions & 0 deletions Dockerfile.tokenizer
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Copyright 2025 The llm-d Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Build stage
FROM python:3.12-slim AS python-builder

ARG TARGETOS=linux
ARG TARGETARCH=amd64

WORKDIR /workspace

RUN apt-get update && apt-get install -y --no-install-recommends build-essential

COPY Makefile Makefile
COPY pkg/preprocessing/chat_completions/ pkg/preprocessing/chat_completions/
COPY services/uds_tokenizer/pyproject.toml services/uds_tokenizer/pyproject.toml
RUN TARGETOS=${TARGETOS} TARGETARCH=${TARGETARCH} make install-python-deps

# Runtime stage
FROM python:3.12-slim

# Set working directory
WORKDIR /app

RUN apt-get update && apt-get upgrade -y && rm -rf /var/cache/apt/

# Copy installed dependencies from python-builder stage
COPY --from=python-builder /workspace/build/venv /app/venv
ENV PATH="/app/venv/bin:$PATH"
ENV PYTHONPATH="/app/preprocessing/chat_completions:/app/venv/lib/python3.12/site-packages"

# Copy project files into the image
COPY services/uds_tokenizer/run_grpc_server.py /app/
COPY services/uds_tokenizer/pyproject.toml /app/pyproject.toml
COPY services/uds_tokenizer/tokenizer_grpc_service.py /app/tokenizer_grpc_service.py
COPY services/uds_tokenizer/utils/ /app/utils/
COPY services/uds_tokenizer/tokenizer_service/ /app/tokenizer_service/
COPY services/uds_tokenizer/tokenizerpb/ /app/tokenizerpb/

# Copy the shared Python code for chat completion preprocessing from the project structure
RUN mkdir -p /app/preprocessing/chat_completions
COPY pkg/preprocessing/chat_completions/tokenizer_wrapper.py /app/preprocessing/chat_completions/

# Create directory for UDS socket
RUN mkdir -p /tmp/tokenizer && chown 65532:65532 /tmp/tokenizer

# Create model cache directories and set permissions
RUN mkdir -p /app/models && chown -R 65532:65532 /app/models
# Create and set permissions for ModelScope directory
RUN mkdir -p /.modelscope && chown -R 65532:65532 /.modelscope
# Create and set permissions for Hugging Face cache directory
RUN mkdir -p /.cache && chown -R 65532:65532 /.cache

# Switch to non-root user
USER 65532:65532

# Startup command: run direct gRPC server
CMD ["python", "/app/run_grpc_server.py"]
59 changes: 59 additions & 0 deletions Dockerfile.uds
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright 2025 The llm-d Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Build Stage: using Go 1.24.1 image
FROM quay.io/projectquay/golang:1.24 AS builder
ARG TARGETOS
ARG TARGETARCH

WORKDIR /workspace

# Install system-level dependencies first. This layer is very stable.
USER root
# Install EPEL repository directly and then ZeroMQ, as epel-release is not in default repos.
# Install all necessary dependencies including Python 3.12 for chat-completions templating.
# The builder is based on UBI8, so we need epel-release-8.
RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-8.noarch.rpm' && \
dnf install -y gcc-c++ libstdc++ libstdc++-devel clang zeromq-devel pkgconfig && \
dnf clean all

# Copy the Go Modules manifests
COPY go.mod go.mod
COPY go.sum go.sum
# cache deps before building and copying source so that we don't need to re-download as much
# and so that source changes don't invalidate our downloaded layer
RUN go mod download

# Copy the source code.
COPY . .

RUN make build-uds

# Use distroless as minimal base image to package the manager binary
# Refer to https://github.com/GoogleContainerTools/distroless for more details
FROM registry.access.redhat.com/ubi9/ubi:latest
WORKDIR /
# Install zeromq runtime library needed by the manager.
# The final image is UBI9, so we need epel-release-9.
USER root
RUN dnf install -y 'https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm' && \
dnf install -y zeromq libxcrypt-compat && \
dnf clean all

# Copy the compiled Go application
COPY --from=builder /workspace/bin/llm-d-kv-cache /app/kv-cache-manager
USER 65532:65532

# Set the entrypoint to the kv-cache-manager binary
ENTRYPOINT ["/app/kv-cache-manager"]
32 changes: 16 additions & 16 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ PYTHON_VERSION := 3.12
VENV_DIR := $(shell pwd)/build/venv
VENV_BIN := $(VENV_DIR)/bin

UDS_TOKENIZER_DIR := services/uds_tokenizer
UDS_TOKENIZER_VENV_DIR := $(UDS_TOKENIZER_DIR)/.venv
UDS_TOKENIZER_VENV_BIN := $(UDS_TOKENIZER_VENV_DIR)/bin

# Attempt to find Python 3.9 executable.
PYTHON_EXE := $(shell command -v python$(PYTHON_VERSION) || command -v python3)

Expand Down Expand Up @@ -117,6 +121,8 @@ install-python-deps: setup-venv ## installs dependencies.
echo "ERROR: Virtual environment not found. Run 'make setup-venv' first."; \
exit 1; \
fi
@echo "Installing UDS tokenizer Python dependencies..."; \
$(VENV_BIN)/pip install "${UDS_TOKENIZER_DIR}"
@if $(VENV_BIN)/python -c "import vllm" 2>/dev/null; then \
echo "vllm is already installed, skipping..."; \
exit 0; \
Expand Down Expand Up @@ -242,28 +248,22 @@ e2e-test-uds: check-go download-zmq image-build-uds ## Run UDS tokenizer e2e tes
go test -v -count=1 -timeout 10m ./tests/e2e/uds_tokenizer/...
##@ UDS Tokenizer Python Tests

UDS_TOKENIZER_DIR := services/uds_tokenizer
UDS_TOKENIZER_VENV_DIR := $(UDS_TOKENIZER_DIR)/.venv
UDS_TOKENIZER_VENV_BIN := $(UDS_TOKENIZER_VENV_DIR)/bin

.PHONY: uds-tokenizer-install-deps
uds-tokenizer-install-deps: detect-python ## Set up venv and install UDS tokenizer dependencies
@printf "\033[33;1m==== Setting up UDS tokenizer venv and dependencies ====\033[0m\n"
@if [ ! -f "$(UDS_TOKENIZER_VENV_BIN)/python" ]; then \
echo "Creating virtual environment in $(UDS_TOKENIZER_VENV_DIR)..."; \
$(PYTHON_EXE) -m venv $(UDS_TOKENIZER_VENV_DIR); \
echo "Upgrading pip..."; \
$(UDS_TOKENIZER_VENV_BIN)/pip install --upgrade pip; \
uds-tokenizer-install-deps: install-python-deps ## Set up venv and install UDS tokenizer dependencies
@printf "\033[33;1m==== Detecting UDS tokenizer venv and dependencies ====\033[0m\n"
@if [ ! -f "$(VENV_BIN)/python" ]; then \
echo "Virtual environment not exist"; \
exit 1; \
else \
echo "Virtual environment already exists"; \
fi
@echo "Installing dependencies..."
@$(UDS_TOKENIZER_VENV_BIN)/pip install "$(UDS_TOKENIZER_DIR)[test]"
@echo "Installing UDS tokenizer test dependencies..."
@$(VENV_BIN)/pip install "$(UDS_TOKENIZER_DIR)[test]"

.PHONY: uds-tokenizer-service-test
uds-tokenizer-service-test: uds-tokenizer-install-deps ## Run UDS tokenizer integration tests (starts server automatically)
@printf "\033[33;1m==== Running UDS tokenizer integration tests ====\033[0m\n"
@$(UDS_TOKENIZER_VENV_BIN)/python -m pytest \
@$(VENV_BIN)/python -m pytest \
$(UDS_TOKENIZER_DIR)/tests/test_integration.py \
-v --timeout=60

Expand All @@ -286,8 +286,8 @@ build: build-uds build-embedded ## Build both UDS-only and embedded binaries

.PHONY: build-uds
build-uds: check-go download-zmq ## Build without embedded tokenizers (no Python required)
@printf "\033[33;1m==== Building (UDS-only, no embedded tokenizers) ====\033[0m\n"
@go build ./pkg/...
@printf "\033[33;1m==== Building application binary (with uds tokenizers) ====\033[0m\n"
@go build -o bin/$(PROJECT_NAME) examples/kv_events/online_uds/main.go
@echo "✅ UDS-only build succeeded"

.PHONY: build-embedded
Expand Down
Loading
Loading