diff --git a/Dockerfile.cloud b/Dockerfile.cloud index b3f5664fa..ab2c943d3 100644 --- a/Dockerfile.cloud +++ b/Dockerfile.cloud @@ -1,57 +1,116 @@ -FROM nvidia/cuda:12.8.0-cudnn-runtime-ubuntu24.04 +# --- Builder stage --------------------------------------------------------- +# Build-only deps live here (build-essential, git, python3-dev, etc.) so the +# final stage can drop them entirely. +FROM nvidia/cuda:12.8.0-runtime-ubuntu24.04 AS builder ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 -ENV CUDA_VISIBLE_DEVICES=0 -ENV NVIDIA_VISIBLE_DEVICES=all -ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility -ENV DAYDREAM_SCOPE_LOGS_DIR=/workspace/logs -ENV DAYDREAM_SCOPE_MODELS_DIR=/workspace/models WORKDIR /app RUN apt-get update && apt-get install -y --no-install-recommends \ - # System dependencies curl \ git \ build-essential \ software-properties-common \ - # Dependencies required for OpenCV - libgl1 \ - libglib2.0-0 \ - libsm6 \ - libxext6 \ - libxrender-dev \ - libgomp1 \ python3-dev \ - # Cleanup && rm -rf /var/lib/apt/lists/* -# Install uv (Python package manager) to system-wide location ENV UV_INSTALL_DIR="/usr/local/bin" RUN curl -LsSf https://astral.sh/uv/0.9.11/install.sh | sh && \ - # Create symlink for fal.ai deploy which hardcodes $HOME/.local/bin/uv mkdir -p /root/.local/bin && \ ln -s /usr/local/bin/uv /root/.local/bin/uv -# Python/uv configuration - use default location but make world-readable ENV UV_CACHE_DIR="/tmp/uv-cache" COPY pyproject.toml uv.lock README.md .python-version LICENSE.md patches.pth . -# Pre-install Python to uv's default location and make it world-readable RUN uv python install && \ chmod -R a+rX /root/.local/share/uv -# Copy project files COPY src/ /app/src/ -# Pre-install bundled plugins (cannot be removed by users) +# Build the venv with all runtime extras the cloud wrapper invokes +# (`uv run --extra livepeer --extra kafka livepeer-runner`). +RUN uv sync --extra livepeer --extra kafka --no-dev + +# Pre-install bundled plugins (cannot be removed by users). ENV DAYDREAM_SCOPE_BUNDLED_PLUGINS_FILE="/app/bundled-plugins.txt" RUN echo "git+https://github.com/daydreamlive/scope-ltx-2.git@d13b5f9d94130b975989cd820eedbef5b3a8f165" > /app/bundled-plugins.txt RUN uv run daydream-scope install git+https://github.com/daydreamlive/scope-ltx-2.git@d13b5f9d94130b975989cd820eedbef5b3a8f165 -# Expose port 8000 for RunPod HTTP proxy +# Strip never-at-runtime files from the venv, plus duplicated CUDA libs that +# the base image already ships under /usr/local/cuda/lib64. Keep cuDNN +# (pyproject `override-dependencies` pins it newer than the base for a Conv3D +# regression), cusparselt (not in base), nccl/cupti/nvtx/nvjitlink/nvshmem +# (small or version-sensitive). Leave the dist-info directories so uv's later +# `sync` (in the fal-side image extension) treats these packages as +# installed and skips reinstall. +RUN find /app/.venv -depth -type d \ + \( -name 'tests' -o -name 'test' -o -name 'doc' -o -name 'docs' \) \ + -exec rm -rf {} + 2>/dev/null || true && \ + find /app/.venv -type f \ + \( -name '*.h' -o -name '*.c' -o -name '*.cpp' -o -name '*.hpp' -o -name '*.pdb' \) \ + -delete 2>/dev/null || true && \ + rm -rf \ + /app/.venv/lib/python3.12/site-packages/nvidia/cublas \ + /app/.venv/lib/python3.12/site-packages/nvidia/cufft \ + /app/.venv/lib/python3.12/site-packages/nvidia/curand \ + /app/.venv/lib/python3.12/site-packages/nvidia/cusolver \ + /app/.venv/lib/python3.12/site-packages/nvidia/cusparse \ + /app/.venv/lib/python3.12/site-packages/nvidia/cuda_runtime \ + /app/.venv/lib/python3.12/site-packages/nvidia/cuda_nvrtc && \ + rm -rf /tmp/uv-cache /root/.cache /var/cache/apt/archives/*.deb + + +# --- Runtime stage --------------------------------------------------------- +# Minimal: copies the venv + uv-managed Python from the builder, no build deps. +FROM nvidia/cuda:12.8.0-runtime-ubuntu24.04 + +ENV DEBIAN_FRONTEND=noninteractive +ENV PYTHONUNBUFFERED=1 +ENV CUDA_VISIBLE_DEVICES=0 +ENV NVIDIA_VISIBLE_DEVICES=all +ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility +ENV DAYDREAM_SCOPE_LOGS_DIR=/workspace/logs +ENV DAYDREAM_SCOPE_MODELS_DIR=/workspace/models + +WORKDIR /app + +# Runtime-only system libs (no build-essential / git / python3-dev — those +# stayed in the builder stage). python3.12 is required for fal's image +# check (`check_python.sh 3.12 python3.12` looks for it on PATH). curl + +# ca-certificates are needed by fal's `install_uv.sh` (it tries to +# (re)install uv even though we already shipped one in the image). +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3.12 \ + curl \ + ca-certificates \ + libgl1 \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender-dev \ + libgomp1 \ + && rm -rf /var/lib/apt/lists/* + +# uv binary + uv-managed Python interpreter (the venv's bin/python is a +# symlink into /root/.local/share/uv/python/...). +COPY --from=builder /usr/local/bin/uv /usr/local/bin/uv +COPY --from=builder /root/.local/share/uv /root/.local/share/uv +RUN mkdir -p /root/.local/bin && ln -s /usr/local/bin/uv /root/.local/bin/uv +ENV UV_INSTALL_DIR="/usr/local/bin" +ENV UV_CACHE_DIR="/tmp/uv-cache" + +# App + venv from the builder. .venv has stripped nvidia-* lib dirs whose +# .so files now come from the base image's /usr/local/cuda/lib64. +COPY --from=builder /app /app + +ENV DAYDREAM_SCOPE_BUNDLED_PLUGINS_FILE="/app/bundled-plugins.txt" + +# Make torch find the base image's CUDA libs for the stripped deps. Keep +# venv's cuDNN first so the override-pinned newer version wins. +ENV LD_LIBRARY_PATH="/app/.venv/lib/python3.12/site-packages/nvidia/cudnn/lib:/usr/local/cuda/lib64" + EXPOSE 8000 -# Default command to run the application CMD ["uv", "run", "daydream-scope", "--host", "0.0.0.0", "--port", "8000"] diff --git a/src/scope/cloud/livepeer_app.py b/src/scope/cloud/livepeer_app.py index 7f310dc8f..f52a54e4e 100644 --- a/src/scope/cloud/livepeer_app.py +++ b/src/scope/cloud/livepeer_app.py @@ -70,6 +70,12 @@ async def lifespan(_app: FastAPI): """Initialize embedded Scope app lifespan and ASGI client.""" global scope_client async with scope_lifespan(scope_app): + # Pre-warm the pipeline registry so the import cascade + # (torch / diffusers / transformers / torchao / per-pipeline modules) + # runs at runner startup instead of on the first cloud-proxy request. + # Shaves the registry-init delay out of the user-perceived connect path. + import scope.core.pipelines.registry # noqa: F401 + scope_client = httpx.AsyncClient( transport=httpx.ASGITransport(app=scope_app), base_url="http://runner", @@ -1546,6 +1552,7 @@ async def websocket_endpoint(ws: WebSocket) -> None: if control_task is not None: await _shutdown_task(control_task, task_name="control_channel") _connection_active = False + logger.info("XXX: WebSocket client disconnected") set_connection_id(None) diff --git a/src/scope/cloud/livepeer_fal_app.py b/src/scope/cloud/livepeer_fal_app.py index 1d234a0b6..24a1162f8 100644 --- a/src/scope/cloud/livepeer_fal_app.py +++ b/src/scope/cloud/livepeer_fal_app.py @@ -262,11 +262,17 @@ def _get_git_sha() -> str: GIT_SHA = _get_git_sha() DOCKER_IMAGE = f"daydreamlive/scope:{GIT_SHA}" +# Re-sync after COPY so the daydream-scope editable install picks up the +# freshly-copied src/ at image build time. Without this, the first `uv run` +# at cold start sees a stale path source and rebuilds + reinstalls the +# project (the ~10s "Built daydream-scope @ file:///app" + Uninstalled/Installed +# block visible in cold-start logs). dockerfile_str = f""" FROM {DOCKER_IMAGE} WORKDIR /app COPY pyproject.toml uv.lock README.md patches.pth /app/ COPY src/ /app/src/ +RUN uv sync --extra livepeer --extra kafka --no-dev """ custom_image = ContainerImage.from_dockerfile_str( dockerfile_str, @@ -386,6 +392,13 @@ class LivepeerScopeApp(fal.App, keep_alive=300): image = custom_image machine_type = "GPU-H100" + # These are required in fal isolate's runtime context (separate from + # the image's /app/.venv), where the App's setup() and websocket + # handler actually run. They're cheap — the real cold-start savings + # come from the image-level `uv sync --extra livepeer --extra kafka` + # in Dockerfile.cloud and the fal-side dockerfile_str re-sync, which + # together skip the ~10s `uv run --extra` resync that previously fired + # when the wrapper invoked `livepeer-runner`. requirements = [ "websockets", "httpx",