AstrBotDevs
diff --git a/‎deploy/docker/config.yaml‎
Lines changed: 15 additions & 0 deletions b/‎deploy/docker/config.yaml‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎pkgs/bay/app/api/v1/sandboxes.py‎
Lines changed: 74 additions & 14 deletions b/‎pkgs/bay/app/api/v1/sandboxes.py‎
Lines changed: 74 additions & 14 deletions
diff --git a/‎pkgs/bay/app/config.py‎
Lines changed: 24 additions & 0 deletions b/‎pkgs/bay/app/config.py‎
Lines changed: 24 additions & 0 deletions
diff --git a/‎pkgs/bay/app/main.py‎
Lines changed: 7 additions & 0 deletions b/‎pkgs/bay/app/main.py‎
Lines changed: 7 additions & 0 deletions
@@ -50,6 +50,18 @@ security:
   api_key: "CHANGE-ME"
   allow_anonymous: false
 
+# Warm Pool — pre-start standby sandbox instances to reduce cold-start latency.
+# When a user creates a sandbox, Bay will first try to claim an available warm instance,
+# delivering near-instant startup instead of waiting for container boot.
+warm_pool:
+  enabled: true
+  warmup_queue_workers: 2          # Concurrent warmup workers
+  warmup_queue_max_size: 256       # Maximum queue depth
+  warmup_queue_drop_policy: "drop_newest"
+  warmup_queue_drop_alert_threshold: 50
+  interval_seconds: 30             # Pool maintenance scan interval
+  run_on_startup: true
+
 profiles:
   # ── Standard Python sandbox ────────────────────────
   - id: python-default
@@ -65,6 +77,7 @@ profiles:
       - shell
       - python
     idle_timeout: 1800  # 30 minutes
+    warm_pool_size: 1   # Keep 1 pre-warmed instance ready
     env: {}
 
   # ── Data Science sandbox (more resources) ──────────
@@ -81,6 +94,7 @@ profiles:
       - shell
       - python
     idle_timeout: 1800
+    warm_pool_size: 1
     env: {}
 
   # ── Browser + Python multi-container sandbox ───────
@@ -114,6 +128,7 @@ profiles:
           - browser
         env: {}
     idle_timeout: 1800
+    warm_pool_size: 1
 
 gc:
   # Enable automatic GC for production
 
@@ -9,7 +9,7 @@
 from datetime import datetime
 
 import structlog
-from fastapi import APIRouter, BackgroundTasks, Header, Query
+from fastapi import APIRouter, BackgroundTasks, Header, Query, Request
 from fastapi.responses import JSONResponse
 from pydantic import BaseModel
 
@@ -332,12 +332,13 @@ async def create_sandbox(
     - Lazy session creation: status may be 'idle' initially
     - ttl=null or ttl=0 means no expiry
     - Supports Idempotency-Key header for safe retries
+    - Prioritizes claiming a warm pool sandbox if available (§6.1)
     """
     # Serialize request body for fingerprinting
     request_body = request.model_dump_json()
     request_path = "/v1/sandboxes"
 
-    # 1. Check idempotency key if provided
+    # 1. Check idempotency key if provided (must be BEFORE claim, §6.1 step 2)
     if idempotency_key:
         cached = await idempotency_svc.check(
             owner=owner,
@@ -348,12 +349,50 @@ async def create_sandbox(
         )
         if cached:
             # Return cached response with original status code
+            # Do NOT trigger claim/warmup side effects (§11.1)
             return JSONResponse(
                 content=cached.response,
                 status_code=cached.status_code,
             )
 
-    # 2. Create sandbox
+    # 2. Try to claim a warm sandbox (§6.1 step 3)
+    #    Skip claim when user specifies a cargo_id (warm sandbox has its own cargo)
+    sandbox = None
+    if request.cargo_id is None:
+        sandbox = await sandbox_mgr.claim_warm_sandbox(
+            owner=owner,
+            profile_id=request.profile,
+            ttl=request.ttl,
+        )
+
+    if sandbox is not None:
+        # Claim succeeded - return immediately (already warm/running)
+        _log.info(
+            "sandbox.create.warm_claim_hit",
+            sandbox_id=sandbox.id,
+            profile=request.profile,
+        )
+        response = _sandbox_to_response(sandbox)
+
+        # Save idempotency key if provided
+        if idempotency_key:
+            await idempotency_svc.save(
+                owner=owner,
+                key=idempotency_key,
+                path=request_path,
+                method="POST",
+                body=request_body,
+                response=response,
+                status_code=201,
+            )
+
+        return response
+
+    # 3. Claim miss - fall back to normal create
+    _log.debug(
+        "sandbox.create.warm_claim_miss",
+        profile=request.profile,
+    )
     sandbox = await sandbox_mgr.create(
         owner=owner,
         profile_id=request.profile,
@@ -362,7 +401,7 @@ async def create_sandbox(
     )
     response = _sandbox_to_response(sandbox)
 
-    # 3. Save idempotency key if provided
+    # 4. Save idempotency key if provided
     if idempotency_key:
         await idempotency_svc.save(
             owner=owner,
@@ -374,13 +413,19 @@ async def create_sandbox(
             status_code=201,
         )
 
-    # 4. Enqueue warmup hook. The hook itself detaches actual warmup work,
-    # so this does not block request completion on keep-alive connections.
-    background_tasks.add_task(
-        _warmup_sandbox_runtime,
-        sandbox_id=sandbox.id,
-        owner=owner,
-    )
+    # 5. Enqueue warmup via queue (§2.5.1: only enqueue, never execute directly)
+    from app.services.warm_pool.lifecycle import get_warmup_queue
+
+    warmup_queue = get_warmup_queue()
+    if warmup_queue is not None and warmup_queue.is_running:
+        warmup_queue.enqueue(sandbox_id=sandbox.id, owner=owner)
+    else:
+        # Fallback: if queue not available, use background task
+        background_tasks.add_task(
+            _warmup_sandbox_runtime,
+            sandbox_id=sandbox.id,
+            owner=owner,
+        )
 
     return response
 
@@ -537,14 +582,29 @@ async def stop_sandbox(
 @router.delete("/{sandbox_id}", status_code=204)
 async def delete_sandbox(
     sandbox_id: str,
+    request: Request,
     sandbox_mgr: SandboxManagerDep,
     owner: AuthDep,
 ) -> None:
-    """Delete sandbox permanently.
+    """Delete sandbox permanently (idempotent).
 
     - Destroys all running sessions
     - Cascade deletes managed cargo
     - Does NOT cascade delete external cargo
+    - If sandbox already soft-deleted, returns 204 (idempotent)
     """
-    sandbox = await sandbox_mgr.get(sandbox_id, owner)
-    await sandbox_mgr.delete(sandbox)
+    request_id = getattr(request.state, "request_id", None)
+    _log.info(
+        "sandbox.delete.request",
+        sandbox_id=sandbox_id,
+        owner=owner,
+        request_id=request_id,
+        delete_source="api.v1.sandboxes.delete",
+    )
+    await sandbox_mgr.delete_by_id(
+        sandbox_id=sandbox_id,
+        owner=owner,
+        idempotent=True,
+        delete_source="api.v1.sandboxes.delete",
+        request_id=request_id,
+    )
@@ -180,6 +180,15 @@ class ProfileConfig(BaseModel):
     # ========== Shared configuration ==========
     idle_timeout: int = 1800  # 30 minutes
 
+    # ========== Warm pool configuration ==========
+    warm_pool_size: int = 0  # Number of pre-warmed sandbox instances (0 = disabled)
+    warm_rotate_ttl: int = 1800  # Seconds before a warm instance is rotated (>= 60)
+    warm_claim_timeout_ms: int = 200  # Max time to attempt claim before fallback (100-3000)
+    warmup_retry_max_attempts: int = 3  # Max retry on warmup failure (>= 0)
+    warmup_retry_backoff_base_ms: int = 200  # Base backoff for warmup retry
+    warmup_retry_backoff_max_ms: int = 5000  # Max backoff for warmup retry
+    warmup_circuit_breaker_threshold: int = 10  # Consecutive failures before circuit break
+
     def model_post_init(self, __context: Any) -> None:
         """Normalize single-container format to multi-container format.
 
@@ -346,6 +355,20 @@ class BrowserLearningConfig(BaseModel):
     error_rate_multiplier_threshold: float = 2.0
 
 
+class WarmPoolConfig(BaseModel):
+    """Warm pool global configuration."""
+
+    enabled: bool = True
+    # Warmup queue settings (in-process bounded queue)
+    warmup_queue_workers: int = 2  # Number of concurrent warmup workers (>= 1)
+    warmup_queue_max_size: int = 256  # Maximum queue depth (>= 1)
+    warmup_queue_drop_policy: Literal["drop_newest", "drop_oldest"] = "drop_newest"
+    warmup_queue_drop_alert_threshold: int = 50  # Alert when drops exceed this count
+    # Scheduler settings
+    interval_seconds: int = 30  # Pool maintenance interval
+    run_on_startup: bool = True  # Whether to run pool maintenance on startup
+
+
 class SecurityConfig(BaseModel):
     """Security configuration."""
 
@@ -389,6 +412,7 @@ class Settings(BaseSettings):
     security: SecurityConfig = Field(default_factory=SecurityConfig)
     idempotency: IdempotencyConfig = Field(default_factory=IdempotencyConfig)
     gc: GCConfig = Field(default_factory=GCConfig)
+    warm_pool: WarmPoolConfig = Field(default_factory=WarmPoolConfig)
     browser_learning: BrowserLearningConfig = Field(default_factory=BrowserLearningConfig)
     browser_auto_release_enabled: bool = True
 
 
@@ -19,6 +19,7 @@
     init_browser_learning_scheduler,
     shutdown_browser_learning_scheduler,
 )
+from app.services.warm_pool.lifecycle import init_warm_pool, shutdown_warm_pool
 
 logger = structlog.get_logger()
 
@@ -46,6 +47,9 @@ async def lifespan(app: FastAPI):
     await init_gc_scheduler()
     await init_browser_learning_scheduler()
 
+    # Initialize warm pool (queue + scheduler)
+    await init_warm_pool()
+
     yield
 
     # Shutdown
@@ -55,6 +59,9 @@ async def lifespan(app: FastAPI):
     await shutdown_gc_scheduler()
     await shutdown_browser_learning_scheduler()
 
+    # Stop warm pool
+    await shutdown_warm_pool()
+
     # Close HTTP client
     await http_client_manager.shutdown()