Skip to content

Commit 4d8d179

Browse files
authored
Merge pull request #130 from PoCInnovation/80-distribox-slave
feat: Distribox Slave #80
2 parents 57fed02 + 059457b commit 4d8d179

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+2475
-170
lines changed

.env.example

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,13 @@ GUACD_PORT=4822
3535
# VNC_HOST: address guacd uses to reach VNC. 127.0.0.1 when guacd uses host
3636
# networking (default); change to host.docker.internal for bridged guacd.
3737
VNC_HOST=127.0.0.1
38+
39+
# Master/Slave mode
40+
# Set to "master" (default) or "slave"
41+
DISTRIBOX_MODE=master
42+
# Slave-only: URL of the master node (e.g. http://192.168.1.10:8080)
43+
MASTER_URL=
44+
# Slave-only: API key generated when registering this slave on the master
45+
SLAVE_API_KEY=
46+
# Slave-only: port for the slave API (default 8081)
47+
SLAVE_PORT=8081

backend/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
1313

1414
FROM builder as dev-stage
1515
COPY ./app /code/app
16-
CMD uvicorn app.main:app --host 0.0.0.0 --port ${BACKEND_PORT} --reload
16+
CMD ["sh", "-c", "exec uvicorn app.main:app --host 0.0.0.0 --port ${BACKEND_PORT} --reload"]
1717

1818
FROM builder as production-stage
1919
COPY ./app /code/app
20-
CMD fastapi run app/main.py --port ${BACKEND_PORT}
20+
CMD ["sh", "-c", "exec fastapi run app/main.py --port ${BACKEND_PORT}"]

backend/app/core/config.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,13 @@ def init_db():
7272
"ALTER TABLE vms ADD COLUMN keyboard_layout VARCHAR"
7373
)
7474
)
75+
if "slave_id" not in vm_columns:
76+
conn.execute(
77+
text(
78+
"ALTER TABLE vms ADD COLUMN slave_id UUID "
79+
"REFERENCES slaves(id)"
80+
)
81+
)
7582

7683
if "events" in inspector.get_table_names():
7784
event_columns = {
@@ -116,3 +123,14 @@ def get_connection(cls):
116123
GUACD_HOST = get_env_or_default("GUACD_HOST", "host.docker.internal")
117124
GUACD_PORT = int(get_env_or_default("GUACD_PORT", "4822"))
118125
VNC_HOST = get_env_or_default("VNC_HOST", "127.0.0.1")
126+
VNC_LISTEN = get_env_or_default("VNC_LISTEN", "127.0.0.1")
127+
128+
# Master/Slave mode: "master" (default) or "slave"
129+
DISTRIBOX_MODE = get_env_or_default("DISTRIBOX_MODE", "master")
130+
131+
# Slave-specific config (only used when DISTRIBOX_MODE=slave)
132+
MASTER_URL = get_env_or_default("MASTER_URL", "")
133+
SLAVE_API_KEY = get_env_or_default("SLAVE_API_KEY", "")
134+
135+
# Virtualization type: "kvm" (default, hardware accel) or "qemu" (software emulation)
136+
VIRT_TYPE = get_env_or_default("VIRT_TYPE", "kvm")

backend/app/core/policies.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,22 @@
137137
"policy": "events:delete",
138138
"description": "Allows the user to delete events and their VMs.",
139139
},
140+
{
141+
"policy": "slaves:get",
142+
"description": "Allows the user to list slave nodes.",
143+
},
144+
{
145+
"policy": "slaves:getById",
146+
"description": "Allows the user to fetch a slave node by id.",
147+
},
148+
{
149+
"policy": "slaves:create",
150+
"description": "Allows the user to register a new slave node.",
151+
},
152+
{
153+
"policy": "slaves:delete",
154+
"description": "Allows the user to unregister a slave node.",
155+
},
140156
]
141157

142158
VALID_POLICIES = {entry["policy"] for entry in POLICIES}

backend/app/core/xml_builder.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from lxml import etree
22
from app.models.vm import VmCreateXML
33
from app.core.constants import VMS_DIR
4+
from app.core.config import VIRT_TYPE
45

56
LAYOUT_TO_KEYMAP = {
67
"en-us-qwerty": "en-us",
@@ -33,7 +34,7 @@
3334

3435
def build_xml(vm_read: VmCreateXML):
3536

36-
domain = etree.Element("domain", type="kvm")
37+
domain = etree.Element("domain", type=VIRT_TYPE)
3738

3839
etree.SubElement(domain, "name").text = str(vm_read.id)
3940
# Frontend sends memory in GiB; libvirt XML expects MiB here.
@@ -50,7 +51,8 @@ def build_xml(vm_read: VmCreateXML):
5051
for feature in ["acpi", "apic", "pae"]:
5152
etree.SubElement(features, feature)
5253

53-
etree.SubElement(domain, "cpu", mode="host-passthrough")
54+
if VIRT_TYPE == "kvm":
55+
etree.SubElement(domain, "cpu", mode="host-passthrough")
5456

5557
etree.SubElement(domain, "clock", offset="utc")
5658

@@ -86,11 +88,12 @@ def build_xml(vm_read: VmCreateXML):
8688
etree.SubElement(iface, "source", network="default")
8789
etree.SubElement(iface, "model", type="virtio")
8890

91+
from app.core.config import VNC_LISTEN
8992
vnc_attrs = {
9093
"type": "vnc",
9194
"port": "-1",
9295
"autoport": "yes",
93-
"listen": "127.0.0.1",
96+
"listen": VNC_LISTEN,
9497
}
9598
if vm_read.keyboard_layout:
9699
keymap = LAYOUT_TO_KEYMAP.get(vm_read.keyboard_layout)

backend/app/main.py

Lines changed: 109 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@
77
from fastapi.middleware.cors import CORSMiddleware
88
from sqlmodel import Session, select
99
from app.core.policies import DISTRIBOX_ADMIN_POLICY
10-
from app.routes import vm, image, host, auth, user_management, tunnel, event
10+
from app.routes import vm, image, host, auth, user_management, tunnel, event, slave
11+
from app.routes import slave_agent
1112
from app.orm.user import UserORM
1213
from app.orm.vm_credential import VmCredentialORM # noqa: F401
1314
from app.orm.event import EventORM, EventParticipantORM # noqa: F401
1415
from app.orm.user_settings import UserSettingsORM # noqa: F401
16+
from app.orm.slave import SlaveORM # noqa: F401
1517
from app.utils.auth import hash_password
16-
from app.core.config import engine, get_env_or_default, init_db
18+
from app.core.config import engine, get_env_or_default, init_db, DISTRIBOX_MODE, QEMUConfig
1719
from app.utils.crypto import encrypt_secret, is_encrypted_secret
1820
from app.services.vm_service import VmService
1921

@@ -23,7 +25,6 @@
2325

2426
frontend_url = get_env_or_default("FRONTEND_URL", "http://localhost:3000")
2527

26-
# CORS configuration
2728
app.add_middleware(
2829
CORSMiddleware,
2930
allow_origins=[frontend_url],
@@ -33,21 +34,61 @@
3334
)
3435

3536

37+
@app.on_event("shutdown")
38+
async def shutdown_event():
39+
if DISTRIBOX_MODE != "slave":
40+
return
41+
42+
_stop_all_local_vms()
43+
44+
import httpx
45+
from app.core.config import MASTER_URL, SLAVE_API_KEY
46+
47+
if not MASTER_URL or not SLAVE_API_KEY:
48+
return
49+
try:
50+
async with httpx.AsyncClient(timeout=5.0) as client:
51+
await client.post(
52+
f"{MASTER_URL}/slaves/shutdown",
53+
headers={"X-Slave-Token": SLAVE_API_KEY},
54+
)
55+
logger.info("Shutdown notification sent to master")
56+
except Exception:
57+
logger.warning("Failed to notify master of shutdown")
58+
59+
60+
def _stop_all_local_vms():
61+
try:
62+
conn = QEMUConfig.get_connection()
63+
domains = conn.listAllDomains(0)
64+
for domain in domains:
65+
if domain.isActive():
66+
try:
67+
domain.destroy()
68+
logger.info("Destroyed VM %s", domain.name())
69+
except Exception:
70+
logger.warning("Failed to destroy VM %s", domain.name())
71+
except Exception:
72+
logger.warning("Failed to stop local VMs during shutdown")
73+
74+
3675
@app.on_event("startup")
3776
async def startup_event():
38-
"""Initialize database and create default admin user if it doesn't exist."""
39-
init_db() # This might be temporary
77+
init_db()
78+
79+
if DISTRIBOX_MODE == "slave":
80+
print(f"✓ Starting in SLAVE mode")
81+
asyncio.create_task(_slave_heartbeat_loop())
82+
return
4083

4184
admin_username = get_env_or_default("ADMIN_USERNAME", "admin")
4285
admin_password = get_env_or_default("ADMIN_PASSWORD", "admin")
4386

4487
with Session(engine) as session:
45-
# Check if admin exists
4688
statement = select(UserORM).where(UserORM.username == admin_username)
4789
admin = session.exec(statement).first()
4890

4991
if not admin:
50-
# Create admin user
5192
admin = UserORM(
5293
username=admin_username,
5394
hashed_password=hash_password(admin_password),
@@ -93,6 +134,8 @@ async def startup_event():
93134
)
94135

95136
asyncio.create_task(_enforce_event_deadlines())
137+
asyncio.create_task(_check_stale_slaves())
138+
print(f"✓ Starting in MASTER mode")
96139

97140

98141
async def _enforce_event_deadlines():
@@ -133,6 +176,54 @@ async def _enforce_event_deadlines():
133176
await asyncio.sleep(30)
134177

135178

179+
async def _check_stale_slaves():
180+
while True:
181+
try:
182+
from app.services.slave_service import SlaveService
183+
SlaveService.check_stale_slaves()
184+
except Exception:
185+
logger.exception("Error in stale slave check")
186+
await asyncio.sleep(30)
187+
188+
189+
async def _slave_heartbeat_loop():
190+
import httpx
191+
from app.core.config import MASTER_URL, SLAVE_API_KEY
192+
from app.services.host_service import HostService
193+
import psutil
194+
195+
if not MASTER_URL or not SLAVE_API_KEY:
196+
logger.warning(
197+
"MASTER_URL or SLAVE_API_KEY not set, skipping heartbeat"
198+
)
199+
return
200+
201+
while True:
202+
try:
203+
host_info = HostService.get_host_info()
204+
mem = psutil.virtual_memory()
205+
heartbeat = {
206+
"total_cpu": psutil.cpu_count() or 0,
207+
"total_mem": round(mem.total / 2**30),
208+
"total_disk": round(host_info.disk.total),
209+
"available_cpu": round(
210+
100.0 - host_info.cpu.percent_used_total, 2
211+
),
212+
"available_mem": round(host_info.mem.available, 2),
213+
"available_disk": round(host_info.disk.available, 2),
214+
}
215+
async with httpx.AsyncClient(timeout=10.0) as client:
216+
await client.post(
217+
f"{MASTER_URL}/slaves/heartbeat",
218+
json=heartbeat,
219+
headers={"X-Slave-Token": SLAVE_API_KEY},
220+
)
221+
logger.debug("Heartbeat sent to master")
222+
except Exception:
223+
logger.exception("Failed to send heartbeat to master")
224+
await asyncio.sleep(30)
225+
226+
136227
@app.exception_handler(HTTPException)
137228
async def http_exception_handler(_, exc: HTTPException):
138229
return JSONResponse(
@@ -148,10 +239,14 @@ async def general_exception_handler(_, exc: Exception):
148239
content={"detail": str(exc)}
149240
)
150241

151-
app.include_router(auth.router, prefix="/auth", tags=["auth"])
152-
app.include_router(user_management.router, tags=["users"])
153-
app.include_router(vm.router, prefix="/vms", tags=["vms"])
154-
app.include_router(image.router, prefix="/images", tags=["images"])
155-
app.include_router(host.router, prefix="/host", tags=["host"])
156-
app.include_router(tunnel.router, tags=["tunnel"])
157-
app.include_router(event.router, prefix="/events", tags=["events"])
242+
if DISTRIBOX_MODE == "slave":
243+
app.include_router(slave_agent.router, tags=["slave-agent"])
244+
else:
245+
app.include_router(auth.router, prefix="/auth", tags=["auth"])
246+
app.include_router(user_management.router, tags=["users"])
247+
app.include_router(vm.router, prefix="/vms", tags=["vms"])
248+
app.include_router(image.router, prefix="/images", tags=["images"])
249+
app.include_router(host.router, prefix="/host", tags=["host"])
250+
app.include_router(tunnel.router, tags=["tunnel"])
251+
app.include_router(event.router, prefix="/events", tags=["events"])
252+
app.include_router(slave.router, prefix="/slaves", tags=["slaves"])

backend/app/models/host.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import Optional
2+
from uuid import UUID
13
from pydantic import BaseModel
24
from app.models.resources import MemoryInfoBase, DiskInfoBase, CPUInfoBase
35

@@ -18,3 +20,22 @@ class HostInfoBase(BaseModel):
1820
disk: DiskInfoHost
1921
mem: MemoryInfoHost
2022
cpu: CPUInfoHost
23+
24+
25+
class NodeHostInfo(BaseModel):
26+
node_id: Optional[UUID] = None
27+
node_name: str
28+
host_info: HostInfoBase
29+
30+
31+
class ClusterHostInfo(BaseModel):
32+
nodes: list[NodeHostInfo]
33+
totals: "ClusterTotals"
34+
35+
36+
class ClusterTotals(BaseModel):
37+
cpu_count: int
38+
mem_total: float
39+
mem_available: float
40+
disk_total: float
41+
disk_available: float

backend/app/models/slave.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from typing import Optional
2+
from pydantic import BaseModel, Field
3+
from uuid import UUID
4+
from datetime import datetime
5+
6+
7+
class SlaveCreate(BaseModel):
8+
name: str = Field(min_length=1)
9+
hostname: str = Field(min_length=1)
10+
port: int = Field(default=8080, ge=1, le=65535)
11+
12+
13+
class SlaveRead(BaseModel):
14+
id: UUID
15+
name: str
16+
hostname: str
17+
port: int
18+
api_key: str
19+
status: str
20+
created_at: datetime
21+
last_heartbeat: Optional[datetime] = None
22+
total_cpu: int
23+
total_mem: int
24+
total_disk: int
25+
available_cpu: float
26+
available_mem: float
27+
available_disk: float
28+
29+
30+
class SlaveHeartbeat(BaseModel):
31+
total_cpu: int
32+
total_mem: int
33+
total_disk: int
34+
available_cpu: float
35+
available_mem: float
36+
available_disk: float
37+
38+
39+
class SlaveHostInfo(BaseModel):
40+
"""Response from a slave's /host/info endpoint."""
41+
disk: dict
42+
mem: dict
43+
cpu: dict

backend/app/models/vm.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,14 @@ class VmRead(VmBase):
1919
state: str
2020
ipv4: Optional[str]
2121
credentials_count: int = 0
22+
slave_id: Optional[UUID] = None
23+
slave_name: Optional[str] = None
2224

2325

2426
class VmCreate(VmBase):
2527
activate_at_start: bool
28+
slave_id: Optional[UUID] = None
29+
auto_place: bool = False
2630

2731

2832
class VmCreateXML(VmBase):

0 commit comments

Comments
 (0)