Skip to content

Commit 0db8ffe

Browse files
committed
Fix nixos-rebuild PATH and install context cancellation
Set NixosSystemPath in main so all exec.Command calls resolve bare names from a systemd service (which strips PATH). Pass the same PATH via sudo env so nixos-rebuild can find nix internally (was failing with exit 127, nix: command not found). Use context.WithoutCancel in the operation queue so a client disconnect cannot cancel nixos-rebuild mid-flight. Previously, Go 1.21+ would return context.Canceled immediately from exec.CommandContext if the context was already done, leaving apps stuck in "installing" state in the database permanently.
1 parent 7c49551 commit 0db8ffe

File tree

3 files changed

+36
-17
lines changed

3 files changed

+36
-17
lines changed

services/host-agent/cmd/host-agent/main.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"codeberg.org/d-buckner/bloud-v3/services/host-agent/internal/appconfig"
1313
"codeberg.org/d-buckner/bloud-v3/services/host-agent/internal/config"
1414
"codeberg.org/d-buckner/bloud-v3/services/host-agent/internal/db"
15+
"codeberg.org/d-buckner/bloud-v3/services/host-agent/internal/nixgen"
1516
"codeberg.org/d-buckner/bloud-v3/services/host-agent/internal/system"
1617
"codeberg.org/d-buckner/bloud-v3/services/host-agent/pkg/configurator"
1718
)
@@ -32,6 +33,12 @@ func main() {
3233
}
3334

3435
func runServer() {
36+
// Ensure system binaries are resolvable by bare name. systemd strips PATH
37+
// to a minimal set that excludes /run/current-system/sw/bin and
38+
// /run/wrappers/bin. Setting it here covers all exec.Command calls
39+
// throughout the process (systemctl, journalctl, podman, nix, sudo, etc.).
40+
_ = os.Setenv("PATH", nixgen.NixosSystemPath)
41+
3542
// Setup structured logging
3643
logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{
3744
Level: slog.LevelInfo,

services/host-agent/internal/nixgen/rebuild.go

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,16 @@ import (
1212
"time"
1313
)
1414

15-
// Stable NixOS binary paths. These are fixed by the system profile and the
16-
// sudo wrapper dir — safe to hardcode rather than relying on PATH, which
17-
// systemd strips for services running as root.
18-
const (
19-
binSudo = "/run/wrappers/bin/sudo"
20-
binNixosRebuild = "/run/current-system/sw/bin/nixos-rebuild"
21-
binSystemctl = "/run/current-system/sw/bin/systemctl"
22-
)
15+
// NixosSystemPath is the PATH required for NixOS system commands.
16+
// Validated against the ISO: all binaries (nix, nixos-rebuild, systemctl,
17+
// journalctl, podman, machinectl, sudo) live in these two directories.
18+
// /usr/bin is empty on NixOS; /nix/var/nix/profiles/default/bin does not
19+
// exist on this system. /bin contains only sh.
20+
//
21+
// Set via os.Setenv in main so all exec.Command calls resolve bare names,
22+
// and passed explicitly via "sudo env" so child processes inherit it too
23+
// (sudo resets the environment by default).
24+
const NixosSystemPath = "/run/current-system/sw/bin:/run/wrappers/bin:/bin"
2325

2426
// Rebuilder handles nixos-rebuild operations
2527
type Rebuilder struct {
@@ -64,12 +66,14 @@ type RebuildResult struct {
6466
func (r *Rebuilder) nixosRebuildCmd(ctx context.Context, args []string) *exec.Cmd {
6567
if r.useSudo {
6668
sudoArgs := append([]string{
67-
"env", "_NIXOS_REBUILD_REEXEC=1",
68-
binNixosRebuild,
69+
"env",
70+
"_NIXOS_REBUILD_REEXEC=1",
71+
"PATH=" + NixosSystemPath,
72+
"nixos-rebuild",
6973
}, args...)
70-
return exec.CommandContext(ctx, binSudo, sudoArgs...)
74+
return exec.CommandContext(ctx, "sudo", sudoArgs...)
7175
}
72-
cmd := exec.CommandContext(ctx, binNixosRebuild, args...)
76+
cmd := exec.CommandContext(ctx, "nixos-rebuild", args...)
7377
cmd.Env = append(os.Environ(), "_NIXOS_REBUILD_REEXEC=1")
7478
return cmd
7579
}
@@ -81,11 +85,11 @@ func (r *Rebuilder) userSystemctlCmd(ctx context.Context, args []string) *exec.C
8185
if r.useSudo {
8286
machinectlArgs := append([]string{
8387
"shell", "bloud@",
84-
binSystemctl, "--user",
88+
"systemctl", "--user",
8589
}, args...)
86-
return exec.CommandContext(ctx, binSudo, append([]string{"machinectl"}, machinectlArgs...)...)
90+
return exec.CommandContext(ctx, "sudo", append([]string{"machinectl"}, machinectlArgs...)...)
8791
}
88-
return exec.CommandContext(ctx, binSystemctl, append([]string{"--user"}, args...)...)
92+
return exec.CommandContext(ctx, "systemctl", append([]string{"--user"}, args...)...)
8993
}
9094

9195
// Switch performs a nixos-rebuild switch

services/host-agent/internal/orchestrator/queue.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -356,17 +356,25 @@ func (q *OperationQueue) executeBatch(batch []QueuedOperation) {
356356
}
357357

358358
// executeInstall runs a single install operation.
359+
// context.WithoutCancel detaches execution from the HTTP request context so
360+
// that a client disconnect (browser navigation, timeout) cannot cancel
361+
// nixos-rebuild mid-flight. Without this, Go 1.21+ exec.CommandContext returns
362+
// context.Canceled immediately if the context is already done, leaving the app
363+
// stuck in "installing" status in the database permanently.
359364
func (q *OperationQueue) executeInstall(op QueuedOperation) {
360-
result, err := q.orchestrator.Install(op.Ctx, *op.Install)
365+
ctx := context.WithoutCancel(op.Ctx)
366+
result, err := q.orchestrator.Install(ctx, *op.Install)
361367
op.ResultCh <- OperationResult{
362368
InstallResult: result,
363369
Err: err,
364370
}
365371
}
366372

367373
// executeUninstall runs a single uninstall operation.
374+
// Uses a detached context for the same reason as executeInstall.
368375
func (q *OperationQueue) executeUninstall(op QueuedOperation) {
369-
result, err := q.orchestrator.Uninstall(op.Ctx, *op.Uninstall)
376+
ctx := context.WithoutCancel(op.Ctx)
377+
result, err := q.orchestrator.Uninstall(ctx, *op.Uninstall)
370378
op.ResultCh <- OperationResult{
371379
UninstallResult: result,
372380
Err: err,

0 commit comments

Comments
 (0)