From 7d4bcbe201a390c50f4fc40ad9137553140501d6 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Fri, 19 Jun 2026 13:11:25 -0400 Subject: [PATCH 1/7] daemon: skip reconciliation after reboot is issued After the daemon issues a reboot, it now returns early from Reconcile() on subsequent invocations. This prevents the daemon from running bootc status on a node that is mid-shutdown, which could return garbage data. This also makes the Rebooting state durable (it persists until the node actually goes down), so the e2e test can now assert on it as an intermediate lifecycle step. This also matches what the MCO does. Assisted-by: Pi (Claude Opus 4.6) Signed-off-by: Jonathan Lebon --- internal/daemon/reconciler.go | 19 +++++++------------ internal/daemon/reconciler_test.go | 7 +++++++ internal/daemon/suite_test.go | 12 +++++++----- test/e2e/bootcnode_test.go | 17 ++++++++++++++--- 4 files changed, 35 insertions(+), 20 deletions(-) diff --git a/internal/daemon/reconciler.go b/internal/daemon/reconciler.go index cecd60b..44ff63f 100644 --- a/internal/daemon/reconciler.go +++ b/internal/daemon/reconciler.go @@ -56,8 +56,8 @@ type BootcNodeReconciler struct { inflight stageOp stageDone chan event.GenericEvent - // rebootIssued tracks whether a reboot has been issued so classifyAction - // can distinguish the Staged→Rebooting. + // rebootIssued tracks whether a reboot has been issued so Reconcile + // can skip reconciliation while the node is shutting down. rebootIssued bool } @@ -78,6 +78,11 @@ func (r *BootcNodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( return ctrl.Result{}, nil } + if r.rebootIssued { + log.Info("Reboot already issued, skipping reconcile") + return ctrl.Result{}, nil + } + var bn bootcv1alpha1.BootcNode if err := r.Get(ctx, req.NamespacedName, &bn); err != nil { if apierrors.IsNotFound(err) { @@ -211,9 +216,6 @@ func (r *BootcNodeReconciler) reconcileBootcNode(ctx context.Context, bn *bootcv reason = bootcv1alpha1.NodeReasonRebooting res.needsReboot = true - case actionAwaitReboot: - reason = bootcv1alpha1.NodeReasonRebooting - case actionAwaitBooted: reason = bootcv1alpha1.NodeReasonStaged log.Info("Image staged", "image", desiredImage) @@ -331,7 +333,6 @@ const ( actionAwaitStage // stage in-flight, waiting for completion actionAwaitBooted // staged, waiting for reboot approval actionReboot // staged + approved, issue reboot - actionAwaitReboot // reboot issued, waiting for completion ) func (r *BootcNodeReconciler) classifyAction(bn *bootcv1alpha1.BootcNode, digested reference.Digested, desiredImage string) updateAction { @@ -348,12 +349,6 @@ func (r *BootcNodeReconciler) classifyAction(bn *bootcv1alpha1.BootcNode, digest return actionAwaitBooted } - // rebootIssued is volatile: if the daemon restarts it resets to false. - // That is safe because either the reboot already landed (booted digest - // matches and we return idle earlier) or it hasn't and we re-issue it. - if r.rebootIssued { - return actionAwaitReboot - } return actionReboot } diff --git a/internal/daemon/reconciler_test.go b/internal/daemon/reconciler_test.go index b8f2985..5984bea 100644 --- a/internal/daemon/reconciler_test.go +++ b/internal/daemon/reconciler_test.go @@ -100,6 +100,7 @@ func TestReconcileBootcStatusError(t *testing.T) { ctx := context.Background() fake.reset() + reconciler.reset() fake.setStatusErr(errors.New(bootcStatusErrMsg)) bn := testutil.NewNode(testNodeName, testutil.ImageDigestRefA) @@ -127,6 +128,7 @@ func TestStagingTriggered(t *testing.T) { ctx := context.Background() fake.reset() + reconciler.reset() fake.status = newBootcStatus(testutil.DigestA) bn := testutil.NewNode(testNodeName, testutil.ImageDigestRefB) @@ -164,6 +166,7 @@ func TestStagingError(t *testing.T) { ctx := context.Background() fake.reset() + reconciler.reset() fake.status = newBootcStatus(testutil.DigestA) fake.setStageErr(errors.New(stageErrMsg)) @@ -197,6 +200,7 @@ func TestAlreadyStaged(t *testing.T) { ctx := context.Background() fake.reset() + reconciler.reset() fake.status = newBootcStatus(testutil.DigestA) fake.status.Status.Staged = newBootEntry(testutil.ImageDigestRefB, testutil.DigestB) @@ -226,6 +230,7 @@ func TestRebootingSet(t *testing.T) { ctx := context.Background() fake.reset() + reconciler.reset() fake.status = newBootcStatus(testutil.DigestA) fake.status.Status.Staged = newBootEntry(testutil.ImageDigestRefB, testutil.DigestB) @@ -255,6 +260,7 @@ func TestRollback(t *testing.T) { ctx := context.Background() fake.reset() + reconciler.reset() fake.status = newBootcStatus(testutil.DigestA) fake.status.Status.Staged = newBootEntry(testutil.ImageDigestRefB, testutil.DigestB) @@ -286,6 +292,7 @@ func TestCancelInflightStage(t *testing.T) { ctx := context.Background() fake.reset() + reconciler.reset() fake.status = newBootcStatus(testutil.DigestA) firstBlock := make(chan struct{}) diff --git a/internal/daemon/suite_test.go b/internal/daemon/suite_test.go index 9f1a094..220cecb 100644 --- a/internal/daemon/suite_test.go +++ b/internal/daemon/suite_test.go @@ -22,9 +22,10 @@ import ( const testNodeName = "test-node" var ( - testEnv *envtest.Environment - k8sClient client.Client - fake *fakeExecutor + testEnv *envtest.Environment + k8sClient client.Client + fake *fakeExecutor + reconciler *BootcNodeReconciler ) func TestMain(m *testing.M) { @@ -65,12 +66,13 @@ func TestMain(m *testing.M) { os.Exit(1) } - if err := (&BootcNodeReconciler{ + reconciler = &BootcNodeReconciler{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), NodeName: testNodeName, Executor: fake, - }).SetupWithManager(mgr); err != nil { + } + if err := reconciler.SetupWithManager(mgr); err != nil { fmt.Fprintf(os.Stderr, "Failed to setup reconciler: %v\n", err) os.Exit(1) } diff --git a/test/e2e/bootcnode_test.go b/test/e2e/bootcnode_test.go index 1b82178..7f607cf 100644 --- a/test/e2e/bootcnode_test.go +++ b/test/e2e/bootcnode_test.go @@ -131,10 +131,21 @@ func TestUpdateReboot(t *testing.T) { t.Logf("Patched pool to update image %s", updateRef) - // Phase 3: Wait for Idle with the update digest — proves the full + // Phase 3: Wait for Rebooting — the daemon skips reconciliation after + // issuing a reboot, so this state is durable until the node goes down. + g.Eventually(func(g Gomega) { + g.Expect(env.Client.Get(ctx, client.ObjectKey{Name: nodeName}, &bn)).To(Succeed()) + g.Expect(bn.Status.Conditions).To(ContainElement(And( + HaveField("Type", bootcv1alpha1.NodeIdle), + HaveField("Status", metav1.ConditionFalse), + HaveField("Reason", bootcv1alpha1.NodeReasonRebooting), + ))) + }).WithTimeout(5*time.Minute).Should(Succeed(), "expected node to reach Rebooting state") + + t.Logf("Node %q is Rebooting", nodeName) + + // Phase 4: Wait for Idle with the update digest — proves the full // update lifecycle completed (staging, reboot, boot into new image). - // We don't assert on intermediate states (Staging, Rebooting) because - // they are too transient to catch reliably with polling. g.Eventually(func(g Gomega) { g.Expect(env.Client.Get(ctx, client.ObjectKey{Name: nodeName}, &bn)).To(Succeed()) g.Expect(bn.Status.Booted).NotTo(BeNil()) From 0e10e040e7ad20372e7a7470d3096042b75fad60 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Fri, 19 Jun 2026 14:45:57 -0400 Subject: [PATCH 2/7] daemon: move reset() to stageOp, drop rebootIssued clearing The `reset()` method only needs to clear the backoff retry counter, so it belongs on `stageOp` rather than `BootcNodeReconciler`. Clearing `rebootIssued` is unnecessary in production; it doesn't make sense to "un-issue" a reboot. The way it gets "reset" is by the daemon naturally getting restarted as part of the reboot. That said, add a `resetDaemon()` test helper to make resetting everything easier. Assisted-by: Pi (Claude Opus 4.6) Signed-off-by: Jonathan Lebon --- internal/daemon/reconciler.go | 13 ++++++------- internal/daemon/reconciler_test.go | 21 +++++++-------------- internal/daemon/suite_test.go | 8 ++++++++ 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/internal/daemon/reconciler.go b/internal/daemon/reconciler.go index 44ff63f..4630c68 100644 --- a/internal/daemon/reconciler.go +++ b/internal/daemon/reconciler.go @@ -178,9 +178,9 @@ func (r *BootcNodeReconciler) reconcileBootcNode(ctx context.Context, bn *bootcv } // Nothing to do the desired image matches the booted ones. - // Rest the reconciler to start from a clean state + // Reset the stage backoff to start from a clean state. if digested.Digest().String() == bn.Status.Booted.ImageDigest { - r.reset() + r.inflight.reset() return reconcileResult{}, nil } @@ -268,11 +268,10 @@ func (s *stageOp) acquire(log logr.Logger, image string, cancel context.CancelFu s.err = nil } -func (r *BootcNodeReconciler) reset() { - r.rebootIssued = false - r.inflight.mu.Lock() - defer r.inflight.mu.Unlock() - r.inflight.retries = 0 +func (s *stageOp) reset() { + s.mu.Lock() + defer s.mu.Unlock() + s.retries = 0 } // run executes bootc stage in a goroutine. The results are delivered via the done channel. diff --git a/internal/daemon/reconciler_test.go b/internal/daemon/reconciler_test.go index 5984bea..2ba1123 100644 --- a/internal/daemon/reconciler_test.go +++ b/internal/daemon/reconciler_test.go @@ -99,8 +99,7 @@ func TestReconcileBootcStatusError(t *testing.T) { g.SetDefaultEventuallyPollingInterval(pollInterval) ctx := context.Background() - fake.reset() - reconciler.reset() + resetDaemon() fake.setStatusErr(errors.New(bootcStatusErrMsg)) bn := testutil.NewNode(testNodeName, testutil.ImageDigestRefA) @@ -127,8 +126,7 @@ func TestStagingTriggered(t *testing.T) { g.SetDefaultEventuallyPollingInterval(pollInterval) ctx := context.Background() - fake.reset() - reconciler.reset() + resetDaemon() fake.status = newBootcStatus(testutil.DigestA) bn := testutil.NewNode(testNodeName, testutil.ImageDigestRefB) @@ -165,8 +163,7 @@ func TestStagingError(t *testing.T) { g.SetDefaultEventuallyPollingInterval(pollInterval) ctx := context.Background() - fake.reset() - reconciler.reset() + resetDaemon() fake.status = newBootcStatus(testutil.DigestA) fake.setStageErr(errors.New(stageErrMsg)) @@ -199,8 +196,7 @@ func TestAlreadyStaged(t *testing.T) { g.SetDefaultEventuallyPollingInterval(pollInterval) ctx := context.Background() - fake.reset() - reconciler.reset() + resetDaemon() fake.status = newBootcStatus(testutil.DigestA) fake.status.Status.Staged = newBootEntry(testutil.ImageDigestRefB, testutil.DigestB) @@ -229,8 +225,7 @@ func TestRebootingSet(t *testing.T) { g.SetDefaultEventuallyPollingInterval(pollInterval) ctx := context.Background() - fake.reset() - reconciler.reset() + resetDaemon() fake.status = newBootcStatus(testutil.DigestA) fake.status.Status.Staged = newBootEntry(testutil.ImageDigestRefB, testutil.DigestB) @@ -259,8 +254,7 @@ func TestRollback(t *testing.T) { g.SetDefaultEventuallyPollingInterval(pollInterval) ctx := context.Background() - fake.reset() - reconciler.reset() + resetDaemon() fake.status = newBootcStatus(testutil.DigestA) fake.status.Status.Staged = newBootEntry(testutil.ImageDigestRefB, testutil.DigestB) @@ -291,8 +285,7 @@ func TestCancelInflightStage(t *testing.T) { g.SetDefaultEventuallyPollingInterval(pollInterval) ctx := context.Background() - fake.reset() - reconciler.reset() + resetDaemon() fake.status = newBootcStatus(testutil.DigestA) firstBlock := make(chan struct{}) diff --git a/internal/daemon/suite_test.go b/internal/daemon/suite_test.go index 220cecb..7a94775 100644 --- a/internal/daemon/suite_test.go +++ b/internal/daemon/suite_test.go @@ -28,6 +28,14 @@ var ( reconciler *BootcNodeReconciler ) +// resetDaemon resets all volatile state on the shared reconciler and +// fake executor so each test starts from a clean slate. +func resetDaemon() { + fake.reset() + reconciler.rebootIssued = false + reconciler.inflight.reset() +} + func TestMain(m *testing.M) { ctrl.SetLogger(zap.New(zap.UseDevMode(true))) From c62a5ca921b9c44a1afe1e914161fbabc200d57e Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Fri, 19 Jun 2026 15:34:06 -0400 Subject: [PATCH 3/7] daemon: skip stage when context is already cancelled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After acquiring runMu, check ctx.Err() before calling Stage(). This prevents stale goroutines from briefly starting a bootc switch process with the wrong image after rapid spec changes (e.g. B→C→D). Multiple goroutines may still pile up on the mutex, but each cancelled one exits immediately without spawning a process. Assisted-by: Pi (Claude Opus 4.6) Signed-off-by: Jonathan Lebon --- internal/daemon/reconciler.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/internal/daemon/reconciler.go b/internal/daemon/reconciler.go index 4630c68..6cb063e 100644 --- a/internal/daemon/reconciler.go +++ b/internal/daemon/reconciler.go @@ -275,12 +275,22 @@ func (s *stageOp) reset() { } // run executes bootc stage in a goroutine. The results are delivered via the done channel. +// +// Technically, multiple goroutines may pile up here waiting on runMu if +// there's multiple rapid image changes (e.g. B→C→D spawns three goroutines). +// This is harmless: each cancelled goroutine checks ctx.Err() after acquiring +// the lock and exits immediately without starting a process. func (s *stageOp) run(ctx context.Context, nodeName, image string, executor bootc.Executor, done chan<- event.GenericEvent) { s.runMu.Lock() defer s.runMu.Unlock() log := logf.FromContext(context.Background()).WithValues("node", nodeName, "image", image) + if ctx.Err() != nil { + log.Info("Stage skipped, context already cancelled") + return + } + // TODO: exec bootc switch async and select on the cancel channel to send SIGINT for graceful shutdown. err := executor.Stage(ctx, image) From b42d7cf664a54327c88221f1a57939092f999b40 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Fri, 19 Jun 2026 15:42:24 -0400 Subject: [PATCH 4/7] daemon: remove stale TODO about SIGINT for bootc cancellation exec.CommandContext already sends SIGKILL on context cancellation, and bootc has no SIGINT handler, so graceful vs. hard termination is equivalent. Replace the TODO with a brief note about the existing behavior. Assisted-by: Pi (Claude Opus 4.6) Signed-off-by: Jonathan Lebon --- internal/daemon/reconciler.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/daemon/reconciler.go b/internal/daemon/reconciler.go index 6cb063e..8e05a0f 100644 --- a/internal/daemon/reconciler.go +++ b/internal/daemon/reconciler.go @@ -291,7 +291,8 @@ func (s *stageOp) run(ctx context.Context, nodeName, image string, executor boot return } - // TODO: exec bootc switch async and select on the cancel channel to send SIGINT for graceful shutdown. + // NB: this uses exec.CommandContext, which will SIGKILL bootc if the + // context is cancelled. err := executor.Stage(ctx, image) s.mu.Lock() From 9841ebca63b5220976cb57ee605d9e52ee1b8849 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Fri, 19 Jun 2026 22:15:39 -0400 Subject: [PATCH 5/7] daemon: run bootc switch via systemd-run on the host Run bootc switch as a transient systemd unit on the host rather than directly inside the daemon pod's cgroup. It's just cleaner to structure it this way (e.g. we don't have to account for bootc memory requirements in our own, and also the operation transparently survives daemon pod restarts), and will also allow in the future using various systemd knobs, such as those around IO scheduling (see #61). Assisted-by: Pi (Claude Opus 4.6) Signed-off-by: Jonathan Lebon --- internal/bootc/executor.go | 87 +++++++++++++++++++++++++++++++++++--- 1 file changed, 81 insertions(+), 6 deletions(-) diff --git a/internal/bootc/executor.go b/internal/bootc/executor.go index 83ff77f..9a8711b 100644 --- a/internal/bootc/executor.go +++ b/internal/bootc/executor.go @@ -7,7 +7,9 @@ import ( "fmt" "os/exec" "strings" + "time" + "github.com/go-logr/logr" logf "sigs.k8s.io/controller-runtime/pkg/log" ) @@ -31,7 +33,7 @@ func NewHostExecutor() *HostExecutor { func (e *HostExecutor) nsenterCmd(ctx context.Context, args ...string) *exec.Cmd { base := []string{ "--target", "1", - "--mount", "--pid", + "--mount", "--pid", "--cgroup", "--setuid", "0", "--setgid", "0", "--env", "--", } @@ -47,19 +49,92 @@ func (e *HostExecutor) Status(ctx context.Context) ([]byte, error) { return out, nil } +// stageUnitName is the systemd transient unit name used for bootc switch. +const stageUnitName = "bootc-operator-switch.service" + func (e *HostExecutor) Stage(ctx context.Context, image string) error { log := logf.FromContext(ctx) - // TODO: use --download-only once available (https://github.com/bootc-dev/bootc/issues/2137) - cmd := e.nsenterCmd(ctx, "bootc", "switch", image) + // Stop any stale unit from a previous daemon incarnation. + e.stopStageUnit() + + // Ideally we'd use systemd-run's `--pipe` here, which would avoid + // having to fetch the unit journal down below, but SELinux blocks it + // (dbus-broker can't access container-labeled fds). + cmd := e.nsenterCmd(ctx, + "systemd-run", "--wait", "--collect", + "--unit", stageUnitName, + // TODO: use --download-only once available + // (https://github.com/bootc-dev/bootc/issues/2137) + "bootc", "switch", image, + ) + cmd.Cancel = func() error { + e.stopStageUnit() + return nil + } log.Info("Executing", "cmd", strings.Join(cmd.Args, " ")) - out, err := cmd.CombinedOutput() - if err != nil { - return fmt.Errorf("running bootc switch: %s: %w", out, err) + cursor := e.journalCursor() + if err := cmd.Run(); err != nil { + // Were we cancelled? + if ctx.Err() != nil { + return ctx.Err() + } + e.copyJournalUnitLogs(log, stageUnitName, cursor) + return fmt.Errorf("running bootc switch: %w", err) } return nil } +// stopStageUnit stops the bootc-operator-switch transient unit if it +// is running. Errors are ignored: the unit may not exist. +func (e *HostExecutor) stopStageUnit() { + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + _ = e.nsenterCmd(ctx, "systemctl", "stop", stageUnitName).Run() +} + +// journalCursor returns the current journal cursor position. Used to scope a +// future journal query to only entries after the cursor position. +func (e *HostExecutor) journalCursor() string { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + out, err := e.nsenterCmd(ctx, + "journalctl", "--show-cursor", "-n", "0", "-o", "cat", + ).Output() + if err != nil { + return "" + } + // Output format: "-- cursor: s=...;i=...;b=...;..." + if after, ok := strings.CutPrefix(strings.TrimSpace(string(out)), "-- cursor: "); ok { + return after + } + return "" +} + +// copyJournalUnitLogs logs recent journal output from the given systemd unit. +// If cursor is non-empty, only entries after that cursor are shown; otherwise +// shows all entries. +func (e *HostExecutor) copyJournalUnitLogs(log logr.Logger, unit string, cursor string) { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + args := []string{"journalctl", "-o", "cat", "--no-pager", + "-u", unit, + } + if cursor != "" { + args = append(args, "--after-cursor="+cursor) + } + out, err := e.nsenterCmd(ctx, args...).Output() + if err != nil { + log.Error(err, "Failed to read unit journal", "unit", unit) + return + } + for _, line := range strings.Split(strings.TrimSpace(string(out)), "\n") { + if line != "" { + log.Info(line, "unit", unit) + } + } +} + func (e *HostExecutor) Reboot(ctx context.Context) error { log := logf.FromContext(ctx) From 581337f3c5b3be3401faf89a82852d8bb682aa91 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Fri, 19 Jun 2026 22:16:15 -0400 Subject: [PATCH 6/7] Revert "daemon: increase memory resource" This reverts commit 7c5e31c6aa15e12cd23def80464d9183169a643a. The memory bump to 512Mi is no longer needed: bootc switch now runs as a transient systemd unit on the host (outside the pod cgroup), so its memory usage does not count against the daemon pod limit. 128Mi is sufficient for the daemon itself (Go binary, controller-runtime, API watches). Assisted-by: Pi (Claude Opus 4.6) Signed-off-by: Jonathan Lebon --- config/daemon/daemon.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/daemon/daemon.yaml b/config/daemon/daemon.yaml index af5d7c8..fefaa14 100644 --- a/config/daemon/daemon.yaml +++ b/config/daemon/daemon.yaml @@ -38,7 +38,7 @@ spec: resources: limits: cpu: 500m - memory: 512Mi + memory: 128Mi requests: cpu: 10m memory: 64Mi From 53725cbaf0c68671ef74b68f8f8f7dd87678cd24 Mon Sep 17 00:00:00 2001 From: Jonathan Lebon Date: Fri, 19 Jun 2026 22:54:36 -0400 Subject: [PATCH 7/7] docs: mark milestones 4b and 4c as complete Signed-off-by: Jonathan Lebon --- docs/IMPLEMENTATION_PLAN.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/IMPLEMENTATION_PLAN.md b/docs/IMPLEMENTATION_PLAN.md index eda8bd0..71e9f54 100644 --- a/docs/IMPLEMENTATION_PLAN.md +++ b/docs/IMPLEMENTATION_PLAN.md @@ -207,7 +207,7 @@ the full controller+daemon loop can be tested end-to-end. DaemonSet, label a node with `bootc.dev/managed`, and verify the daemon pod starts on that node. -### 4b. Core loop +### 4b. Core loop ✅ - Binary identifies its node name (downward API env var) - Watches its single BootcNode via field selector on `metadata.name` @@ -221,7 +221,7 @@ pod starts on that node. **Validation (e2e):** enhance the existing e2e test to verify the daemon populates BootcNode status from `bootc status`. -### 4c. State machine +### 4c. State machine ✅ - Detect `spec.desiredImage != booted` → set `Idle=False reason=Staging`, run `bootc switch ` (no `--download-only` for now;