Skip to content

Commit aaaf4e0

Browse files
committed
Prevent PITR supervisor from restarting slicer during restore
The PITR supervisor loop could restart the oplog slicer during a restore if it read the config before the restore set pitr.enabled=false. Add a suspendPitr/resumePitr mechanism so the restore handler can prevent the supervisor from restarting the slicer for the duration of the restore.
1 parent 51e704a commit aaaf4e0

File tree

3 files changed

+31
-7
lines changed

3 files changed

+31
-7
lines changed

cmd/pbm-agent/agent.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,13 @@ import (
2525
)
2626

2727
type Agent struct {
28-
leadConn connect.Client
29-
nodeConn *mongo.Client
30-
bcp *currentBackup
31-
pitrjob *currentPitr
32-
slicerMx sync.Mutex
33-
bcpMx sync.Mutex
28+
leadConn connect.Client
29+
nodeConn *mongo.Client
30+
bcp *currentBackup
31+
pitrjob *currentPitr
32+
pitrSuspended atomic.Bool
33+
slicerMx sync.Mutex
34+
bcpMx sync.Mutex
3435

3536
brief topo.NodeBrief
3637

cmd/pbm-agent/pitr.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,19 @@ func (a *Agent) removePitr() {
5757
a.setPitr(nil)
5858
}
5959

60+
// suspendPitr stops the current slicer and prevents the PITR supervisor
61+
// from restarting it. Use resumePitr to allow restarts again.
62+
func (a *Agent) suspendPitr() {
63+
a.pitrSuspended.Store(true)
64+
a.removePitr()
65+
}
66+
67+
// resumePitr allows the PITR supervisor to start the slicer again
68+
// after a previous suspendPitr call.
69+
func (a *Agent) resumePitr() {
70+
a.pitrSuspended.Store(false)
71+
}
72+
6073
func (a *Agent) getPitr() *currentPitr {
6174
a.slicerMx.Lock()
6275
defer a.slicerMx.Unlock()
@@ -185,6 +198,10 @@ func (a *Agent) pitr(ctx context.Context) error {
185198
l := log.FromContext(ctx).NewEvent(string(ctrl.CmdPITR), "", "", ep.TS())
186199
ctx = log.SetLogEventToContext(ctx, l)
187200

201+
if a.pitrSuspended.Load() {
202+
return nil
203+
}
204+
188205
if !cfg.PITR.Enabled {
189206
a.removePitr()
190207
a.stopMon()
@@ -358,6 +375,11 @@ func (a *Agent) pitr(ctx context.Context) error {
358375
stopSlicing()
359376
return
360377
}
378+
if a.pitrSuspended.Load() {
379+
l.Debug("stop slicing: pitr suspended")
380+
stopSlicing()
381+
return
382+
}
361383

362384
case <-stopSlicingCtx.Done():
363385
return

cmd/pbm-agent/restore.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@ func (a *Agent) Restore(ctx context.Context, r *ctrl.RestoreCmd, opid ctrl.OPID,
8585
} else {
8686
l.Info("oplog slicer disabled")
8787
}
88-
a.removePitr()
88+
a.suspendPitr()
89+
defer a.resumePitr()
8990
}
9091

9192
// stop balancer during the restore

0 commit comments

Comments
 (0)