Skip to content

Commit cd59a62

Browse files
Merge pull request #1279 from percona/release-2.13.0
Release 2.13.0
2 parents 05ab6d7 + e9b57e1 commit cd59a62

File tree

1,499 files changed

+110337
-130443
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,499 files changed

+110337
-130443
lines changed

.github/pr-badge.yml

Lines changed: 0 additions & 5 deletions
This file was deleted.

.github/workflows/ci.yml

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ on:
3434
- "packaging/**"
3535
- "version/**"
3636

37+
concurrency:
38+
group: ${{ github.workflow }}-${{ github.ref }}
39+
cancel-in-progress: true
40+
3741
jobs:
3842
test:
3943
runs-on: ubuntu-latest
@@ -49,11 +53,45 @@ jobs:
4953
PR_NUMBER: ${{ github.event.number|| github.event.inputs.pr_ver }}
5054
MAKE_TARGET: 'build-cover'
5155
steps:
56+
- name: Cleanup unnecessary software
57+
run: |
58+
sudo rm -rf /usr/share/dotnet
59+
sudo rm -rf /usr/local/lib/android
60+
sudo rm -rf /opt/ghc
61+
sudo rm -rf /opt/hostedtoolcache/CodeQL
62+
df -h
63+
64+
- name: Determine test branch
65+
id: test_branch
66+
run: |
67+
if [ -n "${{ github.event.inputs.tests_ver }}" ]; then
68+
echo "branch=${{ github.event.inputs.tests_ver }}" >> $GITHUB_OUTPUT
69+
elif [ -n "${{ github.event.pull_request.title }}" ]; then
70+
PR_TITLE="${{ github.event.pull_request.title }}"
71+
echo "PR title: $PR_TITLE"
72+
BRANCH_TO_CHECK=$(echo "$PR_TITLE" | grep -oE 'PBM-[0-9]+' | head -1)
73+
if [ -n "$BRANCH_TO_CHECK" ]; then
74+
echo "Extracted branch pattern: $BRANCH_TO_CHECK"
75+
if curl -sf "https://api.github.com/repos/Percona-QA/psmdb-testing/branches/$BRANCH_TO_CHECK" > /dev/null; then
76+
echo "Branch $BRANCH_TO_CHECK found, using it"
77+
echo "branch=$BRANCH_TO_CHECK" >> $GITHUB_OUTPUT
78+
else
79+
echo "Branch $BRANCH_TO_CHECK not found, falling back to main"
80+
echo "branch=main" >> $GITHUB_OUTPUT
81+
fi
82+
else
83+
echo "No PBM-XXXXX pattern found in PR title, using main"
84+
echo "branch=main" >> $GITHUB_OUTPUT
85+
fi
86+
else
87+
echo "branch=main" >> $GITHUB_OUTPUT
88+
fi
89+
5290
- name: Checkout testing repo
5391
uses: actions/checkout@v4
5492
with:
5593
repository: Percona-QA/psmdb-testing
56-
ref: ${{ github.event.inputs.tests_ver || 'main'}}
94+
ref: ${{ steps.test_branch.outputs.branch }}
5795
path: psmdb-testing
5896

5997
- name: Setup environment with PSMDB ${{ matrix.psmdb }} for PBM PR/branch ${{ github.event.pull_request.title || env.PR_NUMBER || env.PBM_BRANCH }}

.github/workflows/trivy.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
tar -xzf ${{ github.workspace }}/trivy/trivy.tar.gz -C ${{ github.workspace }}/trivy
2525
2626
- name: Generate SBOM
27-
run: ${{ github.workspace }}/trivy/trivy fs --format cyclonedx --output ${{ github.workspace }}/sbom.json ${{ github.workspace }}
27+
run: ${{ github.workspace }}/trivy/trivy fs --format cyclonedx --output ${{ github.workspace }}/sbom.json --skip-dirs ${{ github.workspace }}/vendor ${{ github.workspace }}
2828

2929
- name: Run trivy scan on SBOM
3030
run: ${{ github.workspace }}/trivy/trivy sbom ${{ github.workspace }}/sbom.json --severity HIGH,CRITICAL --ignore-unfixed --exit-code=1

CODEOWNERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
* @boris-ilijic @inelpandzic
1+
* @boris-ilijic @jcechace

CODE_OF_CONDUCT.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Percona Backup for MongoDB Code of Conduct
2+
3+
All Percona Products follow the [Percona Community Code of Conduct](https://percona.community/contribute/coc/).
4+
5+
If you notice any unacceptable behavior, let us know as soon as possible by writing to <community-team@percona.com>. We will respond within 48 hours.

cmd/pbm-agent/backup.go

Lines changed: 48 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/percona/percona-backup-mongodb/pbm/prio"
1515
"github.com/percona/percona-backup-mongodb/pbm/storage"
1616
"github.com/percona/percona-backup-mongodb/pbm/topo"
17+
"github.com/percona/percona-backup-mongodb/pbm/util"
1718
"github.com/percona/percona-backup-mongodb/pbm/version"
1819
)
1920

@@ -92,8 +93,10 @@ func (a *Agent) Backup(ctx context.Context, cmd *ctrl.BackupCmd, opid ctrl.OPID,
9293
}
9394
}
9495

95-
if cmd.Type == defs.LogicalBackup {
96-
// wakeup the slicer to not wait for the tick
96+
if cmd.Type == defs.LogicalBackup && cmd.Profile == "" {
97+
// For backups to the main storage,
98+
// wake up the slicer to not wait for the tick.
99+
// This will slice and pause the main PITR
97100
go a.sliceNow(opid)
98101
}
99102

@@ -155,20 +158,25 @@ func (a *Agent) Backup(ctx context.Context, cmd *ctrl.BackupCmd, opid ctrl.OPID,
155158
// not replset. So an `incremental && not_base` backup should land on
156159
// the agent that made a previous (src) backup.
157160
const srcHostMultiplier = 3.0
158-
var c map[string]float64
161+
c := make(map[string]float64)
159162
if cmd.Type == defs.IncrementalBackup && !cmd.IncrBase {
160163
src, err := backup.LastIncrementalBackup(ctx, a.leadConn)
161164
if err != nil {
162165
// try backup anyway
163166
l.Warning("define source backup: %v", err)
164167
} else {
165-
c = make(map[string]float64)
166168
for _, rs := range src.Replsets {
167169
c[rs.Node] = srcHostMultiplier
168170
}
169171
}
170172
}
171173

174+
// When a logical backup targets an external profile (different storage),
175+
// PITR keeps running. Deprioritize nodes currently running PITR slicer
176+
if cmd.Type == defs.LogicalBackup && cmd.Profile != "" {
177+
c = a.deprioritizePITRNodes(ctx, c, l)
178+
}
179+
172180
agents, err := topo.ListSteadyAgents(ctx, a.leadConn)
173181
if err != nil {
174182
l.Error("get agents list: %v", err)
@@ -240,7 +248,7 @@ func (a *Agent) Backup(ctx context.Context, cmd *ctrl.BackupCmd, opid ctrl.OPID,
240248
a.setBcp(&currentBackup{cancel: cancel})
241249
defer a.setBcp(nil)
242250

243-
l.Info("backup started")
251+
l.Info("backup started %s", util.LogProfileArg(cmd.Profile))
244252
err = bcp.Run(bcpCtx, cmd, opid, l)
245253
if err != nil {
246254
if errors.Is(err, storage.ErrCancelled) || errors.Is(err, context.Canceled) {
@@ -266,6 +274,41 @@ func (a *Agent) getValidCandidates(agents []topo.AgentStat, backupType defs.Back
266274
return validCandidates
267275
}
268276

277+
// deprioritizePITRNodes adds low-priority coefficients for nodes currently running PITR slicing.
278+
// It only modifies the coefficient map for nodes not already present (e.g., incremental src host).
279+
// Returns the (possibly modified) coefficient map.
280+
func (a *Agent) deprioritizePITRNodes(
281+
ctx context.Context,
282+
coefficients map[string]float64,
283+
l log.LogEvent,
284+
) map[string]float64 {
285+
pitrLocks, err := lock.GetOpLocks(ctx, a.leadConn, &lock.LockHeader{Type: ctrl.CmdPITR})
286+
if err != nil {
287+
l.Warning("get pitr locks for deprioritization: %v", err)
288+
return coefficients
289+
}
290+
291+
ts, err := topo.GetClusterTime(ctx, a.leadConn)
292+
if err != nil {
293+
l.Warning("get cluster time for pitr deprioritization: %v", err)
294+
return coefficients
295+
}
296+
297+
for i := range pitrLocks {
298+
pl := &pitrLocks[i]
299+
if pl.Heartbeat.T+defs.StaleFrameSec < ts.T {
300+
continue // stale lock, ignore
301+
}
302+
303+
// Only set if not already present (preserve previous priorities)
304+
if _, exists := coefficients[pl.Node]; !exists {
305+
coefficients[pl.Node] = prio.DefaultScore - 0.1
306+
}
307+
}
308+
309+
return coefficients
310+
}
311+
269312
const renominationFrame = 5 * time.Second
270313

271314
func (a *Agent) nominateRS(ctx context.Context, bcp, rs string, nodes [][]string) error {

cmd/pbm-agent/delete.go

Lines changed: 48 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ import (
55
"runtime"
66
"time"
77

8-
"go.mongodb.org/mongo-driver/bson"
98
"go.mongodb.org/mongo-driver/bson/primitive"
109
"golang.org/x/sync/errgroup"
1110

@@ -16,7 +15,6 @@ import (
1615
"github.com/percona/percona-backup-mongodb/pbm/lock"
1716
"github.com/percona/percona-backup-mongodb/pbm/log"
1817
"github.com/percona/percona-backup-mongodb/pbm/oplog"
19-
"github.com/percona/percona-backup-mongodb/pbm/resync"
2018
"github.com/percona/percona-backup-mongodb/pbm/storage"
2119
"github.com/percona/percona-backup-mongodb/pbm/topo"
2220
"github.com/percona/percona-backup-mongodb/pbm/util"
@@ -95,17 +93,19 @@ func (a *Agent) Delete(ctx context.Context, d *ctrl.DeleteBackupCmd, opid ctrl.O
9593
return
9694
}
9795

98-
l.Info("deleting backups older than %v", t)
99-
err = backup.DeleteBackupBefore(ctx, a.leadConn, t, bcpType, nodeInfo.Me)
96+
stg, err := util.GetProfiledStorage(ctx, a.leadConn, d.Profile, nodeInfo.Me, l)
97+
if err != nil {
98+
l.Error("get storage: %v", err)
99+
return
100+
}
101+
l.Info("deleting backups older than %v %s", t, util.LogProfileArg(d.Profile))
102+
err = backup.DeleteBackupBefore(ctx, a.leadConn, stg, d.Profile, bcpType, t)
100103
if err != nil {
101104
l.Error("deleting: %v", err)
102105
return
103106
}
104107
case d.Backup != "":
105-
l = logger.NewEvent(string(ctrl.CmdDeleteBackup), d.Backup, opid.String(), ep.TS())
106-
ctx := log.SetLogEventToContext(ctx, l)
107-
108-
l.Info("deleting backup")
108+
l.Info("deleting backup %q", d.Backup)
109109
err := backup.DeleteBackup(ctx, a.leadConn, d.Backup, nodeInfo.Me)
110110
if err != nil {
111111
l.Error("deleting: %v", err)
@@ -254,59 +254,40 @@ func (a *Agent) Cleanup(ctx context.Context, d *ctrl.CleanupCmd, opid ctrl.OPID,
254254
return
255255
}
256256

257-
cfg, err := config.GetConfig(ctx, a.leadConn)
257+
cfg, err := config.GetProfiledConfig(ctx, a.leadConn, d.Profile)
258258
if err != nil {
259259
l.Error("get config: %v", err)
260+
return
260261
}
261262

262263
stg, err := util.StorageFromConfig(&cfg.Storage, a.brief.Me, l)
263264
if err != nil {
264265
l.Error("get storage: " + err.Error())
266+
return
265267
}
266268

267-
eg := errgroup.Group{}
268-
eg.SetLimit(runtime.NumCPU())
269-
270-
cr, err := backup.MakeCleanupInfo(ctx, a.leadConn, d.OlderThan)
269+
cr, err := backup.MakeCleanupInfo(ctx, a.leadConn, d.OlderThan, d.Profile)
271270
if err != nil {
272271
l.Error("make cleanup report: " + err.Error())
273272
return
274273
}
275274

276-
for i := range cr.Chunks {
277-
name := cr.Chunks[i].FName
275+
eg := &errgroup.Group{}
276+
eg.SetLimit(runtime.NumCPU())
278277

279-
eg.Go(func() error {
280-
err := stg.Delete(name)
281-
return errors.Wrapf(err, "delete chunk file %q", name)
282-
})
283-
}
284-
if err := eg.Wait(); err != nil {
278+
if err := a.deleteChunks(ctx, eg, stg, cr.Chunks); err != nil {
285279
l.Error(err.Error())
286280
}
287281

288-
for i := range cr.Backups {
289-
bcp := &cr.Backups[i]
290-
291-
eg.Go(func() error {
292-
err := backup.DeleteBackupFiles(stg, bcp.Name)
293-
return errors.Wrapf(err, "delete backup files %q", bcp.Name)
294-
})
295-
}
296-
if err := eg.Wait(); err != nil {
282+
if err := a.deleteBackups(ctx, eg, stg, cr.Backups); err != nil {
297283
l.Error(err.Error())
298284
}
299-
300-
err = resync.Resync(ctx, a.leadConn, &cfg.Storage, a.brief.Me, false)
301-
if err != nil {
302-
l.Error("storage resync: " + err.Error())
303-
}
304285
}
305286

306287
func (a *Agent) deletePITRImpl(ctx context.Context, ts primitive.Timestamp) error {
307288
l := log.LogEventFromContext(ctx)
308289

309-
r, err := backup.MakeCleanupInfo(ctx, a.leadConn, ts)
290+
r, err := backup.MakeCleanupInfo(ctx, a.leadConn, ts, "")
310291
if err != nil {
311292
return errors.Wrap(err, "get pitr chunks")
312293
}
@@ -320,32 +301,40 @@ func (a *Agent) deletePITRImpl(ctx context.Context, ts primitive.Timestamp) erro
320301
return errors.Wrap(err, "get storage")
321302
}
322303

323-
return a.deleteChunks(ctx, stg, r.Chunks)
304+
eg := &errgroup.Group{}
305+
eg.SetLimit(runtime.NumCPU())
306+
return a.deleteChunks(ctx, eg, stg, r.Chunks)
324307
}
325308

326-
func (a *Agent) deleteChunks(ctx context.Context, stg storage.Storage, chunks []oplog.OplogChunk) error {
327-
l := log.LogEventFromContext(ctx)
328-
329-
for _, chnk := range chunks {
330-
err := stg.Delete(chnk.FName)
331-
if err != nil && !errors.Is(err, storage.ErrNotExist) {
332-
return errors.Wrapf(err, "delete pitr chunk '%s' (%v) from storage", chnk.FName, chnk)
333-
}
309+
func (a *Agent) deleteChunks(
310+
ctx context.Context,
311+
eg *errgroup.Group,
312+
stg storage.Storage,
313+
chunks []oplog.OplogChunk,
314+
) error {
315+
for _, c := range chunks {
316+
eg.Go(func() error {
317+
err := oplog.DeleteChunkData(ctx, a.leadConn, stg, c)
318+
return errors.Wrapf(err, "delete chunk %q", c.FName)
319+
})
320+
}
321+
return eg.Wait()
322+
}
334323

335-
_, err = a.leadConn.PITRChunksCollection().DeleteOne(
336-
ctx,
337-
bson.D{
338-
{"rs", chnk.RS},
339-
{"start_ts", chnk.StartTS},
340-
{"end_ts", chnk.EndTS},
341-
},
342-
)
343-
if err != nil {
344-
return errors.Wrap(err, "delete pitr chunk metadata")
345-
}
324+
func (a *Agent) deleteBackups(
325+
ctx context.Context,
326+
eg *errgroup.Group,
327+
stg storage.Storage,
328+
backups []backup.BackupMeta,
329+
) error {
330+
l := log.LogEventFromContext(ctx)
346331

347-
l.Debug("deleted %s", chnk.FName)
332+
for _, b := range backups {
333+
eg.Go(func() error {
334+
l.Info("deleting backup %q %s", b.Name, util.LogProfileArg(b.Store.Name))
335+
err := backup.DeleteBackupData(ctx, a.leadConn, stg, b.Name)
336+
return errors.Wrapf(err, "delete backup %q", b.Name)
337+
})
348338
}
349-
350-
return nil
339+
return eg.Wait()
351340
}

0 commit comments

Comments
 (0)