Skip to content

Commit 53eddaf

Browse files
chris-camposcopybara-github
authored andcommitted
Optimize SequenceGlob matching performance using a Set.
When `origin_files` is provided as a list of files, Copybara uses `SequenceGlob`. Previously, `SequenceGlob.relativeTo` returned a `PathMatcher` that iterated linearly through all files in the list for every file checked during checkout. This resulted in O(N*M) complexity where N is the number of files in the list and M is the number of files checked. This change optimizes `SequenceGlob` to pre-calculate the relative paths and store them in an `ImmutableSet`. This allows for O(1) lookup time during matching, reducing the complexity to O(N + M). Benchmarks show a ~73x speedup in matching time for 5000 files (1589ms -> 21ms). BUG=467397947 GWSQ_IGNORE: chriscampos@google.com PiperOrigin-RevId: 847778127 Change-Id: Idaf21e5773faa3d61ce3c9a07ac0d1524df1c0f5
1 parent 79c21be commit 53eddaf

File tree

1 file changed

+32
-5
lines changed

1 file changed

+32
-5
lines changed

java/com/google/copybara/util/SequenceGlob.java

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616

1717
package com.google.copybara.util;
1818

19-
import static com.google.common.collect.ImmutableList.toImmutableList;
20-
2119
import com.google.common.collect.ImmutableList;
20+
import com.google.common.collect.ImmutableSet;
2221
import com.google.copybara.util.GlobAtom.AtomType;
22+
import java.nio.file.FileSystem;
2323
import java.nio.file.Path;
2424
import java.nio.file.PathMatcher;
2525
import net.starlark.java.eval.EvalException;
@@ -38,13 +38,40 @@ String toStringWithParentheses(boolean isRootGlob) {
3838
}
3939

4040
@Override
41-
public PathMatcher relativeTo(Path path) {
41+
public PathMatcher relativeTo(Path root) {
42+
ImmutableSet.Builder<Path> paths = ImmutableSet.builder();
43+
for (GlobAtom atom : include) {
44+
paths.add(resolvePath(root, atom.pattern()));
45+
}
46+
final ImmutableSet<Path> matchPaths = paths.build();
4247
return new ReadablePathMatcher(
43-
FileUtil.anyPathMatcher(
44-
include.stream().map(g -> g.matcher(path)).collect(toImmutableList())),
48+
new PathMatcher() {
49+
@Override
50+
public boolean matches(Path path) {
51+
return matchPaths.contains(path.normalize());
52+
}
53+
54+
@Override
55+
public String toString() {
56+
return SequenceGlob.this.toString();
57+
}
58+
},
4559
this.toString());
4660
}
4761

62+
private Path resolvePath(Path root, String pattern) {
63+
// Logic from GlobAtom.SINGLE_FILE.matcher
64+
FileSystem fs = root.getFileSystem();
65+
String rootStr = root.normalize().toString();
66+
String separator = fs.getSeparator();
67+
68+
if (!rootStr.endsWith(separator)) {
69+
rootStr += separator;
70+
}
71+
72+
return fs.getPath(rootStr + pattern);
73+
}
74+
4875
public static SequenceGlob ofStarlarkList(StarlarkList<?> patterns) throws EvalException {
4976
ImmutableList.Builder<GlobAtom> atoms = ImmutableList.builder();
5077
for (Object pattern : patterns) {

0 commit comments

Comments
 (0)