Skip to content

Commit 9613109

Browse files
author
Jonathan Ellis
committed
SimpleMappedReader no longer closes its ReaderSupplier
1 parent f9d0014 commit 9613109

File tree

12 files changed

+103
-109
lines changed

12 files changed

+103
-109
lines changed

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -174,10 +174,12 @@ Compressing the vectors with product quantization is done as follows:
174174

175175
Then we can wire up the compressed vectors to a two-phase search by getting the fast ApproximateScoreFunction from PQVectors, and the Reranker from the index View:
176176
```java
177-
ReaderSupplier rs = ReaderSupplierFactor.open(indexPath);
177+
ReaderSupplier rs = ReaderSupplierFactory.open(indexPath);
178178
OnDiskGraphIndex index = OnDiskGraphIndex.load(rs);
179179
// load the PQVectors that we just wrote to disk
180-
try (RandomAccessReader in = new SimpleMappedReader(pqPath)) {
180+
try (ReaderSupplier pqSupplier = ReaderSupplierFactory.open(pqPath);
181+
RandomAccessReader in = pqSupplier.get())
182+
{
181183
PQVectors pqv = PQVectors.load(in);
182184
// SearchScoreProvider that does a first pass with the loaded-in-memory PQVectors,
183185
// then reranks with the exact vectors that are stored on disk in the index

UPGRADING.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@
77
performance.
88

99
## API changes
10-
- MemorySegmentReader.Supplier must now be explicitly closed, instead of being
11-
closed by the first Reader created from it.
10+
- MemorySegmentReader.Supplier and SimpleMappedReader.Supplier must now be explicitly closed, instead of being
11+
closed by the first Reader created from them.
1212
- OnDiskGraphIndex no longer closes its ReaderSupplier
1313

14-
# Upgrading from 3.0.x to 3.0.6
14+
### API changes in 3.0.6
1515

16-
## API changes
16+
These were released in 3.0.6 but are spiritually part of 4.0.
1717

1818
- `VectorCompressor.encodeAll()` now returns a `CompressedVectors` object instead of a `ByteSequence<?>[]`.
1919
This provides better encapsulation of the compression functionality while also allowing for more efficient

jvector-base/src/main/java/io/github/jbellis/jvector/disk/SimpleMappedReader.java

Lines changed: 21 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@
3434
public class SimpleMappedReader extends ByteBufferReader {
3535
private static final Logger LOG = Logger.getLogger(SimpleMappedReader.class.getName());
3636

37-
private static final Unsafe unsafe = getUnsafe();
38-
3937
private static Unsafe getUnsafe() {
4038
try {
4139
Field f = Unsafe.class.getDeclaredField("theUnsafe");
@@ -47,60 +45,45 @@ private static Unsafe getUnsafe() {
4745
}
4846
}
4947

50-
public SimpleMappedReader(Path path) throws IOException {
51-
this(path.toString());
52-
}
53-
54-
public SimpleMappedReader(String name) throws IOException {
55-
this(getMappedByteBuffer(name));
56-
}
5748

58-
private SimpleMappedReader(MappedByteBuffer mbb) {
49+
SimpleMappedReader(MappedByteBuffer mbb) {
5950
super(mbb);
6051
}
6152

62-
private static MappedByteBuffer getMappedByteBuffer(String name) throws IOException {
63-
var raf = new RandomAccessFile(name, "r");
64-
if (raf.length() > Integer.MAX_VALUE) {
65-
throw new RuntimeException("MappedRandomAccessReader doesn't support large files");
66-
}
67-
MappedByteBuffer mbb = raf.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, raf.length());
68-
mbb.load();
69-
raf.close();
70-
return mbb;
71-
}
72-
7353
@Override
7454
public void close() {
75-
if (unsafe != null) {
76-
try {
77-
unsafe.invokeCleaner(bb);
78-
} catch (IllegalArgumentException e) {
79-
// empty catch, this was a duplicated/indirect buffer or
80-
// otherwise not cleanable
81-
}
82-
}
83-
}
84-
85-
public SimpleMappedReader duplicate() {
86-
return new SimpleMappedReader((MappedByteBuffer) bb.duplicate());
55+
// Individual readers don't close anything
8756
}
8857

8958
public static class Supplier implements ReaderSupplier {
90-
private final SimpleMappedReader smr;
59+
private final MappedByteBuffer buffer;
60+
private static final Unsafe unsafe = getUnsafe();
9161

9262
public Supplier(Path path) throws IOException {
93-
smr = new SimpleMappedReader(path);
63+
try (var raf = new RandomAccessFile(path.toString(), "r")) {
64+
if (raf.length() > Integer.MAX_VALUE) {
65+
throw new RuntimeException("SimpleMappedReader doesn't support files above 2GB");
66+
}
67+
this.buffer = raf.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, raf.length());
68+
this.buffer.load();
69+
}
9470
}
9571

9672
@Override
97-
public RandomAccessReader get() {
98-
return smr.duplicate();
73+
public SimpleMappedReader get() {
74+
return new SimpleMappedReader((MappedByteBuffer) buffer.duplicate());
9975
}
10076

10177
@Override
10278
public void close() {
103-
smr.close();
79+
if (unsafe != null) {
80+
try {
81+
unsafe.invokeCleaner(buffer);
82+
} catch (IllegalArgumentException e) {
83+
// empty catch, this was a duplicated/indirect buffer or
84+
// otherwise not cleanable
85+
}
86+
}
10487
}
10588
}
10689
}

jvector-base/src/main/java/io/github/jbellis/jvector/quantization/MutablePQVectors.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,17 @@
1717
package io.github.jbellis.jvector.quantization;
1818

1919
import io.github.jbellis.jvector.vector.VectorizationProvider;
20-
import java.util.concurrent.atomic.AtomicInteger;
2120
import io.github.jbellis.jvector.vector.types.ByteSequence;
2221
import io.github.jbellis.jvector.vector.types.VectorFloat;
2322
import io.github.jbellis.jvector.vector.types.VectorTypeSupport;
2423

24+
import java.util.concurrent.atomic.AtomicInteger;
25+
2526
import static java.lang.Math.max;
2627

28+
/**
29+
* A threadsafe mutable PQVectors implementation that grows dynamically as needed.
30+
*/
2731
public class MutablePQVectors extends PQVectors implements MutableCompressedVectors<VectorFloat<?>> {
2832
private static final VectorTypeSupport vectorTypeSupport = VectorizationProvider.getInstance().getVectorTypeSupport();
2933

jvector-examples/src/main/java/io/github/jbellis/jvector/example/SiftSmall.java

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import io.github.jbellis.jvector.disk.RandomAccessReader;
2020
import io.github.jbellis.jvector.disk.ReaderSupplier;
2121
import io.github.jbellis.jvector.disk.ReaderSupplierFactory;
22-
import io.github.jbellis.jvector.disk.SimpleMappedReader;
2322
import io.github.jbellis.jvector.example.util.SiftLoader;
2423
import io.github.jbellis.jvector.graph.GraphIndex;
2524
import io.github.jbellis.jvector.graph.GraphIndexBuilder;
@@ -158,11 +157,12 @@ public static void siftPersisted(List<VectorFloat<?>> baseVectors, List<VectorFl
158157

159158
// on-disk indexes require a ReaderSupplier (not just a Reader) because we will want it to
160159
// open additional readers for searching
161-
ReaderSupplier rs = ReaderSupplierFactory.open(indexPath);
162-
OnDiskGraphIndex index = OnDiskGraphIndex.load(rs);
163-
// measure our recall against the (exactly computed) ground truth
164-
Function<VectorFloat<?>, SearchScoreProvider> sspFactory = q -> SearchScoreProvider.exact(q, VectorSimilarityFunction.EUCLIDEAN, ravv);
165-
testRecall(index, queryVectors, groundTruth, sspFactory);
160+
try (ReaderSupplier rs = ReaderSupplierFactory.open(indexPath)) {
161+
OnDiskGraphIndex index = OnDiskGraphIndex.load(rs);
162+
// measure our recall against the (exactly computed) ground truth
163+
Function<VectorFloat<?>, SearchScoreProvider> sspFactory = q -> SearchScoreProvider.exact(q, VectorSimilarityFunction.EUCLIDEAN, ravv);
164+
testRecall(index, queryVectors, groundTruth, sspFactory);
165+
}
166166
}
167167

168168
// diskann-style index with PQ
@@ -190,20 +190,23 @@ public static void siftDiskAnn(List<VectorFloat<?>> baseVectors, List<VectorFloa
190190
pqv.write(out);
191191
}
192192

193-
ReaderSupplier rs = ReaderSupplierFactory.open(indexPath);
194-
OnDiskGraphIndex index = OnDiskGraphIndex.load(rs);
195-
// load the PQVectors that we just wrote to disk
196-
try (RandomAccessReader in = new SimpleMappedReader(pqPath)) {
197-
PQVectors pqv = PQVectors.load(in);
198-
// SearchScoreProvider that does a first pass with the loaded-in-memory PQVectors,
199-
// then reranks with the exact vectors that are stored on disk in the index
200-
Function<VectorFloat<?>, SearchScoreProvider> sspFactory = q -> {
201-
ApproximateScoreFunction asf = pqv.precomputedScoreFunctionFor(q, VectorSimilarityFunction.EUCLIDEAN);
202-
ExactScoreFunction reranker = index.getView().rerankerFor(q, VectorSimilarityFunction.EUCLIDEAN);
203-
return new SearchScoreProvider(asf, reranker);
204-
};
205-
// measure our recall against the (exactly computed) ground truth
206-
testRecall(index, queryVectors, groundTruth, sspFactory);
193+
try (ReaderSupplier rs = ReaderSupplierFactory.open(indexPath)) {
194+
OnDiskGraphIndex index = OnDiskGraphIndex.load(rs);
195+
// load the PQVectors that we just wrote to disk
196+
try (ReaderSupplier pqSupplier = ReaderSupplierFactory.open(pqPath);
197+
RandomAccessReader in = pqSupplier.get())
198+
{
199+
PQVectors pqv = PQVectors.load(in);
200+
// SearchScoreProvider that does a first pass with the loaded-in-memory PQVectors,
201+
// then reranks with the exact vectors that are stored on disk in the index
202+
Function<VectorFloat<?>, SearchScoreProvider> sspFactory = q -> {
203+
ApproximateScoreFunction asf = pqv.precomputedScoreFunctionFor(q, VectorSimilarityFunction.EUCLIDEAN);
204+
ExactScoreFunction reranker = index.getView().rerankerFor(q, VectorSimilarityFunction.EUCLIDEAN);
205+
return new SearchScoreProvider(asf, reranker);
206+
};
207+
// measure our recall against the (exactly computed) ground truth
208+
testRecall(index, queryVectors, groundTruth, sspFactory);
209+
}
207210
}
208211
}
209212

@@ -255,7 +258,9 @@ public static void siftDiskAnnLTM(List<VectorFloat<?>> baseVectors, List<VectorF
255258
// searching the index does not change
256259
ReaderSupplier rs = ReaderSupplierFactory.open(indexPath);
257260
OnDiskGraphIndex index = OnDiskGraphIndex.load(rs);
258-
try (RandomAccessReader in = new SimpleMappedReader(pqPath)) {
261+
try (ReaderSupplier pqSupplier = ReaderSupplierFactory.open(pqPath);
262+
RandomAccessReader in = pqSupplier.get())
263+
{
259264
var pqvSearch = PQVectors.load(in);
260265
Function<VectorFloat<?>, SearchScoreProvider> sspFactory = q -> {
261266
ApproximateScoreFunction asf = pqvSearch.precomputedScoreFunctionFor(q, VectorSimilarityFunction.EUCLIDEAN);
@@ -316,7 +321,9 @@ public static void siftDiskAnnLTMWithNVQ(List<VectorFloat<?>> baseVectors, List<
316321
// searching the index does not change
317322
ReaderSupplier rs = ReaderSupplierFactory.open(indexPath);
318323
OnDiskGraphIndex index = OnDiskGraphIndex.load(rs);
319-
try (RandomAccessReader in = new SimpleMappedReader(pqPath)) {
324+
try (ReaderSupplier pqSupplier = ReaderSupplierFactory.open(pqPath);
325+
RandomAccessReader in = pqSupplier.get())
326+
{
320327
var pqvSearch = PQVectors.load(in);
321328
Function<VectorFloat<?>, SearchScoreProvider> sspFactory = q -> {
322329
ApproximateScoreFunction asf = pqvSearch.precomputedScoreFunctionFor(q, VectorSimilarityFunction.EUCLIDEAN);

jvector-tests/src/test/java/io/github/jbellis/jvector/graph/GraphIndexBuilderTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ public void testSaveAndLoad() throws IOException {
120120
}
121121

122122
builder = newBuilder.get();
123-
try(var reader = new SimpleMappedReader(indexDataPath)) {
124-
builder.load(reader);
123+
try(var readerSupplier = new SimpleMappedReader.Supplier(indexDataPath)) {
124+
builder.load(readerSupplier.get());
125125
}
126126

127127
assertEquals(ravv.size(), builder.graph.size());

jvector-tests/src/test/java/io/github/jbellis/jvector/graph/TestDeletions.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,8 @@ public void testCleanup() throws IOException {
115115
graph.save(out);
116116
}
117117
var b2 = new GraphIndexBuilder(ravv, VectorSimilarityFunction.COSINE, 2, 10, 1.0f, 1.0f);
118-
try (var marr = new SimpleMappedReader(outputPath.toAbsolutePath().toString())) {
119-
b2.load(marr);
118+
try (var readerSupplier = new SimpleMappedReader.Supplier(outputPath)) {
119+
b2.load(readerSupplier.get());
120120
}
121121
var reloadedGraph = b2.getGraph();
122122
assertGraphEquals(graph, reloadedGraph);

jvector-tests/src/test/java/io/github/jbellis/jvector/graph/disk/TestGraphCache.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ public void tearDown() {
5656

5757
@Test
5858
public void testGraphCacheLoading() throws Exception {
59-
try (var marr = new SimpleMappedReader(onDiskGraphIndexPath.toAbsolutePath().toString());
60-
var onDiskGraph = OnDiskGraphIndex.load(marr::duplicate))
59+
try (var readerSupplier = new SimpleMappedReader.Supplier(onDiskGraphIndexPath);
60+
var onDiskGraph = OnDiskGraphIndex.load(readerSupplier))
6161
{
6262
var none = GraphCache.load(onDiskGraph, -1);
6363
assertEquals(0, none.ramBytesUsed());

jvector-tests/src/test/java/io/github/jbellis/jvector/graph/disk/TestOnDiskGraphIndex.java

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ public void testSimpleGraphs() throws Exception {
7171
var outputPath = testDirectory.resolve("test_graph_" + graph.getClass().getSimpleName());
7272
var ravv = new TestVectorGraph.CircularFloatVectorValues(graph.size());
7373
TestUtil.writeGraph(graph, ravv, outputPath);
74-
try (var marr = new SimpleMappedReader(outputPath.toAbsolutePath().toString());
75-
var onDiskGraph = OnDiskGraphIndex.load(marr::duplicate))
74+
try (var readerSupplier = new SimpleMappedReader.Supplier(outputPath.toAbsolutePath());
75+
var onDiskGraph = OnDiskGraphIndex.load(readerSupplier))
7676
{
7777
TestUtil.assertGraphEquals(graph, onDiskGraph);
7878
try (var onDiskView = onDiskGraph.getView()) {
@@ -114,8 +114,8 @@ public void testRenumberingOnDelete() throws IOException {
114114
var outputPath = testDirectory.resolve("renumbered_graph");
115115
OnDiskGraphIndex.write(original, ravv, oldToNewMap, outputPath);
116116
// check that written graph ordinals match the new ones
117-
try (var marr = new SimpleMappedReader(outputPath.toAbsolutePath().toString());
118-
var onDiskGraph = OnDiskGraphIndex.load(marr::duplicate);
117+
try (var readerSupplier = new SimpleMappedReader.Supplier(outputPath.toAbsolutePath());
118+
var onDiskGraph = OnDiskGraphIndex.load(readerSupplier);
119119
var onDiskView = onDiskGraph.getView())
120120
{
121121
// entry point renumbering
@@ -146,8 +146,8 @@ public void testReorderingRenumbering() throws IOException {
146146
var outputPath = testDirectory.resolve("renumbered_graph");
147147
OnDiskGraphIndex.write(original, ravv, oldToNewMap, outputPath);
148148
// check that written graph ordinals match the new ones
149-
try (var marr = new SimpleMappedReader(outputPath.toAbsolutePath().toString());
150-
var onDiskGraph = OnDiskGraphIndex.load(marr::duplicate);
149+
try (var readerSupplier = new SimpleMappedReader.Supplier(outputPath);
150+
var onDiskGraph = OnDiskGraphIndex.load(readerSupplier);
151151
var onDiskView = onDiskGraph.getView())
152152
{
153153
assertEquals(onDiskView.getVector(0), ravv.getVector(2));
@@ -175,8 +175,8 @@ public void testReorderingWithHoles() throws IOException {
175175
var outputPath = testDirectory.resolve("renumbered_graph");
176176
OnDiskGraphIndex.write(original, ravv, oldToNewMap, outputPath);
177177
// check that written graph ordinals match the new ones
178-
try (var marr = new SimpleMappedReader(outputPath.toAbsolutePath().toString());
179-
var onDiskGraph = OnDiskGraphIndex.load(marr::duplicate);
178+
try (var readerSupplier = new SimpleMappedReader.Supplier(outputPath.toAbsolutePath());
179+
var onDiskGraph = OnDiskGraphIndex.load(readerSupplier);
180180
var onDiskView = onDiskGraph.getView())
181181
{
182182
assertEquals(onDiskView.getVector(0), ravv.getVector(2));
@@ -201,8 +201,8 @@ public void testLargeGraph() throws Exception
201201
var ravv = new TestVectorGraph.CircularFloatVectorValues(graph.size());
202202
TestUtil.writeGraph(graph, ravv, outputPath);
203203

204-
try (var marr = new SimpleMappedReader(outputPath.toAbsolutePath().toString());
205-
var onDiskGraph = OnDiskGraphIndex.load(marr::duplicate);
204+
try (var readerSupplier = new SimpleMappedReader.Supplier(outputPath.toAbsolutePath());
205+
var onDiskGraph = OnDiskGraphIndex.load(readerSupplier);
206206
var cachedOnDiskGraph = new CachingGraphIndex(onDiskGraph))
207207
{
208208
TestUtil.assertGraphEquals(graph, onDiskGraph);
@@ -220,8 +220,8 @@ public void testLargeGraph() throws Exception
220220
public void testV0Read() throws IOException {
221221
// using a random graph from testLargeGraph generated on old version
222222
var file = new File("resources/version0.odgi");
223-
try (var smr = new SimpleMappedReader(file.getAbsolutePath());
224-
var onDiskGraph = OnDiskGraphIndex.load(smr::duplicate);
223+
try (var readerSupplier = new SimpleMappedReader.Supplier(file.toPath());
224+
var onDiskGraph = OnDiskGraphIndex.load(readerSupplier);
225225
var onDiskView = onDiskGraph.getView())
226226
{
227227
assertEquals(32, onDiskGraph.maxDegree);
@@ -241,8 +241,8 @@ public void testV0Write() throws IOException {
241241
var fileIn = new File("resources/version0.odgi");
242242
var fileOut = File.createTempFile("version0", ".odgi");
243243

244-
try (var smr = new SimpleMappedReader(fileIn.getAbsolutePath());
245-
var graph = OnDiskGraphIndex.load(smr::duplicate);
244+
try (var readerSupplier = new SimpleMappedReader.Supplier(fileIn.toPath());
245+
var graph = OnDiskGraphIndex.load(readerSupplier);
246246
var view = graph.getView())
247247
{
248248
try (var writer = new OnDiskGraphIndexWriter.Builder(graph, fileOut.toPath())
@@ -265,8 +265,8 @@ public void testV0WriteIncremental() throws IOException {
265265
var fileIn = new File("resources/version0.odgi");
266266
var fileOut = File.createTempFile("version0", ".odgi");
267267

268-
try (var smr = new SimpleMappedReader(fileIn.getAbsolutePath());
269-
var graph = OnDiskGraphIndex.load(smr::duplicate);
268+
try (var readerSupplier = new SimpleMappedReader.Supplier(fileIn.toPath());
269+
var graph = OnDiskGraphIndex.load(readerSupplier);
270270
var view = graph.getView())
271271
{
272272
try (var writer = new OnDiskGraphIndexWriter.Builder(graph, fileOut.toPath())
@@ -340,10 +340,10 @@ public void testIncrementalWrites() throws IOException {
340340
}
341341

342342
// graph and vectors should be identical
343-
try (var bulkMarr = new SimpleMappedReader(bulkPath.toAbsolutePath().toString());
344-
var bulkGraph = OnDiskGraphIndex.load(bulkMarr::duplicate);
345-
var incrementalMarr = new SimpleMappedReader(incrementalFadcPath.toAbsolutePath().toString());
346-
var incrementalGraph = OnDiskGraphIndex.load(incrementalMarr::duplicate);
343+
try (var bulkReaderSupplier = new SimpleMappedReader.Supplier(bulkPath.toAbsolutePath());
344+
var bulkGraph = OnDiskGraphIndex.load(bulkReaderSupplier);
345+
var incrementalReaderSupplier = new SimpleMappedReader.Supplier(incrementalFadcPath.toAbsolutePath());
346+
var incrementalGraph = OnDiskGraphIndex.load(incrementalReaderSupplier);
347347
var incrementalView = incrementalGraph.getView())
348348
{
349349
assertTrue(OnDiskGraphIndex.areHeadersEqual(incrementalGraph, bulkGraph));

jvector-tests/src/test/java/io/github/jbellis/jvector/quantization/TestADCGraphIndex.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ public void testFusedGraph() throws Exception {
6262

6363
TestUtil.writeFusedGraph(graph, ravv, pqv, outputPath);
6464

65-
try (var marr = new SimpleMappedReader(outputPath.toAbsolutePath().toString());
66-
var onDiskGraph = OnDiskGraphIndex.load(marr::duplicate, 0);
65+
try (var readerSupplier = new SimpleMappedReader.Supplier(outputPath);
66+
var onDiskGraph = OnDiskGraphIndex.load(readerSupplier, 0);
6767
var cachedOnDiskGraph = new CachingGraphIndex(onDiskGraph, 5))
6868
{
6969
TestUtil.assertGraphEquals(graph, onDiskGraph);

0 commit comments

Comments
 (0)