Skip to content

Commit 3642fc9

Browse files
Fix OnDiskGraphIndex when ordinal remapping with holes
Solves #399. Increasing OnDiskGraphIndex to version 4 to serialize the needed elements.
1 parent 9bef39b commit 3642fc9

File tree

4 files changed

+66
-15
lines changed

4 files changed

+66
-15
lines changed

jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/CommonHeader.java

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,15 @@ class CommonHeader {
3030
public final int dimension;
3131
public final int entryNode;
3232
public final int maxDegree;
33+
public final int idUpperBound;
3334

34-
CommonHeader(int version, int size, int dimension, int entryNode, int maxDegree) {
35+
CommonHeader(int version, int size, int dimension, int entryNode, int maxDegree, int idUpperBound) {
3536
this.version = version;
3637
this.size = size;
3738
this.dimension = dimension;
3839
this.entryNode = entryNode;
3940
this.maxDegree = maxDegree;
41+
this.idUpperBound = idUpperBound;
4042
}
4143

4244
void write(DataOutput out) throws IOException {
@@ -48,6 +50,10 @@ void write(DataOutput out) throws IOException {
4850
out.writeInt(dimension);
4951
out.writeInt(entryNode);
5052
out.writeInt(maxDegree);
53+
54+
if (version >= 4) {
55+
out.writeInt(idUpperBound);
56+
}
5157
}
5258

5359
static CommonHeader load(RandomAccessReader reader) throws IOException {
@@ -65,10 +71,22 @@ static CommonHeader load(RandomAccessReader reader) throws IOException {
6571
int entryNode = reader.readInt();
6672
int maxDegree = reader.readInt();
6773

68-
return new CommonHeader(version, size, dimension, entryNode, maxDegree);
74+
int idUpperBound = size;
75+
if (version >= 4) {
76+
idUpperBound = reader.readInt();
77+
}
78+
79+
return new CommonHeader(version, size, dimension, entryNode, maxDegree, idUpperBound);
6980
}
7081

7182
int size() {
72-
return ((version >= 3 ? 2 : 0) + 4) * Integer.BYTES;
83+
int size = 4;
84+
if (version >= 3) {
85+
size += 2;
86+
}
87+
if (version >= 4) {
88+
size += 1;
89+
}
90+
return size * Integer.BYTES;
7391
}
7492
}

jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndex.java

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
import java.util.Map;
3939
import java.util.Set;
4040
import java.util.stream.Collectors;
41-
import java.util.stream.IntStream;
4241

4342
/**
4443
* A class representing a graph index stored on disk. The base graph contains only graph structure.
@@ -49,7 +48,7 @@
4948
*/
5049
public class OnDiskGraphIndex implements GraphIndex, AutoCloseable, Accountable
5150
{
52-
public static final int CURRENT_VERSION = 3;
51+
public static final int CURRENT_VERSION = 4;
5352
static final int MAGIC = 0xFFFF0D61; // FFFF to distinguish from old graphs, which should never start with a negative size "ODGI"
5453
static final VectorTypeSupport vectorTypeSupport = VectorizationProvider.getInstance().getVectorTypeSupport();
5554
final ReaderSupplier readerSupplier;
@@ -58,6 +57,7 @@ public class OnDiskGraphIndex implements GraphIndex, AutoCloseable, Accountable
5857
final int maxDegree;
5958
final int dimension;
6059
final int entryNode;
60+
final int idUpperBound;
6161
final int inlineBlockSize; // total size of all inline elements contributed by features
6262
final EnumMap<FeatureId, ? extends Feature> features;
6363
final EnumMap<FeatureId, Integer> inlineOffsets;
@@ -71,6 +71,7 @@ public class OnDiskGraphIndex implements GraphIndex, AutoCloseable, Accountable
7171
this.dimension = header.common.dimension;
7272
this.entryNode = header.common.entryNode;
7373
this.maxDegree = header.common.maxDegree;
74+
this.idUpperBound = header.common.idUpperBound;
7475
this.features = header.features;
7576
this.neighborsOffset = neighborsOffset;
7677
var inlineBlockSize = 0;
@@ -120,9 +121,30 @@ public int maxDegree() {
120121
}
121122

122123
@Override
123-
public NodesIterator getNodes()
124-
{
125-
return NodesIterator.fromPrimitiveIterator(IntStream.range(0, size).iterator(), size);
124+
public int getIdUpperBound() {
125+
return idUpperBound;
126+
}
127+
128+
@Override
129+
public NodesIterator getNodes() {
130+
try (var reader = readerSupplier.get()) {
131+
int[] valid_nodes = new int[size];
132+
int pos = 0;
133+
for (int node = 0; node < getIdUpperBound(); node++) {
134+
long node_offset = neighborsOffset +
135+
(node * ((long) Integer.BYTES // ids
136+
+ inlineBlockSize // inline elements
137+
+ (Integer.BYTES * (long) (maxDegree + 1)) // neighbor count + neighbors)
138+
));
139+
reader.seek(node_offset);
140+
if (reader.readInt() != -1) {
141+
valid_nodes[pos++] = node;
142+
}
143+
}
144+
return new NodesIterator.ArrayNodesIterator(valid_nodes, size);
145+
} catch (IOException e) {
146+
throw new UncheckedIOException(e);
147+
}
126148
}
127149

128150
@Override
@@ -250,11 +272,17 @@ public int entryNode() {
250272
return entryNode;
251273
}
252274

275+
@Override
276+
public int getIdUpperBound() {
277+
return idUpperBound;
278+
}
279+
253280
@Override
254281
public Bits liveNodes() {
255282
return Bits.ALL;
256283
}
257284

285+
258286
@Override
259287
public void close() throws IOException {
260288
reader.close();

jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/OnDiskGraphIndexWriter.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ private OnDiskGraphIndexWriter(RandomAccessWriter out,
6969
this.startOffset = startOffset;
7070

7171
// create a mock Header to determine the correct size
72-
var ch = new CommonHeader(version, 0, dimension, view.entryNode(), graph.maxDegree());
72+
var ch = new CommonHeader(version, 0, dimension, view.entryNode(), graph.maxDegree(), 0);
7373
var placeholderHeader = new Header(ch, featureMap);
7474
this.headerSize = placeholderHeader.size();
7575
}
@@ -241,7 +241,8 @@ public synchronized void writeHeader() throws IOException {
241241
graph.size(),
242242
dimension,
243243
ordinalMapper.oldToNew(view.entryNode()),
244-
graph.maxDegree());
244+
graph.maxDegree(),
245+
ordinalMapper.maxOrdinal() + 1);
245246
var header = new Header(commonHeader, featureMap);
246247
header.write(out);
247248
out.flush();

jvector-tests/src/test/java/io/github/jbellis/jvector/graph/disk/TestOnDiskGraphIndex.java

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import io.github.jbellis.jvector.disk.SimpleMappedReader;
2323
import io.github.jbellis.jvector.graph.GraphIndexBuilder;
2424
import io.github.jbellis.jvector.graph.ListRandomAccessVectorValues;
25+
import io.github.jbellis.jvector.graph.NodesIterator;
2526
import io.github.jbellis.jvector.graph.RandomAccessVectorValues;
2627
import io.github.jbellis.jvector.graph.TestVectorGraph;
2728
import io.github.jbellis.jvector.quantization.PQVectors;
@@ -35,11 +36,7 @@
3536
import java.io.IOException;
3637
import java.nio.file.Files;
3738
import java.nio.file.Path;
38-
import java.util.EnumSet;
39-
import java.util.HashMap;
40-
import java.util.List;
41-
import java.util.Map;
42-
import java.util.Set;
39+
import java.util.*;
4340

4441
import static io.github.jbellis.jvector.TestUtil.getNeighborNodes;
4542
import static org.junit.Assert.*;
@@ -180,6 +177,13 @@ public void testReorderingWithHoles() throws IOException {
180177
var onDiskView = onDiskGraph.getView())
181178
{
182179
assertEquals(11, onDiskGraph.getIdUpperBound());
180+
181+
Set<Integer> nodesInGraph = new HashSet<>();
182+
for (NodesIterator it = onDiskGraph.getNodes(); it.hasNext(); ) {
183+
nodesInGraph.add(it.next());
184+
}
185+
assertEquals(nodesInGraph, Set.of(0, 2, 10));
186+
183187
assertEquals(onDiskView.getVector(0), ravv.getVector(2));
184188
assertEquals(onDiskView.getVector(10), ravv.getVector(1));
185189
assertEquals(onDiskView.getVector(2), ravv.getVector(0));

0 commit comments

Comments
 (0)