Skip to content

Commit 431538e

Browse files
author
Jonathan Ellis
committed
ada2-1M
1 parent ced9068 commit 431538e

File tree

1 file changed

+20
-16
lines changed

1 file changed

+20
-16
lines changed

jvector-examples/src/main/java/io/github/jbellis/jvector/example/util/MultiFileDatasource.java

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ public DataSet load() throws IOException {
5050
var baseVectors = SiftLoader.readFvecs("fvec/" + basePath);
5151
var queryVectors = SiftLoader.readFvecs("fvec/" + queriesPath);
5252
var gtVectors = SiftLoader.readIvecs("fvec/" + groundTruthPath);
53-
return DataSet.getScrubbedDataSet(name, VectorSimilarityFunction.COSINE, baseVectors, queryVectors, gtVectors);
53+
return DataSet.getScrubbedDataSet(name, VectorSimilarityFunction.DOT_PRODUCT, baseVectors, queryVectors, gtVectors);
5454
}
5555

5656
public static Map<String, MultiFileDatasource> byName = new HashMap<>() {{
@@ -83,24 +83,28 @@ public DataSet load() throws IOException {
8383
"wikipedia_squad/100k/text-embedding-3-small_1536_100000_query_vectors_10000.fvec",
8484
"wikipedia_squad/100k/text-embedding-3-small_1536_100000_indices_query_10000.ivec"));
8585
put("ada002-100k", new MultiFileDatasource("ada002-100k",
86-
"wikipedia_squad/100k/ada_002_100000_base_vectors.fvec",
87-
"wikipedia_squad/100k/ada_002_100000_query_vectors_10000.fvec",
88-
"wikipedia_squad/100k/ada_002_100000_indices_query_10000.ivec"));
86+
"wikipedia_squad/100k/ada_002_100000_base_vectors.fvec",
87+
"wikipedia_squad/100k/ada_002_100000_query_vectors_10000.fvec",
88+
"wikipedia_squad/100k/ada_002_100000_indices_query_10000.ivec"));
89+
put("ada002-1M", new MultiFileDatasource("ada002-1M",
90+
"wikipedia_squad/1M/ada_002_1000000_base_vectors.fvec",
91+
"wikipedia_squad/1M/ada_002_1000000_query_vectors_10000.fvec",
92+
"wikipedia_squad/1M/ada_002_1000000_indices_query_10000.ivec"));
8993
put("e5-small-v2-100k", new MultiFileDatasource("e5-small-v2-100k",
90-
"wikipedia_squad/100k/intfloat_e5-small-v2_100000_base_vectors.fvec",
91-
"wikipedia_squad/100k/intfloat_e5-small-v2_100000_query_vectors_10000.fvec",
92-
"wikipedia_squad/100k/intfloat_e5-small-v2_100000_indices_query_10000.ivec"));
94+
"wikipedia_squad/100k/intfloat_e5-small-v2_100000_base_vectors.fvec",
95+
"wikipedia_squad/100k/intfloat_e5-small-v2_100000_query_vectors_10000.fvec",
96+
"wikipedia_squad/100k/intfloat_e5-small-v2_100000_indices_query_10000.ivec"));
9397
put("e5-base-v2-100k", new MultiFileDatasource("e5-base-v2-100k",
94-
"wikipedia_squad/100k/intfloat_e5-base-v2_100000_base_vectors.fvec",
95-
"wikipedia_squad/100k/intfloat_e5-base-v2_100000_query_vectors_10000.fvec",
96-
"wikipedia_squad/100k/intfloat_e5-base-v2_100000_indices_query_10000.ivec"));
98+
"wikipedia_squad/100k/intfloat_e5-base-v2_100000_base_vectors.fvec",
99+
"wikipedia_squad/100k/intfloat_e5-base-v2_100000_query_vectors_10000.fvec",
100+
"wikipedia_squad/100k/intfloat_e5-base-v2_100000_indices_query_10000.ivec"));
97101
put("e5-large-v2-100k", new MultiFileDatasource("e5-large-v2-100k",
98-
"wikipedia_squad/100k/intfloat_e5-large-v2_100000_base_vectors.fvec",
99-
"wikipedia_squad/100k/intfloat_e5-large-v2_100000_query_vectors_10000.fvec",
100-
"wikipedia_squad/100k/intfloat_e5-large-v2_100000_indices_query_10000.ivec"));
102+
"wikipedia_squad/100k/intfloat_e5-large-v2_100000_base_vectors.fvec",
103+
"wikipedia_squad/100k/intfloat_e5-large-v2_100000_query_vectors_10000.fvec",
104+
"wikipedia_squad/100k/intfloat_e5-large-v2_100000_indices_query_10000.ivec"));
101105
put("gecko-100k", new MultiFileDatasource("gecko-100k",
102-
"wikipedia_squad/100k/textembedding-gecko_100000_base_vectors.fvec",
103-
"wikipedia_squad/100k/textembedding-gecko_100000_query_vectors_10000.fvec",
104-
"wikipedia_squad/100k/textembedding-gecko_100000_indices_query_10000.ivec"));
106+
"wikipedia_squad/100k/textembedding-gecko_100000_base_vectors.fvec",
107+
"wikipedia_squad/100k/textembedding-gecko_100000_query_vectors_10000.fvec",
108+
"wikipedia_squad/100k/textembedding-gecko_100000_indices_query_10000.ivec"));
105109
}};
106110
}

0 commit comments

Comments
 (0)