Skip to content

Commit ddabdb8

Browse files
New MutableGraphIndex and ImmutableGraphIndex interfaces (#534)
* Add MutableGraphIndex and ImmutableGraphIndex to clarify intent and hide implementations * Fix docs * Use "level" when referring to integers instead of "layer" * Now OnHeapGraphIndex will return a ConcurrentGraphIndexView while it is being mutated by GraphIndexBuilder and a FrozenView once mutations are done. * Make allMutationsCompleted volatile * Clarifying comment in OnHeapGraphIndex.getView * Make sure that the view gets instantiated when actually writing the graph.
1 parent 9dcc179 commit ddabdb8

23 files changed

Lines changed: 651 additions & 324 deletions

benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/RecallWithRandomVectorsBenchmark.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ public class RecallWithRandomVectorsBenchmark {
5252
private ArrayList<VectorFloat<?>> baseVectors;
5353
private ArrayList<VectorFloat<?>> queryVectors;
5454
private GraphIndexBuilder graphIndexBuilder;
55-
private GraphIndex graphIndex;
55+
private ImmutableGraphIndex graphIndex;
5656
private PQVectors pqVectors;
5757

5858
// Add ground truth storage

benchmarks-jmh/src/main/java/io/github/jbellis/jvector/bench/StaticSetVectorsBenchmark.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public class StaticSetVectorsBenchmark {
4545
private List<VectorFloat<?>> queryVectors;
4646
private List<List<Integer>> groundTruth;
4747
private GraphIndexBuilder graphIndexBuilder;
48-
private GraphIndex graphIndex;
48+
private ImmutableGraphIndex graphIndex;
4949
int originalDimension;
5050

5151
@Setup

jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphIndexBuilder.java

Lines changed: 54 additions & 43 deletions
Large diffs are not rendered by default.

jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphSearcher.java

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
package io.github.jbellis.jvector.graph;
2626

2727
import io.github.jbellis.jvector.annotations.Experimental;
28-
import io.github.jbellis.jvector.graph.GraphIndex.NodeAtLevel;
28+
import io.github.jbellis.jvector.graph.ImmutableGraphIndex.NodeAtLevel;
2929
import io.github.jbellis.jvector.graph.similarity.DefaultSearchScoreProvider;
3030
import io.github.jbellis.jvector.graph.similarity.ScoreFunction;
3131
import io.github.jbellis.jvector.graph.similarity.SearchScoreProvider;
@@ -43,10 +43,10 @@
4343

4444
/**
4545
* Searches a graph to find nearest neighbors to a query vector. For more background on the
46-
* search algorithm, see {@link GraphIndex}.
46+
* search algorithm, see {@link ImmutableGraphIndex}.
4747
*/
4848
public class GraphSearcher implements Closeable {
49-
private GraphIndex.View view;
49+
private ImmutableGraphIndex.View view;
5050

5151
// Scratch data structures that are used in each {@link #searchInternal} call. These can be expensive
5252
// to allocate, so they're cleared and reused across calls.
@@ -71,14 +71,14 @@ public class GraphSearcher implements Closeable {
7171
/**
7272
* Creates a new graph searcher from the given GraphIndex
7373
*/
74-
public GraphSearcher(GraphIndex graph) {
74+
public GraphSearcher(ImmutableGraphIndex graph) {
7575
this(graph.getView());
7676
}
7777

7878
/**
7979
* Creates a new graph searcher from the given GraphIndex.View
8080
*/
81-
protected GraphSearcher(GraphIndex.View view) {
81+
protected GraphSearcher(ImmutableGraphIndex.View view) {
8282
this.view = view;
8383
this.candidates = new NodeQueue(new GrowableLongHeap(100), NodeQueue.Order.MAX_HEAP);
8484
this.evictedResults = new NodesUnsorted(100);
@@ -112,7 +112,7 @@ private void initializeScoreProvider(SearchScoreProvider scoreProvider) {
112112
cachingReranker = new CachingReranker(scoreProvider);
113113
}
114114

115-
public GraphIndex.View getView() {
115+
public ImmutableGraphIndex.View getView() {
116116
return view;
117117
}
118118

@@ -129,7 +129,7 @@ public void usePruning(boolean usage) {
129129
* Convenience function for simple one-off searches. It is caller's responsibility to make sure that it
130130
* is the unique owner of the vectors instance passed in here.
131131
*/
132-
public static SearchResult search(VectorFloat<?> queryVector, int topK, RandomAccessVectorValues vectors, VectorSimilarityFunction similarityFunction, GraphIndex graph, Bits acceptOrds) {
132+
public static SearchResult search(VectorFloat<?> queryVector, int topK, RandomAccessVectorValues vectors, VectorSimilarityFunction similarityFunction, ImmutableGraphIndex graph, Bits acceptOrds) {
133133
try (var searcher = new GraphSearcher(graph)) {
134134
var ssp = DefaultSearchScoreProvider.exact(queryVector, similarityFunction, vectors);
135135
return searcher.search(ssp, topK, acceptOrds);
@@ -142,7 +142,7 @@ public static SearchResult search(VectorFloat<?> queryVector, int topK, RandomAc
142142
* Convenience function for simple one-off searches. It is caller's responsibility to make sure that it
143143
* is the unique owner of the vectors instance passed in here.
144144
*/
145-
public static SearchResult search(VectorFloat<?> queryVector, int topK, int rerankK, RandomAccessVectorValues vectors, VectorSimilarityFunction similarityFunction, GraphIndex graph, Bits acceptOrds) {
145+
public static SearchResult search(VectorFloat<?> queryVector, int topK, int rerankK, RandomAccessVectorValues vectors, VectorSimilarityFunction similarityFunction, ImmutableGraphIndex graph, Bits acceptOrds) {
146146
try (var searcher = new GraphSearcher(graph)) {
147147
var ssp = DefaultSearchScoreProvider.exact(queryVector, similarityFunction, vectors);
148148
return searcher.search(ssp, topK, rerankK, 0.f, 0.f, acceptOrds);
@@ -160,7 +160,7 @@ public static SearchResult search(VectorFloat<?> queryVector, int topK, int rera
160160
*
161161
* @param view the new view
162162
*/
163-
public void setView(GraphIndex.View view) {
163+
public void setView(ImmutableGraphIndex.View view) {
164164
this.view = view;
165165
}
166166

@@ -169,9 +169,9 @@ public void setView(GraphIndex.View view) {
169169
*/
170170
@Deprecated
171171
public static class Builder {
172-
private final GraphIndex.View view;
172+
private final ImmutableGraphIndex.View view;
173173

174-
public Builder(GraphIndex.View view) {
174+
public Builder(ImmutableGraphIndex.View view) {
175175
this.view = view;
176176
}
177177

jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphIndex.java renamed to jvector-base/src/main/java/io/github/jbellis/jvector/graph/ImmutableGraphIndex.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
import io.github.jbellis.jvector.util.Bits;
3030
import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
3131
import io.github.jbellis.jvector.vector.types.VectorFloat;
32+
33+
import java.util.List;
3234
import java.util.Objects;
3335

3436
import java.io.Closeable;
@@ -44,7 +46,7 @@
4446
* All methods are threadsafe. Operations that require persistent state are wrapped
4547
* in a View that should be created per accessing thread.
4648
*/
47-
public interface GraphIndex extends AutoCloseable, Accountable {
49+
public interface ImmutableGraphIndex extends AutoCloseable, Accountable {
4850
/** Returns the number of nodes in the graph */
4951
@Deprecated
5052
default int size() {
@@ -76,6 +78,8 @@ default int size() {
7678
*/
7779
int maxDegree();
7880

81+
List<Integer> maxDegrees();
82+
7983
/**
8084
* @return the first ordinal greater than all node ids in the graph. Equal to size() in simple cases;
8185
* May be different from size() if nodes are being added concurrently, or if nodes have been
@@ -107,6 +111,14 @@ default boolean containsNode(int nodeId) {
107111
*/
108112
int getDegree(int level);
109113

114+
/**
115+
* Returns the average degree computed over nodes in the specified layer.
116+
*
117+
* @param level the level of interest.
118+
* @return the average degree or NaN if no nodes are present.
119+
*/
120+
double getAverageDegree(int level);
121+
110122
/**
111123
* Return the number of vectors/nodes in the given level.
112124
* @param level The level of interest
@@ -150,6 +162,11 @@ interface View extends Closeable {
150162
default int getIdUpperBound() {
151163
return size();
152164
}
165+
166+
/**
167+
* Whether the given node is present in the given layer of the graph.
168+
*/
169+
boolean contains(int level, int node);
153170
}
154171

155172
/**
@@ -161,7 +178,7 @@ interface ScoringView extends View {
161178
ScoreFunction.ApproximateScoreFunction approximateScoreFunctionFor(VectorFloat<?> queryVector, VectorSimilarityFunction vsf);
162179
}
163180

164-
static String prettyPrint(GraphIndex graph) {
181+
static String prettyPrint(ImmutableGraphIndex graph) {
165182
StringBuilder sb = new StringBuilder();
166183
sb.append(graph);
167184
sb.append("\n");
Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
/*
2+
* All changes to the original code are Copyright DataStax, Inc.
3+
*
4+
* Please see the included license file for details.
5+
*/
6+
7+
/*
8+
* Original license:
9+
* Licensed to the Apache Software Foundation (ASF) under one or more
10+
* contributor license agreements. See the NOTICE file distributed with
11+
* this work for additional information regarding copyright ownership.
12+
* The ASF licenses this file to You under the Apache License, Version 2.0
13+
* (the "License"); you may not use this file except in compliance with
14+
* the License. You may obtain a copy of the License at
15+
*
16+
* http://www.apache.org/licenses/LICENSE-2.0
17+
*
18+
* Unless required by applicable law or agreed to in writing, software
19+
* distributed under the License is distributed on an "AS IS" BASIS,
20+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21+
* See the License for the specific language governing permissions and
22+
* limitations under the License.
23+
*/
24+
25+
package io.github.jbellis.jvector.graph;
26+
27+
import io.github.jbellis.jvector.util.BitSet;
28+
import io.github.jbellis.jvector.util.ThreadSafeGrowableBitSet;
29+
30+
import java.util.List;
31+
import java.util.stream.IntStream;
32+
33+
34+
/**
35+
* An {@link ImmutableGraphIndex} that offers concurrent access; for typical graphs you will get significant
36+
* speedups in construction and searching as you add threads.
37+
*
38+
* <p>The base layer (layer 0) contains all nodes, while higher layers are stored in sparse maps.
39+
* For searching, use a view obtained from {@link #getView()} which supports level–aware operations.
40+
*/
41+
interface MutableGraphIndex extends ImmutableGraphIndex {
42+
/**
43+
* Add the given node ordinal with an empty set of neighbors.
44+
*
45+
* <p>Nodes can be inserted out of order, but it requires that the nodes preceded by the node
46+
* inserted out of order are eventually added.
47+
*
48+
* <p>Actually populating the neighbors, and establishing bidirectional links, is the
49+
* responsibility of the caller.
50+
*
51+
* <p>It is also the responsibility of the caller to ensure that each node is only added once.
52+
*/
53+
void addNode(NodeAtLevel nodeLevel);
54+
55+
/**
56+
* Add the given node ordinal with an empty set of neighbors.
57+
*
58+
* <p>Nodes can be inserted out of order, but it requires that the nodes preceded by the node
59+
* inserted out of order are eventually added.
60+
*
61+
* <p>Actually populating the neighbors, and establishing bidirectional links, is the
62+
* responsibility of the caller.
63+
*
64+
* <p>It is also the responsibility of the caller to ensure that each node is only added once.
65+
*/
66+
void addNode(int level, int node);
67+
68+
/**
69+
* Whether the given node is present in the graph.
70+
*/
71+
boolean contains(NodeAtLevel nodeLevel);
72+
73+
/**
74+
* Whether the given node is present in the given layer of the graph.
75+
*/
76+
boolean contains(int level, int node);
77+
78+
/**
79+
* Add the given node ordinal with an empty set of neighbors.
80+
*
81+
* <p>Nodes can be inserted out of order, but it requires that the nodes preceded by the node
82+
* inserted out of order are eventually added.
83+
*
84+
* <p>Actually populating the neighbors, and establishing bidirectional links, is the
85+
* responsibility of the caller.
86+
*
87+
* <p>It is also the responsibility of the caller to ensure that each node is only added once.
88+
*/
89+
void connectNode(int level, int node, NodeArray nodes);
90+
91+
/**
92+
* Use with extreme caution. Used by Builder to load a saved graph and for rescoring.
93+
*/
94+
void connectNode(NodeAtLevel nodeLevel, NodeArray nodes);
95+
96+
/**
97+
* Mark the given node deleted. Does NOT remove the node from the graph.
98+
*/
99+
void markDeleted(int node);
100+
101+
/** must be called after addNode once neighbors are linked in all levels. */
102+
void markComplete(NodeAtLevel nodeLevel);
103+
104+
void updateEntryNode(NodeAtLevel newEntry);
105+
106+
/**
107+
* Returns an upper bound on the amount of memory used by a single node, in bytes.
108+
*/
109+
long ramBytesUsedOneNode(int layer);
110+
111+
ThreadSafeGrowableBitSet getDeletedNodes();
112+
113+
void setDegrees(List<Integer> layerDegrees);
114+
115+
/**
116+
* Enforce the degree of the given node in all layers.
117+
*/
118+
void enforceDegree(int node);
119+
120+
/**
121+
* Returns an iterator over the neighbors for the given node at the specified level, which can be empty if the node does not belong to that layer.
122+
*/
123+
NodesIterator getNeighborsIterator(NodeAtLevel nodeLevel);
124+
125+
/**
126+
* Returns an iterator over the neighbors for the given node at the specified level, which can be empty if the node does not belong to that layer.
127+
*/
128+
NodesIterator getNeighborsIterator(int level, int node);
129+
130+
/**
131+
* Removes the given node from all layers.
132+
*
133+
* @param node the node id to remove
134+
* @return the number of layers from which it was removed
135+
*/
136+
int removeNode(int node);
137+
138+
/**
139+
* Returns an Integer stream with the nodes contained in the specified level.
140+
*/
141+
IntStream nodeStream(int level);
142+
143+
/**
144+
* Returns the maximum (coarser) level that contains a vector in the graph or -1 if the node is not in the graph.
145+
*/
146+
int getMaxLevelForNode(int node);
147+
148+
/**
149+
* @return the node of the graph to start searches at
150+
*/
151+
NodeAtLevel entryNode();
152+
153+
/**
154+
* Add the given neighbors to the given node at the specified level, maintaining diversity
155+
* It also adds backlinks from the neighbors to the given node.
156+
* The edges will only be added if the out-degree of the node is less than overflowRatio times the max degree.
157+
*/
158+
void addEdges(int level, int node, NodeArray candidates, float overflowRatio);
159+
160+
/**
161+
* Remove edges to deleted nodes and add the new connections, maintaining diversity
162+
*/
163+
void replaceDeletedNeighbors(int level, int node, BitSet toDelete, NodeArray candidates);
164+
165+
/**
166+
* Signals that all mutations have been completed and the graph will not be mutated any further.
167+
* Should be called by the builder after all mutations are completed (during cleanup).
168+
*/
169+
void allMutationsCompleted();
170+
}

0 commit comments

Comments
 (0)