Skip to content

Commit f6d0b97

Browse files
committed
add ord mapping from RAVV to graph creation
Signed-off-by: Samuel Herman <sherman8915@gmail.com>
1 parent 3673866 commit f6d0b97

3 files changed

Lines changed: 24 additions & 6 deletions

File tree

jvector-base/src/main/java/io/github/jbellis/jvector/graph/GraphIndexBuilder.java

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -787,12 +787,27 @@ public synchronized long removeDeletedNodes() {
787787
/**
788788
* Convenience method to build a new graph from an existing one, with the addition of new nodes.
789789
* This is useful when we want to merge a new set of vectors into an existing graph that is already on disk.
790+
*
791+
* @param onDiskGraphIndex the on-disk representation of the graph index to be processed and converted.
792+
* @param perLevelNeighborsScoreCache the cache containing pre-computed neighbor scores,
793+
* @param newVectors a super set RAVV containing the new vectors to be added to the graph as well as the old ones that are already in the graph
794+
* @param buildScoreProvider the provider responsible for calculating build scores.
795+
* @param startingNodeOffset the offset in the newVectors RAVV where the new vectors start
796+
* @param graphToRavvOrdMap a mapping from the old graph's node ids to the newVectors RAVV node ids
797+
* @param beamWidth the width of the beam used during the graph building process.
798+
* @param overflowRatio the ratio of extra neighbors to allow temporarily when inserting a node.
799+
* @param alpha the weight factor for balancing score computations.
800+
* @param addHierarchy whether to add hierarchical structures while building the graph.
801+
*
802+
* @return the in-memory representation of the graph index.
803+
* @throws IOException if an I/O error occurs during the graph loading or conversion process.
790804
*/
791805
public static OnHeapGraphIndex buildAndMergeNewNodes(OnDiskGraphIndex onDiskGraphIndex,
792806
NeighborsScoreCache perLevelNeighborsScoreCache,
793807
RandomAccessVectorValues newVectors,
794808
BuildScoreProvider buildScoreProvider,
795-
int startingNodeId,
809+
int startingNodeOffset,
810+
int[] graphToRavvOrdMap,
796811
int beamWidth,
797812
float overflowRatio,
798813
float alpha,
@@ -813,9 +828,9 @@ public static OnHeapGraphIndex buildAndMergeNewNodes(OnDiskGraphIndex onDiskGrap
813828

814829
// Add each new vector incrementally
815830
final List<ForkJoinTask<?>> forkJoinTask = new ArrayList<>(newVectors.size());
816-
for (int i = 0; i < newVectors.size(); i++) {
817-
final int nodeId = startingNodeId + i;
818-
final VectorFloat<?> vector = newVectors.getVector(i);
831+
for (int i = startingNodeOffset; i < newVectors.size(); i++) {
832+
final int nodeId = i;
833+
final VectorFloat<?> vector = newVectors.getVector(graphToRavvOrdMap[nodeId]);
819834

820835
// The GraphIndexBuilder can add nodes to an existing index
821836
forkJoinTask.add(PhysicalCoreExecutor.pool().submit(() -> builder.addGraphNode(nodeId, vector)));

jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/NeighborsScoreCache.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
* @see OnHeapGraphIndex#convertToHeap(OnDiskGraphIndex, NeighborsScoreCache, BuildScoreProvider, float, float)
4141
*
4242
* This is particularly useful when merging new nodes into an existing graph.
43-
* @see GraphIndexBuilder#buildAndMergeNewNodes(OnDiskGraphIndex, NeighborsScoreCache, RandomAccessVectorValues, BuildScoreProvider, int, int, float, float, boolean)
43+
* @see GraphIndexBuilder#buildAndMergeNewNodes(OnDiskGraphIndex, NeighborsScoreCache, RandomAccessVectorValues, BuildScoreProvider, int, int[], int, float, float, boolean)
4444
*/
4545
public class NeighborsScoreCache {
4646
private final Map<Integer, Map<Integer, NodeArray>> perLevelNeighborsScoreCache;

jvector-tests/src/test/java/io/github/jbellis/jvector/graph/OnHeapGraphIndexTest.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import java.util.Arrays;
4242
import java.util.Set;
4343
import java.util.stream.Collectors;
44+
import java.util.stream.IntStream;
4445

4546
import static org.junit.Assert.assertEquals;
4647
import static org.junit.Assert.assertTrue;
@@ -175,7 +176,9 @@ public void testIncrementalInsertionFromOnDiskIndex() throws IOException {
175176
try (var readerSupplier = new SimpleMappedReader.Supplier(outputPath.toAbsolutePath());
176177
var onDiskGraph = OnDiskGraphIndex.load(readerSupplier)) {
177178
TestUtil.assertGraphEquals(baseGraphIndex, onDiskGraph);
178-
OnHeapGraphIndex reconstructedAllNodeOnHeapGraphIndex = GraphIndexBuilder.buildAndMergeNewNodes(onDiskGraph, neighborsScoreCache, newVectorsRavv, allBuildScoreProvider, numBaseVectors, beamWidth, neighborOverflow, alpha, addHierarchy);
179+
// We will create a trivial 1:1 mapping between the new graph and the ravv
180+
final int[] graphToRavvOrdMap = IntStream.range(0, allVectorsRavv.size()).toArray();
181+
OnHeapGraphIndex reconstructedAllNodeOnHeapGraphIndex = GraphIndexBuilder.buildAndMergeNewNodes(onDiskGraph, neighborsScoreCache, allVectorsRavv, allBuildScoreProvider, numBaseVectors, graphToRavvOrdMap, beamWidth, neighborOverflow, alpha, addHierarchy);
179182

180183
try (GraphSearcher reconstructedAllGraphSearcher = new GraphSearcher(reconstructedAllNodeOnHeapGraphIndex);
181184
GraphSearcher allGraphSearcher = new GraphSearcher(allGraphIndex)) {

0 commit comments

Comments
 (0)