Skip to content

Commit a3c71f1

Browse files
committed
Added sparse search example [skip ci]
1 parent 1cc890b commit a3c71f1

3 files changed

Lines changed: 77 additions & 0 deletions

File tree

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Or check out some examples:
1616
- [Embeddings](examples/openai/example.php) with OpenAI
1717
- [Binary embeddings](examples/cohere/example.php) with Cohere
1818
- [Hybrid search](examples/hybrid/example.php) with Ollama (Reciprocal Rank Fusion)
19+
- [Sparse search](examples/sparse/example.php) with Text Embeddings Inference
1920
- [Morgan fingerprints](examples/rdkit/example.php) with RDKit
2021
- [Recommendations](examples/disco/example.php) with Disco
2122
- [Bulk loading](examples/loading/example.php) with `COPY`

examples/sparse/composer.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"repositories": [
3+
{
4+
"type": "path",
5+
"url": "../.."
6+
}
7+
],
8+
"require": {
9+
"pgvector/pgvector": "dev-master"
10+
}
11+
}

examples/sparse/example.php

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
<?php
2+
3+
// good resources
4+
// https://opensearch.org/blog/improving-document-retrieval-with-sparse-semantic-encoders/
5+
// https://huggingface.co/opensearch-project/opensearch-neural-sparse-encoding-v1
6+
//
7+
// run with
8+
// text-embeddings-router --model-id opensearch-project/opensearch-neural-sparse-encoding-v1 --pooling splade
9+
10+
require_once __DIR__ . '/vendor/autoload.php';
11+
12+
use Pgvector\SparseVector;
13+
14+
$db = pg_connect('postgres://localhost/pgvector_example');
15+
16+
pg_query($db, 'CREATE EXTENSION IF NOT EXISTS vector');
17+
pg_query($db, 'DROP TABLE IF EXISTS documents');
18+
pg_query($db, 'CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding sparsevec(30522))');
19+
20+
function fetchEmbeddings($inputs)
21+
{
22+
$url = 'http://localhost:3000/embed_sparse';
23+
$data = [
24+
'inputs' => $inputs
25+
];
26+
$opts = [
27+
'http' => [
28+
'method' => 'POST',
29+
'header' => "Content-Type: application/json\r\n",
30+
'content' => json_encode($data)
31+
]
32+
];
33+
$context = stream_context_create($opts);
34+
$response = file_get_contents($url, false, $context);
35+
$embeddings = [];
36+
foreach (json_decode($response, true) as $row) {
37+
$embedding = [];
38+
foreach ($row as $v) {
39+
$embedding[$v['index']] = $v['value'];
40+
}
41+
$embeddings[] = $embedding;
42+
}
43+
return $embeddings;
44+
}
45+
46+
$input = [
47+
'The dog is barking',
48+
'The cat is purring',
49+
'The bear is growling'
50+
];
51+
$embeddings = fetchEmbeddings($input);
52+
53+
foreach ($input as $i => $content) {
54+
pg_query_params($db, 'INSERT INTO documents (content, embedding) VALUES ($1, $2)', [$content, new SparseVector($embeddings[$i], 30522)]);
55+
}
56+
57+
$query = 'forest';
58+
$queryEmbedding = fetchEmbeddings([$query])[0];
59+
$result = pg_query_params($db, 'SELECT content FROM documents ORDER BY embedding <#> $1 LIMIT 5', [new SparseVector($queryEmbedding, 30522)]);
60+
while ($row = pg_fetch_array($result)) {
61+
echo $row['content'] . "\n";
62+
}
63+
64+
pg_free_result($result);
65+
pg_close($db);

0 commit comments

Comments
 (0)