Skip to content

Commit 1cc890b

Browse files
committed
Added hybrid search example [skip ci]
1 parent f339ecd commit 1cc890b

3 files changed

Lines changed: 88 additions & 0 deletions

File tree

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Or check out some examples:
1515

1616
- [Embeddings](examples/openai/example.php) with OpenAI
1717
- [Binary embeddings](examples/cohere/example.php) with Cohere
18+
- [Hybrid search](examples/hybrid/example.php) with Ollama (Reciprocal Rank Fusion)
1819
- [Morgan fingerprints](examples/rdkit/example.php) with RDKit
1920
- [Recommendations](examples/disco/example.php) with Disco
2021
- [Bulk loading](examples/loading/example.php) with `COPY`

examples/hybrid/composer.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"repositories": [
3+
{
4+
"type": "path",
5+
"url": "../.."
6+
}
7+
],
8+
"require": {
9+
"pgvector/pgvector": "dev-master"
10+
}
11+
}

examples/hybrid/example.php

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
<?php
2+
3+
require_once __DIR__ . '/vendor/autoload.php';
4+
5+
use Pgvector\Vector;
6+
7+
$db = pg_connect('postgres://localhost/pgvector_example');
8+
9+
pg_query($db, 'CREATE EXTENSION IF NOT EXISTS vector');
10+
pg_query($db, 'DROP TABLE IF EXISTS documents');
11+
pg_query($db, 'CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding vector(768))');
12+
pg_query($db, "CREATE INDEX ON documents USING GIN (to_tsvector('english', content))");
13+
14+
function fetchEmbeddings($input)
15+
{
16+
$url = 'http://localhost:11434/api/embed';
17+
$data = [
18+
'input' => $input,
19+
'model' => 'nomic-embed-text'
20+
];
21+
$opts = [
22+
'http' => [
23+
'method' => 'POST',
24+
'header' => "Content-Type: application/json\r\n",
25+
'content' => json_encode($data)
26+
]
27+
];
28+
$context = stream_context_create($opts);
29+
$response = file_get_contents($url, false, $context);
30+
return json_decode($response, true)['embeddings'];
31+
}
32+
33+
$input = [
34+
'The dog is barking',
35+
'The cat is purring',
36+
'The bear is growling'
37+
];
38+
$embeddings = fetchEmbeddings($input);
39+
40+
foreach ($input as $i => $content) {
41+
pg_query_params($db, 'INSERT INTO documents (content, embedding) VALUES ($1, $2)', [$content, new Vector($embeddings[$i])]);
42+
}
43+
44+
$sql = <<<SQL
45+
WITH semantic_search AS (
46+
SELECT id, RANK () OVER (ORDER BY embedding <=> $2) AS rank
47+
FROM documents
48+
ORDER BY embedding <=> $2
49+
LIMIT 20
50+
),
51+
keyword_search AS (
52+
SELECT id, RANK () OVER (ORDER BY ts_rank_cd(to_tsvector('english', content), query) DESC)
53+
FROM documents, plainto_tsquery('english', $1) query
54+
WHERE to_tsvector('english', content) @@ query
55+
ORDER BY ts_rank_cd(to_tsvector('english', content), query) DESC
56+
LIMIT 20
57+
)
58+
SELECT
59+
COALESCE(semantic_search.id, keyword_search.id) AS id,
60+
COALESCE(1.0 / ($3 + semantic_search.rank), 0.0) +
61+
COALESCE(1.0 / ($3 + keyword_search.rank), 0.0) AS score
62+
FROM semantic_search
63+
FULL OUTER JOIN keyword_search ON semantic_search.id = keyword_search.id
64+
ORDER BY score DESC
65+
LIMIT 5
66+
SQL;
67+
$query = 'growling bear';
68+
$queryEmbedding = fetchEmbeddings($query)[0];
69+
$k = 60;
70+
$result = pg_query_params($db, $sql, [$query, new Vector($queryEmbedding), $k]);
71+
while ($row = pg_fetch_array($result)) {
72+
echo 'document: ' . $row['id'] . ', RRF score: ' . $row['score'] . "\n";
73+
}
74+
75+
pg_free_result($result);
76+
pg_close($db);

0 commit comments

Comments
 (0)