-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrepository-read.ts
More file actions
356 lines (330 loc) · 10.7 KB
/
repository-read.ts
File metadata and controls
356 lines (330 loc) · 10.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
/**
* Read-side queries for active memory projections.
*/
import pg from 'pg';
import { config } from '../config.js';
import {
type EpisodeRow,
type MemoryRow,
type SearchResult,
normalizeMemoryRow,
normalizeSearchRow,
} from './repository-types.js';
import {
findDuplicateVectors,
findDuplicateVectorsInWorkspace,
searchHybrid,
searchKeyword,
searchVectors,
searchVectorsInWorkspace,
} from './repository-vector-search.js';
import type { AgentScope } from './repository-types.js';
export async function getEpisode(pool: pg.Pool, id: string): Promise<EpisodeRow | null> {
const result = await pool.query('SELECT * FROM episodes WHERE id = $1', [id]);
return result.rows[0] ?? null;
}
export async function getMemory(
pool: pg.Pool,
id: string,
userId?: string,
includeDeleted: boolean = false,
): Promise<MemoryRow | null> {
return getMemoryWithClient(pool as any, id, userId, includeDeleted);
}
export async function getMemoryWithClient(
client: pg.PoolClient,
id: string,
userId?: string,
includeDeleted: boolean = false,
): Promise<MemoryRow | null> {
const clauses = ['id = $1'];
const params: string[] = [id];
if (userId) {
clauses.push(`user_id = $${params.length + 1}`);
params.push(userId);
}
if (!includeDeleted) {
clauses.push('deleted_at IS NULL');
clauses.push('expired_at IS NULL');
}
const result = await client.query(`SELECT * FROM memories WHERE ${clauses.join(' AND ')}`, params);
return result.rows[0] ? normalizeMemoryRow(result.rows[0]) : null;
}
export async function listMemories(pool: pg.Pool, userId: string, limit: number, offset: number, sourceSite?: string, episodeId?: string): Promise<MemoryRow[]> {
const params: unknown[] = [userId, limit, offset];
let extraClauses = '';
if (sourceSite) {
params.push(sourceSite);
extraClauses += ` AND source_site = $${params.length}`;
}
if (episodeId) {
params.push(episodeId);
extraClauses += ` AND episode_id = $${params.length}`;
}
const result = await pool.query(
`SELECT * FROM memories
WHERE user_id = $1 AND deleted_at IS NULL AND expired_at IS NULL AND status = 'active'
AND workspace_id IS NULL${extraClauses}
ORDER BY created_at DESC LIMIT $2 OFFSET $3`,
params,
);
return result.rows.map(normalizeMemoryRow);
}
export async function listMemoriesInWorkspace(
pool: pg.Pool,
workspaceId: string,
limit: number,
offset: number,
): Promise<MemoryRow[]> {
const result = await pool.query(
`SELECT * FROM memories
WHERE workspace_id = $1 AND deleted_at IS NULL AND expired_at IS NULL AND status = 'active'
ORDER BY created_at DESC LIMIT $2 OFFSET $3`,
[workspaceId, limit, offset],
);
return result.rows.map(normalizeMemoryRow);
}
export async function getMemoryInWorkspace(
pool: pg.Pool,
id: string,
workspaceId: string,
): Promise<MemoryRow | null> {
const result = await pool.query(
`SELECT * FROM memories WHERE id = $1 AND workspace_id = $2 AND deleted_at IS NULL`,
[id, workspaceId],
);
return result.rows[0] ? normalizeMemoryRow(result.rows[0]) : null;
}
export async function listMemoriesByNamespace(
pool: pg.Pool,
userId: string,
namespace: string,
limit: number = 20,
): Promise<MemoryRow[]> {
const result = await pool.query(
`SELECT * FROM memories
WHERE user_id = $1
AND namespace = $2
AND deleted_at IS NULL
AND expired_at IS NULL
AND status = 'active'
ORDER BY created_at DESC
LIMIT $3`,
[userId, namespace, limit],
);
return result.rows.map(normalizeMemoryRow);
}
export async function getMemoryStats(
pool: pg.Pool,
userId: string,
): Promise<{ count: number; avgImportance: number; sourceDistribution: Record<string, number> }> {
const counts = await pool.query(
`SELECT COUNT(*)::int AS count, AVG(importance) AS avg_importance
FROM memories WHERE user_id = $1 AND deleted_at IS NULL AND expired_at IS NULL AND status = 'active'`,
[userId],
);
const sources = await pool.query(
`SELECT source_site, COUNT(*)::int AS count
FROM memories WHERE user_id = $1 AND deleted_at IS NULL AND expired_at IS NULL AND status = 'active'
GROUP BY source_site`,
[userId],
);
return {
count: counts.rows[0].count ?? 0,
avgImportance: Number(counts.rows[0].avg_importance ?? 0),
sourceDistribution: Object.fromEntries(sources.rows.map((row) => [row.source_site, row.count])),
};
}
export async function searchSimilar(
pool: pg.Pool,
userId: string,
queryEmbedding: number[],
limit: number,
sourceSite?: string,
referenceTime?: Date,
): Promise<SearchResult[]> {
return searchVectors(pool, userId, queryEmbedding, limit, sourceSite, referenceTime);
}
export async function searchHybridSimilar(
pool: pg.Pool,
userId: string,
queryText: string,
queryEmbedding: number[],
limit: number,
sourceSite?: string,
referenceTime?: Date,
): Promise<SearchResult[]> {
return searchHybrid(pool, userId, queryText, queryEmbedding, limit, sourceSite, referenceTime);
}
export async function searchKeywordSimilar(
pool: pg.Pool,
userId: string,
queryText: string,
limit: number,
sourceSite?: string,
): Promise<SearchResult[]> {
return searchKeyword(pool, userId, queryText, limit, sourceSite);
}
export async function findNearDuplicates(
pool: pg.Pool,
userId: string,
embedding: number[],
threshold: number,
limit: number,
) {
return findDuplicateVectors(pool, userId, embedding, threshold, limit);
}
/**
* Workspace-scoped vector search with agent filtering and visibility enforcement.
*/
export async function searchSimilarInWorkspace(
pool: pg.Pool,
workspaceId: string,
queryEmbedding: number[],
limit: number,
agentScope: AgentScope = 'all',
callerAgentId?: string,
referenceTime?: Date,
): Promise<SearchResult[]> {
return searchVectorsInWorkspace(pool, workspaceId, queryEmbedding, limit, agentScope, callerAgentId, referenceTime);
}
/**
* Find near-duplicate memories within a workspace scope for AUDN conflict detection.
*/
export async function findNearDuplicatesInWorkspace(
pool: pg.Pool,
workspaceId: string,
embedding: number[],
threshold: number,
limit: number,
agentScope: AgentScope = 'all',
callerAgentId?: string,
) {
return findDuplicateVectorsInWorkspace(pool, workspaceId, embedding, threshold, limit, agentScope, callerAgentId);
}
export async function findKeywordCandidates(
pool: pg.Pool,
userId: string,
keywords: string[],
limit: number,
includeExpired: boolean = false,
) {
if (keywords.length === 0) return [];
const candidateLimit = Math.max(limit * 4, limit);
const expiredClause = includeExpired ? '' : 'AND expired_at IS NULL';
const result = await pool.query(
`SELECT id, content, importance
FROM memories
WHERE user_id = $1
AND deleted_at IS NULL
${expiredClause}
AND status = 'active'
AND workspace_id IS NULL
AND content ILIKE ANY($2::text[])
ORDER BY importance DESC, created_at DESC
LIMIT $3`,
[userId, keywords.map((keyword) => `%${keyword}%`), candidateLimit],
);
return result.rows
.map((row) => ({
...row,
similarity: estimateKeywordSimilarity(row.content, keywords),
}))
.sort((left, right) => {
if (right.similarity !== left.similarity) {
return right.similarity - left.similarity;
}
return Number(right.importance) - Number(left.importance);
})
.slice(0, limit);
}
export async function countMemories(pool: pg.Pool, userId?: string): Promise<number> {
const query = userId
? `SELECT COUNT(*)::int AS count FROM memories WHERE user_id = $1 AND deleted_at IS NULL AND expired_at IS NULL AND status = 'active'`
: `SELECT COUNT(*)::int AS count FROM memories WHERE deleted_at IS NULL AND expired_at IS NULL AND status = 'active'`;
const result = await pool.query(query, userId ? [userId] : []);
return result.rows[0].count;
}
export async function countNeedsClarification(pool: pg.Pool, userId: string): Promise<number> {
const result = await pool.query(
`SELECT COUNT(*)::int AS count
FROM memories WHERE user_id = $1 AND deleted_at IS NULL AND expired_at IS NULL AND status = 'needs_clarification'`,
[userId],
);
return result.rows[0].count;
}
/**
* Finds memories created within a time window of the given anchor timestamps.
* Used for 1-hop temporal-neighbor expansion: surfaces facts from the same
* conversation session that the initial top-K results originated from.
*/
export async function findTemporalNeighbors(
pool: pg.Pool,
userId: string,
anchorTimestamps: Date[],
queryEmbedding: number[],
windowMinutes: number,
excludeIds: Set<string>,
limit: number,
referenceTime?: Date,
): Promise<SearchResult[]> {
if (anchorTimestamps.length === 0 || limit <= 0) return [];
const pgvector = await import('pgvector/pg');
const excludeArray = [...excludeIds];
const windowInterval = `${windowMinutes} minutes`;
const wSim = config.scoringWeightSimilarity;
const wImp = config.scoringWeightImportance;
const wRec = config.scoringWeightRecency;
const refTime = (referenceTime ?? new Date()).toISOString();
const result = await pool.query(
`SELECT *,
1 - (embedding <=> $1) AS similarity,
(
$7 * (1 - (embedding <=> $1))
+ $8 * importance
+ $9 * EXP(-EXTRACT(EPOCH FROM ($10::timestamptz - last_accessed_at)) / 2592000.0)
) * COALESCE(trust_score, 1.0) AS score
FROM memories
WHERE user_id = $2
AND deleted_at IS NULL
AND expired_at IS NULL
AND status = 'active'
AND workspace_id IS NULL
AND id != ALL($3::uuid[])
AND EXISTS (
SELECT 1 FROM unnest($4::timestamptz[]) AS anchor
WHERE memories.created_at BETWEEN anchor - $5::interval AND anchor + $5::interval
)
ORDER BY score DESC
LIMIT $6`,
[
pgvector.default.toSql(queryEmbedding),
userId,
excludeArray,
anchorTimestamps,
windowInterval,
limit,
wSim, wImp, wRec, refTime,
],
);
return result.rows.map(normalizeSearchRow);
}
function estimateKeywordSimilarity(content: string, keywords: string[]): number {
const lower = content.toLowerCase();
const weights = keywords.map((keyword) => keywordWeight(keyword));
const totalWeight = weights.reduce((sum, weight) => sum + weight, 0);
if (totalWeight === 0) return 0.45;
let matchedWeight = 0;
for (let i = 0; i < keywords.length; i++) {
if (lower.includes(keywords[i].toLowerCase())) {
matchedWeight += weights[i];
}
}
return Math.min(0.89, 0.45 + (0.44 * matchedWeight / totalWeight));
}
function keywordWeight(keyword: string): number {
if (keyword.includes(' ')) return 2.5;
if (/^[A-Z]{3,}$/.test(keyword) || /[A-Z]/.test(keyword.slice(1))) return 2;
if (keyword.length >= 8) return 1.5;
return 1;
}