-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcleanup.sh
More file actions
executable file
·156 lines (129 loc) · 5.43 KB
/
cleanup.sh
File metadata and controls
executable file
·156 lines (129 loc) · 5.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/bin/bash
# Cleanup: prunes stale memories from ChromaDB
# 1. agent_eval: older than 30 days, keep only latest per agent
# 2. compact_save.sh-sourced: older than 60 days with near-duplicates (cosine < 0.1)
# Called by SessionStart hook — daily cooldown prevents repeated runs
COOLDOWN_DIR="$HOME/.claude/.cortex_cleanup_cooldown"
mkdir -p "$COOLDOWN_DIR" 2>/dev/null
TODAY=$(date +%Y-%m-%d)
COOLDOWN_FILE="$COOLDOWN_DIR/cleanup_${TODAY}"
if [ -f "$COOLDOWN_FILE" ]; then
exit 0
fi
touch "$COOLDOWN_FILE"
# Clean old cooldown files
find "$COOLDOWN_DIR" -name "cleanup_*" -mtime +7 -delete 2>/dev/null
/usr/bin/python3 -W ignore - 2>/dev/null <<'PYEOF'
import os, time, sys
from datetime import datetime, timedelta
from collections import defaultdict
sys.path.insert(0, os.path.expanduser("~/.claude/skills/cortex/lib"))
from chroma_client import get_client, get_collection
EVAL_MAX_AGE_DAYS = 30
COMPACT_MAX_AGE_DAYS = 60
DUPLICATE_THRESHOLD = 0.1 # cosine distance below this = near-duplicate
try:
col = get_collection()
if col.count() == 0:
print("[cortex cleanup] Nothing to prune (empty collection)")
sys.exit(0)
data = col.get(include=["metadatas", "documents"])
ids = data["ids"]
metadatas = data["metadatas"]
documents = data["documents"]
now = datetime.now()
eval_cutoff = (now - timedelta(days=EVAL_MAX_AGE_DAYS)).strftime("%Y-%m-%dT%H:%M:%S")
compact_cutoff = (now - timedelta(days=COMPACT_MAX_AGE_DAYS)).strftime("%Y-%m-%dT%H:%M:%S")
to_delete = set()
report = {"eval_old": 0, "compact_dup": 0}
# ── Phase 1: Prune old agent_eval entries (keep latest per agent) ──
# Group eval entries by agent_name
eval_by_agent = defaultdict(list) # agent_name -> [(index, timestamp)]
for i in range(len(ids)):
meta = metadatas[i]
if meta.get("type") != "agent_eval":
continue
agent_name = meta.get("agent_name", "")
ts = meta.get("timestamp", "")
if agent_name:
eval_by_agent[agent_name].append((i, ts))
for agent_name, entries in eval_by_agent.items():
if len(entries) <= 1:
continue
# Sort by timestamp descending — keep the newest
entries.sort(key=lambda x: x[1], reverse=True)
latest_idx, latest_ts = entries[0]
for idx, ts in entries[1:]:
# Delete if older than cutoff
if ts < eval_cutoff:
to_delete.add(ids[idx])
report["eval_old"] += 1
# ── Phase 2: Prune old compact_save.sh duplicates ──
# Find compact-sourced memories older than 60 days
compact_old_indices = []
compact_all_indices = []
for i in range(len(ids)):
meta = metadatas[i]
if meta.get("source") != "compact_save.sh":
continue
if meta.get("type") == "agent_eval":
continue # already handled above
ts = meta.get("timestamp", "")
compact_all_indices.append(i)
if ts and ts < compact_cutoff:
compact_old_indices.append(i)
# For each old compact memory, check if a newer near-duplicate exists
if compact_old_indices and len(compact_all_indices) > 1:
# Build ID→index lookup for O(1) access
id_to_index = {ids[i]: i for i in range(len(ids))}
for old_idx in compact_old_indices:
if ids[old_idx] in to_delete:
continue # already marked
old_doc = documents[old_idx]
if not old_doc:
continue
# Query for nearest neighbors to this document
try:
results = col.query(
query_texts=[old_doc[:500]],
n_results=min(5, col.count())
)
has_newer_duplicate = False
for j in range(len(results["ids"][0])):
match_id = results["ids"][0][j]
match_dist = results["distances"][0][j] if results.get("distances") else 1.0
# Skip self
if match_id == ids[old_idx]:
continue
# Check if near-duplicate (cosine distance < threshold)
if match_dist < DUPLICATE_THRESHOLD:
# Confirm the match is newer
match_meta_idx = id_to_index.get(match_id)
if match_meta_idx is not None:
match_ts = metadatas[match_meta_idx].get("timestamp", "")
old_ts = metadatas[old_idx].get("timestamp", "")
if match_ts > old_ts:
has_newer_duplicate = True
break
if has_newer_duplicate:
to_delete.add(ids[old_idx])
report["compact_dup"] += 1
except Exception:
continue
# ── Execute deletions ──
if to_delete:
col.delete(ids=list(to_delete))
# ── Report ──
total = len(to_delete)
if total > 0:
parts = []
if report["eval_old"]:
parts.append(f"{report['eval_old']} old agent_eval(s)")
if report["compact_dup"]:
parts.append(f"{report['compact_dup']} compact duplicate(s)")
print(f"[cortex cleanup] Pruned {total}: {', '.join(parts)}")
else:
print("[cortex cleanup] Nothing to prune")
except Exception as e:
print(f"[cortex cleanup] Error: {e}")
PYEOF