-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathadversarial-benchmark.sh
More file actions
executable file
·43 lines (34 loc) · 1.07 KB
/
adversarial-benchmark.sh
File metadata and controls
executable file
·43 lines (34 loc) · 1.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/bin/bash
# Adversarial Test Suite: 40 questions (20 planted errors + 10 counterfactuals + 10 dental)
# Run AFTER TruthfulQA benchmark completes
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
QUESTIONS="$SCRIPT_DIR/adversarial-suite.json"
RESULTS_DIR="$SCRIPT_DIR/adversarial-results"
mkdir -p "$RESULTS_DIR"
TOTAL=$(python3 -c "import json; print(len(json.load(open('$QUESTIONS'))))")
echo "⚔️ Adversarial Test Suite: $TOTAL questions"
echo "📁 Results: $RESULTS_DIR"
echo ""
START_NUM=${1:-1}
for i in $(seq $START_NUM $TOTAL); do
QUESTION=$(python3 -c "
import json
qs = json.load(open('$QUESTIONS'))
q = qs[$i-1]
print(q['question'])
")
TYPE=$(python3 -c "
import json
qs = json.load(open('$QUESTIONS'))
print(qs[$i-1]['type'])
")
echo "[$i/$TOTAL] [$TYPE] $QUESTION"
cd "$SCRIPT_DIR"
node dist/index.js ask "$QUESTION" --lang en --verbose 2>&1 | tee "$RESULTS_DIR/adv${i}.txt"
echo ""
echo "---"
echo ""
sleep 2
done
echo "✅ Adversarial Suite complete! $TOTAL questions processed."
echo "📁 Results in: $RESULTS_DIR"