|
| 1 | +#!/usr/bin/env node |
| 2 | +/** |
| 3 | + * Interactive DOCX output reviewer. |
| 4 | + * |
| 5 | + * Opens each benchmark output file one at a time in Word/Preview. |
| 6 | + * Shows an AppleScript dialog explaining what to look for. |
| 7 | + * Waits for you to close the dialog before opening the next file. |
| 8 | + * |
| 9 | + * Usage: node evals/scripts/review-docx-outputs.mjs |
| 10 | + */ |
| 11 | + |
| 12 | +import { existsSync, readFileSync } from 'node:fs'; |
| 13 | +import { execSync } from 'node:child_process'; |
| 14 | +import { dirname, resolve } from 'node:path'; |
| 15 | +import { fileURLToPath } from 'node:url'; |
| 16 | + |
| 17 | +const __dirname = dirname(fileURLToPath(import.meta.url)); |
| 18 | +const RESULTS = resolve(__dirname, '../results/benchmark/latest.json'); |
| 19 | + |
| 20 | +if (!existsSync(RESULTS)) { |
| 21 | + console.error('No results at', RESULTS); |
| 22 | + process.exit(1); |
| 23 | +} |
| 24 | + |
| 25 | +const d = JSON.parse(readFileSync(RESULTS, 'utf8')); |
| 26 | + |
| 27 | +// Collect output files with context |
| 28 | +const files = []; |
| 29 | +for (const r of d.results.results) { |
| 30 | + const o = JSON.parse(r.response?.output || '{}'); |
| 31 | + if (!o.outputFile || !existsSync(o.outputFile)) continue; |
| 32 | + if (!r.vars?.keepFile) continue; |
| 33 | + |
| 34 | + const provider = r.provider?.label || '?'; |
| 35 | + const task = r.vars?.task || '?'; |
| 36 | + const taskShort = (r.test?.description || task).substring(0, 50); |
| 37 | + const status = r.error ? 'ERROR' : r.success ? 'PASS' : 'FAIL'; |
| 38 | + const steps = o.stepCount || 0; |
| 39 | + const path = o.pathUsed || '?'; |
| 40 | + |
| 41 | + files.push({ provider, task, taskShort, status, steps, path, file: o.outputFile }); |
| 42 | +} |
| 43 | + |
| 44 | +// Group by task for better review flow |
| 45 | +const byTask = {}; |
| 46 | +for (const f of files) { |
| 47 | + const key = f.task.substring(0, 40); |
| 48 | + if (!byTask[key]) byTask[key] = []; |
| 49 | + byTask[key].push(f); |
| 50 | +} |
| 51 | + |
| 52 | +// What to look for per task type |
| 53 | +const reviewGuide = { |
| 54 | + 'Replace': [ |
| 55 | + 'Is "SuperDoc Inc" (or "Apex Holdings") present throughout?', |
| 56 | + 'Is the old name ("Iqidis") completely gone?', |
| 57 | + 'Is "TechVentures" still intact (collateral)?', |
| 58 | + 'Is the formatting preserved? (bold, font, size unchanged)', |
| 59 | + 'Are headings, numbering, and styles intact?', |
| 60 | + ], |
| 61 | + 'section': [ |
| 62 | + 'Is the "Force Majeure" heading present at the end?', |
| 63 | + 'Does it have Heading 1 style (large, bold)?', |
| 64 | + 'Is the body paragraph below the heading?', |
| 65 | + 'Is the rest of the document unchanged?', |
| 66 | + ], |
| 67 | + 'placeholder': [ |
| 68 | + 'Is "Jane Smith" present where "[Candidate Name]" was?', |
| 69 | + 'Are ALL occurrences replaced (check header, body, signature)?', |
| 70 | + 'Is formatting preserved around the replacement?', |
| 71 | + ], |
| 72 | + 'table': [ |
| 73 | + 'Is there a summary table at the end?', |
| 74 | + 'Does it have the correct columns and data?', |
| 75 | + 'Does it have borders and proper alignment?', |
| 76 | + 'Is the rest of the document unchanged?', |
| 77 | + ], |
| 78 | + 'bold': [ |
| 79 | + 'Is "TechVentures LLC" displayed in bold?', |
| 80 | + 'Are ALL occurrences bold (not just the first)?', |
| 81 | + 'Is the text content unchanged (no extra/missing words)?', |
| 82 | + ], |
| 83 | + 'tracked': [ |
| 84 | + 'Is there a tracked change showing $500,000 → $750,000?', |
| 85 | + 'Is it shown as a suggestion (not a direct edit)?', |
| 86 | + 'Enable "Track Changes" view in Word to verify.', |
| 87 | + 'Is the rest of the document unchanged?', |
| 88 | + ], |
| 89 | + 'comment': [ |
| 90 | + 'Is there a comment on the indemnification clause?', |
| 91 | + 'Does the comment text mention "legal review" or similar?', |
| 92 | + 'Is the comment anchored to the right text?', |
| 93 | + 'Open the Comments pane in Word to verify.', |
| 94 | + ], |
| 95 | + 'heading': [ |
| 96 | + 'Is there a properly styled heading (Heading 1)?', |
| 97 | + 'Does it appear at the right position in the document?', |
| 98 | + 'Is the heading text correct?', |
| 99 | + ], |
| 100 | +}; |
| 101 | + |
| 102 | +function getGuide(task) { |
| 103 | + for (const [key, guide] of Object.entries(reviewGuide)) { |
| 104 | + if (task.toLowerCase().includes(key.toLowerCase())) return guide; |
| 105 | + } |
| 106 | + return ['Check that the edit was applied correctly.', 'Check that unrelated content is unchanged.']; |
| 107 | +} |
| 108 | + |
| 109 | +function showDialog(title, message) { |
| 110 | + const escaped = message.replace(/"/g, '\\"').replace(/\n/g, '\\n'); |
| 111 | + try { |
| 112 | + const result = execSync(`osascript -e 'button returned of (display dialog "${escaped}" with title "${title}" buttons {"Skip Rest", "Next"} default button "Next")'`, { encoding: 'utf8' }).trim(); |
| 113 | + return result === 'Skip Rest' ? 'skip' : 'next'; |
| 114 | + } catch { |
| 115 | + return 'skip'; |
| 116 | + } |
| 117 | +} |
| 118 | + |
| 119 | +function openFile(path) { |
| 120 | + execSync(`open "${path}"`); |
| 121 | +} |
| 122 | + |
| 123 | +// Review flow |
| 124 | +const taskEntries = Object.entries(byTask); |
| 125 | +let fileIndex = 0; |
| 126 | +const total = files.length; |
| 127 | + |
| 128 | +console.log(`Found ${total} output files across ${taskEntries.length} tasks.`); |
| 129 | +console.log('Opening files one at a time with review guidance.'); |
| 130 | +console.log(''); |
| 131 | + |
| 132 | +for (const [taskKey, taskFiles] of taskEntries) { |
| 133 | + // Sort: SuperDoc MCP first, then baseline, for easy comparison |
| 134 | + taskFiles.sort((a, b) => { |
| 135 | + const order = { 'superdoc-skill': 0, 'superdoc-cli': 1, 'raw': 2, 'vendor-skill': 3 }; |
| 136 | + return (order[a.path] ?? 4) - (order[b.path] ?? 4); |
| 137 | + }); |
| 138 | + |
| 139 | + for (const f of taskFiles) { |
| 140 | + fileIndex++; |
| 141 | + const guide = getGuide(f.task); |
| 142 | + const guideText = guide.map((g, i) => `${i + 1}. ${g}`).join('\\n'); |
| 143 | + |
| 144 | + const title = `[${fileIndex}/${total}] ${f.status} — ${f.provider}`; |
| 145 | + const message = [ |
| 146 | + `Task: ${f.taskShort}`, |
| 147 | + `Provider: ${f.provider}`, |
| 148 | + `Status: ${f.status} | Path: ${f.path} | Steps: ${f.steps}`, |
| 149 | + ``, |
| 150 | + `What to look for:`, |
| 151 | + ...guide.map((g, i) => `${i + 1}. ${g}`), |
| 152 | + ``, |
| 153 | + `File: ${f.file.split('/').pop()}`, |
| 154 | + ].join('\\n'); |
| 155 | + |
| 156 | + console.log(`[${fileIndex}/${total}] ${f.status} ${f.provider.padEnd(22)} ${f.taskShort}`); |
| 157 | + |
| 158 | + openFile(f.file); |
| 159 | + const action = showDialog(title, message); |
| 160 | + |
| 161 | + if (action === 'skip') { |
| 162 | + console.log('Skipping remaining files.'); |
| 163 | + process.exit(0); |
| 164 | + } |
| 165 | + } |
| 166 | +} |
| 167 | + |
| 168 | +console.log(''); |
| 169 | +console.log('Review complete!'); |
0 commit comments