From b5a133247e44d8cf21bfe0437c79dd4b3d27a9c0 Mon Sep 17 00:00:00 2001 From: AlonePenguin <187998801+AlonePenguin@users.noreply.github.com> Date: Mon, 1 Jun 2026 15:06:59 -0400 Subject: [PATCH] Add chemical identity stereochemistry guard --- .../README.md | 44 +++ .../demo.js | 112 ++++++ .../index.js | 355 ++++++++++++++++++ .../make-demo-video.js | 92 +++++ .../package.json | 15 + .../reports/chemical-identity-dashboard.svg | 30 ++ .../clean-chemical-identity-report.json | 43 +++ .../reports/demo.mp4 | Bin 0 -> 10016 bytes .../risky-chemical-identity-handoff.md | 45 +++ .../risky-chemical-identity-report.json | 246 ++++++++++++ .../sample-data.js | 188 ++++++++++ .../test.js | 41 ++ .../verify-video.js | 37 ++ 13 files changed, 1248 insertions(+) create mode 100644 chemical-identity-stereochemistry-guard/README.md create mode 100644 chemical-identity-stereochemistry-guard/demo.js create mode 100644 chemical-identity-stereochemistry-guard/index.js create mode 100644 chemical-identity-stereochemistry-guard/make-demo-video.js create mode 100644 chemical-identity-stereochemistry-guard/package.json create mode 100644 chemical-identity-stereochemistry-guard/reports/chemical-identity-dashboard.svg create mode 100644 chemical-identity-stereochemistry-guard/reports/clean-chemical-identity-report.json create mode 100644 chemical-identity-stereochemistry-guard/reports/demo.mp4 create mode 100644 chemical-identity-stereochemistry-guard/reports/risky-chemical-identity-handoff.md create mode 100644 chemical-identity-stereochemistry-guard/reports/risky-chemical-identity-report.json create mode 100644 chemical-identity-stereochemistry-guard/sample-data.js create mode 100644 chemical-identity-stereochemistry-guard/test.js create mode 100644 chemical-identity-stereochemistry-guard/verify-video.js diff --git a/chemical-identity-stereochemistry-guard/README.md b/chemical-identity-stereochemistry-guard/README.md new file mode 100644 index 00000000..f1c8b009 --- /dev/null +++ b/chemical-identity-stereochemistry-guard/README.md @@ -0,0 +1,44 @@ +# Chemical Identity Stereochemistry Guard + +This self-contained module adds a deterministic chemical identity and stereochemistry gate for SCIBASE knowledge graph workflows. It is scoped to issue #17 and focuses on whether compound nodes and compound graph edges are safe to merge or publish in recommendations. + +The guard does not call external APIs, payment systems, identity providers, live projects, or private data stores. Fixtures are synthetic and every check runs with Node built-ins. + +## What It Checks + +- InChIKey format readiness. +- Isomeric SMILES presence. +- Missing stereochemistry for compounds that require stereochemical identity. +- Salt, hydrate, or other form conflation with parent freebase nodes. +- Isotope-label metadata for labeled tracers. +- DOI-backed chemical identity evidence. +- Synonym collisions across unrelated chemical skeletons. +- `same_as` edges that merge different skeletons, forms, or stereochemical records. +- Assay-context completeness before graph recommendations. +- DOI-backed relationship evidence for compound graph edges. + +## Local Validation + +```sh +npm --prefix chemical-identity-stereochemistry-guard run check +npm --prefix chemical-identity-stereochemistry-guard test +npm --prefix chemical-identity-stereochemistry-guard run demo +npm --prefix chemical-identity-stereochemistry-guard run make-demo-video +npm --prefix chemical-identity-stereochemistry-guard run verify-video +``` + +## Generated Artifacts + +Running the demo writes: + +- `reports/clean-chemical-identity-report.json` +- `reports/risky-chemical-identity-report.json` +- `reports/risky-chemical-identity-handoff.md` +- `reports/chemical-identity-dashboard.svg` +- `reports/demo.mp4` + +The risky packet intentionally demonstrates release blockers: missing stereochemistry, invalid InChIKeys, missing isotope labels, synonym collisions, `same_as` skeleton mismatch, salt-form conflation, incomplete assay context, missing edge evidence, and missing graph nodes. + +## Issue Fit + +This is a distinct Scientific Knowledge Graph Integration slice. It complements the existing broad extraction/navigation, ontology drift, aliasing, biological accession crosswalk, measurement harmonization, geospatial provenance, sample custody/cold-chain, protocol deviation/reagent lot, software dependency, image metadata, funding provenance, temporal consistency, and recommendation visibility/diversity work by focusing specifically on chemical identity and stereochemistry before graph merge or recommendation publication. diff --git a/chemical-identity-stereochemistry-guard/demo.js b/chemical-identity-stereochemistry-guard/demo.js new file mode 100644 index 00000000..683d45dc --- /dev/null +++ b/chemical-identity-stereochemistry-guard/demo.js @@ -0,0 +1,112 @@ +const fs = require("node:fs"); +const path = require("node:path"); +const { evaluateChemicalIdentityGraph } = require("./index"); +const { cleanPacket, riskyPacket } = require("./sample-data"); + +const reportsDir = path.join(__dirname, "reports"); +fs.mkdirSync(reportsDir, { recursive: true }); + +const clean = evaluateChemicalIdentityGraph(cleanPacket); +const risky = evaluateChemicalIdentityGraph(riskyPacket); + +function writeJson(name, value) { + fs.writeFileSync(path.join(reportsDir, name), `${JSON.stringify(value, null, 2)}\n`); +} + +function escapeXml(value) { + return String(value) + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} + +function findingTable(report) { + return report.findings + .slice(0, 12) + .map((finding) => `| ${finding.severity} | ${finding.code} | ${finding.action} |`) + .join("\n"); +} + +function writeHandoff(report) { + const lines = [ + "# Chemical Identity Graph Handoff", + "", + `Decision: ${report.summary.decision}`, + `Compounds reviewed: ${report.summary.compoundsReviewed}`, + `Edges reviewed: ${report.summary.edgesReviewed}`, + `Held nodes: ${report.summary.heldNodes}`, + `Held edges: ${report.summary.heldEdges}`, + `Audit digest: ${report.summary.auditDigest}`, + "", + "## Priority Findings", + "", + "| Severity | Code | Remediation |", + "| --- | --- | --- |", + findingTable(report), + "", + "## Node Actions", + "", + "| Compound | Status | Actions |", + "| --- | --- | --- |", + ...report.compounds.map((node) => `| ${node.id} | ${node.status} | ${node.actions.join(", ") || "none"} |`), + "", + "## Edge Actions", + "", + "| Edge | Status | Actions |", + "| --- | --- | --- |", + ...report.edges.map((edge) => `| ${edge.id} | ${edge.status} | ${edge.actions.join(", ") || "none"} |`), + "" + ]; + fs.writeFileSync(path.join(reportsDir, "risky-chemical-identity-handoff.md"), `${lines.join("\n")}\n`); +} + +function writeSvg(cleanReport, riskyReport) { + const width = 960; + const height = 540; + const findingWidth = Math.round((riskyReport.summary.findingCount / 16) * 300); + const criticalWidth = Math.round((riskyReport.summary.criticalFindings / 5) * 300); + const heldWidth = Math.round(((riskyReport.summary.heldNodes + riskyReport.summary.heldEdges) / 8) * 300); + const rows = riskyReport.findings.slice(0, 8).map((finding, index) => { + const y = 244 + index * 26; + const color = finding.severity === "critical" ? "#991b1b" : finding.severity === "high" ? "#dc2626" : finding.severity === "medium" ? "#d97706" : "#64748b"; + return `${escapeXml(finding.code)}`; + }).join("\n"); + + const svg = ` + + + + Chemical identity stereochemistry guard + Checks compound graph nodes before merge or recommendation publication. + Clean graph findings + + + ${cleanReport.summary.findingCount} findings + Risky critical findings + + + ${riskyReport.summary.criticalFindings} critical + Held nodes and edges + + + ${riskyReport.summary.heldNodes + riskyReport.summary.heldEdges} held + + Top blockers + ${rows} + Decision: ${escapeXml(riskyReport.summary.decision)} | ${riskyReport.summary.auditDigest.slice(0, 28)}... + +`; + fs.writeFileSync(path.join(reportsDir, "chemical-identity-dashboard.svg"), svg); +} + +writeJson("clean-chemical-identity-report.json", clean); +writeJson("risky-chemical-identity-report.json", risky); +writeHandoff(risky); +writeSvg(clean, risky); + +console.log("Wrote chemical identity stereochemistry guard reports:"); +console.log(`- ${path.join(reportsDir, "clean-chemical-identity-report.json")}`); +console.log(`- ${path.join(reportsDir, "risky-chemical-identity-report.json")}`); +console.log(`- ${path.join(reportsDir, "risky-chemical-identity-handoff.md")}`); +console.log(`- ${path.join(reportsDir, "chemical-identity-dashboard.svg")}`); diff --git a/chemical-identity-stereochemistry-guard/index.js b/chemical-identity-stereochemistry-guard/index.js new file mode 100644 index 00000000..151d2555 --- /dev/null +++ b/chemical-identity-stereochemistry-guard/index.js @@ -0,0 +1,355 @@ +const crypto = require("node:crypto"); + +function asArray(value) { + return Array.isArray(value) ? value : []; +} + +function stableJson(value) { + if (Array.isArray(value)) { + return `[${value.map(stableJson).join(",")}]`; + } + if (value && typeof value === "object") { + return `{${Object.keys(value).sort().map((key) => `${JSON.stringify(key)}:${stableJson(value[key])}`).join(",")}}`; + } + return JSON.stringify(value); +} + +function sha256(value) { + return crypto.createHash("sha256").update(stableJson(value)).digest("hex"); +} + +function severityRank(severity) { + return { critical: 4, high: 3, medium: 2, low: 1 }[severity] || 0; +} + +function addFinding(findings, severity, code, message, refs, action) { + findings.push({ + severity, + code, + message, + refs: asArray(refs), + action + }); +} + +function normalize(value) { + return String(value || "").trim().toLowerCase(); +} + +function inchiSkeleton(inchiKey) { + const value = String(inchiKey || "").trim().toUpperCase(); + return value.includes("-") ? value.split("-")[0] : value; +} + +function hasStereoToken(smiles) { + return /[@/\\]/.test(String(smiles || "")); +} + +function synonymKey(value) { + return normalize(value).replace(/[^a-z0-9]/g, ""); +} + +function evidenceOk(evidence) { + return evidence && evidence.doi && /^10\.\d{4,9}\//.test(String(evidence.doi)) && evidence.sourceType; +} + +function nodeById(nodes) { + return new Map(asArray(nodes).map((node) => [node.id, node])); +} + +function evaluateChemicalNode(node, packet, policy, findings) { + const refs = [node.id || "compound"]; + const actions = []; + + if (!node.inchiKey || !/^[A-Z]{14}-[A-Z]{10}-[A-Z]$/.test(String(node.inchiKey))) { + addFinding( + findings, + "high", + "INCHIKEY_FORMAT_INVALID", + `${node.id || "Compound"} has missing or invalid InChIKey evidence.`, + refs, + "attach_valid_inchikey_before_graph_merge" + ); + actions.push("attach_valid_inchikey_before_graph_merge"); + } + + if (!node.isomericSmiles) { + addFinding( + findings, + "high", + "ISOMERIC_SMILES_MISSING", + `${node.id || "Compound"} lacks an isomeric SMILES string.`, + refs, + "attach_isomeric_smiles_before_recommendation" + ); + actions.push("attach_isomeric_smiles_before_recommendation"); + } + + if (node.requiresStereochemistry === true && !hasStereoToken(node.isomericSmiles)) { + addFinding( + findings, + "critical", + "STEREOCHEMISTRY_MISSING", + `${node.id || "Compound"} requires stereochemistry but the graph node has no stereochemical markers.`, + refs, + "split_or_hold_compound_until_stereochemistry_is_explicit" + ); + actions.push("split_or_hold_compound_until_stereochemistry_is_explicit"); + } + + if (node.form && node.parentCompoundId && normalize(node.form) !== "freebase") { + const parent = asArray(packet.compounds).find((candidate) => candidate.id === node.parentCompoundId); + if (parent && inchiSkeleton(parent.inchiKey) === inchiSkeleton(node.inchiKey) && normalize(parent.form) === "freebase") { + addFinding( + findings, + "medium", + "SALT_FORM_NEEDS_SEPARATE_NODE", + `${node.id || "Compound"} is a ${node.form} form that should not be silently merged with the freebase node.`, + refs.concat([parent.id]), + "retain_salt_or_hydrate_form_as_explicit_graph_node" + ); + actions.push("retain_salt_or_hydrate_form_as_explicit_graph_node"); + } + } + + if (node.isotopeLabeled === true && !node.isotopeLabel) { + addFinding( + findings, + "high", + "ISOTOPE_LABEL_MISSING", + `${node.id || "Compound"} is isotope-labeled but has no isotope label metadata.`, + refs, + "attach_isotope_label_before_assay_edge_publication" + ); + actions.push("attach_isotope_label_before_assay_edge_publication"); + } + + if (!evidenceOk(node.evidence)) { + addFinding( + findings, + "high", + "CHEMICAL_IDENTITY_EVIDENCE_MISSING", + `${node.id || "Compound"} lacks DOI-backed identity evidence.`, + refs, + "attach_doi_backed_identity_evidence" + ); + actions.push("attach_doi_backed_identity_evidence"); + } + + const synonymMap = new Map(); + for (const synonym of asArray(node.synonyms)) { + const key = synonymKey(synonym); + if (!key) { + continue; + } + synonymMap.set(key, (synonymMap.get(key) || 0) + 1); + } + for (const [key, count] of synonymMap.entries()) { + if (count > 1) { + addFinding( + findings, + "low", + "DUPLICATE_NODE_SYNONYM", + `${node.id || "Compound"} repeats synonym key ${key}.`, + refs, + "deduplicate_compound_synonyms" + ); + actions.push("deduplicate_compound_synonyms"); + } + } + + return { + id: node.id, + label: node.label || node.id, + status: actions.some((action) => action.includes("hold") || action.includes("split")) ? "hold_identity_node" : actions.length > 0 ? "curator_review" : "publishable", + actions: [...new Set(actions)] + }; +} + +function evaluateSynonymCollisions(compounds, findings) { + const owners = new Map(); + for (const node of asArray(compounds)) { + for (const synonym of asArray(node.synonyms)) { + const key = synonymKey(synonym); + if (!key) { + continue; + } + if (!owners.has(key)) { + owners.set(key, []); + } + owners.get(key).push(node); + } + } + + for (const [key, nodes] of owners.entries()) { + const skeletons = new Set(nodes.map((node) => inchiSkeleton(node.inchiKey)).filter(Boolean)); + if (nodes.length > 1 && skeletons.size > 1) { + addFinding( + findings, + "critical", + "SYNONYM_COLLISION_ACROSS_COMPOUNDS", + `Synonym ${key} resolves to multiple chemical skeletons.`, + nodes.map((node) => node.id), + "quarantine_synonym_until_curator_disambiguates_compounds" + ); + } + } +} + +function evaluateGraphEdge(edge, nodes, policy, findings) { + const refs = [edge.id || "edge", edge.source, edge.target].filter(Boolean); + const source = nodes.get(edge.source); + const target = nodes.get(edge.target); + const actions = []; + + if (!source || (!target && edge.relationship !== "tested_in_assay")) { + addFinding( + findings, + "critical", + "CHEMICAL_EDGE_NODE_MISSING", + `${edge.id || "Edge"} references a missing chemical node.`, + refs, + "block_edge_until_both_chemical_nodes_exist" + ); + actions.push("block_edge_until_both_chemical_nodes_exist"); + return { id: edge.id, status: "hold_graph_edge", actions }; + } + + if (edge.relationship === "same_as") { + const sameSkeleton = inchiSkeleton(source.inchiKey) === inchiSkeleton(target.inchiKey); + if (!sameSkeleton) { + addFinding( + findings, + "critical", + "SAME_AS_SKELETON_MISMATCH", + `${edge.id || "Edge"} marks two different chemical skeletons as same_as.`, + refs, + "split_same_as_edge_into_distinct_compound_nodes" + ); + actions.push("split_same_as_edge_into_distinct_compound_nodes"); + } + if (normalize(source.form) !== normalize(target.form) && policy.mergeSaltForms !== true) { + addFinding( + findings, + "medium", + "SAME_AS_FORM_CONFLATION", + `${edge.id || "Edge"} conflates ${source.form || "unspecified"} and ${target.form || "unspecified"} forms.`, + refs, + "model_compound_form_relationship_instead_of_same_as" + ); + actions.push("model_compound_form_relationship_instead_of_same_as"); + } + if (source.requiresStereochemistry || target.requiresStereochemistry) { + if (source.isomericSmiles !== target.isomericSmiles) { + addFinding( + findings, + "high", + "SAME_AS_STEREOCHEMISTRY_MISMATCH", + `${edge.id || "Edge"} links stereochemically distinct compound records.`, + refs, + "hold_same_as_edge_for_stereochemistry_review" + ); + actions.push("hold_same_as_edge_for_stereochemistry_review"); + } + } + } + + if (edge.relationship === "tested_in_assay") { + if (!edge.assayContext || !edge.assayContext.matrix || !edge.assayContext.concentrationUnit) { + addFinding( + findings, + "high", + "ASSAY_CONTEXT_INCOMPLETE", + `${edge.id || "Edge"} lacks assay matrix or concentration-unit context.`, + refs, + "attach_assay_context_before_recommendation" + ); + actions.push("attach_assay_context_before_recommendation"); + } + if (edge.assayContext && edge.assayContext.compatibleForms && !asArray(edge.assayContext.compatibleForms).map(normalize).includes(normalize(source.form || "freebase"))) { + addFinding( + findings, + "medium", + "ASSAY_FORM_CONTEXT_MISMATCH", + `${edge.id || "Edge"} uses a compound form that is incompatible with the assay context.`, + refs, + "suppress_assay_recommendation_until_form_context_matches" + ); + actions.push("suppress_assay_recommendation_until_form_context_matches"); + } + } + + if (!evidenceOk(edge.evidence)) { + addFinding( + findings, + "high", + "EDGE_EVIDENCE_DOI_MISSING", + `${edge.id || "Edge"} lacks DOI-backed relationship evidence.`, + refs, + "attach_doi_backed_edge_evidence" + ); + actions.push("attach_doi_backed_edge_evidence"); + } + + return { + id: edge.id, + relationship: edge.relationship, + status: actions.some((action) => action.includes("block") || action.includes("hold") || action.includes("split")) ? "hold_graph_edge" : actions.length > 0 ? "curator_review" : "publishable", + actions: [...new Set(actions)] + }; +} + +function evaluateChemicalIdentityGraph(packet) { + const findings = []; + const policy = { + mergeSaltForms: false, + ...(packet.policy || {}) + }; + const nodeMap = nodeById(packet.compounds); + const compoundSummaries = asArray(packet.compounds).map((node) => evaluateChemicalNode(node, packet, policy, findings)); + evaluateSynonymCollisions(packet.compounds, findings); + const edgeSummaries = asArray(packet.edges).map((edge) => evaluateGraphEdge(edge, nodeMap, policy, findings)); + + const criticalFindings = findings.filter((finding) => finding.severity === "critical").length; + const highOrCriticalFindings = findings.filter((finding) => severityRank(finding.severity) >= severityRank("high")).length; + const heldNodes = compoundSummaries.filter((node) => node.status === "hold_identity_node").length; + const heldEdges = edgeSummaries.filter((edge) => edge.status === "hold_graph_edge").length; + let decision = "publish_chemical_graph"; + + if (criticalFindings > 0 || heldNodes > 0 || heldEdges > 0) { + decision = "hold_chemical_graph"; + } else if (highOrCriticalFindings > 0) { + decision = "route_to_chemical_curator"; + } + + const auditSubject = { + graphId: packet.graphId, + policy, + compoundSummaries, + edgeSummaries, + findingCodes: findings.map((finding) => finding.code).sort() + }; + + return { + summary: { + decision, + graphId: packet.graphId || "chemical-graph", + compoundsReviewed: compoundSummaries.length, + edgesReviewed: edgeSummaries.length, + heldNodes, + heldEdges, + findingCount: findings.length, + criticalFindings, + highOrCriticalFindings, + auditDigest: `sha256:${sha256(auditSubject)}` + }, + compounds: compoundSummaries, + edges: edgeSummaries, + findings: findings.sort((a, b) => severityRank(b.severity) - severityRank(a.severity) || a.code.localeCompare(b.code)) + }; +} + +module.exports = { + evaluateChemicalIdentityGraph, + sha256 +}; diff --git a/chemical-identity-stereochemistry-guard/make-demo-video.js b/chemical-identity-stereochemistry-guard/make-demo-video.js new file mode 100644 index 00000000..a0658a7d --- /dev/null +++ b/chemical-identity-stereochemistry-guard/make-demo-video.js @@ -0,0 +1,92 @@ +const fs = require("node:fs"); +const path = require("node:path"); +const { spawnSync } = require("node:child_process"); +const { evaluateChemicalIdentityGraph } = require("./index"); +const { cleanPacket, riskyPacket } = require("./sample-data"); + +const reportsDir = path.join(__dirname, "reports"); +const framesDir = path.join(reportsDir, "frames"); +fs.mkdirSync(framesDir, { recursive: true }); + +const clean = evaluateChemicalIdentityGraph(cleanPacket); +const risky = evaluateChemicalIdentityGraph(riskyPacket); +const width = 960; +const height = 540; +const frames = 72; +const fps = 18; + +function setPixel(buffer, x, y, r, g, b) { + if (x < 0 || y < 0 || x >= width || y >= height) { + return; + } + const offset = (y * width + x) * 3; + buffer[offset] = r; + buffer[offset + 1] = g; + buffer[offset + 2] = b; +} + +function fillRect(buffer, x, y, w, h, r, g, b) { + for (let row = y; row < y + h; row += 1) { + for (let col = x; col < x + w; col += 1) { + setPixel(buffer, col, row, r, g, b); + } + } +} + +function drawNodes(buffer, x, y, count, color) { + for (let index = 0; index < count; index += 1) { + fillRect(buffer, x + index * 54, y + (index % 2) * 28, 34, 34, color[0], color[1], color[2]); + } +} + +function writeFrame(index, progress) { + const buffer = Buffer.alloc(width * height * 3, 248); + fillRect(buffer, 0, 0, width, height, 248, 250, 252); + fillRect(buffer, 48, 44, 864, 452, 255, 255, 255); + fillRect(buffer, 48, 44, 864, 8, 15, 23, 42); + + const cleanWidth = Math.floor(300 * Math.min(1, progress * 1.5) * Math.max(0.04, clean.summary.compoundsReviewed / 5)); + const riskyWidth = Math.floor(300 * Math.max(0, (progress - 0.1) * 1.4) * Math.min(1, risky.summary.findingCount / 16)); + const heldWidth = Math.floor(300 * Math.max(0, (progress - 0.2) * 1.3) * Math.min(1, (risky.summary.heldNodes + risky.summary.heldEdges) / 8)); + + fillRect(buffer, 96, 126, 300, 42, 226, 232, 240); + fillRect(buffer, 96, 126, cleanWidth, 42, 16, 185, 129); + fillRect(buffer, 96, 222, 300, 42, 226, 232, 240); + fillRect(buffer, 96, 222, riskyWidth, 42, 239, 68, 68); + fillRect(buffer, 96, 318, 300, 42, 226, 232, 240); + fillRect(buffer, 96, 318, heldWidth, 42, 245, 158, 11); + + drawNodes(buffer, 110, 406, risky.summary.compoundsReviewed, [99, 102, 241]); + drawNodes(buffer, 536, 214, risky.summary.criticalFindings, [153, 27, 27]); + drawNodes(buffer, 536, 324, Math.min(8, risky.summary.findingCount), [220, 38, 38]); + fillRect(buffer, 536, 436, Math.floor(310 * progress), 14, 37, 99, 235); + + const header = Buffer.from(`P6\n${width} ${height}\n255\n`, "ascii"); + fs.writeFileSync(path.join(framesDir, `frame-${String(index).padStart(3, "0")}.ppm`), Buffer.concat([header, buffer])); +} + +for (let index = 0; index < frames; index += 1) { + writeFrame(index, index / (frames - 1)); +} + +const output = path.join(reportsDir, "demo.mp4"); +const result = spawnSync(process.env.FFMPEG_PATH || "ffmpeg", [ + "-y", + "-framerate", + String(fps), + "-i", + path.join(framesDir, "frame-%03d.ppm"), + "-pix_fmt", + "yuv420p", + "-movflags", + "+faststart", + output +], { stdio: "inherit" }); + +fs.rmSync(framesDir, { recursive: true, force: true }); + +if (result.status !== 0) { + process.exit(result.status || 1); +} + +console.log(`Wrote ${output}`); diff --git a/chemical-identity-stereochemistry-guard/package.json b/chemical-identity-stereochemistry-guard/package.json new file mode 100644 index 00000000..d9e3d684 --- /dev/null +++ b/chemical-identity-stereochemistry-guard/package.json @@ -0,0 +1,15 @@ +{ + "name": "chemical-identity-stereochemistry-guard", + "version": "1.0.0", + "description": "Dependency-free chemical identity and stereochemistry graph guard for SCIBASE knowledge graph workflows.", + "main": "index.js", + "scripts": { + "check": "node test.js", + "test": "node test.js", + "demo": "node demo.js", + "make-demo-video": "node make-demo-video.js", + "verify-video": "node verify-video.js" + }, + "license": "MIT", + "private": true +} diff --git a/chemical-identity-stereochemistry-guard/reports/chemical-identity-dashboard.svg b/chemical-identity-stereochemistry-guard/reports/chemical-identity-dashboard.svg new file mode 100644 index 00000000..c330dbbc --- /dev/null +++ b/chemical-identity-stereochemistry-guard/reports/chemical-identity-dashboard.svg @@ -0,0 +1,30 @@ + + + + + Chemical identity stereochemistry guard + Checks compound graph nodes before merge or recommendation publication. + Clean graph findings + + + 0 findings + Risky critical findings + + + 4 critical + Held nodes and edges + + + 3 held + + Top blockers + CHEMICAL_EDGE_NODE_MISSING +SAME_AS_SKELETON_MISMATCH +STEREOCHEMISTRY_MISSING +SYNONYM_COLLISION_ACROSS_COMPOUNDS +ASSAY_CONTEXT_INCOMPLETE +CHEMICAL_IDENTITY_EVIDENCE_MISSING +EDGE_EVIDENCE_DOI_MISSING +INCHIKEY_FORMAT_INVALID + Decision: hold_chemical_graph | sha256:0128a62f30867cfec310d... + diff --git a/chemical-identity-stereochemistry-guard/reports/clean-chemical-identity-report.json b/chemical-identity-stereochemistry-guard/reports/clean-chemical-identity-report.json new file mode 100644 index 00000000..f952b2d0 --- /dev/null +++ b/chemical-identity-stereochemistry-guard/reports/clean-chemical-identity-report.json @@ -0,0 +1,43 @@ +{ + "summary": { + "decision": "publish_chemical_graph", + "graphId": "KG-CHEM-CLEAN", + "compoundsReviewed": 2, + "edgesReviewed": 2, + "heldNodes": 0, + "heldEdges": 0, + "findingCount": 0, + "criticalFindings": 0, + "highOrCriticalFindings": 0, + "auditDigest": "sha256:3c544a96d9020ddb66cd94e0c2e4a6b810605af9484ae41cd68d022d7667aafe" + }, + "compounds": [ + { + "id": "cmpd-warfarin-r", + "label": "R-warfarin", + "status": "publishable", + "actions": [] + }, + { + "id": "cmpd-warfarin-sodium", + "label": "Warfarin sodium", + "status": "publishable", + "actions": [] + } + ], + "edges": [ + { + "id": "edge-warfarin-form", + "relationship": "form_of", + "status": "publishable", + "actions": [] + }, + { + "id": "edge-warfarin-assay", + "relationship": "tested_in_assay", + "status": "publishable", + "actions": [] + } + ], + "findings": [] +} diff --git a/chemical-identity-stereochemistry-guard/reports/demo.mp4 b/chemical-identity-stereochemistry-guard/reports/demo.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..c83d547a7949587c08ec0fc02470b7c8940d11c7 GIT binary patch literal 10016 zcmcI}2{@E(+y6ZV*|#W48A+DMGWI3gWJ`n+Nrk~!hZ!?t7h@?zPqt(W*`>v0eQPP;C|KN(gL&zN3k9bcpR)l zo9yo5;Rq_+-tL>AzlVL$VQY-vfrNu~;0qpS@gm_-TXkAXGQ|!L>ZN1~`PYQNI$7|f zffBxc$K32d)&klf=1tku;4uRFin3C&%2G0D85G{d&R14Zep7kti+5uKR1|@hHo^fC z*!T+ZIf3$am)0z2&{qgT!d{TjL3Uo5Bm_k7JTcI4pmuF>5w_;GqmZ_fiaE9g2l0|A zn+b=TaC{&dTDZ;)c)$|@UvNJV8V>t^>1u8Dfl1jMW7#e@ZGrCpum4Xy4%iMXu3vWl zsm;CZ=NCTQgk$?<|DVSH#Q#tF|J(8ZslWfJ{(p+^{}hiOtP%Uo8Te<7Y_4Q59|(Cc z=D$4#7hqkn$bvE&2wa{9LI83QNEi??Ab~*CfS`e3w@WEdW&u$F0*~DQ0h z9MT7X($33)0_VcY9k*G+V}idQY~U~?VTnZe?D^-CblEyRg^MTz7;v8hcGDPuLJ&A% z9Ki`A13qXtA_hki4jhfBJMKUWY{t9vbQ>GRJ-TDn!(B%!k|aK&x`d`@;A03SiLxh< za3~o?1r$k6R#p~eC#Rr@R=|RRqf$UY`najCzN9<~W1$T+9dPzwK$}4H!{Zz&C|NXG zPEr;vs{lHkDHNirw6u?pj}&|`6Yy9MDFVq!8ulXPOmW8p9RiW!LhvA~qU^DDSbKFD z6gYg<s5CYc0pWuO0mz9x_kwH0P$rKwR+0BIrYit2{5^V^Mj$|A~T~ZcB zaV7x^0HlP%69{ftXV9|w)hUZ2<6Z1QFuyv`C=b%66MGkTECmk4#e;$);juso^xNUR zNLW7`dxAR=O95?rkRxzocJTlZK#_!nbsR}pcN`gH%g%=A2kI^k>R_^Lunt%vOvBE` z&IL<`ow$&2o5Xx@E>6xAJ1|Bd;yi4e2t+Wt)ky@VZa6=HtuCv8-mcn!+oigUGKy@E z^T64AQPkzpaF`@4oKq5x>h>f@lsmW!;b2;zp)M;Yg+_V8WYp18 zil9n_fj3L6uezcN=pa*YM0I(T3lYo`SP@_n0T$L1EC6^qAu0qh2H+yYp~D;N1I3Hn zUH32c$==q>%n+068a(8oVzbJ&>{<)o6SqIP!vuNnp|Y;R%}&}ULaSdn_d_DBRiEmW zKWXIJmhUnM5b&HscXSPFK@h7OD=J}m;4kL(zdfDVPVP35u+n)}0Cq2Zw_Jc~ZeW7c zj>NCbVE-T?gXy=mVm-+;^!Ggl>3Gp?K{Z-qhSJ)_{sBLfHLrRLd1YGsoV(t$5vx5b zhMY30Jg5iJ%!nw*lYQ@ozbe^4irw=*suF1w4wUFqxqwC67_pw3>BKJzfj%?9TY~bm(qoD&Do{h3SS36JAeIR z-VSO|r$u(;TnoISQgGo(aS|N|dv^7nDRDV9Mwf=hmHt|{$|o{*2a{CmXUT8w$c>@t zhnMcgh`slj6euF9kj6h8P?OMi#M3z$k~y8Z7gTVZ^^7$FOK(XbmGhl<)6hP&u(+KPMN%h(%E?OYP&4@q_0Ze9KxjzRQDuKS2aF5 zufGxdZf-Y!o=5mBzsn%RQ0dh37ny|y!~Kq8TGT0@bs?ExzM=ANF-_P(CjREG1~KC? zQzu6z2&s9XE{l)4)0i^|Ki4-vGj*PhoT;9be(@`%@N?GQI8hRJ4D(2{$MU(;p#~mq z%XK+R4H2|?_@)T|)#ke!?X;yfNxt~b6Y|L!ES!Qxu?5`zJp&H(8ElRWs?)lP96n{s z#whJL!VhJIcl06)XLdwI7haAduN{>N96j-QS9nY4sqD1xEw2(*Rdel*_B|QuWgJj9 zm9C;y{rThKnGHEtacFKRqALoah7`C$qj!aeZOm^4IWjdCoEs+B9USTT@vOyO+EeSCd`n1z#pU&l<6heO>bw}aAcekHMJshrTs~f%M~FtK9F94^5ybN$k95IeB{8R}p~3L5bn5DZ zyH{?Cd`xLOmYdx{#%j^7b$ zvZ~07{cz*-v0(gO$Y$7UVI=th#QAM3wAYm_bvO0+{fC{BZ&eDB7KhBNPJAzrn-%Yy zU0ap?tkq~s%zC0rLpSzGewn3;zXE0I6Or$9zH**R)AT@FaM-I#o}Z%ul@7~{Y3g@x zHa9p5e|T)5dAwL_=H)%Px94V!J)4<6-)(whvv1!7=hJ%Ej^hz6zJJAYIk;f);* zm+nsa)+-Gj&?Z_|<}fYwD!Yut9C&L$*RYSCea$P-d`60@f$_e0?yZ1|P}F@S`=i(Ay9Po943?@hR$uAo-`KtUH^*(~hQ-e#?*ev*Mh5QH5|k1;#A4|v$VF$# zqA&YER;?q>7&>D7w7NpK??{Asw}xkerM|M4Y{`oH7(%5ag&Py&DS`9(e1v1#3fzE@ zh;c#MYsaUa&f-K3wEK)Nj54mduJE5GMkGX$Rd(u1lW@D`SF4_v9F7~hcLiQ~S1?#e8+ujxU9ulw2e#yQ#J$s%Z!%xIUOXbXsIc?yScv{H{ka zWxONL8fZ*gi#cg89ia0!Xl5*n>;6(#dU;<*@IoaW=Ttm4hciZe4xsE;gDcPBt$?AM!#Tu-s#sJA+u2hrkBUYbS+5+?xg4R6;v0Ms zd({3}K^{j8=L6N_mqZsQA2K9NdvQ-|k4;}y-JtRQo6kn3r;XZV+_+2IN3DYDUzC6K zE;ZAQk}a&5J@eU>Q}}}Qsl5Z=H;PW(izJ8Dm`pw3<2&X-<)#Wy*CapDhD?>c)JQ|| z%2_QgQpI!PT3)7xpuU+vymb#C{H1@&g zC|{SpAIRMp2!*ZH2tKU$;{ z-g%$CK^$8-a#B|7jh|&t4Rw9#o^zet`DEtoyY;;y@;j{)?+y!I@EqLv!0vqCa??&>L9{Q2AE@<#abP-}-+f_cilF=qjyMik%7*e;SjH+B%R zK=VS*4HlYl-XoD(bg9&``kJ@xh2lQ)Nm-2KuNH=5oJ>zYn!OhAc`AHkKe34Mx)0aX zy(;(XJ(oQ`of5ONRNZ4dLz)tEoOrQ6=GsE_-rijO91c>#v-mzFyZG+QU#~G6-PFvA zf@GO(XO}NIKAqL6hoC55XrI0-YOi8gED`>VqvP**FK&`!96Zi!{-CM|fgmUXlX?KJ zU9(=41Rlh(ln%&bU}3%AK{I@8LoXn#{IA6~%n(Dioze(Xs%fm%*v^%=HA2}<69?aD zQg)2zw7PP4UCJ#Ok#Hpx3LKd=fykwRo({Fnh29p(_{l4%xHWsCpzxHf<$ z*;!=f0KGX>3%iN*xqv!B?@~SkEX**e+G>IcV&UNT<0b*yepjj}+EB{;=boEp3)I)g zrs|(_dvpI>MasBUIJ|HZ?{4I@%Dl5gK~7WGywv#eZC2c>cA;S0bN8IAxawu#XFxYh zD$d|5`yn$rwO_z?K`fL)B+*wgBd7V%ewtFBBqtlX59iOYFuo%Z-}xTwzPEp2I)gBy zQ8N(f72WIV!9!DgjCJ%`gCuJL!tmM6w+S{s8_C@|7%C(@IR22~!46Eub83hV{78&K zWQ4mO+2pN~wGmT%>yo8KEgVvKM94?Ry=Xz86O2iX*bYf^o2y+A8c&t|?0CwJM!wOt z)czD-D=)jI%7_p%t+hDa-yPz6>wQ|gubFlSmG_U?#@sS}dA_{--K?<*VV&2)Aki*Y zbAO|7%MOiOY?_urD`M<(U z_fRVL(^$x_JQD^UoRJVMm3TUAVVq$JUQ)i8)LWae(g0D%l7wPjjnblqziTZ;a?AiV<0<@U`4O2)1-_Lr8LxFY)uiEU?l-AW ze0_?Fd>WkPC5`$~!bJZ~iHkMpW$q8Z;9UvE4BB{z*1TIriT7unSw66pKxB+<5#0O2 zr0NWw&P3L@Y~MsvoermxBHwFT*;)uXu4?2Ia{lrjPN07tlR9Nkh;dFaVhG+Qs^8nydCssh z0_}868XAO;|1Eq}{plSeUI|ujl9`ShDMYRck5<((A&T$Eyg!_0vLCmrCcy1#>*9Wo zpD~_GIC_Y&D_zz(IJm<&)rZVH8r4k5$=vgf|J5hc-HxREnb)q5U+fUMQhnKFh}a~; z!lrQO-BGQwNul+}frUDbni9NS{oYBur%qJQlFBLg2}i)xD$GwlEJo@yy+}Q#tp9|E>ZoUBrO_iKP-8nBSM1R96d5ev6|hKm92hDi zG6|b$pj}eAnSHi%kofdi(YtWtj*WyXUDyQ<=|}HFCCNLyEFSK#N&O*5M1?Uw-X-&z zrxe(xVA3dmBt>Lw#2puYQ||LCrqHpG)Gx#A^HFbG^B{t@cX7-Pd zrptG$Sy<#e#7+8PAj_D_7oPrg)t(>CeVCsDamo;#{*JI6X{eJjjR~ zeB2^4lkRkg*vOi1X!KflV0FzbX0f*pE*NJ-TkmVaBq5tcMGT+4GJQQuXJy8gPGxVG z?>f%AH=Lb%AKs$!h|DqyeB{tBE}OP|;keo_;M&SdI^s)Vp`94z+C8MH)^$;b+V;Lp z`VKOZ>&J2wByJ1rKx*%8r#4({e0*FptrTfEcxJ#d#AKq<1<~_^SEi~CpD9rY?JBY) zT|8b(r||hy0>m~7Z1XW`m4BobsxN;Qb~dHoyFPkFxdgg%soyiA$a2reIVo1s^@7{B zvklp2o$~4-5BJrXTblcjLW1UrK}E!n#5?qAGu^#&=^H4UG6NxwA-@1SMTy*av+F{% zBQG{WEg!V)$@$UCL_K3a?<3slMwe_AAkdRQw$tpSoQ|tE3J-46{1mv-S-Fm2_~_*h zNqL^&7UVq_h_iSNzuz;$WOn|O=cuF-saF>&FX$B97x}?qwjgYqt3(y(%~TtXEfk(4 zHM^IbVaz?1zLO`cu;6G=WYr*6|OL z*TGT_37++ja=UI(y|c|##e4tTiO}}Z_V2_^3w=!5${$(YF*brP;aV;&4eURz)qJvV z%{s=r)37YNXuMRaF@NpDPsWR{2_<_Qcl%d1t{Emhv5Nl|Y7LyLVlw!CZNLw_9sAD8 zUV9eq*83BC#I8}GsZO$>Y)8(AyEwgzjnmhk20UFEp+Gcdt%eP=yIMCx&rsbCom>}r zprdUtw~B?ryLWJ3b+pR<28P%vjSKf1oatJI4DTP^VFWBLVNx~!J#-{;*yu9}`P&K! zn!akzpn7dcomz2cKI?BaokGq&4q{bTy3=%wA0l?%z%ILNxRm0*cC(E~%+SvFWrFNE zjufwYkFMc>JD7VsawS$ew^oSrq3;j)R93XPcI%0~^n;*R?Td=TZ&4dxH_ zf+qS#^Q4%Knj(3>dvhb<7tM-@jKHl0P+?eb+^~aFpq!sBGh}gUarZGcO7v&DmY%p<>39J;&CRT9IAV!eEN8Tz!}>v zTjo$l+V@YhSi~Dh<+gh`fR!C4)&9>+PRuqAm)|<9eqQ0W<@K7^BJsLfV#l6_v34oa zqpKnfkJA1;=kA?D8*?Oq{j7+%)T}JBcxHKNwut;9#_#4}a6DUtf;=LgGW}e%LJsnL z2D}``WQ6=HOS5zzEPBmytazDb$Pv9K(_1FPvrdGQu@Q(%J0e=1g;g*tH-4`~PKtdu zq;e5~xdI|1Ybz-arN-yY45;SrHSO*ja?{PtmZv}ZePO(PbMn%?$%EHVZXAUy_jy;S z8dJmI!%GH{k-e3ZB&`DTIu-|i2RJ8n0`GrldM-7br<-tERCsWhMbMPzD%FJwJ*EO9 z3MRGS&y;k)DXB9}nI5=^q5Kdi<9C&+eS?V9zjIUNoSFcNRdlMq=jFbyGj~1}>sOfYh(^X7@FFfb5Tr1qe2{7G29*z1+!@3Q`S ze#y=p0ss3ThsYS-P80dVHP|4dYsQR3?k5n8GEEGZY&hCNc4W0^xH41tDD>*yU49iQ|pqdZAv6FdKc zQg%IxqgiCWc2zpNxg~6`*NZ0P+G!5^db?ER;@yi?G%G{FN>3wKuAM&Ft@pT~Pmb}b zKb6td{$6$Gr#L(#{BIDrr-^Ox^_CL}`ih_%RC<4z{N>J$6K~snUi#|Rr19?FDIqp= zdnv`TmfFbEC$2~f|NK6INt61MaLw;4<9id~TdS~qP}XDOmoLF}Y07gCXEieT?)qce zX*W9WUF`ZA&2nz0wN8tqkeEwb?IZy}jWL-<|4h3fnD$!6)*oQn*BQc3r$_9$)@)}R z*S&y;Vy?b=kuZP|7+=om^up>AXGSSww_j=3%4?n^*6g|f5zqBAMj|e5(R15c0Nxz2 z4ihv3CkCq$`NQ0+e7r*#cWdDd#KZd!EbbLQev)-oLhjn{1JS+=$(I4OG%H!-31T0y4mjGA8rtueD%d zXk)mXclqU)Sb$(bI-SP4u{V$vS`!3u(QW)@^? z5vS3J9t^(o+~6)wPQ@J0fZD3@hni+4W>S^k86M`=6~_~d?jN|{P}QaP;Cc-wu`2X- zw{Po!Xcs^|jLB^KHUCf>`m5kqokiX;(SqyPFBTP7^1^$QHu6}H%3e2gt2=LRe+^>T zvj?9aQ(|^2R>pRV$G6SNMk4k_&oJg&WlOx*(W-n|zW@2VH*Lx@SDMcJ-OO-=-{?h7 zMb7t|#lYk*MCOl8>cFI#z?k+fN`kpDxL0q-+k9kB##^>aPmXwr4X<9K%{u-Js2{_m zE&W&80tKH-a0@ZQ!bznw{nnyC*?p_SrBE$z9Ot;F+oN}O`4+$Z`BiP~@GYHo?rY5Q z;ZLpP6|(@EA|_+^A9OoR#za~4QIok|ed?!5jqo{R2egaBTkT7y(^c!y7v0A7=K1%H%n{G@SEY}F6Rv^)B40FvC|P=Hy7n;3V@L&B18E%u5s@o zN7Np@9}co+sMIQ58;Of)Ooo_w$Z2!)pGn+*Kit_w`#vbq)TTAg`opbsQK>!vVTwsr z`!D>t1PY4s^2XAn9+7khsED9_0q$0DOtA=wr3)i9K}Tyi&KbrdQ#?3Ant%GE-h98( zVh|i*bE)TW^{!}`iyS7yYJ25^N%$VGKNuxZ>+9C%X7J88rM_Z+exol+;wZ0SWtGDF z5V>cXGRb^rff+d31Ab!@(ZcGaR;>)P8FVxh2vMW)9L7gCAG%b*aZ)Rv8{X4rFsZiN z!TW4Id4d-@oS9ka0*NlRv>o>d#O(CLJX8}F(!iavsi>Fz6ZCQ2jP*pr`#(8)?+g+; zrPO=<*|oKsQTpC_@uw!mBMJ+U-@V28Jps5nCL`mIx%4tk|Lo@;$K5|p$AsUz)@M;x zn${z8BUxUJ_L*R4D>Xl9vW)gj3x_DF762$CGOB-@(hE;nS$t~FP&uUiLT4s-dzc?w z&k7WeJ(+uqFPkg=(d|h0!Rw(tHHisSU@M8psQE1ylxV4ya^&qp$?2-gmtVDTi4xBL zNWl+@3PLx`mke)jmQ}IjjJ2r`oO3m=?SGbS?p9qnd+>qdPQ=bUDGLfFCNbW)OL7@ z0lFjGbZs`)V@fyaUbs+iC?rJ+4+%C_EVG8V;vPjWzVIIV(pK%%eD6iHwAbD<`{hKM zgsvRAY(1r_CVY8)XfFOn4!o1lm`u8VU$T+Xl$eJfi)GF)b5a+@^ejXY19=9Sn`8K8 zJ#q>+cb^&}^U!Z`+lfG3`b+WcklYOcNq490R)4X~+ZsM`wb`2C>dibxeHXPO5j-MS LtW=J)$bJ2Pjzb97 literal 0 HcmV?d00001 diff --git a/chemical-identity-stereochemistry-guard/reports/risky-chemical-identity-handoff.md b/chemical-identity-stereochemistry-guard/reports/risky-chemical-identity-handoff.md new file mode 100644 index 00000000..3cd529f0 --- /dev/null +++ b/chemical-identity-stereochemistry-guard/reports/risky-chemical-identity-handoff.md @@ -0,0 +1,45 @@ +# Chemical Identity Graph Handoff + +Decision: hold_chemical_graph +Compounds reviewed: 5 +Edges reviewed: 4 +Held nodes: 1 +Held edges: 2 +Audit digest: sha256:0128a62f30867cfec310d4ff265ff6e88ad056a911cbc36f61046aa3e030e7a0 + +## Priority Findings + +| Severity | Code | Remediation | +| --- | --- | --- | +| critical | CHEMICAL_EDGE_NODE_MISSING | block_edge_until_both_chemical_nodes_exist | +| critical | SAME_AS_SKELETON_MISMATCH | split_same_as_edge_into_distinct_compound_nodes | +| critical | STEREOCHEMISTRY_MISSING | split_or_hold_compound_until_stereochemistry_is_explicit | +| critical | SYNONYM_COLLISION_ACROSS_COMPOUNDS | quarantine_synonym_until_curator_disambiguates_compounds | +| high | ASSAY_CONTEXT_INCOMPLETE | attach_assay_context_before_recommendation | +| high | CHEMICAL_IDENTITY_EVIDENCE_MISSING | attach_doi_backed_identity_evidence | +| high | EDGE_EVIDENCE_DOI_MISSING | attach_doi_backed_edge_evidence | +| high | INCHIKEY_FORMAT_INVALID | attach_valid_inchikey_before_graph_merge | +| high | ISOMERIC_SMILES_MISSING | attach_isomeric_smiles_before_recommendation | +| high | ISOTOPE_LABEL_MISSING | attach_isotope_label_before_assay_edge_publication | +| high | SAME_AS_STEREOCHEMISTRY_MISMATCH | hold_same_as_edge_for_stereochemistry_review | +| medium | ASSAY_FORM_CONTEXT_MISMATCH | suppress_assay_recommendation_until_form_context_matches | + +## Node Actions + +| Compound | Status | Actions | +| --- | --- | --- | +| cmpd-thalidomide-ambiguous | hold_identity_node | split_or_hold_compound_until_stereochemistry_is_explicit, attach_doi_backed_identity_evidence, deduplicate_compound_synonyms | +| cmpd-alpha-other | publishable | none | +| cmpd-caffeine-citrate | curator_review | retain_salt_or_hydrate_form_as_explicit_graph_node | +| cmpd-caffeine-freebase | publishable | none | +| cmpd-tracer | curator_review | attach_valid_inchikey_before_graph_merge, attach_isomeric_smiles_before_recommendation, attach_isotope_label_before_assay_edge_publication | + +## Edge Actions + +| Edge | Status | Actions | +| --- | --- | --- | +| edge-bad-same-as | hold_graph_edge | split_same_as_edge_into_distinct_compound_nodes, hold_same_as_edge_for_stereochemistry_review | +| edge-form-conflation | curator_review | model_compound_form_relationship_instead_of_same_as | +| edge-assay-incomplete | curator_review | attach_assay_context_before_recommendation, suppress_assay_recommendation_until_form_context_matches, attach_doi_backed_edge_evidence | +| edge-missing-target | hold_graph_edge | block_edge_until_both_chemical_nodes_exist | + diff --git a/chemical-identity-stereochemistry-guard/reports/risky-chemical-identity-report.json b/chemical-identity-stereochemistry-guard/reports/risky-chemical-identity-report.json new file mode 100644 index 00000000..c501892c --- /dev/null +++ b/chemical-identity-stereochemistry-guard/reports/risky-chemical-identity-report.json @@ -0,0 +1,246 @@ +{ + "summary": { + "decision": "hold_chemical_graph", + "graphId": "KG-CHEM-RISK", + "compoundsReviewed": 5, + "edgesReviewed": 4, + "heldNodes": 1, + "heldEdges": 2, + "findingCount": 15, + "criticalFindings": 4, + "highOrCriticalFindings": 11, + "auditDigest": "sha256:0128a62f30867cfec310d4ff265ff6e88ad056a911cbc36f61046aa3e030e7a0" + }, + "compounds": [ + { + "id": "cmpd-thalidomide-ambiguous", + "label": "Thalidomide", + "status": "hold_identity_node", + "actions": [ + "split_or_hold_compound_until_stereochemistry_is_explicit", + "attach_doi_backed_identity_evidence", + "deduplicate_compound_synonyms" + ] + }, + { + "id": "cmpd-alpha-other", + "label": "Alpha unrelated scaffold", + "status": "publishable", + "actions": [] + }, + { + "id": "cmpd-caffeine-citrate", + "label": "Caffeine citrate", + "status": "curator_review", + "actions": [ + "retain_salt_or_hydrate_form_as_explicit_graph_node" + ] + }, + { + "id": "cmpd-caffeine-freebase", + "label": "Caffeine", + "status": "publishable", + "actions": [] + }, + { + "id": "cmpd-tracer", + "label": "Tracer X", + "status": "curator_review", + "actions": [ + "attach_valid_inchikey_before_graph_merge", + "attach_isomeric_smiles_before_recommendation", + "attach_isotope_label_before_assay_edge_publication" + ] + } + ], + "edges": [ + { + "id": "edge-bad-same-as", + "relationship": "same_as", + "status": "hold_graph_edge", + "actions": [ + "split_same_as_edge_into_distinct_compound_nodes", + "hold_same_as_edge_for_stereochemistry_review" + ] + }, + { + "id": "edge-form-conflation", + "relationship": "same_as", + "status": "curator_review", + "actions": [ + "model_compound_form_relationship_instead_of_same_as" + ] + }, + { + "id": "edge-assay-incomplete", + "relationship": "tested_in_assay", + "status": "curator_review", + "actions": [ + "attach_assay_context_before_recommendation", + "suppress_assay_recommendation_until_form_context_matches", + "attach_doi_backed_edge_evidence" + ] + }, + { + "id": "edge-missing-target", + "status": "hold_graph_edge", + "actions": [ + "block_edge_until_both_chemical_nodes_exist" + ] + } + ], + "findings": [ + { + "severity": "critical", + "code": "CHEMICAL_EDGE_NODE_MISSING", + "message": "edge-missing-target references a missing chemical node.", + "refs": [ + "edge-missing-target", + "cmpd-tracer", + "cmpd-missing" + ], + "action": "block_edge_until_both_chemical_nodes_exist" + }, + { + "severity": "critical", + "code": "SAME_AS_SKELETON_MISMATCH", + "message": "edge-bad-same-as marks two different chemical skeletons as same_as.", + "refs": [ + "edge-bad-same-as", + "cmpd-thalidomide-ambiguous", + "cmpd-alpha-other" + ], + "action": "split_same_as_edge_into_distinct_compound_nodes" + }, + { + "severity": "critical", + "code": "STEREOCHEMISTRY_MISSING", + "message": "cmpd-thalidomide-ambiguous requires stereochemistry but the graph node has no stereochemical markers.", + "refs": [ + "cmpd-thalidomide-ambiguous" + ], + "action": "split_or_hold_compound_until_stereochemistry_is_explicit" + }, + { + "severity": "critical", + "code": "SYNONYM_COLLISION_ACROSS_COMPOUNDS", + "message": "Synonym alphacompound resolves to multiple chemical skeletons.", + "refs": [ + "cmpd-thalidomide-ambiguous", + "cmpd-thalidomide-ambiguous", + "cmpd-alpha-other" + ], + "action": "quarantine_synonym_until_curator_disambiguates_compounds" + }, + { + "severity": "high", + "code": "ASSAY_CONTEXT_INCOMPLETE", + "message": "edge-assay-incomplete lacks assay matrix or concentration-unit context.", + "refs": [ + "edge-assay-incomplete", + "cmpd-caffeine-citrate", + "assay-dose-response-7" + ], + "action": "attach_assay_context_before_recommendation" + }, + { + "severity": "high", + "code": "CHEMICAL_IDENTITY_EVIDENCE_MISSING", + "message": "cmpd-thalidomide-ambiguous lacks DOI-backed identity evidence.", + "refs": [ + "cmpd-thalidomide-ambiguous" + ], + "action": "attach_doi_backed_identity_evidence" + }, + { + "severity": "high", + "code": "EDGE_EVIDENCE_DOI_MISSING", + "message": "edge-assay-incomplete lacks DOI-backed relationship evidence.", + "refs": [ + "edge-assay-incomplete", + "cmpd-caffeine-citrate", + "assay-dose-response-7" + ], + "action": "attach_doi_backed_edge_evidence" + }, + { + "severity": "high", + "code": "INCHIKEY_FORMAT_INVALID", + "message": "cmpd-tracer has missing or invalid InChIKey evidence.", + "refs": [ + "cmpd-tracer" + ], + "action": "attach_valid_inchikey_before_graph_merge" + }, + { + "severity": "high", + "code": "ISOMERIC_SMILES_MISSING", + "message": "cmpd-tracer lacks an isomeric SMILES string.", + "refs": [ + "cmpd-tracer" + ], + "action": "attach_isomeric_smiles_before_recommendation" + }, + { + "severity": "high", + "code": "ISOTOPE_LABEL_MISSING", + "message": "cmpd-tracer is isotope-labeled but has no isotope label metadata.", + "refs": [ + "cmpd-tracer" + ], + "action": "attach_isotope_label_before_assay_edge_publication" + }, + { + "severity": "high", + "code": "SAME_AS_STEREOCHEMISTRY_MISMATCH", + "message": "edge-bad-same-as links stereochemically distinct compound records.", + "refs": [ + "edge-bad-same-as", + "cmpd-thalidomide-ambiguous", + "cmpd-alpha-other" + ], + "action": "hold_same_as_edge_for_stereochemistry_review" + }, + { + "severity": "medium", + "code": "ASSAY_FORM_CONTEXT_MISMATCH", + "message": "edge-assay-incomplete uses a compound form that is incompatible with the assay context.", + "refs": [ + "edge-assay-incomplete", + "cmpd-caffeine-citrate", + "assay-dose-response-7" + ], + "action": "suppress_assay_recommendation_until_form_context_matches" + }, + { + "severity": "medium", + "code": "SALT_FORM_NEEDS_SEPARATE_NODE", + "message": "cmpd-caffeine-citrate is a citrate salt form that should not be silently merged with the freebase node.", + "refs": [ + "cmpd-caffeine-citrate", + "cmpd-caffeine-freebase" + ], + "action": "retain_salt_or_hydrate_form_as_explicit_graph_node" + }, + { + "severity": "medium", + "code": "SAME_AS_FORM_CONFLATION", + "message": "edge-form-conflation conflates citrate salt and freebase forms.", + "refs": [ + "edge-form-conflation", + "cmpd-caffeine-citrate", + "cmpd-caffeine-freebase" + ], + "action": "model_compound_form_relationship_instead_of_same_as" + }, + { + "severity": "low", + "code": "DUPLICATE_NODE_SYNONYM", + "message": "cmpd-thalidomide-ambiguous repeats synonym key alphacompound.", + "refs": [ + "cmpd-thalidomide-ambiguous" + ], + "action": "deduplicate_compound_synonyms" + } + ] +} diff --git a/chemical-identity-stereochemistry-guard/sample-data.js b/chemical-identity-stereochemistry-guard/sample-data.js new file mode 100644 index 00000000..7edd8b70 --- /dev/null +++ b/chemical-identity-stereochemistry-guard/sample-data.js @@ -0,0 +1,188 @@ +const cleanPacket = { + graphId: "KG-CHEM-CLEAN", + policy: { + mergeSaltForms: false + }, + compounds: [ + { + id: "cmpd-warfarin-r", + label: "R-warfarin", + inchiKey: "PJVWKTKQMONHTI-HNNXBMFYSA-N", + isomericSmiles: "CC(=O)CC(C1=CC=CC=C1)C2=C(C=CC(=C2O)O)C(=O)O[C@H]3CCCO3", + requiresStereochemistry: true, + form: "freebase", + synonyms: ["R-warfarin"], + evidence: { + doi: "10.1000/warfarin.identity", + sourceType: "curated-chemistry" + } + }, + { + id: "cmpd-warfarin-sodium", + label: "Warfarin sodium", + inchiKey: "PJVWKTKQMONHTI-HNNXBMFYSA-N", + isomericSmiles: "CC(=O)CC(C1=CC=CC=C1)C2=C(C=CC(=C2O)O)C(=O)O[C@H]3CCCO3.[Na+]", + requiresStereochemistry: true, + form: "sodium salt", + synonyms: ["warfarin sodium"], + evidence: { + doi: "10.1000/warfarin.sodium", + sourceType: "curated-chemistry" + } + } + ], + edges: [ + { + id: "edge-warfarin-form", + source: "cmpd-warfarin-sodium", + target: "cmpd-warfarin-r", + relationship: "form_of", + evidence: { + doi: "10.1000/warfarin.sodium", + sourceType: "curated-chemistry" + } + }, + { + id: "edge-warfarin-assay", + source: "cmpd-warfarin-r", + target: "assay-anticoagulation-1", + relationship: "tested_in_assay", + assayContext: { + matrix: "plasma", + concentrationUnit: "uM", + compatibleForms: ["freebase"] + }, + evidence: { + doi: "10.1000/warfarin.assay", + sourceType: "assay-publication" + } + } + ] +}; + +const riskyPacket = { + graphId: "KG-CHEM-RISK", + policy: { + mergeSaltForms: false + }, + compounds: [ + { + id: "cmpd-thalidomide-ambiguous", + label: "Thalidomide", + inchiKey: "UEJJHQNACJXSKW-UHFFFAOYSA-N", + isomericSmiles: "O=C1NC(=O)C(c2ccccc2)N1", + requiresStereochemistry: true, + form: "freebase", + synonyms: ["thalidomide", "alpha compound", "alpha-compound"], + evidence: { + doi: "", + sourceType: "" + } + }, + { + id: "cmpd-alpha-other", + label: "Alpha unrelated scaffold", + inchiKey: "BSYNRYMUTXBXSQ-UHFFFAOYSA-N", + isomericSmiles: "CC(C)C1=CC=CC=C1", + requiresStereochemistry: false, + form: "freebase", + synonyms: ["Alpha Compound"], + evidence: { + doi: "10.1000/alpha.identity", + sourceType: "curated-chemistry" + } + }, + { + id: "cmpd-caffeine-citrate", + label: "Caffeine citrate", + parentCompoundId: "cmpd-caffeine-freebase", + inchiKey: "RYYVLZVUVIJVGH-UHFFFAOYSA-N", + isomericSmiles: "Cn1cnc2n(C)c(=O)n(C)c(=O)c12.O=C(O)CC(O)(CC(=O)O)C(=O)O", + requiresStereochemistry: false, + form: "citrate salt", + synonyms: ["caffeine citrate"], + evidence: { + doi: "10.1000/caffeine.citrate", + sourceType: "curated-chemistry" + } + }, + { + id: "cmpd-caffeine-freebase", + label: "Caffeine", + inchiKey: "RYYVLZVUVIJVGH-UHFFFAOYSA-N", + isomericSmiles: "Cn1cnc2n(C)c(=O)n(C)c(=O)c12", + requiresStereochemistry: false, + form: "freebase", + synonyms: ["caffeine"], + evidence: { + doi: "10.1000/caffeine.freebase", + sourceType: "curated-chemistry" + } + }, + { + id: "cmpd-tracer", + label: "Tracer X", + inchiKey: "BADKEY", + isomericSmiles: "", + requiresStereochemistry: false, + isotopeLabeled: true, + isotopeLabel: "", + form: "freebase", + synonyms: ["Tracer X"], + evidence: { + doi: "10.1000/tracer.identity", + sourceType: "curated-chemistry" + } + } + ], + edges: [ + { + id: "edge-bad-same-as", + source: "cmpd-thalidomide-ambiguous", + target: "cmpd-alpha-other", + relationship: "same_as", + evidence: { + doi: "10.1000/sameas.bad", + sourceType: "imported-graph" + } + }, + { + id: "edge-form-conflation", + source: "cmpd-caffeine-citrate", + target: "cmpd-caffeine-freebase", + relationship: "same_as", + evidence: { + doi: "10.1000/caffeine.citrate", + sourceType: "imported-graph" + } + }, + { + id: "edge-assay-incomplete", + source: "cmpd-caffeine-citrate", + target: "assay-dose-response-7", + relationship: "tested_in_assay", + assayContext: { + compatibleForms: ["freebase"] + }, + evidence: { + doi: "", + sourceType: "" + } + }, + { + id: "edge-missing-target", + source: "cmpd-tracer", + target: "cmpd-missing", + relationship: "same_as", + evidence: { + doi: "10.1000/tracer.edge", + sourceType: "imported-graph" + } + } + ] +}; + +module.exports = { + cleanPacket, + riskyPacket +}; diff --git a/chemical-identity-stereochemistry-guard/test.js b/chemical-identity-stereochemistry-guard/test.js new file mode 100644 index 00000000..cbc199df --- /dev/null +++ b/chemical-identity-stereochemistry-guard/test.js @@ -0,0 +1,41 @@ +const assert = require("node:assert/strict"); +const { evaluateChemicalIdentityGraph, sha256 } = require("./index"); +const { cleanPacket, riskyPacket } = require("./sample-data"); + +const clean = evaluateChemicalIdentityGraph(cleanPacket); +assert.equal(clean.summary.decision, "publish_chemical_graph"); +assert.equal(clean.summary.findingCount, 0); +assert.equal(clean.summary.compoundsReviewed, 2); +assert.equal(clean.summary.edgesReviewed, 2); +assert.ok(clean.summary.auditDigest.startsWith("sha256:")); + +const risky = evaluateChemicalIdentityGraph(riskyPacket); +assert.equal(risky.summary.decision, "hold_chemical_graph"); +assert.equal(risky.summary.compoundsReviewed, 5); +assert.equal(risky.summary.edgesReviewed, 4); +assert.ok(risky.summary.heldNodes >= 1); +assert.ok(risky.summary.heldEdges >= 2); +assert.ok(risky.summary.findingCount >= 13); +assert.ok(risky.summary.criticalFindings >= 4); +assert.ok(risky.summary.highOrCriticalFindings >= 9); + +const findingCodes = new Set(risky.findings.map((finding) => finding.code)); +assert.ok(findingCodes.has("STEREOCHEMISTRY_MISSING")); +assert.ok(findingCodes.has("CHEMICAL_IDENTITY_EVIDENCE_MISSING")); +assert.ok(findingCodes.has("SYNONYM_COLLISION_ACROSS_COMPOUNDS")); +assert.ok(findingCodes.has("SAME_AS_SKELETON_MISMATCH")); +assert.ok(findingCodes.has("SAME_AS_FORM_CONFLATION")); +assert.ok(findingCodes.has("ASSAY_CONTEXT_INCOMPLETE")); +assert.ok(findingCodes.has("ASSAY_FORM_CONTEXT_MISMATCH")); +assert.ok(findingCodes.has("EDGE_EVIDENCE_DOI_MISSING")); +assert.ok(findingCodes.has("CHEMICAL_EDGE_NODE_MISSING")); +assert.ok(findingCodes.has("INCHIKEY_FORMAT_INVALID")); +assert.ok(findingCodes.has("ISOMERIC_SMILES_MISSING")); +assert.ok(findingCodes.has("ISOTOPE_LABEL_MISSING")); + +const firstDigest = evaluateChemicalIdentityGraph(riskyPacket).summary.auditDigest; +const secondDigest = evaluateChemicalIdentityGraph(riskyPacket).summary.auditDigest; +assert.equal(firstDigest, secondDigest); +assert.equal(sha256({ b: 2, a: 1 }), sha256({ a: 1, b: 2 })); + +console.log("chemical identity stereochemistry guard tests passed"); diff --git a/chemical-identity-stereochemistry-guard/verify-video.js b/chemical-identity-stereochemistry-guard/verify-video.js new file mode 100644 index 00000000..39af983c --- /dev/null +++ b/chemical-identity-stereochemistry-guard/verify-video.js @@ -0,0 +1,37 @@ +const assert = require("node:assert/strict"); +const fs = require("node:fs"); +const path = require("node:path"); +const { spawnSync } = require("node:child_process"); + +const videoPath = path.join(__dirname, "reports", "demo.mp4"); +assert.ok(fs.existsSync(videoPath), "reports/demo.mp4 must exist"); +assert.ok(fs.statSync(videoPath).size > 5000, "reports/demo.mp4 should not be empty"); + +const probe = spawnSync(process.env.FFPROBE_PATH || "ffprobe", [ + "-v", + "error", + "-select_streams", + "v:0", + "-show_entries", + "stream=codec_name,width,height,r_frame_rate:format=duration", + "-of", + "json", + videoPath +], { encoding: "utf8" }); + +if (probe.status !== 0) { + process.stderr.write(probe.stderr || "ffprobe failed\n"); + process.exit(probe.status || 1); +} + +const metadata = JSON.parse(probe.stdout); +const stream = metadata.streams && metadata.streams[0]; +assert.equal(stream.codec_name, "h264"); +assert.equal(stream.width, 960); +assert.equal(stream.height, 540); +assert.equal(stream.r_frame_rate, "18/1"); + +const duration = Number(metadata.format && metadata.format.duration); +assert.ok(duration >= 3.9 && duration <= 4.2, `unexpected duration ${duration}`); + +console.log(`demo.mp4 verified: ${stream.codec_name}, ${stream.width}x${stream.height}, ${duration.toFixed(3)}s, ${stream.r_frame_rate}`);