Skip to content

Commit 64daa3b

Browse files
committed
* Started work on the new claim-gen/junto importer logic. (significantly simplified, from being able to drop support for many deprecated fields, and just from having a fresh rewrite) [note: not yet tested]
1 parent e74fceb commit 64daa3b

4 files changed

Lines changed: 336 additions & 294 deletions

File tree

Packages/client/Source/UI/@Shared/Maps/Node/NodeUI_Menu/MI_ImportSubtree.tsx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,13 @@ import {Assert} from "react-vextensions/Dist/Internals/FromJSVE";
2525
import {Command, CreateAccessor, GetAsync} from "mobx-graphlink";
2626
import {MAX_TIMEOUT_DURATION} from "ui-debug-kit";
2727
import {RunCommand_AddChildNode} from "Utils/DB/Command.js";
28-
import {CG_Debate, CG_Node} from "Utils/DataFormats/JSON/ClaimGen/DataModel.js";
2928
import {GetResourcesInImportSubtree_CG} from "Utils/DataFormats/JSON/ClaimGen/ImportHelpers.js";
3029
import {CommandEntry, RunCommandBatch, RunCommandBatchResult} from "Utils/DB/RunCommandBatch.js";
3130
import {MI_SharedProps} from "../NodeUI_Menu.js";
3231
import {DMSubtreeData} from "../../../../../Utils/DataFormats/JSON/DM/DMSubtreeData.js";
3332
import {GetResourcesInImportSubtree_JsonDm} from "../../../../../Utils/DataFormats/JSON/DM/DMImportHelpers.js";
3433
import {PolicyPicker, PolicyPicker_Button} from "../../../../Database/Policies/PolicyPicker.js";
34+
import {CG_Node} from "../../../../../Utils/DataFormats/JSON/ClaimGen/DataModel.js";
3535

3636
@Observer
3737
export class MI_ImportSubtree extends BaseComponent<MI_SharedProps, {}, ImportResource> {
@@ -117,7 +117,7 @@ class ImportSubtreeUI extends BaseComponent<
117117
sourceText_parseError: string|n,
118118
forJSONDM_subtreeData: DMSubtreeData|n,
119119
forJSONDMFS_subtreeData: FS_NodeL3|n,
120-
forJSONCG_subtreeData: CG_Debate|n,
120+
forJSONCG_subtreeData: CG_Node|n,
121121
forCSVSL_subtreeData: CSV_SL_Row[]|n,
122122

123123
// right-panel
@@ -281,15 +281,15 @@ class ImportSubtreeUI extends BaseComponent<
281281
}
282282
this.SetState(newState);
283283
} else if (uiState.sourceType == DataExchangeFormat.json_cg) {
284-
let subtreeData_new: CG_Debate|n = null;
284+
let subtreeData_new: CG_Node|n = null;
285285
try {
286286
const rawData = FromJSON(newSourceText);
287287
if ("questions" in rawData) {
288-
subtreeData_new = rawData as CG_Debate;
288+
subtreeData_new = rawData as CG_Node;
289289
} else if ("positions" in rawData) {
290290
subtreeData_new = {
291291
questions: [rawData],
292-
} as CG_Debate;
292+
} as CG_Node;
293293
}
294294
newState.forJSONCG_subtreeData = subtreeData_new;
295295
newState.sourceText_parseError = null;
Lines changed: 101 additions & 177 deletions
Original file line numberDiff line numberDiff line change
@@ -1,96 +1,121 @@
1-
import {Attachment, AttachmentType, DescriptionAttachment, QuoteAttachment, ReferencesAttachment, Source, SourceChain, SourceType} from "dm_common";
2-
import {IsString} from "js-vextensions";
1+
import {Attachment, AttachmentType, DescriptionAttachment, NodeType, QuoteAttachment, ReferencesAttachment, Source, SourceChain, SourceType} from "dm_common";
2+
import {Assert, IsString} from "js-vextensions";
33

4-
export type CG_RefURLOrQuoteOld = string | CG_QuoteOld;
5-
// non-standard quote structure, as exported from alt claimgen instance (ie. the non-claim-miner one)
6-
export class CG_QuoteOld {
7-
quote: string;
8-
url: string;
9-
}
4+
export class CG_Node {
5+
constructor(data: Partial<CG_Node>, _collectionFieldName_forSyntheticallyCreatedNodeObj: "atomic_claims" | "counter_claims" | "examples") {
6+
Object.assign(this, data, {_collectionFieldName_forSyntheticallyCreatedNodeObj});
7+
}
108

11-
export class CG_Quote {
12-
quote: string;
13-
source: Source;
14-
extras?: object; // eg. {claimMiner: {id: "123"}}
15-
}
9+
static GetNodeType(node: CG_Node): NodeType {
10+
// special case: if `atomic_claims` is non-null, then this is a multi-premise argument-node
11+
if (node.atomic_claims) return NodeType.argument;
12+
13+
if (node.question) return NodeType.category
14+
if (node.position) return NodeType.claim;
15+
if (node.category) return NodeType.category;
16+
if (node.claim) return NodeType.claim;
17+
if (node.argument) return NodeType.claim;
18+
if (node.text) {
19+
// try to discern node-type based on what collection of strings this synthetically-created-node-obj was sourced from
20+
if (node._isSyntheticNodeObj_fromStringCollection == "atomic_claims") return NodeType.claim;
21+
if (node._isSyntheticNodeObj_fromStringCollection == "counter_claims") return NodeType.claim;
22+
if (node._isSyntheticNodeObj_fromStringCollection == "examples") return NodeType.claim;
23+
}
24+
throw new Error("Cannot discern node-type for CG_Node: " + JSON.stringify(node));
25+
}
1626

17-
// "source" entries are supposed to be imported as just attachments on the node itself (atm, always as Quote attachments)
18-
// v6+?
19-
// Note: Is this supposed to simply be a replacement of the CG_Quote structure? Dunno... (my guess is CG_Quote was claimminer's export-model, but CG_Source is claimgen's take on it)
20-
export class CG_Source {
21-
text: string;
22-
url: string;
23-
score: number;
24-
}
27+
// special fields (added by importer itself, during interpretation process)
28+
// ==========
29+
30+
_isSyntheticNodeObj_fromStringCollection?: "atomic_claims" | "counter_claims" | "examples";
31+
32+
// text/title fields (these fields are named differently, but they're all equivalent, as just the "text" of the node)
33+
// ==========
2534

26-
export abstract class CG_Node {
27-
id?: string; // deprecated, and ignored (and console.warn is called if input json uses this; new approach is to use extras.TOOL_NAMESPACE.id)
28-
narrative?: string;
29-
reference_urls?: CG_RefURLOrQuoteOld[];
30-
quotes?: CG_Quote[];
31-
sources?: CG_Source[]; // v6
32-
extras?: object; // eg. {claimMiner: {id: "123"}}
33-
34-
//abstract GetTitle(): string;
35-
/** Get the regular, "standalone" text of the claim. (stored in debate-map as text_base) */
36-
static GetTitle_Main(node: CG_Node): string {
35+
question?: string;
36+
position?: string;
37+
category?: string;
38+
claim?: string;
39+
argument?: string;
40+
text?: string; // synthetically added by the importer, for plain string entries in the "examples", "atomic_claims", and "counter_claims" arrays
41+
42+
static GetText(node: CG_Node) {
3743
const d = node as any;
38-
const result_raw =
39-
d.name ?? (d.questionText ?? d.question) ?? d.position ?? d.category ?? d.claim ??
40-
d.argument ?? d.original_example ?? d.example ?? d.text ??
41-
(d.quote ? `"${d.quote}"` : null);
44+
const result_raw = d.question ?? d.position ?? d.category ?? d.claim ?? d.argument;
4245
const result = (result_raw ?? "").trim(); // fsr, some json files contain line-breaks at start or end, so clean this up
43-
return result.length ? result : null;
46+
return result.length ? result : "";
4447
}
45-
/** Get the "narrative" text of the claim, as displayed in the papers app. (stored in debate-map as text_narration) */
46-
static GetTitle_Narrative(node: CG_Node) {
47-
const result = (node.narrative ?? "").trim(); // fsr, some json files contain line-breaks at start or end, so clean this up
48-
return result.length ? result : null;
48+
49+
// children fields
50+
// ==========
51+
52+
// arrays of objects (these fields are named differently, but they're all equivalent, as just "children nodes" of the node)
53+
positions?: CG_Node[];
54+
categories?: CG_Node[];
55+
claims?: CG_Node[];
56+
arguments?: CG_Node[];
57+
58+
// arrays of strings
59+
atomic_claims?: string[];
60+
counter_claims?: string[];
61+
examples?: string[];
62+
63+
static GetChildren(node: CG_Node): CG_Node[] {
64+
const childrenFromObjects = [
65+
node.positions,
66+
node.categories,
67+
node.claims,
68+
node.arguments,
69+
].flatMap(a=>a ?? []);
70+
71+
const childrenFromSimpleStrings = [] as CG_Node[];
72+
if (node.atomic_claims) {
73+
for (const entryText of node.atomic_claims) {
74+
Assert(IsString(entryText), `Expected "atomic_claims" to be an array of strings, but found: ${JSON.stringify(node.examples)}`);
75+
childrenFromSimpleStrings.push(new CG_Node({text: entryText}, "atomic_claims"));
76+
}
77+
}
78+
if (node.counter_claims) {
79+
for (const entryText of node.counter_claims) {
80+
Assert(IsString(entryText), `Expected "counter_claims" to be an array of strings, but found: ${JSON.stringify(node.examples)}`);
81+
childrenFromSimpleStrings.push(new CG_Node({text: entryText}, "counter_claims"));
82+
}
83+
}
84+
if (node.examples) {
85+
for (const entryText of node.examples) {
86+
Assert(IsString(entryText), `Expected "examples" to be an array of strings, but found: ${JSON.stringify(node.examples)}`);
87+
childrenFromSimpleStrings.push(new CG_Node({text: entryText}, "examples"));
88+
}
89+
}
90+
91+
// add the simple-string-children first, since they're shallower/non-recursive
92+
return [...childrenFromSimpleStrings, ...childrenFromObjects];
4993
}
94+
95+
// other fields
96+
// ==========
97+
98+
sources?: CG_Source[]; // these get imported as attachments (each as a "quote" attachment, but also one "references" attachment gets added as the first attachment, containing all source urls)
99+
50100
static GetAttachments(node: CG_Node) {
51101
const result = [] as Attachment[];
52102

53-
const referenceURLs = node.reference_urls && node.reference_urls.length > 0 ? node.reference_urls.filter(a=>IsString(a)) as string[] : [];
54-
const oldQuotes = node.reference_urls && node.reference_urls.length > 0 ? node.reference_urls.filter(a=>!IsString(a)) as CG_QuoteOld[] : [];
55-
if (referenceURLs.length > 0) {
103+
const sources = node.sources ?? [];
104+
const sourceURLs = sources.map(a=>a.url).filter(a=>IsString(a) && a.trim().length > 0);
105+
106+
if (sourceURLs.length > 0) {
56107
result.push(new Attachment({
57-
references: {
58-
sourceChains: referenceURLs.map(url=>{
108+
references: new ReferencesAttachment({
109+
sourceChains: sourceURLs.map(url=>{
59110
return new SourceChain([
60-
{type: SourceType.webpage, link: url},
111+
new Source({type: SourceType.webpage, link: url}),
61112
]);
62113
}),
63-
},
64-
}));
65-
}
66-
67-
for (const quoteOld of oldQuotes) {
68-
result.push(new Attachment({
69-
quote: new QuoteAttachment({
70-
content: quoteOld.quote,
71-
sourceChains: [
72-
new SourceChain([
73-
{type: SourceType.webpage, link: quoteOld.url},
74-
]),
75-
],
76-
}),
77-
}));
78-
}
79-
for (const quote of node.quotes ?? []) {
80-
result.push(new Attachment({
81-
quote: new QuoteAttachment({
82-
content: quote.quote,
83-
sourceChains: quote.source != null ? [
84-
new SourceChain([quote.source]),
85-
] : [],
86114
}),
87-
extras: quote.extras,
88115
}));
89116
}
90117

91-
// not sure yet if sources are just the "new quotes", or something different/extra (see comment on CG_Source class for more thoughts)
92-
// so for now, do a separate loop to add them
93-
for (const source of node.sources ?? []) {
118+
for (const source of sources) {
94119
result.push(new Attachment({
95120
quote: new QuoteAttachment({
96121
content: source.text,
@@ -103,111 +128,10 @@ export abstract class CG_Node {
103128
}));
104129
}
105130

106-
if (CG_Evidence.is(node)) {
107-
const evidence = node as CG_Evidence;
108-
if (evidence.url) {
109-
const sourceChains = evidence.url != null ? [
110-
new SourceChain([
111-
new Source({type: SourceType.webpage, link: evidence.url}),
112-
]),
113-
] : [];
114-
/*if (evidence.text) {
115-
result.push(new Attachment({
116-
references: new ReferencesAttachment({
117-
sourceChains,
118-
}),
119-
}));
120-
} else if (evidence.quote) {*/
121-
result.push(new Attachment({
122-
quote: new QuoteAttachment({
123-
content: evidence.quote,
124-
sourceChains,
125-
}),
126-
}));
127-
}
128-
if (evidence.reasoning) {
129-
result.push(new Attachment({
130-
description: new DescriptionAttachment({
131-
text: evidence.reasoning,
132-
}),
133-
}));
134-
}
135-
}
136-
137131
return result;
138132
}
139133
}
140-
141-
export class CG_Debate extends CG_Node {
142-
name: string;
143-
questions: CG_Question[];
144-
}
145-
export class CG_Question extends CG_Node {
146-
// v1
147-
questionText?: string; // deprecated, but processed atm
148-
// v2
149-
question?: string;
150-
151-
positions: CG_Position[];
152-
}
153-
export class CG_Position extends CG_Node {
154-
position: string;
155-
categories: CG_Category[];
156-
}
157-
export class CG_Category extends CG_Node {
158-
category: string;
159-
claims: CG_Claim[];
160-
}
161-
export class CG_Claim extends CG_Node {
162-
// v1
163-
claim?: string;
164-
165-
// v2
166-
argument?: string; // deprecated, but processed atm
167-
/*generated?: string;
168-
valid?: boolean;
169-
similarity?: boolean;
170-
edited?: boolean;*/
171-
172-
// v3 (tool extending claim-gen)
173-
arguments?: (string | CG_Argument)[];
174-
175-
// v4
176-
counter_claim?: string;
177-
178-
// v5
179-
examples?: (string | CG_Argument)[];
180-
counter_claims?: (string | CG_Argument)[];
181-
182-
// v7+
183-
atomic_claims?: (string | CG_AtomicClaim)[];
184-
}
185-
186-
// the distinction between "argument" and "example" is a bit unclear to me in the claimgen model; merging them atm
187-
export class CG_Argument extends CG_Node {
188-
// when in "arguments" collection
189-
argument?: string;
190-
191-
// when in "examples" collection
192-
original_example?: string; // <v6
193-
example?: string; // v6(a)
194-
text?: string; // v6(b)
195-
evidence?: CG_Evidence[]; // <v6? [maybe not deprecated, ie. maybe can occur alongside sources]
196-
}
197-
198-
// "evidence" entries are supposed to be imported as separate nodes (eg. since supports/refutes needs a link-polarity to distinguish)
199-
export class CG_Evidence extends CG_Node {
200-
quote: string;
134+
export class CG_Source {
135+
text: string;
201136
url: string;
202-
stance: "supports" | "refutes";
203-
reasoning: string;
204-
205-
static is(node: CG_Node) {
206-
return (node as any).stance != null;
207-
}
208-
}
209-
210-
// v7+
211-
export class CG_AtomicClaim extends CG_Node {
212-
text?: string;
213137
}

0 commit comments

Comments
 (0)