InternScience
diff --git a/‎examples/evaluate/evaluate_kg/kg_evaluation_config.yaml‎
Lines changed: 3 additions & 4 deletions b/‎examples/evaluate/evaluate_kg/kg_evaluation_config.yaml‎
Lines changed: 3 additions & 4 deletions
diff --git a/‎examples/evaluate/evaluate_qa/qa_evaluation_config.yaml‎
Lines changed: 7 additions & 6 deletions b/‎examples/evaluate/evaluate_qa/qa_evaluation_config.yaml‎
Lines changed: 7 additions & 6 deletions
diff --git a/‎examples/evaluate/evaluate_triple/evaluate_triple.sh‎
Lines changed: 2 additions & 0 deletions b/‎examples/evaluate/evaluate_triple/evaluate_triple.sh‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/evaluate/evaluate_triple/triple_evaluation_config.yaml‎
Lines changed: 46 additions & 0 deletions b/‎examples/evaluate/evaluate_triple/triple_evaluation_config.yaml‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎graphgen/bases/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎graphgen/bases/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎graphgen/bases/base_evaluator.py‎
Lines changed: 21 additions & 2 deletions b/‎graphgen/bases/base_evaluator.py‎
Lines changed: 21 additions & 2 deletions
diff --git a/‎graphgen/bases/base_operator.py‎
Lines changed: 18 additions & 1 deletion b/‎graphgen/bases/base_operator.py‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎graphgen/bases/datatypes.py‎
Lines changed: 7 additions & 0 deletions b/‎graphgen/bases/datatypes.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎graphgen/models/__init__.py‎
Lines changed: 0 additions & 1 deletion b/‎graphgen/models/__init__.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎graphgen/models/evaluator/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎graphgen/models/evaluator/__init__.py‎
Lines changed: 2 additions & 1 deletion
@@ -10,7 +10,7 @@ nodes:
     dependencies: []
     params:
       input_path:
-        - examples/input_examples/extract_demo.txt
+        - examples/input_examples/jsonl_demo.jsonl
 
   - id: chunk
     op_name: chunk
@@ -39,7 +39,6 @@ nodes:
     dependencies:
       - build_kg
     params:
+      target: kg
       metrics:
-        - kg_structure
-        - kg_accuracy
-        - kg_consistency
+        - structure
@@ -1,7 +1,7 @@
 global_params:
   working_dir: cache
-  graph_backend: kuzu # graph database backend, support: kuzu, networkx
-  kv_backend: rocksdb # key-value store backend, support: rocksdb, json_kv
+  graph_backend: networkx # graph database backend, support: kuzu, networkx
+  kv_backend: json_kv # key-value store backend, support: rocksdb, json_kv
 
 nodes:
   - id: read_files # id is unique in the pipeline, and can be referenced by other steps
@@ -89,10 +89,11 @@ nodes:
       batch_size: 128
     save_output: true
     params:
+      target: qa
       metrics:
-        - qa_length
-        - qa_mtld
-        # - qa_reward_score
-        # - qa_uni_score
+        - length
+        - mtld
+        # - reward_score
+        # - uni_score
       mtld_params:
         threshold: 0.7
@@ -0,0 +1,2 @@
+python3 -m graphgen.run \
+--config_file examples/evaluate/evaluate_triple/triple_evaluation_config.yaml
@@ -0,0 +1,46 @@
+global_params:
+  working_dir: cache
+  graph_backend: networkx # graph database backend, support: kuzu, networkx
+  kv_backend: json_kv # key-value store backend, support: rocksdb, json_kv
+
+nodes:
+  - id: read
+    op_name: read
+    type: source
+    dependencies: []
+    params:
+      input_path:
+        - examples/input_examples/jsonl_demo.jsonl
+
+  - id: chunk
+    op_name: chunk
+    type: map_batch
+    dependencies:
+      - read
+    execution_params:
+      replicas: 4
+    params:
+      chunk_size: 20480 # larger chunk size for better context
+      chunk_overlap: 2000
+
+  - id: build_kg
+    op_name: build_kg
+    type: map_batch
+    dependencies:
+      - chunk
+    execution_params:
+      replicas: 1
+      batch_size: 128
+
+  - id: evaluate
+    op_name: evaluate
+    type: aggregate
+    save_output: true
+    dependencies:
+      - build_kg
+    params:
+      target: triple
+      src_namespace: chunk
+      tgt_namespace: build_kg
+      metrics:
+        - accuracy
@@ -1,3 +1,4 @@
+from .base_evaluator import BaseKGEvaluator, BaseQAEvaluator, BaseTripleEvaluator
 from .base_extractor import BaseExtractor
 from .base_generator import BaseGenerator
 from .base_kg_builder import BaseKGBuilder
@@ -9,5 +10,4 @@
 from .base_splitter import BaseSplitter
 from .base_storage import BaseGraphStorage, BaseKVStorage, StorageNameSpace
 from .base_tokenizer import BaseTokenizer
-from .base_evaluator import BaseEvaluator
 from .datatypes import Chunk, Config, Node, QAPair, Token
@@ -1,10 +1,29 @@
 from abc import ABC, abstractmethod
+from typing import Any
+
+from .base_storage import BaseGraphStorage
 from .datatypes import QAPair
 
 
-class BaseEvaluator(ABC):
+class BaseQAEvaluator(ABC):
     @abstractmethod
-    def evaluate(self, pair: QAPair) -> float:
+    async def evaluate(self, pair: QAPair) -> dict[str, float]:
         """
         Evaluate the text and return a score.
         """
+
+
+class BaseKGEvaluator(ABC):
+    @abstractmethod
+    def evaluate(self, kg: BaseGraphStorage) -> dict[str, Any]:
+        """
+        Evaluate the whole graph and return a dict of scores.
+        """
+
+
+class BaseTripleEvaluator(ABC):
+    @abstractmethod
+    async def evaluate(self, unit: dict) -> dict[str, float]:
+        """
+        Evaluate a node/edge and return a score.
+        """
@@ -1,12 +1,25 @@
 import inspect
 import os
 from abc import ABC, abstractmethod
-from typing import Iterable, Union, Tuple
+from typing import Iterable, Tuple, Union
 
+import numpy as np
 import pandas as pd
 import ray
 
 
+def convert_to_serializable(obj):
+    if isinstance(obj, np.ndarray):
+        return obj.tolist()
+    if isinstance(obj, np.generic):
+        return obj.item()
+    if isinstance(obj, dict):
+        return {k: convert_to_serializable(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [convert_to_serializable(v) for v in obj]
+    return obj
+
+
 class BaseOperator(ABC):
     def __init__(
         self,
@@ -21,6 +34,7 @@ def __init__(
         log_dir = os.path.join(working_dir, "logs")
         self.op_name = op_name or self.__class__.__name__
         self.working_dir = working_dir
+        self.kv_backend = kv_backend
         self.kv_storage = init_storage(
             backend=kv_backend, working_dir=working_dir, namespace=self.op_name
         )
@@ -118,6 +132,9 @@ def split(self, batch: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
         return to_process, pd.DataFrame(recovered_chunks)
 
     def store(self, results: list, meta_update: dict):
+        results = convert_to_serializable(results)
+        meta_update = convert_to_serializable(meta_update)
+
         batch = {res["_trace_id"]: res for res in results}
         self.kv_storage.upsert(batch)
 
 
@@ -31,6 +31,13 @@ class QAPair:
     question: str
     answer: str
 
+    @staticmethod
+    def from_dict(data: dict) -> "QAPair":
+        return QAPair(
+            question=data.get("question", ""),
+            answer=data.get("answer", ""),
+        )
+
 
 @dataclass
 class Token:
 
@@ -1,6 +1,5 @@
 from .evaluator import (
     AccuracyEvaluator,
-    ConsistencyEvaluator,
     LengthEvaluator,
     MTLDEvaluator,
     RewardEvaluator,
 
@@ -1,2 +1,3 @@
-from .kg import AccuracyEvaluator, ConsistencyEvaluator, StructureEvaluator
+from .kg import StructureEvaluator
 from .qa import LengthEvaluator, MTLDEvaluator, RewardEvaluator, UniEvaluator
+from .triple import AccuracyEvaluator
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+python3 -m graphgen.run \`
	`2`	`+--config_file examples/evaluate/evaluate_triple/triple_evaluation_config.yaml`