feat: added richer feedback

ashwin6-dev · ashwin6-dev · commit fda77f1b42bd · 2026-02-19T06:33:33.000Z
diff --git a/evaluation_function/domain/evaluators.py b/evaluation_function/domain/evaluators.py
@@ -76,21 +76,31 @@ def __init__(self, formula1: Formula, formula2: Formula):
         self._formula2 = formula2
 
     def evaluate(self) -> bool:
+        ok, _ = self.evaluate_with_counterexample()
+        return ok
+
+    def evaluate_with_counterexample(self) -> tuple[bool, dict | None]:
+        """Returns (are_equivalent, counterexample_or_none). Counterexample has assignment, response_value, expected_value."""
         atoms1 = _extract_atoms(self._formula1)
         atoms2 = _extract_atoms(self._formula2)
         all_atoms = list(atoms1 | atoms2)
-        
+
         for assignment_values in product([False, True], repeat=len(all_atoms)):
             assignment_dict = {atom: val for atom, val in zip(all_atoms, assignment_values)}
             assignment = Assignment(assignment_dict)
-            
+
             evaluator1 = FormulaEvaluator(self._formula1, assignment)
             evaluator2 = FormulaEvaluator(self._formula2, assignment)
-            
-            if evaluator1.evaluate() != evaluator2.evaluate():
-                return False
-        
-        return True
+            v1, v2 = evaluator1.evaluate(), evaluator2.evaluate()
+
+            if v1 != v2:
+                assignment_str = {atom.name: val for atom, val in assignment_dict.items()}
+                return False, {
+                    "assignment": assignment_str,
+                    "response_value": v1,
+                    "expected_value": v2,
+                }
+        return True, None
 
 
 class SatisfiabilityEvaluator:
@@ -117,15 +127,21 @@ def __init__(self, formula: Formula):
         self._formula = formula
 
     def evaluate(self) -> bool:
+        ok, _ = self.evaluate_with_counterexample()
+        return ok
+
+    def evaluate_with_counterexample(self) -> tuple[bool, dict | None]:
+        """Returns (is_tautology, counterexample_or_none). Counterexample has assignment and formula_value."""
         atoms = _extract_atoms(self._formula)
         all_atoms = list(atoms)
-        
+
         for assignment_values in product([False, True], repeat=len(all_atoms)):
             assignment_dict = {atom: val for atom, val in zip(all_atoms, assignment_values)}
             assignment = Assignment(assignment_dict)
-            
+
             evaluator = FormulaEvaluator(self._formula, assignment)
-            if not evaluator.evaluate():
-                return False
-        
-        return True
+            val = evaluator.evaluate()
+            if not val:
+                assignment_str = {atom.name: v for atom, v in assignment_dict.items()}
+                return False, {"assignment": assignment_str, "formula_value": val}
+        return True, None
diff --git a/evaluation_function/evaluation.py b/evaluation_function/evaluation.py
@@ -44,7 +44,7 @@ def evaluation_function(
         if not isinstance(answer, dict):
             return Result(
                 is_correct=False,
-                feedback_items=[("incorrect input", f"missing answer object. got {answer}")]
+                feedback_items=[("incorrect input", "missing answer object")]
             )
 
         # If response is a string, parse it as JSON
@@ -67,15 +67,15 @@ def evaluation_function(
         formula = formula_parser(response_formula)
 
         # Answer shape: satisfiability (bool), tautology (bool), equivalent (None|str), validTruthTable (bool)
-        satisfiability = answer.get("satisfiability", answer.get("satisability", False)) is True
+        satisfiability = answer.get("satisfiability", False) is True
         tautology = answer.get("tautology", False) is True
         equivalent = answer.get("equivalent")
         if equivalent is not None and not isinstance(equivalent, str):
             equivalent = None
         elif equivalent is not None and isinstance(equivalent, str) and equivalent.strip() == "":
             equivalent = None
-        # validTruthTable (bool) or truthTable (None|dict) for backward compat
-        has_truth_table = answer.get("validTruthTable", False) is True or answer.get("truthTable") is not None
+            
+        has_truth_table = answer.get("validTruthTable", False) is True
         has_equivalence = equivalent is not None
 
         num_selected = sum([satisfiability, tautology, has_equivalence, has_truth_table])
@@ -114,16 +114,49 @@ def evaluation_function(
                 return truth_table_result
 
         is_correct = False
+        feedback = []
+
         if has_equivalence:
             answer_formula = formula_parser(equivalent)
-            is_correct = EquivalenceEvaluator(formula, answer_formula).evaluate()
+            ev = EquivalenceEvaluator(formula, answer_formula)
+            is_correct, counterex = ev.evaluate_with_counterexample()
+            if not is_correct:
+                feedback.append((
+                    "equivalence",
+                    f"Comparing your formula \"{response_formula}\" with expected \"{equivalent}\". They are not equivalent."
+                ))
+                if counterex:
+                    asn = ", ".join(f"{k}={counterex['assignment'][k]}" for k in sorted(counterex["assignment"]))
+                    feedback.append((
+                        "counterexample",
+                        f"Under assignment ({asn}): your formula = {counterex['response_value']}, expected formula = {counterex['expected_value']}."
+                    ))
         elif tautology:
-            is_correct = TautologyEvaluator(formula).evaluate()
+            ev = TautologyEvaluator(formula)
+            is_correct, counterex = ev.evaluate_with_counterexample()
+            if not is_correct:
+                feedback.append((
+                    "tautology",
+                    f"Formula \"{response_formula}\" is not a tautology."
+                ))
+                if counterex:
+                    asn = ", ".join(f"{k}={counterex['assignment'][k]}" for k in sorted(counterex["assignment"]))
+                    feedback.append((
+                        "counterexample",
+                        f"Under assignment ({asn}) the formula evaluates to False."
+                    ))
         elif satisfiability:
             is_correct = SatisfiabilityEvaluator(formula).evaluate()
+            if not is_correct:
+                feedback.append((
+                    "satisfiability",
+                    f"Formula \"{response_formula}\" is not satisfiable: no assignment of the atoms makes it true."
+                ))
         elif has_truth_table:
             is_correct = True  # already validated above
 
+        if feedback:
+            return Result(is_correct=False, feedback_items=feedback)
         return Result(is_correct=is_correct)
 
     except Exception as e:
diff --git a/evaluation_function/truth_table/evaluate.py b/evaluation_function/truth_table/evaluate.py
@@ -139,10 +139,16 @@ def evaluate_truth_table(variables: list[str], cells: list[list[str]], num_atoms
                 continue
 
             assignment = Assignment(atoms_mapping)
-            if FormulaEvaluator(formula, assignment).evaluate() != cells[i][j]:
+            expected = FormulaEvaluator(formula, assignment).evaluate()
+            got = cells[i][j]
+            if expected != got:
+                formula_str = variables[j] if j < len(variables) else f"column {j+1}"
                 return Result(
                     is_correct=False,
-                    feedback_items=[(Exception, "incorrect cell value")]
+                    feedback_items=[(
+                        Exception,
+                        f"incorrect cell value at row {i+1}, column {j+1} (formula \"{formula_str}\"): expected {'T' if expected else 'F'}, got {'T' if got else 'F'}."
+                    )]
                 )