Skip to content

Commit fda77f1

Browse files
committed
feat: added richer feedback
1 parent 2c70b61 commit fda77f1

3 files changed

Lines changed: 76 additions & 21 deletions

File tree

evaluation_function/domain/evaluators.py

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -76,21 +76,31 @@ def __init__(self, formula1: Formula, formula2: Formula):
7676
self._formula2 = formula2
7777

7878
def evaluate(self) -> bool:
79+
ok, _ = self.evaluate_with_counterexample()
80+
return ok
81+
82+
def evaluate_with_counterexample(self) -> tuple[bool, dict | None]:
83+
"""Returns (are_equivalent, counterexample_or_none). Counterexample has assignment, response_value, expected_value."""
7984
atoms1 = _extract_atoms(self._formula1)
8085
atoms2 = _extract_atoms(self._formula2)
8186
all_atoms = list(atoms1 | atoms2)
82-
87+
8388
for assignment_values in product([False, True], repeat=len(all_atoms)):
8489
assignment_dict = {atom: val for atom, val in zip(all_atoms, assignment_values)}
8590
assignment = Assignment(assignment_dict)
86-
91+
8792
evaluator1 = FormulaEvaluator(self._formula1, assignment)
8893
evaluator2 = FormulaEvaluator(self._formula2, assignment)
89-
90-
if evaluator1.evaluate() != evaluator2.evaluate():
91-
return False
92-
93-
return True
94+
v1, v2 = evaluator1.evaluate(), evaluator2.evaluate()
95+
96+
if v1 != v2:
97+
assignment_str = {atom.name: val for atom, val in assignment_dict.items()}
98+
return False, {
99+
"assignment": assignment_str,
100+
"response_value": v1,
101+
"expected_value": v2,
102+
}
103+
return True, None
94104

95105

96106
class SatisfiabilityEvaluator:
@@ -117,15 +127,21 @@ def __init__(self, formula: Formula):
117127
self._formula = formula
118128

119129
def evaluate(self) -> bool:
130+
ok, _ = self.evaluate_with_counterexample()
131+
return ok
132+
133+
def evaluate_with_counterexample(self) -> tuple[bool, dict | None]:
134+
"""Returns (is_tautology, counterexample_or_none). Counterexample has assignment and formula_value."""
120135
atoms = _extract_atoms(self._formula)
121136
all_atoms = list(atoms)
122-
137+
123138
for assignment_values in product([False, True], repeat=len(all_atoms)):
124139
assignment_dict = {atom: val for atom, val in zip(all_atoms, assignment_values)}
125140
assignment = Assignment(assignment_dict)
126-
141+
127142
evaluator = FormulaEvaluator(self._formula, assignment)
128-
if not evaluator.evaluate():
129-
return False
130-
131-
return True
143+
val = evaluator.evaluate()
144+
if not val:
145+
assignment_str = {atom.name: v for atom, v in assignment_dict.items()}
146+
return False, {"assignment": assignment_str, "formula_value": val}
147+
return True, None

evaluation_function/evaluation.py

Lines changed: 39 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def evaluation_function(
4444
if not isinstance(answer, dict):
4545
return Result(
4646
is_correct=False,
47-
feedback_items=[("incorrect input", f"missing answer object. got {answer}")]
47+
feedback_items=[("incorrect input", "missing answer object")]
4848
)
4949

5050
# If response is a string, parse it as JSON
@@ -67,15 +67,15 @@ def evaluation_function(
6767
formula = formula_parser(response_formula)
6868

6969
# Answer shape: satisfiability (bool), tautology (bool), equivalent (None|str), validTruthTable (bool)
70-
satisfiability = answer.get("satisfiability", answer.get("satisability", False)) is True
70+
satisfiability = answer.get("satisfiability", False) is True
7171
tautology = answer.get("tautology", False) is True
7272
equivalent = answer.get("equivalent")
7373
if equivalent is not None and not isinstance(equivalent, str):
7474
equivalent = None
7575
elif equivalent is not None and isinstance(equivalent, str) and equivalent.strip() == "":
7676
equivalent = None
77-
# validTruthTable (bool) or truthTable (None|dict) for backward compat
78-
has_truth_table = answer.get("validTruthTable", False) is True or answer.get("truthTable") is not None
77+
78+
has_truth_table = answer.get("validTruthTable", False) is True
7979
has_equivalence = equivalent is not None
8080

8181
num_selected = sum([satisfiability, tautology, has_equivalence, has_truth_table])
@@ -114,16 +114,49 @@ def evaluation_function(
114114
return truth_table_result
115115

116116
is_correct = False
117+
feedback = []
118+
117119
if has_equivalence:
118120
answer_formula = formula_parser(equivalent)
119-
is_correct = EquivalenceEvaluator(formula, answer_formula).evaluate()
121+
ev = EquivalenceEvaluator(formula, answer_formula)
122+
is_correct, counterex = ev.evaluate_with_counterexample()
123+
if not is_correct:
124+
feedback.append((
125+
"equivalence",
126+
f"Comparing your formula \"{response_formula}\" with expected \"{equivalent}\". They are not equivalent."
127+
))
128+
if counterex:
129+
asn = ", ".join(f"{k}={counterex['assignment'][k]}" for k in sorted(counterex["assignment"]))
130+
feedback.append((
131+
"counterexample",
132+
f"Under assignment ({asn}): your formula = {counterex['response_value']}, expected formula = {counterex['expected_value']}."
133+
))
120134
elif tautology:
121-
is_correct = TautologyEvaluator(formula).evaluate()
135+
ev = TautologyEvaluator(formula)
136+
is_correct, counterex = ev.evaluate_with_counterexample()
137+
if not is_correct:
138+
feedback.append((
139+
"tautology",
140+
f"Formula \"{response_formula}\" is not a tautology."
141+
))
142+
if counterex:
143+
asn = ", ".join(f"{k}={counterex['assignment'][k]}" for k in sorted(counterex["assignment"]))
144+
feedback.append((
145+
"counterexample",
146+
f"Under assignment ({asn}) the formula evaluates to False."
147+
))
122148
elif satisfiability:
123149
is_correct = SatisfiabilityEvaluator(formula).evaluate()
150+
if not is_correct:
151+
feedback.append((
152+
"satisfiability",
153+
f"Formula \"{response_formula}\" is not satisfiable: no assignment of the atoms makes it true."
154+
))
124155
elif has_truth_table:
125156
is_correct = True # already validated above
126157

158+
if feedback:
159+
return Result(is_correct=False, feedback_items=feedback)
127160
return Result(is_correct=is_correct)
128161

129162
except Exception as e:

evaluation_function/truth_table/evaluate.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,16 @@ def evaluate_truth_table(variables: list[str], cells: list[list[str]], num_atoms
139139
continue
140140

141141
assignment = Assignment(atoms_mapping)
142-
if FormulaEvaluator(formula, assignment).evaluate() != cells[i][j]:
142+
expected = FormulaEvaluator(formula, assignment).evaluate()
143+
got = cells[i][j]
144+
if expected != got:
145+
formula_str = variables[j] if j < len(variables) else f"column {j+1}"
143146
return Result(
144147
is_correct=False,
145-
feedback_items=[(Exception, "incorrect cell value")]
148+
feedback_items=[(
149+
Exception,
150+
f"incorrect cell value at row {i+1}, column {j+1} (formula \"{formula_str}\"): expected {'T' if expected else 'F'}, got {'T' if got else 'F'}."
151+
)]
146152
)
147153

148154

0 commit comments

Comments
 (0)