@@ -44,7 +44,7 @@ def evaluation_function(
4444 if not isinstance (answer , dict ):
4545 return Result (
4646 is_correct = False ,
47- feedback_items = [("incorrect input" , f "missing answer object. got { answer } " )]
47+ feedback_items = [("incorrect input" , "missing answer object" )]
4848 )
4949
5050 # If response is a string, parse it as JSON
@@ -67,15 +67,15 @@ def evaluation_function(
6767 formula = formula_parser (response_formula )
6868
6969 # Answer shape: satisfiability (bool), tautology (bool), equivalent (None|str), validTruthTable (bool)
70- satisfiability = answer .get ("satisfiability" , answer . get ( "satisability" , False ) ) is True
70+ satisfiability = answer .get ("satisfiability" , False ) is True
7171 tautology = answer .get ("tautology" , False ) is True
7272 equivalent = answer .get ("equivalent" )
7373 if equivalent is not None and not isinstance (equivalent , str ):
7474 equivalent = None
7575 elif equivalent is not None and isinstance (equivalent , str ) and equivalent .strip () == "" :
7676 equivalent = None
77- # validTruthTable (bool) or truthTable (None|dict) for backward compat
78- has_truth_table = answer .get ("validTruthTable" , False ) is True or answer . get ( "truthTable" ) is not None
77+
78+ has_truth_table = answer .get ("validTruthTable" , False ) is True
7979 has_equivalence = equivalent is not None
8080
8181 num_selected = sum ([satisfiability , tautology , has_equivalence , has_truth_table ])
@@ -114,16 +114,49 @@ def evaluation_function(
114114 return truth_table_result
115115
116116 is_correct = False
117+ feedback = []
118+
117119 if has_equivalence :
118120 answer_formula = formula_parser (equivalent )
119- is_correct = EquivalenceEvaluator (formula , answer_formula ).evaluate ()
121+ ev = EquivalenceEvaluator (formula , answer_formula )
122+ is_correct , counterex = ev .evaluate_with_counterexample ()
123+ if not is_correct :
124+ feedback .append ((
125+ "equivalence" ,
126+ f"Comparing your formula \" { response_formula } \" with expected \" { equivalent } \" . They are not equivalent."
127+ ))
128+ if counterex :
129+ asn = ", " .join (f"{ k } ={ counterex ['assignment' ][k ]} " for k in sorted (counterex ["assignment" ]))
130+ feedback .append ((
131+ "counterexample" ,
132+ f"Under assignment ({ asn } ): your formula = { counterex ['response_value' ]} , expected formula = { counterex ['expected_value' ]} ."
133+ ))
120134 elif tautology :
121- is_correct = TautologyEvaluator (formula ).evaluate ()
135+ ev = TautologyEvaluator (formula )
136+ is_correct , counterex = ev .evaluate_with_counterexample ()
137+ if not is_correct :
138+ feedback .append ((
139+ "tautology" ,
140+ f"Formula \" { response_formula } \" is not a tautology."
141+ ))
142+ if counterex :
143+ asn = ", " .join (f"{ k } ={ counterex ['assignment' ][k ]} " for k in sorted (counterex ["assignment" ]))
144+ feedback .append ((
145+ "counterexample" ,
146+ f"Under assignment ({ asn } ) the formula evaluates to False."
147+ ))
122148 elif satisfiability :
123149 is_correct = SatisfiabilityEvaluator (formula ).evaluate ()
150+ if not is_correct :
151+ feedback .append ((
152+ "satisfiability" ,
153+ f"Formula \" { response_formula } \" is not satisfiable: no assignment of the atoms makes it true."
154+ ))
124155 elif has_truth_table :
125156 is_correct = True # already validated above
126157
158+ if feedback :
159+ return Result (is_correct = False , feedback_items = feedback )
127160 return Result (is_correct = is_correct )
128161
129162 except Exception as e :
0 commit comments