-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgrounding_eval.py
More file actions
162 lines (134 loc) · 9.38 KB
/
grounding_eval.py
File metadata and controls
162 lines (134 loc) · 9.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import json
import os.path
from argparse import ArgumentParser
import pandas as pd
import numpy as np
pd.set_option("display.precision", 3)
COLUMNS_PER_CLASS_RESULTS = ["gold title", "class size in segments", "exact segment match accuracy",
"class size in sections",
"exact section match precision", "exact section match recall", "exact section match f1",
"partial section match precision", "partial section match recall", "partial section match f1"]
def eval_df(predictions, gold_labels, title_mapping, out, quiet=False):
predictions.loc[predictions["community"] == -1, "representative"] = "NA"
predictions = predictions.merge(gold_labels, on=["filename", "title_text", "section_text", "title_index", "normalized_index", "original_title_line"], how="left")
predictions = predictions.dropna(subset=["gold_cluster"])
predictions["predicted_mapping"] = predictions.apply(lambda x: title_mapping.get(x["representative"], "NA"), axis=1)
predictions["prediction_correct"] = predictions.apply(lambda x: x["gold_title"] == x["predicted_mapping"], axis=1)
missing_gold_class = set(predictions["gold_title"].unique()).difference(set(title_mapping.values()))
df_per_gold_title = pd.DataFrame(columns=COLUMNS_PER_CLASS_RESULTS)
predictions_no_missing_class = predictions[~predictions['gold_title'].isin(missing_gold_class)]
predictions_no_missing_pred = predictions[predictions["predicted_mapping"] != "NA"]
accuracy_per_gold_title = predictions_no_missing_class.groupby("gold_title")["prediction_correct"].mean()
intersection_recall_df = predictions_no_missing_class.groupby(["gold_title", "filename"])["prediction_correct"].sum() > 0
intersection_recall_df = intersection_recall_df.groupby("gold_title").mean()
exact_section_match_recall = predictions_no_missing_class.groupby(["gold_title", "filename"])["prediction_correct"].sum() >= predictions_no_missing_class.groupby(["gold_title", "filename"])["prediction_correct"].size()
exact_section_match_recall = exact_section_match_recall.groupby("gold_title").mean()
intersection_precision_df = predictions_no_missing_pred.groupby(["predicted_mapping", "filename"])["prediction_correct"].sum() > 0
intersection_precision_df = intersection_precision_df.groupby("predicted_mapping").mean()
exact_section_match_precision = predictions_no_missing_pred.groupby(["predicted_mapping", "filename"])["prediction_correct"].sum() >= predictions_no_missing_pred.groupby(["predicted_mapping", "filename"])["prediction_correct"].size()
exact_section_match_precision = exact_section_match_precision.groupby("predicted_mapping").mean()
if len(intersection_precision_df) == 1:
exact_section_match_precision, intersection_precision_df = handle_only_single_class_predicted(
exact_section_match_precision, intersection_precision_df, exact_section_match_recall.index)
f1_score_exact = 2 * (exact_section_match_precision * exact_section_match_recall) / (exact_section_match_precision + exact_section_match_recall)
f1_score_partial = 2 * (intersection_precision_df * intersection_recall_df) / (intersection_precision_df + intersection_recall_df)
# set results in table
df_per_gold_title["partial section match recall"] = intersection_recall_df[accuracy_per_gold_title.index].values
df_per_gold_title["exact section match recall"] = exact_section_match_recall[accuracy_per_gold_title.index].values
df_per_gold_title["partial section match precision"] = intersection_precision_df[accuracy_per_gold_title.index].values
df_per_gold_title["partial section match f1"] = f1_score_partial[accuracy_per_gold_title.index].values
df_per_gold_title["exact section match f1"] = f1_score_exact[accuracy_per_gold_title.index].values
df_per_gold_title["exact section match precision"] = exact_section_match_precision[accuracy_per_gold_title.index].values
df_per_gold_title["gold title"] = accuracy_per_gold_title.index
df_per_gold_title["exact segment match accuracy"] = accuracy_per_gold_title.values
class_size = predictions_no_missing_class.groupby("gold_title").size()
df_per_gold_title["class size in segments"] = class_size[accuracy_per_gold_title.index].values
df_per_gold_title["class size in sections"] = predictions_no_missing_class.groupby("gold_title")["filename"].nunique()[accuracy_per_gold_title.index].values
weighted_mean_exact = (f1_score_exact * class_size).sum() / class_size.sum()
weighted_mean_partial = (f1_score_partial * class_size).sum() / class_size.sum()
df_per_gold_title.to_csv(out, index=False, float_format='%.3f')
columns_accumulated = ["category", "f1 macro (equal)", "f1 micro (weighted)"]
df_accumulated = pd.DataFrame(columns=columns_accumulated)
df_accumulated["category"] = ["exact match", "partial match"]
df_accumulated["f1 macro (equal)"] = [f1_score_exact.mean(), f1_score_partial.mean()]
df_accumulated["f1 micro (weighted)"] = [weighted_mean_exact, weighted_mean_partial]
if not quiet:
print()
print(df_accumulated)
df_accumulated.to_markdown(out.replace('.csv', '_accumulated.md'), index=False)
return df_accumulated
def handle_only_single_class_predicted(exact_section_match_precision, intersection_precision_df, indices):
intersection_precision = []
exact_precision = []
for gold in indices:
if gold in intersection_precision_df:
intersection_precision.append(intersection_precision_df[gold])
exact_precision.append(exact_section_match_precision[gold])
else:
intersection_precision.append(1)
exact_precision.append(1)
intersection_precision_df = pd.Series(intersection_precision, index=indices)
exact_section_match_precision = pd.Series(exact_precision, index=indices)
return exact_section_match_precision, intersection_precision_df
def get_mapping(gold_df, path_to_mapping=""):
if path_to_mapping != "":
with open(path_to_mapping, "r") as f:
toc_mapping = json.load(f)
else:
toc_mapping = {cls: cls for cls in gold_df["gold_title"].unique()}
return toc_mapping
def main(pred_df, gold_df, toc_mapping, out_dir):
# evaluate predictions against gold
out_path = os.path.join(out_dir, "predictions_eval.csv")
eval_df(pred_df, gold_df, toc_mapping, out_path)
# most frequent class baseline
reverse_mapping = {v: k for k, v in toc_mapping.items()}
count_per_class = gold_df["gold_title"].value_counts()
most_frequent_class = count_per_class.index[0]
num_most_frequent_class = count_per_class[most_frequent_class]
percentage_most_frequent_class = num_most_frequent_class / count_per_class.sum() * 100
matching_representative_key = reverse_mapping[most_frequent_class]
predict_most_frequent_class = pred_df.copy()
predict_most_frequent_class["representative"] = matching_representative_key
print("\n\nPREDICT MOST FREQUENT CLASS")
print(f"that is, \"{most_frequent_class}\", with {num_most_frequent_class} instances ({percentage_most_frequent_class:.3f})")
out_path = os.path.join(out_dir, "most_frequent_class_baseline.csv")
eval_df(predict_most_frequent_class, gold_df, toc_mapping, out_path)
# random baseline
print("\n\nRANDOM PREDICTIONS")
all_random_runs = []
for i in range(100):
random_predictions = pred_df.copy()
predicting_labels = pd.Series(
np.random.choice(gold_df["gold_title"].dropna().unique(), len(random_predictions)))
prediction_reps = predicting_labels.apply(lambda x: reverse_mapping.get(x, "NA"))
random_predictions["representative"] = prediction_reps.values
out_dir_random = os.path.join(out_dir, "random_baseline")
os.makedirs(out_dir_random, exist_ok=True)
out_path = os.path.join(out_dir_random, f"eval_{i}.csv")
df_accum = eval_df(random_predictions, gold_df, toc_mapping, out_path, quiet=True)
all_random_runs.append(df_accum)
# get the mean of all random runs
all_random_runs = pd.concat(all_random_runs)
all_random_runs = all_random_runs.groupby("category").mean()
print("\n\nRANDOM PREDICTIONS MEAN")
print(all_random_runs)
all_random_runs.to_markdown(os.path.join(out_dir, "random_baseline_accum_mean.md"), index=False)
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument("--predictions", type=str, required=True,
help="path to predictions csv (meta_filtered.csv)")
parser.add_argument("--gold", type=str, required=True,
help="path to gold csv. "
"expected to match the predictions csv format, with additional column of 'gold_title'")
parser.add_argument("--toc_mapping", type=str, default="",
help="path to json file with mapping from representatives to gold titles. "
"If not provided, will assume that the gold titles match the representatives")
parser.add_argument("--out_dir", type=str, required=True,
help="path to output directory where all evaluation files will be saved")
args = parser.parse_args()
predictions = pd.read_csv(args.predictions, index_col=False)
gold = pd.read_csv(args.gold, index_col=False)
mapping_toc = get_mapping(gold, path_to_mapping=args.toc_mapping)
os.makedirs(args.out_dir, exist_ok=True)
main(predictions, gold, mapping_toc, args.out_dir)