Skip to content

Commit 061d4d5

Browse files
author
amolina
committed
Merge branch 'main' of github.com:CVC-DAG/anyscript
2 parents 80f3ffb + e554de5 commit 061d4d5

2 files changed

Lines changed: 10 additions & 13 deletions

File tree

anyscript.eval/compute.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,14 @@ def load_json(path):
4848
book_id_pages[book.strip()] = list_pages
4949

5050
# A queries map és on va lo de les LUT de pagines o de books en cas de que la response sigui amb les queries originals
51-
map_at_k, recall_at_k = compute_map_recall_at_k(df_books, k=10, queries_map=None, evaluate_page=False, book_to_author_map=book_id_to_author, book_to_pages_map=book_id_pages)
51+
map_at_k, recall_at_k = compute_map_recall_at_k(df_books, k=50, queries_map=None, evaluate_page=False, book_to_author_map=book_id_to_author, book_to_pages_map=book_id_pages)
5252

5353
print(' THe mean recall at k book level is: ', recall_at_k)
5454
print(' THe mean map at k book level is: ', map_at_k)
5555

5656

5757
# A queries map és on va lo de les LUT de pagines o de books en cas de que la response sigui amb les queries originals
58-
map_at_k, recall_at_k = compute_map_recall_at_k(df_pages, k=10, queries_map=None, evaluate_page=True, book_to_author_map=book_id_to_author, book_to_pages_map=book_id_pages)
58+
map_at_k, recall_at_k = compute_map_recall_at_k(df_pages, k=50, queries_map=None, evaluate_page=True, book_to_author_map=book_id_to_author, book_to_pages_map=book_id_pages)
5959

6060
print(' THe mean recall at k page level is: ', recall_at_k)
6161
print(' THe mean map at k page level is: ', map_at_k)
@@ -68,10 +68,10 @@ def load_json(path):
6868
ideal_rankings = load_json(IDEAL_RANKNGS)
6969
ideal_rankings_books = load_json(IDEAL_RANKNGS_BOOKS)
7070

71-
ndcg_at_k = compute_nDCG(df_pages, ideal_rankings, k=10, book_to_author=book_id_to_author, lut_full_catalog=lut_full_catalog, queries_map=lut_pages)
71+
ndcg_at_k = compute_nDCG(df_pages, ideal_rankings, k=50, book_to_author=book_id_to_author, lut_full_catalog=lut_full_catalog, queries_map=lut_pages)
7272

7373
print(' THe mean nDCG at k page level is: ', ndcg_at_k)
7474

75-
ndcg_at_k = compute_nDCG(df_books, ideal_rankings_books, k=10, book_to_author=book_id_to_author, lut_full_catalog=lut_full_catalog, queries_map=lut_books)
75+
ndcg_at_k = compute_nDCG(df_books, ideal_rankings_books, k=50, book_to_author=book_id_to_author, lut_full_catalog=lut_full_catalog, queries_map=lut_books)
7676

7777
print(' THe mean nDCG at k book level is: ', ndcg_at_k)

anyscript.eval/evaluation_functions.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,10 @@ def get_book_id_from_filename(filename):
6969
return filename.split('_')[0]
7070

7171

72-
def retrieve_page_true_relevant_documents(query_filename:str):
72+
def retrieve_page_true_relevant_documents(query_filename:str, book_to_pages_map:dict):
7373
book_id = get_book_id_from_filename(query_filename)
74-
path = Path(f"/data/123-1/datasets/AnyScriptFiltered/binarized/{book_id}")
75-
relevant_documents = (path.glob("*.png"))
76-
77-
return list(relevant_documents)
78-
74+
relevant_pages = book_to_pages_map[book_id]
75+
return relevant_pages
7976

8077
def retrieve_book_true_relevant_documents(query_filename:str, book_to_author_map:dict, book_to_pages_map:dict):
8178
book_id = get_book_id_from_filename(query_filename)
@@ -101,12 +98,12 @@ def compute_map_recall_at_k(response: pd.DataFrame, k:int=100, queries_map=None,
10198

10299
## Extract relevant Documents
103100
if evaluate_page:
104-
relevant_documents = retrieve_page_true_relevant_documents(query_img)
101+
relevant_documents = retrieve_page_true_relevant_documents(query_img, book_to_pages_map)
105102
else:
106103
assert book_to_author_map is not None, "book_to_author_map must be provided for book-level evaluation"
107104
relevant_documents = retrieve_book_true_relevant_documents(query_img, book_to_author_map, book_to_pages_map)
108105

109-
num_relevant_documents = len(relevant_documents)+1
106+
num_relevant_documents = len(relevant_documents)
110107

111108
# SORT and FILTER predictions based in K
112109
filtered_response_sorted = filtered_response.sort_values("similarity", ascending=False).head(k)
@@ -137,7 +134,7 @@ def compute_relevance_gt(query_page,
137134
date_query = int(lut_full_catalog[query_book]["date"][0])
138135
date_candidate = int(lut_full_catalog[candidate_book]["date"][0])
139136

140-
epoch_score = max(0.0, 20 - abs(date_query - date_candidate) / 20)
137+
epoch_score = max(0.0, (20 - abs(date_query - date_candidate)) / 20)
141138
except:
142139
epoch_score = 0
143140

0 commit comments

Comments
 (0)