Skip to content

Commit b260458

Browse files
committed
mv analysis scripts to analysis
1 parent 5d76b93 commit b260458

6 files changed

Lines changed: 61 additions & 3 deletions

File tree

analysis/calculate_AP.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import os
2+
import pandas as pd
3+
import numpy as np
4+
5+
6+
def average_precisions(y_true, y_pred):
7+
_, classes = y_true.shape
8+
average_precisions = []
9+
10+
for index in range(classes):
11+
row_indices_sorted = np.argsort(-y_pred[:, index])
12+
13+
y_true_cls = y_true[row_indices_sorted, index]
14+
y_pred_cls = y_pred[row_indices_sorted, index]
15+
16+
tp = y_true_cls == 1
17+
fp = y_true_cls == 0
18+
19+
fp = np.cumsum(fp)
20+
tp = np.cumsum(tp)
21+
22+
npos = np.sum(y_true_cls)
23+
24+
rec = tp * 1.0 / npos
25+
26+
# avoid divide by zero in case the first detection matches a difficult
27+
# ground truth
28+
prec = tp * 1.0 / np.maximum((tp + fp), np.finfo(np.float64).eps)
29+
30+
mrec = np.concatenate(([0.0], rec, [1.0]))
31+
mpre = np.concatenate(([0.0], prec, [0.0]))
32+
33+
# compute the precision envelope
34+
for i in range(mpre.size - 1, 0, -1):
35+
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
36+
37+
# to calculate area under PR curve, look for points
38+
# where X axis (recall) changes value
39+
i = np.where(mrec[1:] != mrec[:-1])[0]
40+
41+
# and sum (\Delta recall) * prec
42+
average_precisions.append(np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]))
43+
44+
return average_precisions
45+
46+
def main(d):
47+
files = [f for f in os.listdir(d) if not f.endswith("_submission.csv")]
48+
df = pd.DataFrame()
49+
for f in files:
50+
df_ = pd.read_csv(os.path.join(d, f))
51+
df = df.append(df_, ignore_index=True)
52+
53+
# df['GT cell label'] = [f.split("\n")[0].split(" ")[-1] for f in df['GT cell label']]
54+
55+
if __name__ == "__main__":
56+
d = "/home/trangle/HPA_SingleCellClassification/examples/bestfitting"
57+
main(d)
File renamed without changes.
Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,8 +261,9 @@ def prepare_meta_publicHPAv21():
261261
thres_vector = thres_classes.best_threshold
262262
il_labels = tmp[[l+'_y' for l in LABEL_ALIASE_LIST]].values
263263
sc_labels = tmp[[l+'_x' for l in LABEL_ALIASE_LIST]].values
264-
sc_labels = np.array([sc_labels.transpose()[c] > thres_vector[c] for c in range(19)]).transpose()
265-
#sc_labels = il_labels*sc_labels
264+
sc_labels = np.array([sc_labels.transpose()[c] > (2*thres_vector[c]) for c in range(19)]).transpose()
265+
sc_labels = il_labels*sc_labels
266+
print(sc_labels.shape)
266267
sc_labels = pd.DataFrame(sc_labels.astype('uint8'))
267268
sc_labels.columns = LABEL_ALIASE_LIST
268269
# Merge the calculated sc_labels back with meta
@@ -305,7 +306,7 @@ def prepare_meta_publicHPAv21():
305306
df_c.target.value_counts()
306307
print('Keeping these final columns:')
307308
print(df_c.columns)
308-
df_c.to_csv(f'{d}/sl_pHPA_15_0.05_euclidean_100000_rmoutliers_ilsc_3d_bbox_metav21_individualthresholds.csv', index=False)
309+
df_c.to_csv(f'{d}/sl_pHPA_15_0.05_euclidean_100000_rmoutliers_ilsc_3d_bbox_metav21_individualthresholds_il.csv', index=False)
309310
#df_c.to_csv(f'{d}/sl_pHPA_15_0.05_euclidean_100000_rmoutliers_ilsc_3d_bbox_metav21_potentialnewlabels.csv', index=False)
310311

311312

File renamed without changes.

0 commit comments

Comments
 (0)