-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathmain_tils_train_test.py
More file actions
266 lines (233 loc) · 14.8 KB
/
main_tils_train_test.py
File metadata and controls
266 lines (233 loc) · 14.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
'''
main function to train and test tils detection
train: model is trained using pan-cancer tcga datasets
test: including internal testing and external testing
main functions: see Transfer_Learning_PyTorch_V01.py class
author: Hongming Xu, CCF, 2019
email: mxu@ualberta.ca
environment: pytorch
'''
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1" # The GPU id to use, usually either "0" or "1";
import sys
import pandas as pd
import time
import argparse
rela_path='../../' # change the path adaptively
sys.path.insert(0,rela_path+'xhm_deep_learning/models')
sys.path.insert(0,rela_path+'xhm_deep_learning/functions')
#sys.path.append('/home/xuh3/projects/xhm_deep_learning/models') # linux absolute path
from Transfer_Learning_PyTorch import Transfer_Learning_PyTorch # Transfer_Learning is my defined class
# switch between training & testing & testing_external
training=False # used for model development by developer
testing_int=False # for debugging, used by deveoper
testing_ext=False # used by deveoper
testing_end_to_end=True # end_to_end testing: input-> the folder containing wsis
# easily used by users: output-> the prediction probability maps
if __name__=='__main__':
if training == True:
data_dir = rela_path+'data/pan_cancer_tils/data_v02/' # not color normalized version
model_dir = rela_path+'data/pan_cancer_tils/models/resnet34/'
model_version = []
validation_acc = []
testing_acc = []
training_time = []
# parameter settings
model_name = ['resnet34', 'shufflenet', 'resnet18']
frozen_per = [0, 0.8] # percentile of frozen trainable layers, typically 0,0.5,0.8 [0,1]
optimizer = ['sgd', 'adam']
learning_rate = [0.001, 0.0001, 0.00001]
batch_size = [4, 16, 32, 64]
load_data = 'v1' # change this one according to different applications
num_workers=10
epochs = 100
imagenet_init=True # False - weights are randomly initialized, tune_all_layers will be run
num_early_stoping=5
zscore=False
for i in range(len(frozen_per)):
fp=frozen_per[i]
for j in range(len(optimizer)):
op=optimizer[j]
for k in range(len(learning_rate)):
lr=learning_rate[k]
for b in range(len(batch_size)):
bs=batch_size[b]
start_time = time.time()
model_tl=Transfer_Learning_PyTorch(load_data, data_dir, model_dir, model_name[0], bs, num_workers, epochs,
imagenet_init,fp,op,lr,num_early_stoping,zscore)
valid_acc, _ = model_tl.train_model()
print("---{} minutes---".format((time.time() - start_time) / 60))
training_time.append((time.time() - start_time) / 60)
model_v="{}_{}_{}_{}_{}.pt".format(model_name[0], fp, op, lr, bs)
model_version.append(model_v)
validation_acc.append(valid_acc.cpu().numpy().tolist())
model_tl = Transfer_Learning_PyTorch(load_data=load_data,test_dir=data_dir, model_dir=model_dir,
model_name=model_name[0],batch_size=bs,fp=fp,op=op,lr=lr)
test_acc = model_tl.test_model()
testing_acc.append(test_acc.cpu().numpy().tolist())
data = {'Models': model_version, 'Valid Acc': validation_acc, 'Test Acc': testing_acc, 'Training Time': training_time}
df = pd.DataFrame(data)
pred_file = model_dir + 'logs.xlsx'
df.to_excel(pred_file)
if testing_int == True: # for debugging
data_dir = rela_path+'data/pan_cancer_tils/data_v02/' # not color normalized version
model_dir = rela_path+'data/pan_cancer_tils/models/resnet18/'
# parameter settings
model_name = ['resnet18']
frozen_per = [0] # percentile of frozen trainable layers, typically 0,0.5,0.8 [0,1]
optimizer = ['adam']
learning_rate = [0.0001]
batch_size = [4]
load_data = 'v1'
num_workers = 10
epochs = 100
imagenet_init = True
num_early_stoping = 5
zscore = False
model_tl = Transfer_Learning_PyTorch(load_data=load_data, test_dir=data_dir,
model_dir=model_dir,
model_name=model_name[0],
batch_size=batch_size[0], fp=frozen_per[0], op=optimizer[0], lr=learning_rate[0])
test_acc = model_tl.test_model()
if testing_ext==True:
#best_resnet18='resnet18_0_adam_0.0001_4.pt'
# switches to perform test on different datasets
kang_colon=False
lee_gastric=False
tcga_coad_read=False
lee_colon=False
cheong_stomach=False
if kang_colon==True:
test_path=['../../../data/pan_cancer_tils/data_yonsei_v01/181119_v2/',
'../../../data/pan_cancer_tils/data_yonsei_v01/181211_v2/',
'../../../data/pan_cancer_tils/data_yonsei_v01/Kang_MSI_WSI_2019_10_07_v2/']
wsi_path=['../../../data/kang_colon_slide/181119/',
'../../../data/kang_colon_slide/181211/',
'../../../data/kang_colon_slide/Kang_MSI_WSI_2019_10_07/']
wsi_ext='.mrxs'
elif lee_gastric==True:
test_path=['../../../data/pan_cancer_tils/data_lee_gastric/']
wsi_path=['../../../data/lee_gastric_slide/Stomach_Immunotherapy/']
wsi_ext='.tiff'
elif tcga_coad_read==True:
test_path=[rela_path+'data/tcga_coad_read_data/coad_read_tissue_tiles/tcga_coad_a1/',
rela_path+'data/tcga_coad_read_data/coad_read_tissue_tiles/tcga_coad_a2/',
rela_path+'data/tcga_coad_read_data/coad_read_tissue_tiles/tcga_coad_b/',
rela_path+'data/tcga_coad_read_data/coad_read_tissue_tiles/tcga_coad_uncertain/',
rela_path+'data/tcga_coad_read_data/coad_read_tissue_tiles/tcga_read/']
wsi_path=[rela_path+'data/tcga_coad_slide/tcga_coad/quality_a1/',
rela_path+'data/tcga_coad_slide/tcga_coad/quality_a2/',
rela_path+'data/tcga_coad_slide/tcga_coad/quality_b/',
rela_path+'data/tcga_coad_slide/tcga_coad/quality_uncertain/',
rela_path+'data/tcga_read_slide/dataset/']
output_path=rela_path+'data/tcga_coad_read_data/coad_read_tils_preds/pred_files/'
wsi_ext='.svs'
elif lee_colon==True:
test_path=[rela_path+'data/lee_colon_data/all_tiles_tils/']
wsi_path=[rela_path+'data/lee_colon_data/wsi_tumor_files/']
output_path=[rela_path+'data/lee_colon_data/tils_pred/pred_excels/']
wsi_ext='.tiff'
elif cheong_stomach==True:
test_path=['../../data/cheong_stomach_stage4/all_tiles_tils/biopsy/',
'../../data/cheong_stomach_stage4/all_tiles_tils/surgery/']
wsi_path=['../../data/Stomach_Cancer_Stage4_Immunotherapy/biopsy_45pts/',
'../../data/Stomach_Cancer_Stage4_Immunotherapy/surgical_19pts/']
output_path=['../../data/cheong_stomach_stage4/tils_pred/pred_excels/biopsy/',
'../../data/cheong_stomach_stage4/tils_pred/pred_excels/surgery/']
wsi_ext='.czi'
else:
raise RuntimeError('processing dataset selection is not correct~~~~~~~~~')
for i in range(len(test_path)):
start_time = time.time()
# best resnet18
model_tl = Transfer_Learning_PyTorch(test_dir=test_path[i], model_dir=rela_path+'data/pan_cancer_tils/models/resnet18/',
model_name='resnet18',
batch_size=4, fp=0, op='adam',
lr=0.0001,num_workers=10,wsi_path=wsi_path[i],wsi_ext=wsi_ext,output_path=output_path[i])
model_tl.test_model_external()
print("---{} minutes---".format((time.time() - start_time) / 60))
if testing_end_to_end==True:
#switches to select different datasets
Stomach_Immunotherapy_stmary=False
GC_SM2_stmary=False
Stomach_Cancer_Stage4_Immunotherapy=False
tcga_blca=False
tcga_stad=False
colon_IHC=False
tcga_luad=False
gastric_trial=False
gastric_trial_czi=True
cuda_id = 0
class_name = ['others', 'tils'] # see data fold names
class_interest = 1
if Stomach_Immunotherapy_stmary==True:
wsi_path=[rela_path+'data/stomach_cancer_immunotherapy/Stomach_Immunotherapy_stmary/']
output_path=[rela_path+'data/stomach_cancer_immunotherapy/tils_maps/Stomach_Immunotherapy_stmary/']
wsi_ext='.tiff'
elif GC_SM2_stmary==True:
wsi_path = [rela_path + 'data/stomach_cancer_immunotherapy/GC_SM2_stmary/']
output_path = [rela_path + 'data/stomach_cancer_immunotherapy/tils_maps/GC_SM2_stmary/']
wsi_ext = '.svs'
elif Stomach_Cancer_Stage4_Immunotherapy==True:
wsi_path = [rela_path + 'data/stomach_cancer_immunotherapy/Stomach_Cancer_Stage4_Immunotherapy/biopsy_45pts/',
rela_path + 'data/stomach_cancer_immunotherapy/Stomach_Cancer_Stage4_Immunotherapy/surgical_19pts/']
output_path = [rela_path + 'data/stomach_cancer_immunotherapy/tils_maps/Stomach_Cancer_Stage4_Immunotherapy/biopsy_45pts/',
rela_path + 'data/stomach_cancer_immunotherapy/tils_maps/Stomach_Cancer_Stage4_Immunotherapy/surgical_19pts/']
wsi_ext='.czi'
elif tcga_blca==True:
wsi_path = [rela_path + 'data/tcga_blca_slide/blca_wsi/',
rela_path + 'data/tcga_blca_slide/blca_wsi2/']
output_path = [rela_path + 'data/tcga_blca_slide/til_maps/wsi/',
rela_path + 'data/tcga_blca_slide/til_maps/wsi2/']
wsi_ext='.svs'
elif tcga_stad==True:
wsi_path = [rela_path + 'data/tcga_stad_slide/wsis/']
output_path = [rela_path + 'data/tcga_stad_slide/til_maps/wsis/']
wsi_ext='.svs'
elif colon_IHC==True:
wsi_path = [rela_path + 'data/kang_colon_slide/colon_IHC_JK/']
output_path = [rela_path + 'data/kang_colon_slide/colon_IHC_JK/til_maps/']
wsi_ext='HE.mrxs'
elif tcga_luad==True:
wsi_path = [rela_path + 'data/tcga_luad_slide/LUAD_wsi/']
output_path = [rela_path + 'data/tcga_luad_slide/til_maps/']
wsi_ext='.svs'
elif gastric_trial==True:
# images are stored at labshared server
wsi_path = ['/mnt/isilon/data/w_QHS/hwangt-share/Datasets/CLASSIC_Stomach_Cancer_Image/LEICA/1-100/',
'/mnt/isilon/data/w_QHS/hwangt-share/Datasets/CLASSIC_Stomach_Cancer_Image/LEICA/101-200/',
'/mnt/isilon/data/w_QHS/hwangt-share/Datasets/CLASSIC_Stomach_Cancer_Image/LEICA/201-300/',
'/mnt/isilon/data/w_QHS/hwangt-share/Datasets/CLASSIC_Stomach_Cancer_Image/LEICA/301-400/',
'/mnt/isilon/data/w_QHS/hwangt-share/Datasets/CLASSIC_Stomach_Cancer_Image/LEICA/401-500/',
'/mnt/isilon/data/w_QHS/hwangt-share/Datasets/CLASSIC_Stomach_Cancer_Image/LEICA/501-600/',
'/mnt/isilon/data/w_QHS/hwangt-share/Datasets/CLASSIC_Stomach_Cancer_Image/LEICA/601-622/']
output_path = [rela_path + 'data/CLASSIC_stomach_cancer_image/til_maps/1-100/',
rela_path + 'data/CLASSIC_stomach_cancer_image/til_maps/101-200/',
rela_path + 'data/CLASSIC_stomach_cancer_image/til_maps/201-300/',
rela_path + 'data/CLASSIC_stomach_cancer_image/til_maps/301-400/',
rela_path + 'data/CLASSIC_stomach_cancer_image/til_maps/401-500/',
rela_path + 'data/CLASSIC_stomach_cancer_image/til_maps/501-600/',
rela_path + 'data/CLASSIC_stomach_cancer_image/til_maps/601-622/']
wsi_ext = '.svs'
elif gastric_trial_czi==True:
wsi_path = [#'/mnt/isilon/data/w_QHS/hwangt-share/Datasets/CLASSIC_Stomach_Cancer_Image/sample_1_175/',
#'/mnt/isilon/data/w_QHS/hwangt-share/Datasets/CLASSIC_Stomach_Cancer_Image/sample_176_375/',
'/mnt/isilon/data/w_QHS/hwangt-share/Datasets/CLASSIC_Stomach_Cancer_Image/sample_376_556/',
'/mnt/isilon/data/w_QHS/hwangt-share/Datasets/CLASSIC_Stomach_Cancer_Image/sample_557_622/']
output_path = [#rela_path + 'data/CLASSIC_stomach_cancer_image/til_maps/sample_1_175/',
#rela_path + 'data/CLASSIC_stomach_cancer_image/til_maps/sample_176_375/',
rela_path + 'data/CLASSIC_stomach_cancer_image/til_maps/sample_376_556/',
rela_path + 'data/CLASSIC_stomach_cancer_image/til_maps/sample_557_622/']
wsi_ext = '.czi'
else:
raise RuntimeError('processing dataset selection is not correct~~~~~~~~~')
for i in range(0,len(wsi_path)):
start_time = time.time()
# best resnet18
model_tl = Transfer_Learning_PyTorch(model_dir=rela_path+'data/pan_cancer_tils/models/resnet18/',
model_name='resnet18', batch_size=4, fp=0, op='adam',
lr=0.0001,num_workers=20, wsi_path=wsi_path[i], wsi_ext=wsi_ext, output_path=output_path[i],
cuda_id=cuda_id, class_num=len(class_name),class_interest=class_interest,tile_size=[112,112])
model_tl.test_end_to_end()
print("---{} minutes---".format((time.time() - start_time) / 60))