Skip to content

Commit b01d93a

Browse files
committed
ADD: add multifile and multichannel flag to local datastore
Adds flags to the local datastore init signaling the data is either multichannel or multi-file (each directory has a sample with multiple volumes Signed-off-by: Cavan Riley <cavan-riley@uiowa.edu>
1 parent 058ddfc commit b01d93a

11 files changed

Lines changed: 293 additions & 31 deletions

File tree

monailabel/datastore/cvat.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import tempfile
1717
import time
1818
import urllib.parse
19+
from typing import Any, Dict
1920

2021
import numpy as np
2122
import requests
@@ -318,6 +319,32 @@ def download_from_cvat(self, max_retry_count=5, retry_wait_time=10):
318319
retry_count += 1
319320
return None
320321

322+
def add_directory(self, directory_id: str, filename: str, info: Dict[str, Any]) -> str:
323+
"""
324+
Not implemented for this datastore
325+
326+
Abstract method for adding a directory to cvat
327+
"""
328+
raise NotImplementedError("This datastore does not support adding directories")
329+
330+
def get_is_multichannel(self) -> bool:
331+
"""
332+
Not implemented for this datastore
333+
334+
Returns whether the application's studies is directed at multichannel (4D) data
335+
"""
336+
logger.info("The function get_is_multichannel is not implemented for this datastore")
337+
return False
338+
339+
def get_is_multi_file(self) -> bool:
340+
"""
341+
Not implemented for this datastore
342+
343+
Returns whether the application's studies is directed at directories containing multiple images per sample
344+
"""
345+
logger.info("The function get_is_multi_file is not implemented for this datastore")
346+
return False
347+
321348

322349
"""
323350
def main():

monailabel/datastore/dicom.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,3 +264,29 @@ def _download_labeled_data(self):
264264
def datalist(self, full_path=True) -> List[Dict[str, Any]]:
265265
self._download_labeled_data()
266266
return super().datalist(full_path)
267+
268+
def add_directory(self, directory_id: str, filename: str, info: Dict[str, Any]) -> str:
269+
"""
270+
Not implemented
271+
272+
Abstract method for adding a directory to DICOMWeb
273+
"""
274+
raise NotImplementedError("This datastore does not support adding directories")
275+
276+
def get_is_multichannel(self) -> bool:
277+
"""
278+
Not implemented for this datastore
279+
280+
Returns whether the application's studies is directed at multichannel (4D) data
281+
"""
282+
logger.info("The function get_is_multichannel is not implemented for this datastore")
283+
return False
284+
285+
def get_is_multi_file(self) -> bool:
286+
"""
287+
Not implemented for this datastore
288+
289+
Returns whether the application's studies is directed at directories containing multiple images per sample
290+
"""
291+
logger.info("The function get_is_multi_file is not implemented for this datastore")
292+
return False

monailabel/datastore/dsa.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,32 @@ def status(self) -> Dict[str, Any]:
270270
def json(self):
271271
return self.datalist()
272272

273+
def add_directory(self, directory_id: str, filename: str, info: Dict[str, Any]) -> str:
274+
"""
275+
Not implemented for this datastore
276+
277+
Abstract method for adding a directory to dsa
278+
"""
279+
raise NotImplementedError("This datastore does not support adding directories")
280+
281+
def get_is_multichannel(self) -> bool:
282+
"""
283+
Not implemented for this datastore
284+
285+
Returns whether the application's studies is directed at multichannel (4D) data
286+
"""
287+
logger.info("The function get_is_multichannel is not implemented for this datastore")
288+
return False
289+
290+
def get_is_multi_file(self) -> bool:
291+
"""
292+
Not implemented for this datastore
293+
294+
Returns whether the application's studies is directed at directories containing multiple images per sample
295+
"""
296+
logger.info("The function get_is_multi_file is not implemented for this datastore")
297+
return False
298+
273299

274300
"""
275301
def main():

monailabel/datastore/local.py

Lines changed: 85 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,11 @@ def __init__(
102102
images_dir: str = ".",
103103
labels_dir: str = "labels",
104104
datastore_config: str = "datastore_v2.json",
105-
extensions=("*.nii.gz", "*.nii"),
105+
extensions=("*.nii.gz", "*.nii", "*.nrrd"),
106106
auto_reload=False,
107107
read_only=False,
108+
multichannel: bool = False,
109+
multi_file: bool = False,
108110
):
109111
"""
110112
Creates a `LocalDataset` object
@@ -124,6 +126,14 @@ def __init__(
124126
self._ignore_event_config = False
125127
self._config_ts = 0
126128
self._auto_reload = auto_reload
129+
if multichannel and multi_file:
130+
raise ValueError(
131+
"multichannel and multi_file are mutually exclusive: "
132+
"multichannel expects a single 4D NIfTI volume per sample, "
133+
"while multi_file expects a directory of separate modality files."
134+
)
135+
self._multichannel: bool = multichannel
136+
self._multi_file: bool = multi_file
127137

128138
logging.getLogger("filelock").setLevel(logging.ERROR)
129139

@@ -256,6 +266,18 @@ def datalist(self, full_path=True) -> List[Dict[str, Any]]:
256266
ds = json.loads(json.dumps(ds).replace(f"{self._datastore_path.rstrip(os.pathsep)}{os.pathsep}", ""))
257267
return ds
258268

269+
def get_is_multichannel(self) -> bool:
270+
"""
271+
Returns whether the dataset is multichannel or not
272+
"""
273+
return self._multichannel
274+
275+
def get_is_multi_file(self) -> bool:
276+
"""
277+
Returns whether the dataset is multi-file or not
278+
"""
279+
return self._multi_file
280+
259281
def get_image(self, image_id: str, params=None) -> Any:
260282
"""
261283
Retrieve image object based on image id
@@ -431,6 +453,43 @@ def refresh(self):
431453
"""
432454
self._reconcile_datastore()
433455

456+
def add_directory(self, directory_id: str, filename: str, info: Dict[str, Any]) -> str:
457+
"""
458+
Add a directory to the datastore
459+
460+
:param directory_id: the directory id
461+
:param filename: the filename
462+
:param info: additional info
463+
464+
:return: directory id
465+
"""
466+
id = os.path.basename(os.path.normpath(filename))
467+
if not directory_id:
468+
directory_id = id
469+
470+
logger.info(f"Adding Image: {directory_id} => {filename}")
471+
name = directory_id
472+
dest = os.path.realpath(os.path.join(self._datastore.image_path(), name))
473+
474+
with FileLock(self._lock_file):
475+
logger.debug("Acquired the lock!")
476+
if os.path.isdir(filename):
477+
if os.path.exists(dest):
478+
shutil.rmtree(dest)
479+
shutil.copytree(filename, dest)
480+
else:
481+
shutil.copy2(filename, dest)
482+
483+
info = info if info else {}
484+
info["ts"] = int(time.time())
485+
info["name"] = name
486+
487+
# images = get_directory_contents(filename)
488+
self._datastore.objects[directory_id] = ImageLabelModel(image=DataModel(info=info, ext=""))
489+
self._update_datastore_file(lock=False)
490+
logger.debug("Released the lock!")
491+
return directory_id
492+
434493
def add_image(self, image_id: str, image_filename: str, image_info: Dict[str, Any]) -> str:
435494
id, image_ext = self._to_id(os.path.basename(image_filename))
436495
if not image_id:
@@ -552,10 +611,17 @@ def _list_files(self, path, patterns):
552611
files = os.listdir(path)
553612

554613
filtered = dict()
555-
for pattern in patterns:
556-
matching = fnmatch.filter(files, pattern)
557-
for file in matching:
558-
filtered[os.path.basename(file)] = file
614+
if not self._multi_file:
615+
for pattern in patterns:
616+
matching = fnmatch.filter(files, pattern)
617+
for file in matching:
618+
filtered[os.path.basename(file)] = file
619+
else:
620+
ignored = {"labels", ".lock", os.path.basename(self._datastore_config_path).lower()}
621+
for file in files:
622+
abs_file = os.path.join(path, file)
623+
if os.path.isdir(abs_file) and file.lower() not in ignored:
624+
filtered[os.path.basename(file)] = file
559625
return filtered
560626

561627
def _reconcile_datastore(self):
@@ -585,24 +651,26 @@ def _add_non_existing_images(self) -> int:
585651
invalidate = 0
586652
self._init_from_datastore_file()
587653

588-
local_images = self._list_files(self._datastore.image_path(), self._extensions)
654+
local_files = self._list_files(self._datastore.image_path(), self._extensions)
589655

590-
image_ids = list(self._datastore.objects.keys())
591-
for image_file in local_images:
592-
image_id, image_ext = self._to_id(image_file)
593-
if image_id not in image_ids:
594-
logger.info(f"Adding New Image: {image_id} => {image_file}")
656+
ids = list(self._datastore.objects.keys())
657+
for file in local_files:
658+
if self._multi_file:
659+
# Directories have no extension — use the name as-is
660+
file_id = file
661+
file_ext_str = ""
662+
else:
663+
file_id, file_ext_str = self._to_id(file)
595664

596-
name = self._filename(image_id, image_ext)
597-
image_info = {
665+
if file_id not in ids:
666+
logger.info(f"Adding New Image: {file_id} => {file}")
667+
name = self._filename(file_id, file_ext_str)
668+
file_info = {
598669
"ts": int(time.time()),
599-
# "checksum": file_checksum(os.path.join(self._datastore.image_path(), name)),
600670
"name": name,
601671
}
602-
603672
invalidate += 1
604-
self._datastore.objects[image_id] = ImageLabelModel(image=DataModel(info=image_info, ext=image_ext))
605-
673+
self._datastore.objects[file_id] = ImageLabelModel(image=DataModel(info=file_info, ext=file_ext_str))
606674
return invalidate
607675

608676
def _add_non_existing_labels(self, tag) -> int:

monailabel/datastore/xnat.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,32 @@ def __upload_assessment(self, aiaa_model_name, image_id, file_path, type):
386386

387387
self._request_put(url, data, type=type)
388388

389+
def add_directory(self, directory_id: str, filename: str, info: Dict[str, Any]) -> str:
390+
"""
391+
Not implemented for this datastore
392+
393+
Abstract method for adding a directory to xnat
394+
"""
395+
raise NotImplementedError("This datastore does not support adding directories")
396+
397+
def get_is_multichannel(self) -> bool:
398+
"""
399+
Not implemented for this datastore
400+
401+
Returns whether the application's studies is directed at multichannel (4D) data
402+
"""
403+
logger.info("The function get_is_multichannel is not implemented for this datastore")
404+
return False
405+
406+
def get_is_multi_file(self) -> bool:
407+
"""
408+
Not implemented for this datastore
409+
410+
Returns whether the application's studies is directed at directories containing multiple images per sample
411+
"""
412+
logger.info("The function get_is_multi_file is not implemented for this datastore")
413+
return False
414+
389415

390416
"""
391417
def main():

monailabel/endpoints/datastore.py

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,20 +66,28 @@ def add_image(
6666
user: Optional[str] = None,
6767
):
6868
logger.info(f"Image: {image}; File: {file}; params: {params}")
69-
file_ext = "".join(pathlib.Path(file.filename).suffixes) if file.filename else ".nii.gz"
7069

71-
image_id = image if image else os.path.basename(file.filename).replace(file_ext, "")
70+
instance: MONAILabelApp = app_instance()
71+
if instance.datastore().get_is_multi_file():
72+
raise HTTPException(
73+
status_code=400,
74+
detail="Multi-file datastore does not support single-file uploads. "
75+
"Data must be pre-staged as sample subdirectories on the server filesystem.",
76+
)
77+
78+
file_ext = "".join(pathlib.Path(file.filename).suffixes) if file.filename else ".nii.gz"
79+
id = image if image else os.path.basename(file.filename).replace(file_ext, "")
7280
image_file = tempfile.NamedTemporaryFile(suffix=file_ext).name
7381

7482
with open(image_file, "wb") as buffer:
7583
shutil.copyfileobj(file.file, buffer)
7684
background_tasks.add_task(remove_file, image_file)
7785

78-
instance: MONAILabelApp = app_instance()
7986
save_params: Dict[str, Any] = json.loads(params) if params else {}
8087
if user:
8188
save_params["user"] = user
82-
image_id = instance.datastore().add_image(image_id, image_file, save_params)
89+
90+
image_id = instance.datastore().add_image(id, image_file, save_params)
8391
return {"image": image_id}
8492

8593

@@ -134,9 +142,14 @@ def download_image(image: str, check_only=False, check_sum=None):
134142
instance: MONAILabelApp = app_instance()
135143
image = instance.datastore().get_image_uri(image)
136144

145+
if os.path.isdir(image):
146+
raise HTTPException(
147+
status_code=400,
148+
detail="Image may be a multi-file sample (directory). Single-file download is not supported for this sample.",
149+
)
137150
if not os.path.isfile(image):
138-
logger.error(f"Image NOT Found or is a directory: {image}")
139-
raise HTTPException(status_code=404, detail="Image NOT Found or is a directory")
151+
logger.error(f"Image NOT Found: {image}")
152+
raise HTTPException(status_code=404, detail="Image NOT Found")
140153

141154
if check_only:
142155
if check_sum:

monailabel/interfaces/app.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,9 @@ def __init__(
9090
self.app_dir = app_dir
9191
self.studies = studies
9292
self.conf = conf if conf else {}
93-
93+
self.multichannel: bool = strtobool(conf.get("multichannel", False))
94+
self.multi_file: bool = strtobool(conf.get("multi_file", False))
95+
self.input_channels = conf.get("input_channels", False)
9496
self.name = name
9597
self.description = description
9698
self.version = version
@@ -146,6 +148,8 @@ def init_datastore(self) -> Datastore:
146148
extensions=settings.MONAI_LABEL_DATASTORE_FILE_EXT,
147149
auto_reload=settings.MONAI_LABEL_DATASTORE_AUTO_RELOAD,
148150
read_only=settings.MONAI_LABEL_DATASTORE_READ_ONLY,
151+
multichannel=self.multichannel,
152+
multi_file=self.multi_file,
149153
)
150154

151155
def init_remote_datastore(self) -> Datastore:
@@ -282,6 +286,9 @@ def infer(self, request, datastore=None):
282286
)
283287

284288
request = copy.deepcopy(request)
289+
request["multi_file"] = self.multi_file
290+
request["multichannel"] = self.multichannel
291+
request["input_channels"] = self.input_channels
285292
request["description"] = task.description
286293

287294
image_id = request["image"]
@@ -292,7 +299,7 @@ def infer(self, request, datastore=None):
292299
else:
293300
request["image"] = datastore.get_image_uri(request["image"])
294301

295-
if os.path.isdir(request["image"]):
302+
if os.path.isdir(request["image"]) and not self.multi_file:
296303
logger.info("Input is a Directory; Consider it as DICOM")
297304

298305
logger.debug(f"Image => {request['image']}")
@@ -431,6 +438,10 @@ def train(self, request):
431438
)
432439

433440
request = copy.deepcopy(request)
441+
# 4D image support, send train task information regarding data
442+
request["multi_file"] = self.multi_file
443+
request["multichannel"] = self.multichannel
444+
request["input_channels"] = self.input_channels
434445
result = task(request, self.datastore())
435446

436447
# Run all scoring methods

0 commit comments

Comments
 (0)