From b4a0c1d083f827a26563121fbd6828117f7d35c0 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Wed, 6 May 2026 19:40:24 +0200 Subject: [PATCH 1/4] feat: add gigapath model Co-authored-by: Copilot --- .../applications/prov-gigapath.yaml | 28 +++++ helm/rayservice/values.yaml | 1 + models/prov_gigapath.py | 110 ++++++++++++++++++ 3 files changed, 139 insertions(+) create mode 100644 helm/rayservice/applications/prov-gigapath.yaml create mode 100644 models/prov_gigapath.py diff --git a/helm/rayservice/applications/prov-gigapath.yaml b/helm/rayservice/applications/prov-gigapath.yaml new file mode 100644 index 0000000..97f9a5c --- /dev/null +++ b/helm/rayservice/applications/prov-gigapath.yaml @@ -0,0 +1,28 @@ +- name: prov-gigapath + import_path: models.prov_gigapath:app + route_prefix: /prov-gigapath + runtime_env: + config: + setup_timeout_seconds: 1800 + working_dir: https://github.com/RationAI/model-service/archive/refs/heads/feature/gigapath.zip?v1 + deployments: + - name: ProvGigaPath + max_ongoing_requests: 1024 + max_queued_requests: 2048 + autoscaling_config: + min_replicas: 0 + max_replicas: 4 + target_ongoing_requests: 256 + ray_actor_options: + num_cpus: 4 + num_gpus: 1 + memory: 8589934592 + runtime_env: + env_vars: + HF_HOME: /mnt/huggingface_cache + user_config: + tile_size: 224 + max_batch_size: 512 + batch_wait_timeout_s: 0.1 + model: + repo_id: prov-gigapath/prov-gigapath diff --git a/helm/rayservice/values.yaml b/helm/rayservice/values.yaml index b6e24b7..6e62751 100644 --- a/helm/rayservice/values.yaml +++ b/helm/rayservice/values.yaml @@ -6,4 +6,5 @@ applications: - episeg-1 - heatmap-builder - prostate-classifier-1 + - prov-gigapath - virchow2 diff --git a/models/prov_gigapath.py b/models/prov_gigapath.py new file mode 100644 index 0000000..4e02cc5 --- /dev/null +++ b/models/prov_gigapath.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +import asyncio +from typing import TYPE_CHECKING, Any, TypedDict + +import lz4.frame +import numpy as np +from fastapi import FastAPI, Request, Response +from ray import serve + + +if TYPE_CHECKING: + import torch + + +class Config(TypedDict): + tile_size: int + model: dict[str, Any] + max_batch_size: int + batch_wait_timeout_s: float + + +fastapi = FastAPI() + + +@serve.deployment(num_replicas="auto") +@serve.ingress(fastapi) +class ProvGigaPath: + """GigaPath tile encoder for pathology.""" + + model: torch.nn.Module + transforms: Any + tile_size: int + + def __init__(self) -> None: + import torch + + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + def reconfigure(self, config: Config) -> None: + import timm + from timm.data.config import resolve_data_config + from timm.data.transforms_factory import create_transform + + self.tile_size = config["tile_size"] + model_config = dict(config["model"]) + repo_id = model_config["repo_id"] + + self.model = timm.create_model( + f"hf-hub:{repo_id}", + pretrained=True, + num_classes=0, + ) + self.model = self.model.to(self.device).eval() + + self.transforms = create_transform( + **resolve_data_config(self.model.pretrained_cfg, model=self.model) + ) + + self.predict.set_max_batch_size(config["max_batch_size"]) # type: ignore[attr-defined] + self.predict.set_batch_wait_timeout_s(config["batch_wait_timeout_s"]) # type: ignore[attr-defined] + + @serve.batch + async def predict(self, inputs: list[torch.Tensor]) -> list[torch.Tensor]: + import torch + + tensors = torch.stack(inputs).to(self.device) + device_type = self.device.type + + # PyTorch autocast does not support float16 on CPU (throws RuntimeError). + # bfloat16 is the only supported low-precision option for CPU inference. + autocast_dtype = torch.float16 if device_type == "cuda" else torch.bfloat16 + + with ( + torch.inference_mode(), + torch.autocast(device_type=device_type, dtype=autocast_dtype), + ): + output = self.model(tensors) + + return list(output) + + @fastapi.post("/") + async def root(self, request: Request) -> Response: + from PIL import Image + + data = await asyncio.to_thread(lz4.frame.decompress, await request.body()) + image = np.frombuffer(data, dtype=np.uint8).reshape( + self.tile_size, self.tile_size, 3 + ) + + output_dtype = np.dtype( + request.headers.get("x-output-dtype", "float32").lower() + ) + + tensor = self.transforms(Image.fromarray(image)) + + raw_output: torch.Tensor = await self.predict(tensor) + result = raw_output.cpu().numpy().astype(output_dtype, copy=False) + output_shape = str(result.shape) + + return Response( + content=lz4.frame.compress(result.tobytes()), + media_type="application/octet-stream", + headers={ + "x-output-shape": output_shape, + }, + ) + + +app = ProvGigaPath.bind() # type: ignore[attr-defined] From 4c9dedf44fae0604306d307ed1d39903dde05331 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Thu, 7 May 2026 14:20:43 +0200 Subject: [PATCH 2/4] smt --- helm/rayservice/applications/episeg-1.yaml | 2 +- helm/rayservice/applications/heatmap-builder.yaml | 2 +- helm/rayservice/applications/prostate-classifier-1.yaml | 2 +- helm/rayservice/applications/prov-gigapath.yaml | 2 +- helm/rayservice/applications/virchow2.yaml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/helm/rayservice/applications/episeg-1.yaml b/helm/rayservice/applications/episeg-1.yaml index 5677799..5c8e9db 100644 --- a/helm/rayservice/applications/episeg-1.yaml +++ b/helm/rayservice/applications/episeg-1.yaml @@ -6,7 +6,7 @@ deployments: - name: SemanticSegmentation max_ongoing_requests: 16 - max_queued_requests: 32 + max_queued_requests: 128 autoscaling_config: min_replicas: 0 max_replicas: 4 diff --git a/helm/rayservice/applications/heatmap-builder.yaml b/helm/rayservice/applications/heatmap-builder.yaml index 13b90cb..f39bf49 100644 --- a/helm/rayservice/applications/heatmap-builder.yaml +++ b/helm/rayservice/applications/heatmap-builder.yaml @@ -6,7 +6,7 @@ deployments: - name: HeatmapBuilder max_ongoing_requests: 16 - max_queued_requests: 32 + max_queued_requests: 128 autoscaling_config: min_replicas: 0 max_replicas: 4 diff --git a/helm/rayservice/applications/prostate-classifier-1.yaml b/helm/rayservice/applications/prostate-classifier-1.yaml index a177c43..6cd33b2 100644 --- a/helm/rayservice/applications/prostate-classifier-1.yaml +++ b/helm/rayservice/applications/prostate-classifier-1.yaml @@ -6,7 +6,7 @@ deployments: - name: BinaryClassifier max_ongoing_requests: 512 - max_queued_requests: 1024 + max_queued_requests: 4096 autoscaling_config: min_replicas: 0 max_replicas: 4 diff --git a/helm/rayservice/applications/prov-gigapath.yaml b/helm/rayservice/applications/prov-gigapath.yaml index 97f9a5c..3219939 100644 --- a/helm/rayservice/applications/prov-gigapath.yaml +++ b/helm/rayservice/applications/prov-gigapath.yaml @@ -8,7 +8,7 @@ deployments: - name: ProvGigaPath max_ongoing_requests: 1024 - max_queued_requests: 2048 + max_queued_requests: 8192 autoscaling_config: min_replicas: 0 max_replicas: 4 diff --git a/helm/rayservice/applications/virchow2.yaml b/helm/rayservice/applications/virchow2.yaml index cf797d8..eaac069 100644 --- a/helm/rayservice/applications/virchow2.yaml +++ b/helm/rayservice/applications/virchow2.yaml @@ -8,7 +8,7 @@ deployments: - name: Virchow2 max_ongoing_requests: 1024 - max_queued_requests: 2048 + max_queued_requests: 8192 autoscaling_config: min_replicas: 0 max_replicas: 4 From 80efa006eda23b61a5f527ecc1a3ae419825dc32 Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Thu, 7 May 2026 17:56:54 +0200 Subject: [PATCH 3/4] correct url --- helm/rayservice/applications/prov-gigapath.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/rayservice/applications/prov-gigapath.yaml b/helm/rayservice/applications/prov-gigapath.yaml index 3219939..f0eb4db 100644 --- a/helm/rayservice/applications/prov-gigapath.yaml +++ b/helm/rayservice/applications/prov-gigapath.yaml @@ -4,7 +4,7 @@ runtime_env: config: setup_timeout_seconds: 1800 - working_dir: https://github.com/RationAI/model-service/archive/refs/heads/feature/gigapath.zip?v1 + working_dir: https://github.com/RationAI/model-service/archive/refs/heads/main.zip deployments: - name: ProvGigaPath max_ongoing_requests: 1024 From a101567b7cfff2b7a5ad359e0fa9f8f4ff3a921b Mon Sep 17 00:00:00 2001 From: JiriStipek <567776@mail.muni.cz> Date: Sun, 10 May 2026 19:53:44 +0200 Subject: [PATCH 4/4] fix issues based on HF docs --- models/prov_gigapath.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/models/prov_gigapath.py b/models/prov_gigapath.py index 4e02cc5..b59ed11 100644 --- a/models/prov_gigapath.py +++ b/models/prov_gigapath.py @@ -39,22 +39,30 @@ def __init__(self) -> None: def reconfigure(self, config: Config) -> None: import timm - from timm.data.config import resolve_data_config - from timm.data.transforms_factory import create_transform + from torchvision import transforms self.tile_size = config["tile_size"] model_config = dict(config["model"]) repo_id = model_config["repo_id"] self.model = timm.create_model( - f"hf-hub:{repo_id}", + f"hf_hub:{repo_id}", pretrained=True, - num_classes=0, ) self.model = self.model.to(self.device).eval() - self.transforms = create_transform( - **resolve_data_config(self.model.pretrained_cfg, model=self.model) + # Based on the HF documentation + self.transforms = transforms.Compose( + [ + transforms.Resize( + 256, interpolation=transforms.InterpolationMode.BICUBIC + ), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize( + mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225) + ), + ] ) self.predict.set_max_batch_size(config["max_batch_size"]) # type: ignore[attr-defined] @@ -65,18 +73,8 @@ async def predict(self, inputs: list[torch.Tensor]) -> list[torch.Tensor]: import torch tensors = torch.stack(inputs).to(self.device) - device_type = self.device.type - - # PyTorch autocast does not support float16 on CPU (throws RuntimeError). - # bfloat16 is the only supported low-precision option for CPU inference. - autocast_dtype = torch.float16 if device_type == "cuda" else torch.bfloat16 - - with ( - torch.inference_mode(), - torch.autocast(device_type=device_type, dtype=autocast_dtype), - ): + with torch.inference_mode(): output = self.model(tensors) - return list(output) @fastapi.post("/")