From b4a0c1d083f827a26563121fbd6828117f7d35c0 Mon Sep 17 00:00:00 2001
From: JiriStipek <567776@mail.muni.cz>
Date: Wed, 6 May 2026 19:40:24 +0200
Subject: [PATCH 1/4] feat: add gigapath model

Co-authored-by: Copilot <copilot@github.com>
---
 .../applications/prov-gigapath.yaml           |  28 +++++
 helm/rayservice/values.yaml                   |   1 +
 models/prov_gigapath.py                       | 110 ++++++++++++++++++
 3 files changed, 139 insertions(+)
 create mode 100644 helm/rayservice/applications/prov-gigapath.yaml
 create mode 100644 models/prov_gigapath.py

diff --git a/helm/rayservice/applications/prov-gigapath.yaml b/helm/rayservice/applications/prov-gigapath.yaml
new file mode 100644
index 0000000..97f9a5c
--- /dev/null
+++ b/helm/rayservice/applications/prov-gigapath.yaml
@@ -0,0 +1,28 @@
+- name: prov-gigapath
+  import_path: models.prov_gigapath:app
+  route_prefix: /prov-gigapath
+  runtime_env:
+    config:
+      setup_timeout_seconds: 1800
+    working_dir: https://github.com/RationAI/model-service/archive/refs/heads/feature/gigapath.zip?v1
+  deployments:
+    - name: ProvGigaPath
+      max_ongoing_requests: 1024
+      max_queued_requests: 2048
+      autoscaling_config:
+        min_replicas: 0
+        max_replicas: 4
+        target_ongoing_requests: 256
+      ray_actor_options:
+        num_cpus: 4
+        num_gpus: 1
+        memory: 8589934592
+        runtime_env:
+          env_vars:
+            HF_HOME: /mnt/huggingface_cache
+      user_config:
+        tile_size: 224
+        max_batch_size: 512
+        batch_wait_timeout_s: 0.1
+        model:
+          repo_id: prov-gigapath/prov-gigapath
diff --git a/helm/rayservice/values.yaml b/helm/rayservice/values.yaml
index b6e24b7..6e62751 100644
--- a/helm/rayservice/values.yaml
+++ b/helm/rayservice/values.yaml
@@ -6,4 +6,5 @@ applications:
   - episeg-1
   - heatmap-builder
   - prostate-classifier-1
+  - prov-gigapath
   - virchow2
diff --git a/models/prov_gigapath.py b/models/prov_gigapath.py
new file mode 100644
index 0000000..4e02cc5
--- /dev/null
+++ b/models/prov_gigapath.py
@@ -0,0 +1,110 @@
+from __future__ import annotations
+
+import asyncio
+from typing import TYPE_CHECKING, Any, TypedDict
+
+import lz4.frame
+import numpy as np
+from fastapi import FastAPI, Request, Response
+from ray import serve
+
+
+if TYPE_CHECKING:
+    import torch
+
+
+class Config(TypedDict):
+    tile_size: int
+    model: dict[str, Any]
+    max_batch_size: int
+    batch_wait_timeout_s: float
+
+
+fastapi = FastAPI()
+
+
+@serve.deployment(num_replicas="auto")
+@serve.ingress(fastapi)
+class ProvGigaPath:
+    """GigaPath tile encoder for pathology."""
+
+    model: torch.nn.Module
+    transforms: Any
+    tile_size: int
+
+    def __init__(self) -> None:
+        import torch
+
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    def reconfigure(self, config: Config) -> None:
+        import timm
+        from timm.data.config import resolve_data_config
+        from timm.data.transforms_factory import create_transform
+
+        self.tile_size = config["tile_size"]
+        model_config = dict(config["model"])
+        repo_id = model_config["repo_id"]
+
+        self.model = timm.create_model(
+            f"hf-hub:{repo_id}",
+            pretrained=True,
+            num_classes=0,
+        )
+        self.model = self.model.to(self.device).eval()
+
+        self.transforms = create_transform(
+            **resolve_data_config(self.model.pretrained_cfg, model=self.model)
+        )
+
+        self.predict.set_max_batch_size(config["max_batch_size"])  # type: ignore[attr-defined]
+        self.predict.set_batch_wait_timeout_s(config["batch_wait_timeout_s"])  # type: ignore[attr-defined]
+
+    @serve.batch
+    async def predict(self, inputs: list[torch.Tensor]) -> list[torch.Tensor]:
+        import torch
+
+        tensors = torch.stack(inputs).to(self.device)
+        device_type = self.device.type
+
+        # PyTorch autocast does not support float16 on CPU (throws RuntimeError).
+        # bfloat16 is the only supported low-precision option for CPU inference.
+        autocast_dtype = torch.float16 if device_type == "cuda" else torch.bfloat16
+
+        with (
+            torch.inference_mode(),
+            torch.autocast(device_type=device_type, dtype=autocast_dtype),
+        ):
+            output = self.model(tensors)
+
+        return list(output)
+
+    @fastapi.post("/")
+    async def root(self, request: Request) -> Response:
+        from PIL import Image
+
+        data = await asyncio.to_thread(lz4.frame.decompress, await request.body())
+        image = np.frombuffer(data, dtype=np.uint8).reshape(
+            self.tile_size, self.tile_size, 3
+        )
+
+        output_dtype = np.dtype(
+            request.headers.get("x-output-dtype", "float32").lower()
+        )
+
+        tensor = self.transforms(Image.fromarray(image))
+
+        raw_output: torch.Tensor = await self.predict(tensor)
+        result = raw_output.cpu().numpy().astype(output_dtype, copy=False)
+        output_shape = str(result.shape)
+
+        return Response(
+            content=lz4.frame.compress(result.tobytes()),
+            media_type="application/octet-stream",
+            headers={
+                "x-output-shape": output_shape,
+            },
+        )
+
+
+app = ProvGigaPath.bind()  # type: ignore[attr-defined]

From 4c9dedf44fae0604306d307ed1d39903dde05331 Mon Sep 17 00:00:00 2001
From: JiriStipek <567776@mail.muni.cz>
Date: Thu, 7 May 2026 14:20:43 +0200
Subject: [PATCH 2/4] smt

---
 helm/rayservice/applications/episeg-1.yaml              | 2 +-
 helm/rayservice/applications/heatmap-builder.yaml       | 2 +-
 helm/rayservice/applications/prostate-classifier-1.yaml | 2 +-
 helm/rayservice/applications/prov-gigapath.yaml         | 2 +-
 helm/rayservice/applications/virchow2.yaml              | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/helm/rayservice/applications/episeg-1.yaml b/helm/rayservice/applications/episeg-1.yaml
index 5677799..5c8e9db 100644
--- a/helm/rayservice/applications/episeg-1.yaml
+++ b/helm/rayservice/applications/episeg-1.yaml
@@ -6,7 +6,7 @@
   deployments:
     - name: SemanticSegmentation
       max_ongoing_requests: 16
-      max_queued_requests: 32
+      max_queued_requests: 128
       autoscaling_config:
         min_replicas: 0
         max_replicas: 4
diff --git a/helm/rayservice/applications/heatmap-builder.yaml b/helm/rayservice/applications/heatmap-builder.yaml
index 13b90cb..f39bf49 100644
--- a/helm/rayservice/applications/heatmap-builder.yaml
+++ b/helm/rayservice/applications/heatmap-builder.yaml
@@ -6,7 +6,7 @@
   deployments:
     - name: HeatmapBuilder
       max_ongoing_requests: 16
-      max_queued_requests: 32
+      max_queued_requests: 128
       autoscaling_config:
         min_replicas: 0
         max_replicas: 4
diff --git a/helm/rayservice/applications/prostate-classifier-1.yaml b/helm/rayservice/applications/prostate-classifier-1.yaml
index a177c43..6cd33b2 100644
--- a/helm/rayservice/applications/prostate-classifier-1.yaml
+++ b/helm/rayservice/applications/prostate-classifier-1.yaml
@@ -6,7 +6,7 @@
   deployments:
     - name: BinaryClassifier
       max_ongoing_requests: 512
-      max_queued_requests: 1024
+      max_queued_requests: 4096
       autoscaling_config:
         min_replicas: 0
         max_replicas: 4
diff --git a/helm/rayservice/applications/prov-gigapath.yaml b/helm/rayservice/applications/prov-gigapath.yaml
index 97f9a5c..3219939 100644
--- a/helm/rayservice/applications/prov-gigapath.yaml
+++ b/helm/rayservice/applications/prov-gigapath.yaml
@@ -8,7 +8,7 @@
   deployments:
     - name: ProvGigaPath
       max_ongoing_requests: 1024
-      max_queued_requests: 2048
+      max_queued_requests: 8192
       autoscaling_config:
         min_replicas: 0
         max_replicas: 4
diff --git a/helm/rayservice/applications/virchow2.yaml b/helm/rayservice/applications/virchow2.yaml
index cf797d8..eaac069 100644
--- a/helm/rayservice/applications/virchow2.yaml
+++ b/helm/rayservice/applications/virchow2.yaml
@@ -8,7 +8,7 @@
   deployments:
     - name: Virchow2
       max_ongoing_requests: 1024
-      max_queued_requests: 2048
+      max_queued_requests: 8192
       autoscaling_config:
         min_replicas: 0
         max_replicas: 4

From 80efa006eda23b61a5f527ecc1a3ae419825dc32 Mon Sep 17 00:00:00 2001
From: JiriStipek <567776@mail.muni.cz>
Date: Thu, 7 May 2026 17:56:54 +0200
Subject: [PATCH 3/4] correct url

---
 helm/rayservice/applications/prov-gigapath.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/helm/rayservice/applications/prov-gigapath.yaml b/helm/rayservice/applications/prov-gigapath.yaml
index 3219939..f0eb4db 100644
--- a/helm/rayservice/applications/prov-gigapath.yaml
+++ b/helm/rayservice/applications/prov-gigapath.yaml
@@ -4,7 +4,7 @@
   runtime_env:
     config:
       setup_timeout_seconds: 1800
-    working_dir: https://github.com/RationAI/model-service/archive/refs/heads/feature/gigapath.zip?v1
+    working_dir: https://github.com/RationAI/model-service/archive/refs/heads/main.zip
   deployments:
     - name: ProvGigaPath
       max_ongoing_requests: 1024

From a101567b7cfff2b7a5ad359e0fa9f8f4ff3a921b Mon Sep 17 00:00:00 2001
From: JiriStipek <567776@mail.muni.cz>
Date: Sun, 10 May 2026 19:53:44 +0200
Subject: [PATCH 4/4] fix issues based on HF docs

---
 models/prov_gigapath.py | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/models/prov_gigapath.py b/models/prov_gigapath.py
index 4e02cc5..b59ed11 100644
--- a/models/prov_gigapath.py
+++ b/models/prov_gigapath.py
@@ -39,22 +39,30 @@ def __init__(self) -> None:
 
     def reconfigure(self, config: Config) -> None:
         import timm
-        from timm.data.config import resolve_data_config
-        from timm.data.transforms_factory import create_transform
+        from torchvision import transforms
 
         self.tile_size = config["tile_size"]
         model_config = dict(config["model"])
         repo_id = model_config["repo_id"]
 
         self.model = timm.create_model(
-            f"hf-hub:{repo_id}",
+            f"hf_hub:{repo_id}",
             pretrained=True,
-            num_classes=0,
         )
         self.model = self.model.to(self.device).eval()
 
-        self.transforms = create_transform(
-            **resolve_data_config(self.model.pretrained_cfg, model=self.model)
+        # Based on the HF documentation
+        self.transforms = transforms.Compose(
+            [
+                transforms.Resize(
+                    256, interpolation=transforms.InterpolationMode.BICUBIC
+                ),
+                transforms.CenterCrop(224),
+                transforms.ToTensor(),
+                transforms.Normalize(
+                    mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)
+                ),
+            ]
         )
 
         self.predict.set_max_batch_size(config["max_batch_size"])  # type: ignore[attr-defined]
@@ -65,18 +73,8 @@ async def predict(self, inputs: list[torch.Tensor]) -> list[torch.Tensor]:
         import torch
 
         tensors = torch.stack(inputs).to(self.device)
-        device_type = self.device.type
-
-        # PyTorch autocast does not support float16 on CPU (throws RuntimeError).
-        # bfloat16 is the only supported low-precision option for CPU inference.
-        autocast_dtype = torch.float16 if device_type == "cuda" else torch.bfloat16
-
-        with (
-            torch.inference_mode(),
-            torch.autocast(device_type=device_type, dtype=autocast_dtype),
-        ):
+        with torch.inference_mode():
             output = self.model(tensors)
-
         return list(output)
 
     @fastapi.post("/")