roboflow · thchann · Mar 2, 2026 · Apr 4, 2026 · dkosowski87 · Mar 18, 2026
@@ -0,0 +1,55 @@
+from fastapi import APIRouter, Depends
+from typing import Any, Optional
+from starlette.responses import JSONResponse
+
+from inference.core.env import DOCKER_SOCKET_PATH
+from inference.core.managers.metrics import get_container_stats
+from inference.core.utils.container import is_docker_socket_mounted
+
+
+def create_health_router(model_init_state: Optional[Any] = None) -> APIRouter:
+    router = APIRouter()
+
+    @router.get("/device/stats", summary="Device/container statistics")
+    def device_stats():
+        not_configured_error_message = {
+            "error": "Device statistics endpoint is not enabled.",
+            "hint": (
+                "Mount the Docker socket and point its location when running the docker "
+                "container to collect device stats "
+                "(i.e. `docker run ... -v /var/run/docker.sock:/var/run/docker.sock "
+                "-e DOCKER_SOCKET_PATH=/var/run/docker.sock ...`)."
+            ),
+        }
+        if not DOCKER_SOCKET_PATH:
+            return JSONResponse(
+                status_code=404,
+                content=not_configured_error_message,
+            )
+        if not is_docker_socket_mounted(docker_socket_path=DOCKER_SOCKET_PATH):
+            return JSONResponse(
+                status_code=500,
+                content=not_configured_error_message,
+            )
+
+        container_stats = get_container_stats(docker_socket_path=DOCKER_SOCKET_PATH)
+        return JSONResponse(status_code=200, content=container_stats)
+
+    @router.get("/readiness", status_code=200)
+    def readiness(state: Any = Depends(lambda: model_init_state)):
+        """Readiness endpoint for Kubernetes readiness probe."""
+        if state is None:
+            return {"status": "ready"}
+        with state.lock:
+            if state.is_ready:
+                return {"status": "ready"}
+            return JSONResponse(
+                content={"status": "not ready"}, status_code=503
+            )
+
+    @router.get("/healthz", status_code=200)
+    def healthz():
+        """Health endpoint for Kubernetes liveness probe."""
+        return {"status": "healthy"}
+
+    return router
@@ -0,0 +1,208 @@
+from typing import List, Optional, Union
+
+from fastapi import APIRouter, BackgroundTasks
+
+from inference.core import logger
+from inference.core.entities.requests.inference import (
+    ClassificationInferenceRequest,
+    DepthEstimationRequest,
+    InferenceRequest,
+    InstanceSegmentationInferenceRequest,
+    KeypointsDetectionInferenceRequest,
+    ObjectDetectionInferenceRequest,
+)
+from inference.core.entities.responses.inference import (
+    ClassificationInferenceResponse,
+    DepthEstimationResponse,
+    InferenceResponse,
+    InstanceSegmentationInferenceResponse,
+    KeypointsDetectionInferenceResponse,
+    ObjectDetectionInferenceResponse,
+    MultiLabelClassificationInferenceResponse,
+    StubResponse,
+)
+from inference.core.env import DEPTH_ESTIMATION_ENABLED
+from inference.core.interfaces.http.error_handlers import with_route_exceptions
+from inference.core.interfaces.http.orjson_utils import orjson_response
+from inference.core.managers.base import ModelManager
+from inference.core.utils.model_alias import resolve_roboflow_model_alias
+from inference.usage_tracking.collector import usage_collector
+
+
+def create_inference_router(
+    model_manager: ModelManager,
+) -> APIRouter:
+    router = APIRouter()
+
+    def process_inference_request(
+        inference_request: InferenceRequest,
+        countinference: Optional[bool] = None,
+        service_secret: Optional[str] = None,
+        **kwargs,
+    ) -> InferenceResponse:
+        de_aliased_model_id = resolve_roboflow_model_alias(
+            model_id=inference_request.model_id
+        )
+        model_manager.add_model(
+            de_aliased_model_id,
+            inference_request.api_key,
+            countinference=countinference,
+            service_secret=service_secret,
+        )
+        resp = model_manager.infer_from_request_sync(
+            de_aliased_model_id,
+            inference_request,
+            **kwargs,
+        )
+        return orjson_response(resp)
+
+    @router.post(
+        "/infer/object_detection",
+        response_model=Union[
+            ObjectDetectionInferenceResponse,
+            List[ObjectDetectionInferenceResponse],
+            StubResponse,
+        ],
+        summary="Object detection infer",
+        description="Run inference with the specified object detection model",
+        response_model_exclude_none=True,
+    )
+    @with_route_exceptions
+    @usage_collector("request")
+    def infer_object_detection(
+        inference_request: ObjectDetectionInferenceRequest,
+        background_tasks: BackgroundTasks,
+        countinference: Optional[bool] = None,
+        service_secret: Optional[str] = None,
+    ):
+        logger.debug("Reached /infer/object_detection")
+        return process_inference_request(
+            inference_request,
+            active_learning_eligible=True,
+            background_tasks=background_tasks,
+            countinference=countinference,
+            service_secret=service_secret,
+        )
+
+    @router.post(
+        "/infer/instance_segmentation",
+        response_model=Union[InstanceSegmentationInferenceResponse, StubResponse],
+        summary="Instance segmentation infer",
+        description="Run inference with the specified instance segmentation model",
+    )
+    @with_route_exceptions
+    @usage_collector("request")
+    def infer_instance_segmentation(
+        inference_request: InstanceSegmentationInferenceRequest,
+        background_tasks: BackgroundTasks,
+        countinference: Optional[bool] = None,
+        service_secret: Optional[str] = None,
+    ):
+        logger.debug("Reached /infer/instance_segmentation")
+        return process_inference_request(
+            inference_request,
+            active_learning_eligible=True,
+            background_tasks=background_tasks,
+            countinference=countinference,
+            service_secret=service_secret,
+        )
+
+    @router.post(
+        "/infer/semantic_segmentation",
+        response_model=Union[InstanceSegmentationInferenceResponse, StubResponse],
-        response_model=Union[InstanceSegmentationInferenceResponse, StubResponse],
+        response_model=Union[SemanticSegmentationInferenceResponse, StubResponse],
-        response_model=Union[InstanceSegmentationInferenceResponse, StubResponse],
+        response_model=Union[SemanticSegmentationInferenceResponse, StubResponse],
+        summary="Semantic segmentation infer",
+        description="Run inference with the specified semantic segmentation model",
+    )
+    @with_route_exceptions
+    @usage_collector("request")
+    def infer_semantic_segmentation(
+        inference_request,
+        background_tasks: BackgroundTasks,
+        countinference: Optional[bool] = None,
+        service_secret: Optional[str] = None,
+    ):
+        logger.debug("Reached /infer/semantic_segmentation")
+        return process_inference_request(
+            inference_request,
+            active_learning_eligible=True,
+            background_tasks=background_tasks,
+            countinference=countinference,
+            service_secret=service_secret,
+        )
+
+    @router.post(
+        "/infer/classification",
+        response_model=Union[
+            ClassificationInferenceResponse,
+            MultiLabelClassificationInferenceResponse,
+            StubResponse,
+        ],
+        summary="Classification infer",
+        description="Run inference with the specified classification model",
+    )
+    @with_route_exceptions
+    @usage_collector("request")
+    def infer_classification(
+        inference_request: ClassificationInferenceRequest,
+        background_tasks: BackgroundTasks,
+        countinference: Optional[bool] = None,
+        service_secret: Optional[str] = None,
+    ):
+        logger.debug("Reached /infer/classification")
+        return process_inference_request(
+            inference_request,
+            active_learning_eligible=True,
+            background_tasks=background_tasks,
+            countinference=countinference,
+            service_secret=service_secret,
+        )
+
+    @router.post(
+        "/infer/keypoints_detection",
+        response_model=Union[KeypointsDetectionInferenceResponse, StubResponse],
+        summary="Keypoints detection infer",
+        description="Run inference with the specified keypoints detection model",
+    )
+    @with_route_exceptions
+    @usage_collector("request")
+    def infer_keypoints(
+        inference_request: KeypointsDetectionInferenceRequest,
+        countinference: Optional[bool] = None,
+        service_secret: Optional[str] = None,
+    ):
+        logger.debug("Reached /infer/keypoints_detection")
+        return process_inference_request(
+            inference_request,
+            countinference=countinference,
+            service_secret=service_secret,
+        )
+
+    if DEPTH_ESTIMATION_ENABLED:
+
+        @router.post(
+            "/infer/depth-estimation",
+            response_model=DepthEstimationResponse,
+            summary="Depth Estimation",
+            description="Run the depth estimation model to generate a depth map.",
+        )
+        @with_route_exceptions
+        def depth_estimation(
+            inference_request: DepthEstimationRequest,
+            countinference: Optional[bool] = None,
+            service_secret: Optional[str] = None,
+        ):
+            logger.debug("Reached /infer/depth-estimation")
+            depth_model_id = inference_request.model_id
+            model_manager.add_model(
+                depth_model_id,
+                inference_request.api_key,
+                countinference=countinference,
+                service_secret=service_secret,
+            )
+            response = model_manager.infer_from_request_sync(
+                depth_model_id, inference_request
+            )
+            return response
+
+    return router
+
@@ -0,0 +1,68 @@
+from typing import Optional
+
+from fastapi import APIRouter, HTTPException, Query
+
+rom inference.core.version import __version__
-rom inference.core.version import __version__
+from inference.core.version import __version__
-rom inference.core.version import __version__
+from inference.core.version import __version__
+from inference.core.devices.utils import GLOBAL_INFERENCE_SERVER_ID
+from inference.core.entities.responses.server_state import ServerVersionInfo
+
+
+def create_info_router() -> APIRouter:
+    router = APIRouter()
+
+    @router.get(
+        "/info",
+        response_model=ServerVersionInfo,
+        summary="Info",
+        description="Get the server name and version number",
+    )
+    def root():
+        """Endpoint to get the server name and version number.
+
+        Returns:
+            ServerVersionInfo: The server version information.
+        """
+        return ServerVersionInfo(
+            name="Roboflow Inference Server",
+            version=__version__,
+            uuid=GLOBAL_INFERENCE_SERVER_ID,
+        )
+
+    @router.get(
+        "/logs",
+        summary="Get Recent Logs",
+        description="Get recent application logs for debugging",
+    )
+    def get_logs(
+        limit: Optional[int] = Query(
+            100, description="Maximum number of log entries to return"
+        ),
+        level: Optional[str] = Query(
+            None,
+            description="Filter by log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)",
+        ),
+        since: Optional[str] = Query(
+            None, description="Return logs since this ISO timestamp"
+        ),
+    ):
+        """Only available when ENABLE_IN_MEMORY_LOGS is set to 'true'."""
+        from inference.core.logging.memory_handler import (
+            get_recent_logs,
+            is_memory_logging_enabled,
+        )
+
+        if not is_memory_logging_enabled():
+            raise HTTPException(
+                status_code=404, detail="Logs endpoint not available"
+            )
+
+        try:
+            logs = get_recent_logs(limit=limit or 100, level=level, since=since)
+            return {"logs": logs, "total_count": len(logs)}
+        except (ImportError, ModuleNotFoundError):
+            raise HTTPException(
+                status_code=500, detail="Logging system not properly initialized"
+            )
+
+    return router
+