dstackai · peterschmidt85 · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026 · Apr 14, 2026
diff --git a/docs/docs/concepts/fleets.md b/docs/docs/concepts/fleets.md
@@ -517,6 +517,20 @@ Fleet my-gcp-fleet deleted
 Alternatively, you can delete a fleet by passing the fleet name  to `dstack fleet delete`.
 To terminate and delete specific instances from a fleet, pass `-i INSTANCE_NUM`.
 
+### List offers
+
+To inspect offers available through a fleet, pass `--fleet` to `dstack offer`.
+
+<div class="termy">
+
+```shell
+$ dstack offer --gpu H100 --fleet my-fleet
+```
+
+</div>
+
+Use `--group-by gpu,backend` to aggregate offers.
+
 !!! info "What's next?"
     1. Check [dev environments](dev-environments.md), [tasks](tasks.md), and
     [services](services.md)

diff --git a/docs/docs/guides/protips.md b/docs/docs/guides/protips.md
@@ -458,6 +458,9 @@ Getting offers...
 
 </div>
 
+By default, `dstack offer` ignores fleet configurations and shows all available offers that match the request.
+To inspect offers available through a specific fleet, pass `--fleet NAME`.
+
 ??? info "Grouping offers"
     Use `--group-by` to aggregate offers. Accepted values: `gpu`, `backend`, `region`, and `count`.
 

diff --git a/docs/docs/guides/troubleshooting.md b/docs/docs/guides/troubleshooting.md
@@ -54,7 +54,15 @@ If you run `dstack apply` and don't see any instance offers, it means that
 `dstack` could not find instances that match the requirements in your configuration.
 Below are some of the reasons why this might happen.
 
-> Feel free to use `dstack offer` to view available offers.
+Feel free to use `dstack offer` to inspect available offers:
+
+```shell
+# All matching offers, ignoring fleet configurations
+$ dstack offer --gpu H100
+
+# Offers available through a specific fleet
+$ dstack offer --gpu H100 --fleet my-fleet
+```
 
 #### Cause 1: No backends
 

diff --git a/docs/docs/reference/cli/dstack/offer.md b/docs/docs/reference/cli/dstack/offer.md
@@ -4,9 +4,6 @@ Displays available offers (hardware configurations) from configured backends or
 
 The output shows backend, region, instance type, resources, spot availability, and pricing.
 
-!!! info "Experimental"
-    `dstack offer` command is currently an experimental feature. Backward compatibility is not guaranteed across releases.
-
 ## Usage
 
 This command accepts most of the same arguments as [`dstack apply`](apply.md).
@@ -20,9 +17,28 @@ $ dstack offer --help
 
 </div>
 
+## Fleet offers
+
+By default, `dstack offer` ignores fleet configurations and shows all available offers that match the request.
+
+Use `--fleet` to inspect offers available through specific fleets. With one `--fleet`,
+`dstack offer` shows offers available through that fleet. With multiple `--fleet`, it
+combines offers available through the selected fleets.
+
+<div class="termy">
+
+```shell
+$ dstack offer --gpu H100 --fleet my-fleet
+```
+
+</div>
+
+The same fleet filtering applies to `--group-by` output, e.g. `--group-by gpu,backend`
+or `--group-by gpu,backend,region`.
+
 ## Examples
 
-### Filtering offers
+### Filtering offers { #list-gpu-offers }
 
 The `--gpu` flag accepts the same specification format as the `gpu` property in [`dev environment`](../../../concepts/dev-environments.md), [`task`](../../../concepts/tasks.md), 
 [`service`](../../../concepts/services.md), and [`fleet`](../../../concepts/fleets.md) configurations.

diff --git a/skills/dstack/SKILL.md b/skills/dstack/SKILL.md
@@ -459,7 +459,7 @@ dstack stop my-run-name --abort
 
 ### List offers
 
-Offers represent available instance configurations available for provisioning across backends. `dstack offer` lists offers regardless of configured fleets.
+Offers represent available instance configurations available for provisioning across backends. By default, `dstack offer` ignores fleet configurations and shows all available offers that match the request. Use `--fleet` to inspect offers available through specific fleets.
 
 ```bash
 # Filter by specific backend
@@ -474,10 +474,18 @@ dstack offer --gpu 24GB..80GB
 # Combine filters
 dstack offer --backend aws --gpu A100:80GB
 
+# Limit to a specific fleet
+dstack offer --fleet my-fleet
+
+# Combine offers from multiple fleets
+dstack offer --fleet my-fleet --fleet other-fleet
+
 # JSON output (for troubleshooting/scripting)
 dstack offer --json
 ```
 
+With one `--fleet`, `dstack offer` shows offers available through that fleet. With multiple `--fleet`, it combines offers available through the selected fleets. Identical backend offers are shown once, while matching existing instances stay separate.
+
 **Max offers:** By default, `dstack offer` returns first N offers (output also includes the total number). Use `--max-offers N` to increase the limit.
 **Grouping:** Prefer `--group-by gpu` (other supported values: `gpu,backend`, `gpu,backend,region`) for aggregated output across all offers, not `--max-offers`.
 

diff --git a/src/dstack/_internal/cli/commands/offer.py b/src/dstack/_internal/cli/commands/offer.py
@@ -15,11 +15,8 @@
 from dstack._internal.core.models.configurations import ApplyConfigurationType, TaskConfiguration
 from dstack._internal.core.models.gpus import GpuGroup
 from dstack._internal.core.models.runs import RunSpec
-from dstack._internal.utils.logging import get_logger
 from dstack.api.utils import load_profile
 
-logger = get_logger(__name__)
-
 
 class OfferConfigurator(BaseRunConfigurator):
     TYPE = ApplyConfigurationType.TASK
@@ -77,11 +74,6 @@ def _register(self):
 
     def _command(self, args: argparse.Namespace):
         super()._command(args)
-        if args.fleets:
-            logger.warning(
-                "Specifying `--fleet` in `dstack offer` has no defined effect"
-                " and may be disallowed in a future release"
-            )
         # Set image and user so that the server (a) does not default gpu.vendor
         # to nvidia — `dstack offer` should show all vendors, and (b) does not
         # attempt to pull image config from the Docker registry.
@@ -114,7 +106,11 @@ def _command(self, args: argparse.Namespace):
                         run_spec,
                         max_offers=args.max_offers,
                     )
-                    print_run_plan(run_plan, include_run_properties=False)
+                    print_run_plan(
+                        run_plan,
+                        include_run_properties=False,
+                        show_offer_fleet_hint=run_spec.merged_profile.fleets is None,
+                    )
         else:
             if args.group_by:
                 gpus = self._list_gpus(args, run_spec)

diff --git a/src/dstack/_internal/cli/services/profile.py b/src/dstack/_internal/cli/services/profile.py
@@ -70,7 +70,7 @@ def register_profile_args(parser: argparse.ArgumentParser):
         action="append",
         metavar="NAME",
         dest="fleets",
-        help="Consider only instances from the specified fleet(s) for reuse",
+        help="Consider only the specified fleet(s)",
     )
     fleets_group_exc = fleets_group.add_mutually_exclusive_group()
     fleets_group_exc.add_argument(

diff --git a/src/dstack/_internal/cli/utils/run.py b/src/dstack/_internal/cli/utils/run.py
@@ -55,6 +55,12 @@ class RunWaitStatus(str, Enum):
     WAITING_FOR_SCHEDULE = "waiting for schedule"
 
 
+_OFFER_FLEET_HINT = (
+    "Hint: Existing fleets are ignored, and all available offers are shown."
+    " To filter by fleet, pass --fleet NAME."
+)
+
+
 def print_offers_json(run_plan: RunPlan, run_spec):
     """Print offers information in JSON format."""
     job_plan = run_plan.job_plans[0]
@@ -92,6 +98,7 @@ def print_run_plan(
     include_run_properties: bool = True,
     no_fleets: bool = False,
     verbose: bool = False,
+    show_offer_fleet_hint: bool = False,
 ):
     run_spec = run_plan.get_effective_run_spec()
     job_plan = run_plan.job_plans[0]
@@ -171,9 +178,9 @@ def th(s: str) -> str:
     offers.add_column("PRICE", style="grey58", ratio=1)
     offers.add_column()
 
-    job_plan.offers = job_plan.offers[:max_offers] if max_offers else job_plan.offers
+    displayed_offers = job_plan.offers[:max_offers] if max_offers else job_plan.offers
 
-    for i, offer in enumerate(job_plan.offers, start=1):
+    for i, offer in enumerate(displayed_offers, start=1):
         r = offer.instance.resources
 
         instance = offer.instance.name
@@ -188,19 +195,32 @@ def th(s: str) -> str:
             format_instance_availability(offer.availability),
             style=None if i == 1 or not include_run_properties else "secondary",
         )
-    if job_plan.total_offers > len(job_plan.offers):
+    if job_plan.total_offers > len(displayed_offers):
         offers.add_row("", "...", style="secondary")
 
     console.print(props)
     console.print()
-    if len(job_plan.offers) > 0:
+    if len(displayed_offers) > 0:
+        show_offer_fleet_hint_before_table = (
+            show_offer_fleet_hint
+            and job_plan.total_offers <= len(displayed_offers)
+            and len(displayed_offers) < 3
+        )
+        show_offer_fleet_hint_after_table = (
+            show_offer_fleet_hint and not show_offer_fleet_hint_before_table
+        )
+        if show_offer_fleet_hint_before_table:
+            console.print(f"[secondary]{_OFFER_FLEET_HINT}[/]")
+            console.print()
         console.print(offers)
-        if job_plan.total_offers > len(job_plan.offers):
+        if job_plan.total_offers > len(displayed_offers):
             console.print(
-                f"[secondary] Shown {len(job_plan.offers)} of {job_plan.total_offers} offers, "
+                f"[secondary] Shown {len(displayed_offers)} of {job_plan.total_offers} offers, "
                 f"${job_plan.max_price:3f}".rstrip("0").rstrip(".")
                 + "max[/]"
             )
+        if show_offer_fleet_hint_after_table:
+            console.print(f"[secondary]{_OFFER_FLEET_HINT}[/]")
         console.print()
     else:
         console.print(NO_FLEETS_WARNING if no_fleets else NO_OFFERS_WARNING)

diff --git a/src/dstack/_internal/server/routers/gpus.py b/src/dstack/_internal/server/routers/gpus.py
@@ -2,8 +2,10 @@
 
 from fastapi import APIRouter, Depends
 from packaging.version import Version
+from sqlalchemy.ext.asyncio import AsyncSession
 
 from dstack._internal.server.compatibility.gpus import patch_list_gpus_response
+from dstack._internal.server.db import get_session
 from dstack._internal.server.models import ProjectModel, UserModel
 from dstack._internal.server.schemas.gpus import ListGpusRequest, ListGpusResponse
 from dstack._internal.server.security.permissions import ProjectMember
@@ -23,10 +25,16 @@
 @project_router.post("/list", response_model=ListGpusResponse, response_model_exclude_none=True)
 async def list_gpus(
     body: ListGpusRequest,
+    session: Annotated[AsyncSession, Depends(get_session)],
     client_version: Annotated[Optional[Version], Depends(get_client_version)],
     user_project: Tuple[UserModel, ProjectModel] = Depends(ProjectMember()),
 ) -> ListGpusResponse:
     _, project = user_project
-    resp = await list_gpus_grouped(project=project, run_spec=body.run_spec, group_by=body.group_by)
+    resp = await list_gpus_grouped(
+        session=session,
+        project=project,
+        run_spec=body.run_spec,
+        group_by=body.group_by,
+    )
     patch_list_gpus_response(resp, client_version)
     return resp
diff --git a/src/dstack/_internal/server/services/gpus.py b/src/dstack/_internal/server/services/gpus.py
@@ -1,5 +1,7 @@
 from typing import Dict, List, Literal, Optional, Tuple
 
+from sqlalchemy.ext.asyncio import AsyncSession
+
 from dstack._internal.core.backends.base.backend import Backend
 from dstack._internal.core.errors import ServerClientError
 from dstack._internal.core.models.backends.base import BackendType
@@ -10,17 +12,22 @@
 from dstack._internal.core.models.runs import Requirements, RunSpec, get_policy_map
 from dstack._internal.server.models import ProjectModel
 from dstack._internal.server.schemas.gpus import ListGpusResponse
+from dstack._internal.server.services.jobs import get_jobs_from_run_spec
 from dstack._internal.server.services.offers import get_offers_by_requirements
+from dstack._internal.server.services.runs.plan import (
+    get_backend_offers_in_run_candidate_fleets,
+)
 from dstack._internal.utils.common import get_or_error
 
 
 async def list_gpus_grouped(
+    session: AsyncSession,
     project: ProjectModel,
     run_spec: RunSpec,
     group_by: Optional[List[Literal["backend", "region", "count"]]] = None,
 ) -> ListGpusResponse:
     """Retrieves available GPU specifications based on a run spec, with optional grouping."""
-    offers = await _get_gpu_offers(project=project, run_spec=run_spec)
+    offers = await _get_gpu_offers(session=session, project=project, run_spec=run_spec)
     backend_gpus = _process_offers_into_backend_gpus(offers)
     group_by_set = set(group_by) if group_by else set()
     if "region" in group_by_set and "backend" not in group_by_set:
@@ -47,10 +54,24 @@ async def list_gpus_grouped(
 
 
 async def _get_gpu_offers(
-    project: ProjectModel, run_spec: RunSpec
+    session: AsyncSession,
+    project: ProjectModel,
+    run_spec: RunSpec,
 ) -> List[Tuple[Backend, InstanceOfferWithAvailability]]:
     """Fetches all available instance offers that match the run spec's GPU requirements."""
     profile = run_spec.merged_profile
+    if profile.fleets is not None:
+        jobs = await get_jobs_from_run_spec(run_spec=run_spec, secrets={}, replica_num=0)
+        if len(jobs) == 0:
+            return []
+        return await get_backend_offers_in_run_candidate_fleets(
+            session=session,
+            project=project,
+            run_spec=run_spec,
+            job=jobs[0],
+            volumes=None,
+            max_offers_per_fleet=None,
+        )
     requirements = Requirements(
         resources=run_spec.configuration.resources,
         max_price=profile.max_price,