diff --git a/pyproject.toml b/pyproject.toml
index 897ac15..dc75397 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,12 +31,6 @@ dependencies = [
     # against 0.0.8) and ``tests/test_gpt_oss_harmony_parity.py`` passes on it,
     # so the older harmony is safe.
     "openai-harmony>=0.0.4",
-    # Crusoe's Rust BPE tokenizer; ~10x faster encode vs HF's tokenizers.
-    # ``load_tokenizer`` patches it in by default for every supported model
-    # except a small denylist (DeepSeek-V3 family). The patch is bracketed
-    # around ``from_pretrained``, so subsequent ``AutoTokenizer`` calls
-    # outside the renderers package stay vanilla.
-    "fastokens>=0.2.0",
     # ``BaseRendererConfig`` inherits from ``pydantic_config.BaseConfig`` so
     # the typed-config surface stays uniform with prime-rl / verifiers config
     # bases. Transitively brings pydantic, which ``renderers.configs`` also
@@ -103,12 +97,6 @@ required-version = ">=0.11.1"
 exclude-newer = "7 days"
 
 [tool.uv.exclude-newer-package]
-# fastokens 0.2.0 was published on 2026-05-17 and contains the
-# ``unpatch_transformers`` fix (crusoecloud/fastokens#32) needed for
-# MiniMax-M2's slow→fast tokenizer conversion path. Exempting it from
-# the project-wide 7-day cutoff lets the lockfile pick it up immediately
-# while the rest of the dependency graph stays gated.
-fastokens = false
 # PrimeIntellect-published packages in this project's dependency closure —
 # fast-track so first-party releases can land same-day. Only packages that
 # appear in `uv tree` are listed.
diff --git a/renderers/base.py b/renderers/base.py
index ed5bc7e..64a8760 100644
--- a/renderers/base.py
+++ b/renderers/base.py
@@ -1,8 +1,6 @@
 from __future__ import annotations
 
-import contextlib
 import enum
-import io
 import logging
 import queue
 import threading
@@ -1163,29 +1161,6 @@ def _model_has_vision_config(model_name: str) -> bool:
 }
 
 
-# Models for which ``fastokens`` is known to diverge from vanilla
-# ``transformers.AutoTokenizer`` and therefore must NOT be patched.
-# Empirical audit ran each entry of ``MODEL_RENDERER_MAP`` through both
-# backends. The entries below fail to load under fastokens (DeepSeek-V3
-# family — Metaspace pretokenizer not yet implemented).
-FASTOKENS_INCOMPATIBLE: frozenset[str] = frozenset(
-    {
-        # fastokens: ``ValueError: pre-tokenizer error: unsupported
-        # pre-tokenizer type: Metaspace`` — DeepSeek's tokenizer uses
-        # SentencePiece-style Metaspace pretokenization which fastokens
-        # doesn't yet implement.
-        "deepseek-ai/DeepSeek-V3",
-        "deepseek-ai/DeepSeek-V3-Base",
-        "deepseek-ai/DeepSeek-R1",
-        "deepseek-ai/DeepSeek-R1-0528",
-    }
-)
-
-
-_FASTOKENS_PATCH_LOCK = threading.Lock()
-_FASTOKENS_ANNOUNCED = False
-
-
 def _tokenizer_source_for(model_name_or_path: str) -> str:
     return TOKENIZER_SOURCE_OVERRIDES.get(model_name_or_path, model_name_or_path)
 
@@ -1222,48 +1197,6 @@ def _preserve_requested_tokenizer_name(
     return tokenizer
 
 
-def _patched_load(model_name_or_path: str, **kwargs):
-    """Run ``AutoTokenizer.from_pretrained`` with fastokens patched in
-    process-locally — patch around the load, unpatch right after.
-
-    fastokens captures the loaded backend on a per-tokenizer basis, so
-    after we unpatch the returned tokenizer object continues to use
-    fastokens for ``encode``/``decode`` while subsequent
-    ``AutoTokenizer.from_pretrained`` calls (outside our control) go
-    back to vanilla. This keeps the global side effect minimal.
-
-    fastokens itself prints ``[fastokens] patch_transformers: ...`` to
-    stdout on every patch/unpatch call. Building a pool of size N would
-    therefore emit ~N lines (more under thread contention, where some
-    threads see ``already patched``). We swallow those prints under a
-    lock — ``contextlib.redirect_stdout`` swaps ``sys.stdout``
-    process-wide, so the lock keeps unrelated stdout writes from other
-    threads from disappearing into our buffer. The patch/unpatch calls
-    are cheap; only the brief patch+unpatch is serialized, the actual
-    ``from_pretrained`` still runs concurrently across pool slots. A
-    single ``logger.info`` is emitted on the first patch so the fast
-    path is still discoverable in logs.
-    """
-    import fastokens
-
-    global _FASTOKENS_ANNOUNCED
-
-    with _FASTOKENS_PATCH_LOCK:
-        with contextlib.redirect_stdout(io.StringIO()):
-            fastokens.patch_transformers()
-        if not _FASTOKENS_ANNOUNCED:
-            logger.info(
-                "fastokens enabled — tokenizers load through the Rust BPE fast path (~10x encode speedup)."
-            )
-            _FASTOKENS_ANNOUNCED = True
-    try:
-        return _load_tokenizer_via_auto(model_name_or_path, **kwargs)
-    finally:
-        with _FASTOKENS_PATCH_LOCK:
-            with contextlib.redirect_stdout(io.StringIO()):
-                fastokens.unpatch_transformers()
-
-
 def _load_fast_tokenizer_directly(
     model_name_or_path: str, revision: str | None
 ) -> Any | None:
@@ -1323,36 +1256,14 @@ def _load_tokenizer_via_auto(model_name_or_path: str, **kwargs) -> Any:
         return tok
 
 
-def load_tokenizer(
-    model_name_or_path: str,
-    *,
-    use_fastokens: bool = True,
-):
-    """Load a tokenizer with the renderers-package security + perf policy.
+def load_tokenizer(model_name_or_path: str):
+    """Load a tokenizer with the renderers-package security policy.
 
-    **Security** — default ``trust_remote_code=False``. Models listed in
+    Default ``trust_remote_code=False``. Models listed in
     ``TRUSTED_REVISIONS`` (Moonshot Kimi-K2 family) load with
     ``trust_remote_code=True`` AND a pinned ``revision=<sha>`` so
     transformers only executes the reviewed commit's tokenizer Python.
 
-    **Performance** — ``use_fastokens=True`` (default) routes the load
-    through ``fastokens.patch_transformers()`` so the resulting tokenizer
-    encodes ~10x faster than vanilla ``tokenizers``. The patch is
-    bracketed: it's applied before ``from_pretrained`` and removed
-    immediately after, so global ``AutoTokenizer.from_pretrained`` calls
-    elsewhere in the user's process are not affected.
-
-    Models in ``FASTOKENS_INCOMPATIBLE`` (DeepSeek-V3 family) skip the
-    patch — fastokens currently fails to load them. Pass
-    ``use_fastokens=False`` to force the vanilla backend for any other
-    model.
-
-    Unknown / fine-tuned model paths fall through to
-    ``trust_remote_code=False`` and the patched-load fast path. If
-    fastokens raises during the patched load (e.g. an unknown
-    pre-tokenizer type), we automatically retry with the vanilla
-    backend and emit an INFO log.
-
     ``AutoTokenizer.from_pretrained`` eagerly builds the model config to
     resolve the tokenizer class. If that construction raises on a
     modeling-only concern the tokenizer doesn't need (e.g. RoPE
@@ -1367,28 +1278,7 @@ def load_tokenizer(
     """
     load_name_or_path = _tokenizer_source_for(model_name_or_path)
     kwargs = _tokenizer_load_kwargs(load_name_or_path)
-
-    if not use_fastokens or load_name_or_path in FASTOKENS_INCOMPATIBLE:
-        tok = _load_tokenizer_via_auto(load_name_or_path, **kwargs)
-        return _preserve_requested_tokenizer_name(
-            tok,
-            requested_name_or_path=model_name_or_path,
-            loaded_name_or_path=load_name_or_path,
-        )
-
-    try:
-        tok = _patched_load(load_name_or_path, **kwargs)
-    except Exception as exc:
-        logger.info(
-            "fastokens could not load %r (%s: %s); falling back to vanilla "
-            "AutoTokenizer. Add this model to FASTOKENS_INCOMPATIBLE in "
-            "renderers.base to suppress the retry.",
-            load_name_or_path,
-            type(exc).__name__,
-            str(exc)[:160],
-        )
-        tok = _load_tokenizer_via_auto(load_name_or_path, **kwargs)
-
+    tok = _load_tokenizer_via_auto(load_name_or_path, **kwargs)
     return _preserve_requested_tokenizer_name(
         tok,
         requested_name_or_path=model_name_or_path,
@@ -1718,104 +1608,28 @@ def trim_to_turn_close(
     return previous_ids
 
 
-# Per-model offset-aware tokenizer cache. ``attribute_text_segments``
-# uses the fast HuggingFace tokenizer's ``offset_mapping`` to attribute
-# each token to its source text segment under one BPE pass. Fastokens
-# (the Rust BPE we patch in by default for ~10x faster encode) does not
-# track character offsets — the patched tokenizer's
-# ``return_offsets_mapping=True`` raises ``NotImplementedError``. So we
-# keep a parallel vanilla tokenizer per model purely for offset queries.
-# Memory cost is one extra tokenizer per *unique* model name across all
-# pools / renderers (the cache is process-global), independent of pool
-# size.
-_offset_tokenizers: dict[str, Any] = {}
-_offset_tokenizers_lock = threading.Lock()
-
-
 def _get_offset_tokenizer(tokenizer):
-    """Return a tokenizer that supports ``return_offsets_mapping=True``.
-
-    If ``tokenizer`` itself supports offsets, returns it unchanged.
-    Otherwise loads a vanilla (non-fastokens) tokenizer from
-    ``tokenizer.name_or_path`` and caches it. Raises if the tokenizer
-    has no usable ``name_or_path`` — hand-coded renderers always pass
-    a tokenizer loaded via ``load_tokenizer`` which does set it.
+    """Assert ``tokenizer`` supports ``return_offsets_mapping=True``.
+
+    Hand-coded renderers concatenate scaffold + body in one BPE pass to
+    preserve cross-boundary merges, then attribute each resulting token
+    back to its source segment via the fast tokenizer's
+    ``offset_mapping`` (see :func:`attribute_text_segments`). The
+    contract: every BYO tokenizer must be a fast tokenizer with offset
+    support. Tokenizers loaded via :func:`load_tokenizer` are
+    ``PreTrainedTokenizerFast`` instances that satisfy this trivially.
     """
-    # Cheap probe: does this tokenizer already provide offsets?
     try:
         tokenizer("a", add_special_tokens=False, return_offsets_mapping=True)
-        return tokenizer
-    except (NotImplementedError, ValueError, TypeError):
-        pass
-
-    name_or_path = getattr(tokenizer, "name_or_path", "")
-    if not name_or_path:
+    except (NotImplementedError, ValueError, TypeError) as exc:
         raise RuntimeError(
-            "Cannot construct an offset-aware tokenizer: the supplied "
-            "tokenizer has no ``name_or_path`` to fall back on. Pass a "
-            "tokenizer loaded via ``renderers.base.load_tokenizer``."
-        )
-
-    with _offset_tokenizers_lock:
-        cached = _offset_tokenizers.get(name_or_path)
-        if cached is not None:
-            return cached
-
-        load_name_or_path = _tokenizer_source_for(name_or_path)
-        kwargs = _tokenizer_load_kwargs(load_name_or_path)
-
-        def _has_offsets(tok) -> bool:
-            if not getattr(tok, "is_fast", False):
-                return False
-            try:
-                tok("a", add_special_tokens=False, return_offsets_mapping=True)
-                return True
-            except (NotImplementedError, ValueError, TypeError):
-                return False
-
-        # We want HF's Rust tokenizer with offset tracking, not the fastokens
-        # shim. The shim is installed by a *process-global* monkeypatch that
-        # ``load_tokenizer`` toggles per pool-slot load, so a plain reload here
-        # can race a concurrent slot's open patch window and silently pick up
-        # the offset-less shim (then get cached, poisoning the process). So:
-        # load, verify offsets, and if missing, reload with the patch forced
-        # off — serialized against pool patch/unpatch via ``_FASTOKENS_PATCH_LOCK``
-        # so no concurrent window can swap the shim back in mid-load — then
-        # restore the prior patch state. Never cache a non-offset tokenizer.
-        offset_tok = _load_tokenizer_via_auto(load_name_or_path, **kwargs)
-        offset_tok = _preserve_requested_tokenizer_name(
-            offset_tok,
-            requested_name_or_path=name_or_path,
-            loaded_name_or_path=load_name_or_path,
-        )
-        if not _has_offsets(offset_tok):
-            import fastokens
-
-            with _FASTOKENS_PATCH_LOCK:
-                was_patched = bool(getattr(fastokens, "_patched", False))
-                if was_patched:
-                    with contextlib.redirect_stdout(io.StringIO()):
-                        fastokens.unpatch_transformers()
-                try:
-                    offset_tok = _load_tokenizer_via_auto(load_name_or_path, **kwargs)
-                    offset_tok = _preserve_requested_tokenizer_name(
-                        offset_tok,
-                        requested_name_or_path=name_or_path,
-                        loaded_name_or_path=load_name_or_path,
-                    )
-                finally:
-                    if was_patched:
-                        with contextlib.redirect_stdout(io.StringIO()):
-                            fastokens.patch_transformers()
-        if not _has_offsets(offset_tok):
-            raise RuntimeError(
-                f"Could not load an offset-capable tokenizer for {name_or_path!r}: "
-                "offset_mapping is unavailable even with the fastokens patch off. "
-                "Hand-coded renderers require a fast tokenizer for body/scaffold "
-                "attribution."
-            )
-        _offset_tokenizers[name_or_path] = offset_tok
-        return offset_tok
+            "Hand-coded renderers require a fast tokenizer with "
+            "``return_offsets_mapping=True`` support for body/scaffold "
+            "attribution. Pass a tokenizer loaded via "
+            "``renderers.base.load_tokenizer``, or any "
+            "``transformers.PreTrainedTokenizerFast`` instance."
+        ) from exc
+    return tokenizer
 
 
 def attribute_text_segments(
@@ -1839,14 +1653,13 @@ def attribute_text_segments(
     tokens (rare; usually pre-tokenizer artefacts) are attributed to
     the most recently entered segment.
 
-    Requires a HuggingFace fast tokenizer with offset tracking. The
-    ``fastokens`` patch ``load_tokenizer`` applies by default does
-    **not** track offsets — when that's the case we transparently load
-    a vanilla offset-capable tokenizer for the same model and cache it
-    (see :func:`_get_offset_tokenizer`). Hand-coded renderers are only
-    registered for model families that ship a fast tokenizer, so a
-    silent slow-tokenizer fallback isn't supported — BPE drift at the
-    wrap/body boundary would defeat the whole point.
+    Requires a HuggingFace fast tokenizer with offset tracking. Every
+    model in ``MODEL_RENDERER_MAP`` ships one, so the offset lookup
+    always succeeds for tokenizers obtained via :func:`load_tokenizer`.
+    BYO tokenizers must be a ``PreTrainedTokenizerFast`` (or anything
+    else exposing ``return_offsets_mapping=True``); slow tokenizers
+    aren't supported — BPE drift at the wrap/body boundary would
+    defeat the whole point.
 
     Empty input or empty joined text returns an empty list.
     """
diff --git a/tests/test_load_tokenizer.py b/tests/test_load_tokenizer.py
index b2e49c8..ea15d6a 100644
--- a/tests/test_load_tokenizer.py
+++ b/tests/test_load_tokenizer.py
@@ -12,6 +12,8 @@
 from types import SimpleNamespace
 from unittest.mock import patch
 
+import pytest
+
 from renderers import base
 from renderers.base import TOKENIZER_SOURCE_OVERRIDES, TRUSTED_REVISIONS, load_tokenizer
 
@@ -81,7 +83,7 @@ def test_meta_llama_loads_tokenizer_from_unsloth_mirror(mock_from_pretrained):
     mirror = "unsloth/Llama-3.2-1B-Instruct"
     mock_from_pretrained.return_value = SimpleNamespace(name_or_path=mirror)
 
-    tok = load_tokenizer(canonical, use_fastokens=False)
+    tok = load_tokenizer(canonical)
 
     args, kwargs = mock_from_pretrained.call_args
     assert args == (mirror,)
@@ -120,42 +122,22 @@ def test_tokenizer_source_overrides_are_exact_llama_mirrors():
     }
 
 
-def test_offset_tokenizer_uses_unsloth_mirror_for_meta_llama(monkeypatch):
-    """Offset-tokenizer reloads must use the same unrestricted source
-    override, otherwise Llama rendering can hit the gated Meta repo after
-    the initial tokenizer load succeeds."""
+def test_get_offset_tokenizer_rejects_offsetless_byo():
+    """BYO tokenizers without ``return_offsets_mapping`` support raise a
+    clear error. Hand-coded renderers concatenate scaffold + body in one
+    BPE pass and attribute tokens via the fast tokenizer's offset map;
+    no transparent reload-from-name_or_path fallback exists. The
+    contract is: pass a fast tokenizer or get a loud error at construct
+    time, not silent BPE drift at the wrap/body boundary."""
 
     class _NoOffsets:
-        name_or_path = "meta-llama/Llama-3.2-1B-Instruct"
-
-        def __call__(self, *args, **kwargs):
-            raise NotImplementedError("fastokens shim has no offsets")
-
-    class _OffsetTokenizer:
-        is_fast = True
-
-        def __init__(self, name_or_path: str):
-            self.name_or_path = name_or_path
+        name_or_path = "anywhere/anything"
 
         def __call__(self, *args, **kwargs):
-            return {"offset_mapping": [(0, 1)]}
-
-    calls = []
-
-    def _fake_load(name_or_path, **kwargs):
-        calls.append((name_or_path, kwargs))
-        return _OffsetTokenizer(name_or_path)
-
-    base._offset_tokenizers.clear()
-    monkeypatch.setattr(base, "_load_tokenizer_via_auto", _fake_load)
-
-    try:
-        tok = base._get_offset_tokenizer(_NoOffsets())
-    finally:
-        base._offset_tokenizers.clear()
+            raise NotImplementedError("BYO tokenizer has no offsets")
 
-    assert calls == [("unsloth/Llama-3.2-1B-Instruct", {"trust_remote_code": False})]
-    assert tok.name_or_path == "meta-llama/Llama-3.2-1B-Instruct"
+    with pytest.raises(RuntimeError, match="fast tokenizer.*offsets"):
+        base._get_offset_tokenizer(_NoOffsets())
 
 
 # ---------------------------------------------------------------------------
diff --git a/tests/test_load_tokenizer_fastokens.py b/tests/test_load_tokenizer_fastokens.py
deleted file mode 100644
index 28c42a0..0000000
--- a/tests/test_load_tokenizer_fastokens.py
+++ /dev/null
@@ -1,213 +0,0 @@
-"""Coverage for the fastokens fast-path in ``renderers.base.load_tokenizer``.
-
-``load_tokenizer`` defaults to routing every supported model through
-``fastokens.patch_transformers()`` for ~10x faster encode. Models in
-``FASTOKENS_INCOMPATIBLE`` skip the patch (DeepSeek's Metaspace
-pretokenizer isn't supported). Callers can opt out per-call with
-``use_fastokens=False``.
-
-These tests pin the policy:
-
-1. The denylist contains the empirically-verified incompat models —
-   adding to it should be a deliberate review action.
-2. With ``use_fastokens=True`` (the default) on a compatible model, the
-   resulting tokenizer's backend is the fastokens shim. Encode output
-   stays byte-identical to vanilla.
-3. With ``use_fastokens=False``, the resulting tokenizer is vanilla.
-4. For incompat models, the fast path is silently skipped and the
-   tokenizer still loads + encodes correctly.
-5. The fastokens patch is removed immediately after the load so it
-   doesn't leak into the caller's process — subsequent
-   ``AutoTokenizer.from_pretrained`` calls outside ``load_tokenizer``
-   use vanilla.
-"""
-
-from __future__ import annotations
-
-import pytest
-from transformers import AutoTokenizer
-
-from renderers.base import (
-    FASTOKENS_INCOMPATIBLE,
-    load_tokenizer,
-)
-
-
-# ---------------------------------------------------------------------------
-# Denylist shape
-# ---------------------------------------------------------------------------
-
-
-def test_fastokens_incompatible_is_explicit_set():
-    """The denylist is small and audited — pinning the exact contents
-    catches accidental drift. Adding/removing entries should be a
-    deliberate action with a parity probe."""
-    assert FASTOKENS_INCOMPATIBLE == frozenset(
-        {
-            "deepseek-ai/DeepSeek-V3",
-            "deepseek-ai/DeepSeek-V3-Base",
-            "deepseek-ai/DeepSeek-R1",
-            "deepseek-ai/DeepSeek-R1-0528",
-        }
-    )
-
-
-# ---------------------------------------------------------------------------
-# Fast path (compatible model — Qwen3.5-9B as representative)
-# ---------------------------------------------------------------------------
-
-
-_FAST_MODEL = "Qwen/Qwen3.5-9B"
-
-
-def _backend_class_name(tok) -> str:
-    """Return the class name of the underlying backend object so tests
-    can tell vanilla from fastokens-shimmed tokenizers."""
-    backend = getattr(tok, "_tokenizer", None)
-    return type(backend).__name__ if backend is not None else type(tok).__name__
-
-
-def test_default_uses_fastokens_on_compatible_model():
-    tok = load_tokenizer(_FAST_MODEL)
-    # The shim type is named ``_TokenizerShim`` (see fastokens._compat);
-    # match by name so we don't import private fastokens internals.
-    assert "Shim" in _backend_class_name(tok), (
-        f"Expected fastokens shim backend, got {_backend_class_name(tok)!r}"
-    )
-
-
-def test_explicit_off_returns_vanilla_backend():
-    tok = load_tokenizer(_FAST_MODEL, use_fastokens=False)
-    assert "Shim" not in _backend_class_name(tok), (
-        f"Expected vanilla backend, got {_backend_class_name(tok)!r}"
-    )
-
-
-def test_fast_and_vanilla_encode_identically_on_compatible_model():
-    fast = load_tokenizer(_FAST_MODEL)
-    vanilla = load_tokenizer(_FAST_MODEL, use_fastokens=False)
-    samples = [
-        "Hello, world!",
-        "Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
-        "🌍 emoji + 中文 + tabs\there",
-        " ".join([f"word_{i}" for i in range(50)]),
-    ]
-    for s in samples:
-        assert fast.encode(s, add_special_tokens=False) == vanilla.encode(
-            s, add_special_tokens=False
-        ), f"encode diverged on {s!r}"
-
-
-# ---------------------------------------------------------------------------
-# Denylist: incompat models silently skip the patch and still load.
-# ---------------------------------------------------------------------------
-
-
-@pytest.mark.parametrize("model", sorted(FASTOKENS_INCOMPATIBLE))
-def test_incompat_model_loads_via_vanilla_backend(model):
-    """For models we know diverge / fail under fastokens, the fast path
-    must be skipped so the load still succeeds with a vanilla backend."""
-    if "DeepSeek" in model:
-        # Skip if upstream gating / size makes the load impractical here.
-        # We only care that the path doesn't try fastokens. Probe the
-        # tokenizer_config to make sure the repo is reachable; if not,
-        # skip rather than fail (CI without HF auth, network issues).
-        from huggingface_hub import HfApi
-
-        try:
-            HfApi().repo_info(model)
-        except Exception as e:
-            pytest.skip(f"{model}: repo unreachable in this env ({e})")
-    tok = load_tokenizer(model)
-    assert "Shim" not in _backend_class_name(tok), (
-        f"{model}: should NOT have been patched; got {_backend_class_name(tok)!r}"
-    )
-    # And it still encodes.
-    ids = tok.encode("hello", add_special_tokens=False)
-    assert len(ids) > 0
-
-
-# ---------------------------------------------------------------------------
-# Patch must not leak: AutoTokenizer.from_pretrained calls OUTSIDE
-# load_tokenizer should still produce a vanilla tokenizer.
-# ---------------------------------------------------------------------------
-
-
-def test_patch_is_unloaded_after_call():
-    """``load_tokenizer`` brackets the fastokens patch. After it returns
-    a fastokens-shimmed tokenizer, a fresh ``AutoTokenizer.from_pretrained``
-    call must NOT pick up the patch — the user's process stays clean."""
-    fast = load_tokenizer(_FAST_MODEL)
-    assert "Shim" in _backend_class_name(fast), "preconditions: fast path active"
-
-    # Now call AutoTokenizer.from_pretrained directly. It MUST be vanilla.
-    direct = AutoTokenizer.from_pretrained(_FAST_MODEL, trust_remote_code=False)
-    assert "Shim" not in _backend_class_name(direct), (
-        f"fastokens patch leaked into user-side AutoTokenizer call: "
-        f"got {_backend_class_name(direct)!r}"
-    )
-
-
-# ---------------------------------------------------------------------------
-# Failure-mode fallback: if fastokens raises during the patched load,
-# load_tokenizer falls back to vanilla without surfacing the error.
-# ---------------------------------------------------------------------------
-
-
-def test_fallback_on_fastokens_load_error(monkeypatch):
-    """Simulate fastokens raising during patched load — load_tokenizer
-    should fall back to vanilla and return a working tokenizer."""
-    import renderers.base as rb
-
-    def _boom(*args, **kwargs):
-        raise ValueError("simulated fastokens failure: unsupported pre-tokenizer")
-
-    monkeypatch.setattr(rb, "_patched_load", _boom)
-
-    tok = load_tokenizer(_FAST_MODEL)  # default use_fastokens=True
-    # The vanilla fallback ran — backend is not a fastokens shim.
-    assert "Shim" not in _backend_class_name(tok)
-    # Still works.
-    assert len(tok.encode("hi", add_special_tokens=False)) > 0
-
-
-# ---------------------------------------------------------------------------
-# Print suppression: fastokens itself prints "[fastokens]
-# patch_transformers: ..." on every patch/unpatch call. Building a
-# RendererPool of size N would emit ~N lines (the pool factory calls
-# load_tokenizer once per slot). load_tokenizer swallows that stdout
-# chatter and emits a single INFO log on the first patch instead.
-# ---------------------------------------------------------------------------
-
-
-def test_no_fastokens_stdout_chatter(capsys, caplog):
-    """``load_tokenizer`` must not leak ``[fastokens]`` prints onto
-    stdout, and must emit exactly one INFO log per process announcing
-    the fast path (not once per call)."""
-    import logging
-
-    import renderers.base as rb
-
-    # Reset the process-wide "announced" flag so this test sees the
-    # first-call log even if another test loaded a tokenizer earlier.
-    rb._FASTOKENS_ANNOUNCED = False
-
-    with caplog.at_level(logging.INFO, logger="renderers.base"):
-        load_tokenizer(_FAST_MODEL)
-        load_tokenizer(_FAST_MODEL)
-
-    captured = capsys.readouterr()
-    assert "[fastokens]" not in captured.out, (
-        f"fastokens print leaked to stdout: {captured.out!r}"
-    )
-    assert "[fastokens]" not in captured.err, (
-        f"fastokens print leaked to stderr: {captured.err!r}"
-    )
-
-    fastokens_info = [
-        r for r in caplog.records if "fastokens enabled" in r.getMessage()
-    ]
-    assert len(fastokens_info) == 1, (
-        f"expected exactly one fastokens INFO log across two loads, "
-        f"got {len(fastokens_info)}"
-    )
diff --git a/uv.lock b/uv.lock
index eea8022..2c6f5e6 100644
--- a/uv.lock
+++ b/uv.lock
@@ -9,12 +9,11 @@ resolution-markers = [
 ]
 
 [options]
-exclude-newer = "2026-06-19T02:55:59.889910839Z"
+exclude-newer = "2026-06-19T02:36:32.208558271Z"
 exclude-newer-span = "P7D"
 
 [options.exclude-newer-package]
 prime-pydantic-config = false
-fastokens = false
 
 [[package]]
 name = "annotated-doc"
@@ -268,35 +267,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
 ]
 
-[[package]]
-name = "fastokens"
-version = "0.2.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/14/8e/7e88ec1d48db5a6e8d8d44318ce285e38c04b81508bdc2a60e17045a116f/fastokens-0.2.0.tar.gz", hash = "sha256:ef0e175de5c8cb1b616b3210d75dce1fab78e35fc02f77f03f7847d4678be686", size = 675822, upload-time = "2026-05-17T10:32:55.642Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e8/14/3d640cbe3c866ee6c113ea4ca37c16c5aa44be6412918928bbd3f3b739ef/fastokens-0.2.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:e7db86c2785a502e3cd993c7d3a91c46e2751f94d1f446caa9600e9cf3dafbd1", size = 3078350, upload-time = "2026-05-17T10:32:41.313Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/bf/33ca3798842fb8bdacf03a97c874454d50fe328d44e0d3c3ba7a633fd3ab/fastokens-0.2.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fa8dcbc6ad3f7a7e9f5bf1ef3cfc9f0c04f0c8779be9e38864815dc567b27de4", size = 2983397, upload-time = "2026-05-17T10:32:38.823Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/0a/1bfd13fb855bce3ce50faa644d6e0e19343035706c2628e17da1247bbf50/fastokens-0.2.0-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09b399600ae5beaa34e45afd05c6835b8569861955dfdc3e05afba25fe414e5a", size = 3309835, upload-time = "2026-05-17T10:32:33.427Z" },
-    { url = "https://files.pythonhosted.org/packages/32/ce/33292977a81011ffc59cee1c20b6f8d0dbd9cb39209c327dc74dc542178d/fastokens-0.2.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f363b1e89fe4ed979224c326b25b33b28e1172134f3e7959238276170f12328e", size = 3615173, upload-time = "2026-05-17T10:32:30.898Z" },
-    { url = "https://files.pythonhosted.org/packages/07/1c/1bef8b4831bf9220ae182fcee3e250273b7f537c81aec96041f6eb4bc990/fastokens-0.2.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d79358faa11658d8908fbdfab321b6ae6a0ff52ae3cdce7a0fda5c30629a276", size = 3295485, upload-time = "2026-05-17T10:32:36.109Z" },
-    { url = "https://files.pythonhosted.org/packages/92/11/49afb739800b6a82af04fa1e991b68669c789ebeaa3bcbc96c2544c8ff9c/fastokens-0.2.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:cf6ae284b70d65989548a8143e1f463f31126f5650ad45ce6af05b83e4afb3e7", size = 3329524, upload-time = "2026-05-17T10:32:44.607Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/d2/f9b1c01535a0ce994d30e86cf49616023246134d2a406b74c5dd49df7825/fastokens-0.2.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:40fa091ff12aa8ac00fa8e2133c7256b96209827b88de5120eb1361600d7f68f", size = 3149194, upload-time = "2026-05-17T10:32:47.596Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/0b/95b2e5d25efc684988918658c90445e0e5f1dc4fee7dae5edd586b4feaba/fastokens-0.2.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:f643f4739b20c86ed21e2c46e7fe203e6a621e21124074f138819423882e3aba", size = 3401011, upload-time = "2026-05-17T10:32:50.13Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/31/80b74196524d7a9576fa96685a39ee1f333ffd12eb29dbeb5e65c6e3dcb2/fastokens-0.2.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3cd4effdc6610cbee0be717032652a1997976f1a2f21949435d925c7982cf6f3", size = 3593240, upload-time = "2026-05-17T10:32:52.661Z" },
-    { url = "https://files.pythonhosted.org/packages/59/96/12814aa955b7277adb07a8b36176181f247f3b9cf973d05b34294df6a72c/fastokens-0.2.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d29eb1d608977d63fc4679d6ce2b360fdb8b0ea8d66a0eaf804f7ac31ba52a3a", size = 3089011, upload-time = "2026-05-17T10:32:43.16Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/05/553b59c6a8542ad7d443306fc1fd7bb012e4aae459ccf9ab422ea4c681b7/fastokens-0.2.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:f8ec7a20322aafe245201727c7a507b87333c51dc580940b9ee5456dbfe3963c", size = 2992964, upload-time = "2026-05-17T10:32:39.992Z" },
-    { url = "https://files.pythonhosted.org/packages/13/66/a53c2309003510de7c189ba8a9d2ea5a1833e9b447d9f69b95449c3dfe34/fastokens-0.2.0-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dc4c0a25726620b1cc4cfb7e9dc1a53b17bbe3f2f9adbaa57b8375b5f36ac5d4", size = 3316289, upload-time = "2026-05-17T10:32:34.557Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/54/e0e4318ee1ad0b5196df72cf93615bba0b81f7869d659a44ccc475969151/fastokens-0.2.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:160253f8d30747cf66e7ed895c513e16f7b173dd9e644fa641e2eecbd43a616a", size = 3303534, upload-time = "2026-05-17T10:32:37.462Z" },
-    { url = "https://files.pythonhosted.org/packages/64/44/bfff90e4b1a43c17edf7305dafbd56dc992bbe832cc08da78f1f50104c2d/fastokens-0.2.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:b61b9fe5b41e0bb36ad86e7551dc53293c9833909ef07b1cdbaa2055b06c3b3e", size = 3254096, upload-time = "2026-05-17T10:32:28.489Z" },
-    { url = "https://files.pythonhosted.org/packages/df/89/bec1a0368100c5f1134ab1ce588d71f88697fadbbad5f26bb130eda00fe8/fastokens-0.2.0-cp39-abi3-manylinux_2_28_armv7l.whl", hash = "sha256:8f138fc64e355589be43068e6996ac0bfcfd872cd75fb5793d11d06cb9c0ac9b", size = 3000177, upload-time = "2026-05-17T10:32:29.745Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/fd/cbe8a033e2ef565ce5aef4e02316054b04adad96ee9805cba74a50a84ed9/fastokens-0.2.0-cp39-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:c1a6cd93d16e880d81deb12519cc2eb20149d7423321cf00a34ce25ea9c9ef15", size = 3626561, upload-time = "2026-05-17T10:32:32.087Z" },
-    { url = "https://files.pythonhosted.org/packages/05/bf/1cad7f0e8d03f5f5b2b417cda8859e4d968d2eebdca0cd336b23d7dbbdbb/fastokens-0.2.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:01b9bdba818d7b2c67d57d9917faf7a1dad32ece0734440130de94ad768b819f", size = 3336689, upload-time = "2026-05-17T10:32:46.21Z" },
-    { url = "https://files.pythonhosted.org/packages/05/56/0030c6e67c60ee88b74336d1bb03eb556b2afa60445268921ad02d9cb3b3/fastokens-0.2.0-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:9aec70b85a06bf2ac95dd76e07e404040dbcfb06eb165f29bc61ef2ab68dc87a", size = 3154666, upload-time = "2026-05-17T10:32:48.835Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/5b/02cc5d501ccc2fbd4b068a7aa34a35347bdb3a4189b96af647ef0407af87/fastokens-0.2.0-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:8c41f5278ac53853249c1170d653ee0afaa0906c9bc32c8d25c9443b6d667298", size = 3406657, upload-time = "2026-05-17T10:32:51.392Z" },
-    { url = "https://files.pythonhosted.org/packages/97/d7/f5fb2564e16b1f5733e05c41b090f95a3fe767f6b888ba7d864193bc5447/fastokens-0.2.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d068bc50082ad67d5d542847075f1f7b8d10f703274e56e241312f18b4d9e772", size = 3598064, upload-time = "2026-05-17T10:32:54.109Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/39/2098de2aa01c3e2ef62b066926d70015f88845e3ac1a976ba1cc3a363c05/fastokens-0.2.0-cp39-abi3-win_amd64.whl", hash = "sha256:5729c44ce1d60cb03e506731dcb17d8c2d69c267098c3ff35ca8be37d618714d", size = 2767905, upload-time = "2026-05-17T10:32:56.759Z" },
-]
-
 [[package]]
 name = "filelock"
 version = "3.29.0"
@@ -1380,7 +1350,6 @@ wheels = [
 name = "renderers"
 source = { editable = "." }
 dependencies = [
-    { name = "fastokens" },
     { name = "jinja2" },
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
     { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
@@ -1405,7 +1374,6 @@ dev = [
 
 [package.metadata]
 requires-dist = [
-    { name = "fastokens", specifier = ">=0.2.0" },
     { name = "jinja2" },
     { name = "numpy" },
     { name = "openai", specifier = ">=1.108.1" },