[LLADA2] documentation fixes (#13333)

kashif · web-flow · commit 762ae059fa98 · 2026-03-25T17:49:31.000+05:30
documentation fixes
diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml
@@ -580,8 +580,6 @@
         title: Latent Diffusion
       - local: api/pipelines/ledits_pp
         title: LEDITS++
-      - local: api/pipelines/llada2
-        title: LLaDA2
       - local: api/pipelines/longcat_image
         title: LongCat-Image
       - local: api/pipelines/lumina2
@@ -672,6 +670,10 @@
       - local: api/pipelines/z_image
         title: Z-Image
       title: Image
+    - sections:
+      - local: api/pipelines/llada2
+        title: LLaDA2
+      title: Text
     - sections:
       - local: api/pipelines/allegro
         title: Allegro
diff --git a/docs/source/en/api/pipelines/llada2.md b/docs/source/en/api/pipelines/llada2.md
@@ -26,7 +26,9 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 from diffusers import BlockRefinementScheduler, LLaDA2Pipeline
 
 model_id = "inclusionAI/LLaDA2.1-mini"
-model = AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True, dtype=torch.bfloat16, device_map="auto")
+model = AutoModelForCausalLM.from_pretrained(
+    model_id, trust_remote_code=True, dtype=torch.bfloat16, device_map="auto"
+)
 tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 scheduler = BlockRefinementScheduler()
 
@@ -46,18 +48,21 @@ print(output.texts[0])
 
 ## Callbacks
 
-Callbacks run after each refinement step and can inspect or modify the current tokens.
+Callbacks run after each refinement step. Pass `callback_on_step_end_tensor_inputs` to select which tensors are
+included in `callback_kwargs`. In the current implementation, `block_x` (the sequence window being refined) and
+`transfer_index` (mask-filling commit mask) are provided; return `{"block_x": ...}` from the callback to replace the
+window.
 
 ```py
 def on_step_end(pipe, step, timestep, callback_kwargs):
-    cur_x = callback_kwargs["cur_x"]
-    # Inspect or modify `cur_x` here.
-    return {"cur_x": cur_x}
+    block_x = callback_kwargs["block_x"]
+    # Inspect or modify `block_x` here.
+    return {"block_x": block_x}
 
 out = pipe(
     prompt="Write a short poem.",
     callback_on_step_end=on_step_end,
-    callback_on_step_end_tensor_inputs=["cur_x"],
+    callback_on_step_end_tensor_inputs=["block_x"],
 )
 ```
 
@@ -68,11 +73,13 @@ LLaDA2.1 models support two modes:
 | Mode | `threshold` | `editing_threshold` | `max_post_steps` |
 |------|-------------|---------------------|------------------|
 | Quality | 0.7 | 0.5 | 16 |
-| Speed | 0.5 | 0.0 | 16 |
+| Speed | 0.5 | `None` | 16 |
+
+Pass `editing_threshold=None`, `0.0`, or a negative value to turn off post-mask editing.
 
-For LLaDA2.0 models, disable editing by passing `editing_threshold=None`.
+For LLaDA2.0 models, disable editing by passing `editing_threshold=None` or `0.0`.
 
-For all models: `block_length=32`, `temperature=0.0`, `steps=32`.
+For all models: `block_length=32`, `temperature=0.0`, `num_inference_steps=32`.
 
 ## LLaDA2Pipeline
 [[autodoc]] LLaDA2Pipeline
diff --git a/src/diffusers/pipelines/llada2/pipeline_llada2.py b/src/diffusers/pipelines/llada2/pipeline_llada2.py
@@ -273,10 +273,10 @@ def __call__(
             threshold (`float`):
                 Confidence threshold for committing tokens.
             editing_threshold (`float`, *optional*):
-                Confidence threshold for editing already-committed (non-mask) tokens. When set, after all mask tokens
-                in a block are resolved, the pipeline continues refining: if the model predicts a different token with
-                confidence above this threshold, the existing token is replaced. Set to `None` or a negative value to
-                disable editing. Defaults to `0.5`.
+                Confidence threshold for editing already-committed (non-mask) tokens. When positive, after all mask
+                tokens in a block are resolved, the pipeline continues refining: if the model predicts a different
+                token with confidence above this threshold, the existing token is replaced. Set to `None`, `0.0`, or a
+                negative value to disable editing. Defaults to `0.5`.
             max_post_steps (`int`):
                 Maximum number of additional refinement iterations after all mask tokens in a block are resolved. Only
                 used when `editing_threshold` is enabled. Defaults to `16`.
@@ -373,7 +373,7 @@ def __call__(
         self._num_timesteps = num_inference_steps * max(num_blocks - prefill_blocks, 0)
 
         finished = torch.zeros((batch_size,), device=device, dtype=torch.bool)
-        editing_enabled = editing_threshold is not None and editing_threshold >= 0.0
+        editing_enabled = editing_threshold is not None and editing_threshold > 0.0
         global_step = 0
 
         # 5. Block-wise refinement loop
diff --git a/src/diffusers/schedulers/scheduling_block_refinement.py b/src/diffusers/schedulers/scheduling_block_refinement.py
@@ -57,7 +57,7 @@ class BlockRefinementScheduler(SchedulerMixin, ConfigMixin):
     the number of refinement steps.
 
     Optionally supports editing: after all mask tokens are resolved, tokens can be replaced if the model predicts a
-    different token with confidence above `editing_threshold`.
+    different token with confidence above a positive `editing_threshold` (`None`, `0.0`, or negative disables editing).
     """
 
     order = 1
@@ -208,7 +208,8 @@ def step(
             threshold (`float`, *optional*):
                 Confidence threshold for committing tokens. Defaults to config value.
             editing_threshold (`float`, *optional*):
-                Confidence threshold for editing non-mask tokens. Defaults to config value.
+                Confidence threshold for editing non-mask tokens; must be positive to enable editing. Defaults to
+                config value.
             minimal_topk (`int`, *optional*):
                 Minimum tokens to commit per step. Defaults to config value.
             prompt_mask (`torch.BoolTensor`, *optional*):
@@ -268,7 +269,7 @@ def step(
                         transfer_index[b, idx] = True
 
         # --- Editing transfer (non-mask, non-prompt positions) ---
-        editing_enabled = editing_threshold is not None and editing_threshold >= 0.0
+        editing_enabled = editing_threshold is not None and editing_threshold > 0.0
         editing_transfer_index = torch.zeros_like(sampled_tokens, dtype=torch.bool)
         if editing_enabled:
             if prompt_mask is None: