lstein · lstein · Mar 27, 2026 · Mar 28, 2026 · Mar 28, 2026 · Mar 28, 2026
@@ -516,6 +516,19 @@ class BulkDeleteModelsResponse(BaseModel):
     failed: List[dict] = Field(description="List of failed deletions with error messages")
 
 
+class BulkReidentifyModelsRequest(BaseModel):
+    """Request body for bulk model reidentification."""
+
+    keys: List[str] = Field(description="List of model keys to reidentify")
+
+
+class BulkReidentifyModelsResponse(BaseModel):
+    """Response body for bulk model reidentification."""
+
+    succeeded: List[str] = Field(description="List of successfully reidentified model keys")
+    failed: List[dict] = Field(description="List of failed reidentifications with error messages")
+
+
 @model_manager_router.post(
     "/i/bulk_delete",
     operation_id="bulk_delete_models",
@@ -557,6 +570,67 @@ async def bulk_delete_models(
     return BulkDeleteModelsResponse(deleted=deleted, failed=failed)
 
 
+@model_manager_router.post(
+    "/i/bulk_reidentify",
+    operation_id="bulk_reidentify_models",
+    responses={
+        200: {"description": "Models reidentified (possibly with some failures)"},
+    },
+    status_code=200,
+)
+async def bulk_reidentify_models(
+    current_admin: AdminUserOrDefault,
+    request: BulkReidentifyModelsRequest = Body(description="List of model keys to reidentify"),
+) -> BulkReidentifyModelsResponse:
+    """
+    Reidentify multiple models by re-probing their weights files.
+
+    Returns a list of successfully reidentified keys and failed reidentifications with error messages.
+    """
+    logger = ApiDependencies.invoker.services.logger
+    store = ApiDependencies.invoker.services.model_manager.store
+    models_path = ApiDependencies.invoker.services.configuration.models_path
+
+    succeeded = []
+    failed = []
+
+    for key in request.keys:
+        try:
+            config = store.get_model(key)
+            if pathlib.Path(config.path).is_relative_to(models_path):
+                model_path = pathlib.Path(config.path)
+            else:
+                model_path = models_path / config.path
+            mod = ModelOnDisk(model_path)
+            result = ModelConfigFactory.from_model_on_disk(mod)
+            if result.config is None:
+                raise InvalidModelException("Unable to identify model format")
+
+            # Retain user-editable fields from the original config
+            result.config.path = config.path
+            result.config.key = config.key
+            result.config.name = config.name
+            result.config.description = config.description
+            result.config.cover_image = config.cover_image
+            if hasattr(config, "trigger_phrases") and hasattr(result.config, "trigger_phrases"):
+                result.config.trigger_phrases = config.trigger_phrases
+            result.config.source = config.source
+            result.config.source_type = config.source_type
+
+            store.replace_model(config.key, result.config)
+            succeeded.append(key)
+            logger.info(f"Reidentified model: {key}")
+        except UnknownModelException as e:
+            logger.error(f"Failed to reidentify model {key}: {str(e)}")
+            failed.append({"key": key, "error": str(e)})
+        except Exception as e:
+            logger.error(f"Failed to reidentify model {key}: {str(e)}")
+            failed.append({"key": key, "error": str(e)})
+
+    logger.info(f"Bulk reidentify completed: {len(succeeded)} succeeded, {len(failed)} failed")
+    return BulkReidentifyModelsResponse(succeeded=succeeded, failed=failed)
+
+
 @model_manager_router.delete(
     "/i/{key}/image",
     operation_id="delete_model_image",

@@ -21,7 +21,7 @@
     WithBoard,
     WithMetadata,
 )
-from invokeai.app.invocations.primitives import ImageOutput
+from invokeai.app.invocations.primitives import ImageOutput, StringOutput
 from invokeai.app.services.image_records.image_records_common import ImageCategory
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.misc import SEED_MAX
@@ -581,6 +581,25 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
         return ImageOutput.build(image_dto)
 
 
+@invocation(
+    "decode_watermark",
+    title="Decode Invisible Watermark",
+    tags=["image", "watermark"],
+    category="image",
+    version="1.0.0",
+)
+class DecodeInvisibleWatermarkInvocation(BaseInvocation):
+    """Decode an invisible watermark from an image."""
+
+    image: ImageField = InputField(description="The image to decode the watermark from")
+    length: int = InputField(default=8, description="The expected watermark length in bytes")
+
+    def invoke(self, context: InvocationContext) -> StringOutput:
+        image = context.images.get_pil(self.image.image_name)
+        watermark = InvisibleWatermark.decode_watermark(image, self.length)
+        return StringOutput(value=watermark)
+
+
 @invocation(
     "mask_edge",
     title="Mask Edge",

@@ -36,14 +36,14 @@
 
 @invocation(
     "qwen_image_denoise",
-    title="Denoise - Qwen Image Edit",
+    title="Denoise - Qwen Image",
     tags=["image", "qwen_image"],
     category="image",
     version="1.0.0",
     classification=Classification.Prototype,
 )
 class QwenImageDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
-    """Run the denoising process with a Qwen Image Edit model."""
+    """Run the denoising process with a Qwen Image model."""
 
     # If latents is provided, this means we are doing image-to-image.
     latents: Optional[LatentsField] = InputField(
@@ -132,7 +132,7 @@ def _get_noise(
         seed: int,
     ) -> torch.Tensor:
         rand_device = "cpu"
-        rand_dtype = torch.float16
+        rand_dtype = torch.float32
 
         return torch.randn(
             batch_size,
@@ -270,7 +270,7 @@ def _run_diffusion(self, context: InvocationContext):
 
         # Try to load the scheduler config from the model's directory (Diffusers models
         # have a scheduler/ subdir). For GGUF models this path doesn't exist, so fall
-        # back to instantiating the scheduler with the known Qwen Image Edit defaults.
+        # back to instantiating the scheduler with the known Qwen Image defaults.
         model_path = context.models.get_absolute_path(context.models.get_config(self.transformer.transformer))
         scheduler_path = model_path / "scheduler"
         if scheduler_path.is_dir() and (scheduler_path / "scheduler_config.json").exists():
@@ -304,8 +304,19 @@ def _run_diffusion(self, context: InvocationContext):
         init_sigmas = np.linspace(1.0, 1.0 / self.steps, self.steps).tolist()
         scheduler.set_timesteps(sigmas=init_sigmas, mu=mu, device=device)
 
-        timesteps_sched = scheduler.timesteps
-        sigmas_sched = scheduler.sigmas
+        # Clip the schedule based on denoising_start/denoising_end to support img2img strength.
+        # The scheduler's sigmas go from high (noisy) to 0 (clean). We clip to the fractional range.
+        sigmas_sched = scheduler.sigmas  # (N+1,) including terminal 0
+        if self.denoising_start > 0 or self.denoising_end < 1:
+            total_sigmas = len(sigmas_sched) - 1  # exclude terminal
+            start_idx = int(round(self.denoising_start * total_sigmas))
+            end_idx = int(round(self.denoising_end * total_sigmas))
+            sigmas_sched = sigmas_sched[start_idx : end_idx + 1]  # +1 to include the next sigma for dt
+            # Rebuild timesteps from clipped sigmas (exclude terminal 0)
+            timesteps_sched = sigmas_sched[:-1] * scheduler.config.num_train_timesteps
+        else:
+            timesteps_sched = scheduler.timesteps
+
         total_steps = len(timesteps_sched)
 
         cfg_scale = self._prepare_cfg_scale(total_steps)
@@ -353,29 +364,44 @@ def _run_diffusion(self, context: InvocationContext):
         # Pack latents into 2x2 patches: (B, C, H, W) -> (B, H/2*W/2, C*4)
         latents = self._pack_latents(latents, 1, out_channels, latent_height, latent_width)
 
-        # Pack reference image latents and concatenate along the sequence dimension.
-        # The edit transformer always expects [noisy_patches ; ref_patches] in its sequence.
-        if ref_latents is not None:
-            _, ref_ch, rh, rw = ref_latents.shape
-            if rh != latent_height or rw != latent_width:
-                ref_latents = torch.nn.functional.interpolate(
-                    ref_latents, size=(latent_height, latent_width), mode="bilinear"
+        # Determine whether the model uses reference latent conditioning (zero_cond_t).
+        # Edit models (zero_cond_t=True) expect [noisy_patches ; ref_patches] in the sequence.
+        # Txt2img models (zero_cond_t=False) only take noisy patches.
+        has_zero_cond_t = getattr(transformer_info.model, "zero_cond_t", False) or getattr(
+            transformer_info.model.config, "zero_cond_t", False
+        )
+        use_ref_latents = has_zero_cond_t
+
+        ref_latents_packed = None
+        if use_ref_latents:
+            if ref_latents is not None:
+                _, ref_ch, rh, rw = ref_latents.shape
+                if rh != latent_height or rw != latent_width:
+                    ref_latents = torch.nn.functional.interpolate(
+                        ref_latents, size=(latent_height, latent_width), mode="bilinear"
+                    )
+            else:
+                # No reference image provided — use zeros so the model still gets the
+                # expected sequence layout.
+                ref_latents = torch.zeros(
+                    1, out_channels, latent_height, latent_width, device=device, dtype=inference_dtype
                 )
+            ref_latents_packed = self._pack_latents(ref_latents, 1, out_channels, latent_height, latent_width)
+
+        # img_shapes tells the transformer the spatial layout of patches.
+        if use_ref_latents:
+            img_shapes = [
+                [
+                    (1, latent_height // 2, latent_width // 2),
+                    (1, latent_height // 2, latent_width // 2),
+                ]
+            ]
         else:
-            # No reference image provided — use zeros so the model still gets the
-            # expected sequence layout.
-            ref_latents = torch.zeros(
-                1, out_channels, latent_height, latent_width, device=device, dtype=inference_dtype
-            )
-        ref_latents_packed = self._pack_latents(ref_latents, 1, out_channels, latent_height, latent_width)
-
-        # img_shapes tells the transformer the spatial layout of noisy and reference patches.
-        img_shapes = [
-            [
-                (1, latent_height // 2, latent_width // 2),
-                (1, latent_height // 2, latent_width // 2),
+            img_shapes = [
+                [
+                    (1, latent_height // 2, latent_width // 2),
+                ]
             ]
-        ]
 
         # Prepare inpaint extension (operates in 4D space, so unpack/repack around it)
         inpaint_mask = self._prep_inpaint_mask(context, noise)  # noise has the right 4D shape
@@ -422,14 +448,16 @@ def _run_diffusion(self, context: InvocationContext):
                 )
             )
 
-            scheduler.set_begin_index(0)
-
             for step_idx, t in enumerate(tqdm(timesteps_sched)):
                 # The pipeline passes timestep / 1000 to the transformer
                 timestep = t.expand(latents.shape[0]).to(inference_dtype)
 
-                # Concatenate noisy and reference patches along the sequence dim
-                model_input = torch.cat([latents, ref_latents_packed], dim=1)
+                # For edit models: concatenate noisy and reference patches along the sequence dim
+                # For txt2img models: just use noisy patches
+                if ref_latents_packed is not None:
+                    model_input = torch.cat([latents, ref_latents_packed], dim=1)
+                else:
+                    model_input = latents
 
                 noise_pred_cond = transformer(
                     hidden_states=model_input,
@@ -457,8 +485,12 @@ def _run_diffusion(self, context: InvocationContext):
                 else:
                     noise_pred = noise_pred_cond
 
-                # Use the scheduler's step method — exactly matching the pipeline
-                latents = scheduler.step(noise_pred, t, latents, return_dict=False)[0]
+                # Euler step using the (possibly clipped) sigma schedule
+                sigma_curr = sigmas_sched[step_idx]
+                sigma_next = sigmas_sched[step_idx + 1]
+                dt = sigma_next - sigma_curr
+                latents = latents.to(torch.float32) + dt * noise_pred.to(torch.float32)
+                latents = latents.to(inference_dtype)
 
                 if inpaint_extension is not None:
                     sigma_next = sigmas_sched[step_idx + 1].item()

@@ -22,14 +22,14 @@
 
 @invocation(
     "qwen_image_i2l",
-    title="Image to Latents - Qwen Image Edit",
+    title="Image to Latents - Qwen Image",
     tags=["image", "latents", "vae", "i2l", "qwen_image"],
     category="image",
     version="1.0.0",
     classification=Classification.Prototype,
 )
 class QwenImageImageToLatentsInvocation(BaseInvocation, WithMetadata, WithBoard):
-    """Generates latents from an image using the Qwen Image Edit VAE."""
+    """Generates latents from an image using the Qwen Image VAE."""
 
     image: ImageField = InputField(description="The image to encode.")
     vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection)
@@ -51,7 +51,7 @@ def vae_encode(vae_info: LoadedModel, image_tensor: torch.Tensor) -> torch.Tenso
 
             image_tensor = image_tensor.to(device=TorchDevice.choose_torch_device(), dtype=vae.dtype)
             with torch.inference_mode():
-                # The Qwen Image Edit VAE expects 5D input: (B, C, num_frames, H, W)
+                # The Qwen Image VAE expects 5D input: (B, C, num_frames, H, W)
                 if image_tensor.dim() == 4:
                     image_tensor = image_tensor.unsqueeze(2)
 

@@ -23,14 +23,14 @@
 
 @invocation(
     "qwen_image_l2i",
-    title="Latents to Image - Qwen Image Edit",
+    title="Latents to Image - Qwen Image",
     tags=["latents", "image", "vae", "l2i", "qwen_image"],
     category="latents",
     version="1.0.0",
     classification=Classification.Prototype,
 )
 class QwenImageLatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
-    """Generates an image from latents using the Qwen Image Edit VAE."""
+    """Generates an image from latents using the Qwen Image VAE."""
 
     latents: LatentsField = InputField(description=FieldDescriptions.latents, input=Input.Connection)
     vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection)
@@ -56,7 +56,7 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
             TorchDevice.empty_cache()
 
             with torch.inference_mode(), tiling_context:
-                # The Qwen Image Edit VAE uses per-channel latents_mean / latents_std
+                # The Qwen Image VAE uses per-channel latents_mean / latents_std
                 # instead of a single scaling_factor.
                 # Latents are 5D: (B, C, num_frames, H, W) — the unpack from the
                 # denoise step already produces this shape.

@@ -15,7 +15,7 @@
 
 @invocation_output("qwen_image_lora_loader_output")
 class QwenImageLoRALoaderOutput(BaseInvocationOutput):
-    """Qwen Image Edit LoRA Loader Output"""
+    """Qwen Image LoRA Loader Output"""
 
     transformer: Optional[TransformerField] = OutputField(
         default=None, description=FieldDescriptions.transformer, title="Transformer"
@@ -24,14 +24,14 @@ class QwenImageLoRALoaderOutput(BaseInvocationOutput):
 
 @invocation(
     "qwen_image_lora_loader",
-    title="Apply LoRA - Qwen Image Edit",
+    title="Apply LoRA - Qwen Image",
     tags=["lora", "model", "qwen_image"],
     category="model",
     version="1.0.0",
     classification=Classification.Prototype,
 )
 class QwenImageLoRALoaderInvocation(BaseInvocation):
-    """Apply a LoRA model to a Qwen Image Edit transformer."""
+    """Apply a LoRA model to a Qwen Image transformer."""
 
     lora: ModelIdentifierField = InputField(
         description=FieldDescriptions.lora_model,
@@ -72,14 +72,14 @@ def invoke(self, context: InvocationContext) -> QwenImageLoRALoaderOutput:
 
 @invocation(
     "qwen_image_lora_collection_loader",
-    title="Apply LoRA Collection - Qwen Image Edit",
+    title="Apply LoRA Collection - Qwen Image",
     tags=["lora", "model", "qwen_image"],
     category="model",
     version="1.0.0",
     classification=Classification.Prototype,
 )
 class QwenImageLoRACollectionLoader(BaseInvocation):
-    """Applies a collection of LoRAs to a Qwen Image Edit transformer."""
+    """Applies a collection of LoRAs to a Qwen Image transformer."""
 
     loras: Optional[LoRAField | list[LoRAField]] = InputField(
         default=None, description="LoRA models and weights. May be a single LoRA or collection.", title="LoRAs"