Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
2f10d83
feat: add Qwen Image 2512 txt2img support
lstein Mar 27, 2026
8b9e36f
chore: ruff & lint:prettier
lstein Mar 28, 2026
25b45ca
fix: remove unused frontend exports (zQwenImageVariantType, isQwenIma…
lstein Mar 28, 2026
66e9f87
fix: make QwenImage variant optional to fix model detection tags
lstein Mar 28, 2026
556db02
fix: restore Qwen Image Edit starter models with distinct variable names
lstein Mar 28, 2026
f3dfbd5
fix: restore correct GGUF filenames in Qwen Image Edit starter model …
lstein Mar 28, 2026
6a19ad5
fix: skip reference images in graph for non-edit Qwen Image models
lstein Mar 28, 2026
058df87
fix: only set zero_cond_t=True for edit-variant GGUF models
lstein Mar 28, 2026
b41bee7
fix: recall Qwen Image advanced params (component source, quantizatio…
lstein Mar 28, 2026
18d038c
fix: remove unnecessary async from QwenImageComponentSource parse
lstein Mar 28, 2026
2aeb2fd
fix: prevent Flux LoRAs from being detected as Qwen Image LoRAs
lstein Mar 28, 2026
5c6ca30
chore: ruff
lstein Mar 28, 2026
2fcedc7
fix: don't force reference image to output aspect ratio in VAE encoding
lstein Mar 28, 2026
6963cd9
Fix SIGINT shutdown during active inference (#8993)
JPPhoto Mar 28, 2026
89d4296
fix: clip denoise schedule by denoising_start/end, block GGUF enqueue…
lstein Mar 29, 2026
bf9addb
refactor: rename Qwen Image Edit node titles/descriptions to Qwen Image
lstein Mar 30, 2026
b24e170
fix: skip negative text encoder node when CFG <= 1
lstein Mar 30, 2026
c03f1aa
feat: support Kohya-format Qwen Image LoRAs (LoKR)
lstein Mar 30, 2026
13a4b76
fix: generate noise in float32 instead of float16
lstein Apr 1, 2026
ed268b1
Feature (frontend): Add invisible watermark decoder node. (#8967)
lstein Apr 4, 2026
474d85e
feat: add bulk reidentify action for models (#8951) (#8952)
Pfannkuchensack Apr 4, 2026
07c9436
chore(frontend): typegen
lstein Apr 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 74 additions & 0 deletions invokeai/app/api/routers/model_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,19 @@ class BulkDeleteModelsResponse(BaseModel):
failed: List[dict] = Field(description="List of failed deletions with error messages")


class BulkReidentifyModelsRequest(BaseModel):
"""Request body for bulk model reidentification."""

keys: List[str] = Field(description="List of model keys to reidentify")


class BulkReidentifyModelsResponse(BaseModel):
"""Response body for bulk model reidentification."""

succeeded: List[str] = Field(description="List of successfully reidentified model keys")
failed: List[dict] = Field(description="List of failed reidentifications with error messages")


@model_manager_router.post(
"/i/bulk_delete",
operation_id="bulk_delete_models",
Expand Down Expand Up @@ -557,6 +570,67 @@ async def bulk_delete_models(
return BulkDeleteModelsResponse(deleted=deleted, failed=failed)


@model_manager_router.post(
"/i/bulk_reidentify",
operation_id="bulk_reidentify_models",
responses={
200: {"description": "Models reidentified (possibly with some failures)"},
},
status_code=200,
)
async def bulk_reidentify_models(
current_admin: AdminUserOrDefault,
request: BulkReidentifyModelsRequest = Body(description="List of model keys to reidentify"),
) -> BulkReidentifyModelsResponse:
"""
Reidentify multiple models by re-probing their weights files.

Returns a list of successfully reidentified keys and failed reidentifications with error messages.
"""
logger = ApiDependencies.invoker.services.logger
store = ApiDependencies.invoker.services.model_manager.store
models_path = ApiDependencies.invoker.services.configuration.models_path

succeeded = []
failed = []

for key in request.keys:
try:
config = store.get_model(key)
if pathlib.Path(config.path).is_relative_to(models_path):
model_path = pathlib.Path(config.path)
else:
model_path = models_path / config.path
mod = ModelOnDisk(model_path)
result = ModelConfigFactory.from_model_on_disk(mod)
if result.config is None:
raise InvalidModelException("Unable to identify model format")

# Retain user-editable fields from the original config
result.config.path = config.path
result.config.key = config.key
result.config.name = config.name
result.config.description = config.description
result.config.cover_image = config.cover_image
if hasattr(config, "trigger_phrases") and hasattr(result.config, "trigger_phrases"):
result.config.trigger_phrases = config.trigger_phrases
result.config.source = config.source
result.config.source_type = config.source_type

store.replace_model(config.key, result.config)
succeeded.append(key)
logger.info(f"Reidentified model: {key}")
except UnknownModelException as e:
logger.error(f"Failed to reidentify model {key}: {str(e)}")
failed.append({"key": key, "error": str(e)})
except Exception as e:
logger.error(f"Failed to reidentify model {key}: {str(e)}")
failed.append({"key": key, "error": str(e)})

logger.info(f"Bulk reidentify completed: {len(succeeded)} succeeded, {len(failed)} failed")
return BulkReidentifyModelsResponse(succeeded=succeeded, failed=failed)


@model_manager_router.delete(
"/i/{key}/image",
operation_id="delete_model_image",
Expand Down
21 changes: 20 additions & 1 deletion invokeai/app/invocations/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
WithBoard,
WithMetadata,
)
from invokeai.app.invocations.primitives import ImageOutput
from invokeai.app.invocations.primitives import ImageOutput, StringOutput
from invokeai.app.services.image_records.image_records_common import ImageCategory
from invokeai.app.services.shared.invocation_context import InvocationContext
from invokeai.app.util.misc import SEED_MAX
Expand Down Expand Up @@ -581,6 +581,25 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
return ImageOutput.build(image_dto)


@invocation(
"decode_watermark",
title="Decode Invisible Watermark",
tags=["image", "watermark"],
category="image",
version="1.0.0",
)
class DecodeInvisibleWatermarkInvocation(BaseInvocation):
"""Decode an invisible watermark from an image."""

image: ImageField = InputField(description="The image to decode the watermark from")
length: int = InputField(default=8, description="The expected watermark length in bytes")

def invoke(self, context: InvocationContext) -> StringOutput:
image = context.images.get_pil(self.image.image_name)
watermark = InvisibleWatermark.decode_watermark(image, self.length)
return StringOutput(value=watermark)


@invocation(
"mask_edge",
title="Mask Edge",
Expand Down
96 changes: 64 additions & 32 deletions invokeai/app/invocations/qwen_image_denoise.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@

@invocation(
"qwen_image_denoise",
title="Denoise - Qwen Image Edit",
title="Denoise - Qwen Image",
tags=["image", "qwen_image"],
category="image",
version="1.0.0",
classification=Classification.Prototype,
)
class QwenImageDenoiseInvocation(BaseInvocation, WithMetadata, WithBoard):
"""Run the denoising process with a Qwen Image Edit model."""
"""Run the denoising process with a Qwen Image model."""

# If latents is provided, this means we are doing image-to-image.
latents: Optional[LatentsField] = InputField(
Expand Down Expand Up @@ -132,7 +132,7 @@ def _get_noise(
seed: int,
) -> torch.Tensor:
rand_device = "cpu"
rand_dtype = torch.float16
rand_dtype = torch.float32

return torch.randn(
batch_size,
Expand Down Expand Up @@ -270,7 +270,7 @@ def _run_diffusion(self, context: InvocationContext):

# Try to load the scheduler config from the model's directory (Diffusers models
# have a scheduler/ subdir). For GGUF models this path doesn't exist, so fall
# back to instantiating the scheduler with the known Qwen Image Edit defaults.
# back to instantiating the scheduler with the known Qwen Image defaults.
model_path = context.models.get_absolute_path(context.models.get_config(self.transformer.transformer))
scheduler_path = model_path / "scheduler"
if scheduler_path.is_dir() and (scheduler_path / "scheduler_config.json").exists():
Expand Down Expand Up @@ -304,8 +304,19 @@ def _run_diffusion(self, context: InvocationContext):
init_sigmas = np.linspace(1.0, 1.0 / self.steps, self.steps).tolist()
scheduler.set_timesteps(sigmas=init_sigmas, mu=mu, device=device)

timesteps_sched = scheduler.timesteps
sigmas_sched = scheduler.sigmas
# Clip the schedule based on denoising_start/denoising_end to support img2img strength.
# The scheduler's sigmas go from high (noisy) to 0 (clean). We clip to the fractional range.
sigmas_sched = scheduler.sigmas # (N+1,) including terminal 0
if self.denoising_start > 0 or self.denoising_end < 1:
total_sigmas = len(sigmas_sched) - 1 # exclude terminal
start_idx = int(round(self.denoising_start * total_sigmas))
end_idx = int(round(self.denoising_end * total_sigmas))
sigmas_sched = sigmas_sched[start_idx : end_idx + 1] # +1 to include the next sigma for dt
# Rebuild timesteps from clipped sigmas (exclude terminal 0)
timesteps_sched = sigmas_sched[:-1] * scheduler.config.num_train_timesteps
else:
timesteps_sched = scheduler.timesteps

total_steps = len(timesteps_sched)

cfg_scale = self._prepare_cfg_scale(total_steps)
Expand Down Expand Up @@ -353,29 +364,44 @@ def _run_diffusion(self, context: InvocationContext):
# Pack latents into 2x2 patches: (B, C, H, W) -> (B, H/2*W/2, C*4)
latents = self._pack_latents(latents, 1, out_channels, latent_height, latent_width)

# Pack reference image latents and concatenate along the sequence dimension.
# The edit transformer always expects [noisy_patches ; ref_patches] in its sequence.
if ref_latents is not None:
_, ref_ch, rh, rw = ref_latents.shape
if rh != latent_height or rw != latent_width:
ref_latents = torch.nn.functional.interpolate(
ref_latents, size=(latent_height, latent_width), mode="bilinear"
# Determine whether the model uses reference latent conditioning (zero_cond_t).
# Edit models (zero_cond_t=True) expect [noisy_patches ; ref_patches] in the sequence.
# Txt2img models (zero_cond_t=False) only take noisy patches.
has_zero_cond_t = getattr(transformer_info.model, "zero_cond_t", False) or getattr(
transformer_info.model.config, "zero_cond_t", False
)
use_ref_latents = has_zero_cond_t

ref_latents_packed = None
if use_ref_latents:
if ref_latents is not None:
_, ref_ch, rh, rw = ref_latents.shape
if rh != latent_height or rw != latent_width:
ref_latents = torch.nn.functional.interpolate(
ref_latents, size=(latent_height, latent_width), mode="bilinear"
)
else:
# No reference image provided — use zeros so the model still gets the
# expected sequence layout.
ref_latents = torch.zeros(
1, out_channels, latent_height, latent_width, device=device, dtype=inference_dtype
)
ref_latents_packed = self._pack_latents(ref_latents, 1, out_channels, latent_height, latent_width)

# img_shapes tells the transformer the spatial layout of patches.
if use_ref_latents:
img_shapes = [
[
(1, latent_height // 2, latent_width // 2),
(1, latent_height // 2, latent_width // 2),
]
]
else:
# No reference image provided — use zeros so the model still gets the
# expected sequence layout.
ref_latents = torch.zeros(
1, out_channels, latent_height, latent_width, device=device, dtype=inference_dtype
)
ref_latents_packed = self._pack_latents(ref_latents, 1, out_channels, latent_height, latent_width)

# img_shapes tells the transformer the spatial layout of noisy and reference patches.
img_shapes = [
[
(1, latent_height // 2, latent_width // 2),
(1, latent_height // 2, latent_width // 2),
img_shapes = [
[
(1, latent_height // 2, latent_width // 2),
]
]
]

# Prepare inpaint extension (operates in 4D space, so unpack/repack around it)
inpaint_mask = self._prep_inpaint_mask(context, noise) # noise has the right 4D shape
Expand Down Expand Up @@ -422,14 +448,16 @@ def _run_diffusion(self, context: InvocationContext):
)
)

scheduler.set_begin_index(0)

for step_idx, t in enumerate(tqdm(timesteps_sched)):
# The pipeline passes timestep / 1000 to the transformer
timestep = t.expand(latents.shape[0]).to(inference_dtype)

# Concatenate noisy and reference patches along the sequence dim
model_input = torch.cat([latents, ref_latents_packed], dim=1)
# For edit models: concatenate noisy and reference patches along the sequence dim
# For txt2img models: just use noisy patches
if ref_latents_packed is not None:
model_input = torch.cat([latents, ref_latents_packed], dim=1)
else:
model_input = latents

noise_pred_cond = transformer(
hidden_states=model_input,
Expand Down Expand Up @@ -457,8 +485,12 @@ def _run_diffusion(self, context: InvocationContext):
else:
noise_pred = noise_pred_cond

# Use the scheduler's step method — exactly matching the pipeline
latents = scheduler.step(noise_pred, t, latents, return_dict=False)[0]
# Euler step using the (possibly clipped) sigma schedule
sigma_curr = sigmas_sched[step_idx]
sigma_next = sigmas_sched[step_idx + 1]
dt = sigma_next - sigma_curr
latents = latents.to(torch.float32) + dt * noise_pred.to(torch.float32)
latents = latents.to(inference_dtype)

if inpaint_extension is not None:
sigma_next = sigmas_sched[step_idx + 1].item()
Expand Down
6 changes: 3 additions & 3 deletions invokeai/app/invocations/qwen_image_image_to_latents.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@

@invocation(
"qwen_image_i2l",
title="Image to Latents - Qwen Image Edit",
title="Image to Latents - Qwen Image",
tags=["image", "latents", "vae", "i2l", "qwen_image"],
category="image",
version="1.0.0",
classification=Classification.Prototype,
)
class QwenImageImageToLatentsInvocation(BaseInvocation, WithMetadata, WithBoard):
"""Generates latents from an image using the Qwen Image Edit VAE."""
"""Generates latents from an image using the Qwen Image VAE."""

image: ImageField = InputField(description="The image to encode.")
vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection)
Expand All @@ -51,7 +51,7 @@ def vae_encode(vae_info: LoadedModel, image_tensor: torch.Tensor) -> torch.Tenso

image_tensor = image_tensor.to(device=TorchDevice.choose_torch_device(), dtype=vae.dtype)
with torch.inference_mode():
# The Qwen Image Edit VAE expects 5D input: (B, C, num_frames, H, W)
# The Qwen Image VAE expects 5D input: (B, C, num_frames, H, W)
if image_tensor.dim() == 4:
image_tensor = image_tensor.unsqueeze(2)

Expand Down
6 changes: 3 additions & 3 deletions invokeai/app/invocations/qwen_image_latents_to_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@

@invocation(
"qwen_image_l2i",
title="Latents to Image - Qwen Image Edit",
title="Latents to Image - Qwen Image",
tags=["latents", "image", "vae", "l2i", "qwen_image"],
category="latents",
version="1.0.0",
classification=Classification.Prototype,
)
class QwenImageLatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
"""Generates an image from latents using the Qwen Image Edit VAE."""
"""Generates an image from latents using the Qwen Image VAE."""

latents: LatentsField = InputField(description=FieldDescriptions.latents, input=Input.Connection)
vae: VAEField = InputField(description=FieldDescriptions.vae, input=Input.Connection)
Expand All @@ -56,7 +56,7 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
TorchDevice.empty_cache()

with torch.inference_mode(), tiling_context:
# The Qwen Image Edit VAE uses per-channel latents_mean / latents_std
# The Qwen Image VAE uses per-channel latents_mean / latents_std
# instead of a single scaling_factor.
# Latents are 5D: (B, C, num_frames, H, W) — the unpack from the
# denoise step already produces this shape.
Expand Down
10 changes: 5 additions & 5 deletions invokeai/app/invocations/qwen_image_lora_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

@invocation_output("qwen_image_lora_loader_output")
class QwenImageLoRALoaderOutput(BaseInvocationOutput):
"""Qwen Image Edit LoRA Loader Output"""
"""Qwen Image LoRA Loader Output"""

transformer: Optional[TransformerField] = OutputField(
default=None, description=FieldDescriptions.transformer, title="Transformer"
Expand All @@ -24,14 +24,14 @@ class QwenImageLoRALoaderOutput(BaseInvocationOutput):

@invocation(
"qwen_image_lora_loader",
title="Apply LoRA - Qwen Image Edit",
title="Apply LoRA - Qwen Image",
tags=["lora", "model", "qwen_image"],
category="model",
version="1.0.0",
classification=Classification.Prototype,
)
class QwenImageLoRALoaderInvocation(BaseInvocation):
"""Apply a LoRA model to a Qwen Image Edit transformer."""
"""Apply a LoRA model to a Qwen Image transformer."""

lora: ModelIdentifierField = InputField(
description=FieldDescriptions.lora_model,
Expand Down Expand Up @@ -72,14 +72,14 @@ def invoke(self, context: InvocationContext) -> QwenImageLoRALoaderOutput:

@invocation(
"qwen_image_lora_collection_loader",
title="Apply LoRA Collection - Qwen Image Edit",
title="Apply LoRA Collection - Qwen Image",
tags=["lora", "model", "qwen_image"],
category="model",
version="1.0.0",
classification=Classification.Prototype,
)
class QwenImageLoRACollectionLoader(BaseInvocation):
"""Applies a collection of LoRAs to a Qwen Image Edit transformer."""
"""Applies a collection of LoRAs to a Qwen Image transformer."""

loras: Optional[LoRAField | list[LoRAField]] = InputField(
default=None, description="LoRA models and weights. May be a single LoRA or collection.", title="LoRAs"
Expand Down
Loading
Loading