Skip to content

Commit f48e390

Browse files
Support AliMama SD3 and Flux inpaint controlnets.
Use the ControlNetInpaintingAliMamaApply node.
1 parent 369a6dd commit f48e390

4 files changed

Lines changed: 68 additions & 11 deletions

File tree

comfy/cldm/mmdit.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ class ControlNet(comfy.ldm.modules.diffusionmodules.mmdit.MMDiT):
66
def __init__(
77
self,
88
num_blocks = None,
9+
control_latent_channels = None,
910
dtype = None,
1011
device = None,
1112
operations = None,
@@ -17,10 +18,13 @@ def __init__(
1718
for _ in range(len(self.joint_blocks)):
1819
self.controlnet_blocks.append(operations.Linear(self.hidden_size, self.hidden_size, device=device, dtype=dtype))
1920

21+
if control_latent_channels is None:
22+
control_latent_channels = self.in_channels
23+
2024
self.pos_embed_input = comfy.ldm.modules.diffusionmodules.mmdit.PatchEmbed(
2125
None,
2226
self.patch_size,
23-
self.in_channels,
27+
control_latent_channels,
2428
self.hidden_size,
2529
bias=True,
2630
strict_img_size=False,

comfy/controlnet.py

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,13 +79,19 @@ def __init__(self, device=None):
7979
self.previous_controlnet = None
8080
self.extra_conds = []
8181
self.strength_type = StrengthType.CONSTANT
82+
self.concat_mask = False
83+
self.extra_concat_orig = []
84+
self.extra_concat = None
8285

83-
def set_cond_hint(self, cond_hint, strength=1.0, timestep_percent_range=(0.0, 1.0), vae=None):
86+
def set_cond_hint(self, cond_hint, strength=1.0, timestep_percent_range=(0.0, 1.0), vae=None, extra_concat=[]):
8487
self.cond_hint_original = cond_hint
8588
self.strength = strength
8689
self.timestep_percent_range = timestep_percent_range
8790
if self.latent_format is not None:
8891
self.vae = vae
92+
self.extra_concat_orig = extra_concat.copy()
93+
if self.concat_mask and len(self.extra_concat_orig) == 0:
94+
self.extra_concat_orig.append(torch.tensor([[[[1.0]]]]))
8995
return self
9096

9197
def pre_run(self, model, percent_to_timestep_function):
@@ -100,9 +106,9 @@ def set_previous_controlnet(self, controlnet):
100106
def cleanup(self):
101107
if self.previous_controlnet is not None:
102108
self.previous_controlnet.cleanup()
103-
if self.cond_hint is not None:
104-
del self.cond_hint
105-
self.cond_hint = None
109+
110+
self.cond_hint = None
111+
self.extra_concat = None
106112
self.timestep_range = None
107113

108114
def get_models(self):
@@ -123,6 +129,8 @@ def copy_to(self, c):
123129
c.vae = self.vae
124130
c.extra_conds = self.extra_conds.copy()
125131
c.strength_type = self.strength_type
132+
c.concat_mask = self.concat_mask
133+
c.extra_concat_orig = self.extra_concat_orig.copy()
126134

127135
def inference_memory_requirements(self, dtype):
128136
if self.previous_controlnet is not None:
@@ -175,7 +183,7 @@ def set_extra_arg(self, argument, value=None):
175183

176184

177185
class ControlNet(ControlBase):
178-
def __init__(self, control_model=None, global_average_pooling=False, compression_ratio=8, latent_format=None, device=None, load_device=None, manual_cast_dtype=None, extra_conds=["y"], strength_type=StrengthType.CONSTANT):
186+
def __init__(self, control_model=None, global_average_pooling=False, compression_ratio=8, latent_format=None, device=None, load_device=None, manual_cast_dtype=None, extra_conds=["y"], strength_type=StrengthType.CONSTANT, concat_mask=False):
179187
super().__init__(device)
180188
self.control_model = control_model
181189
self.load_device = load_device
@@ -189,6 +197,7 @@ def __init__(self, control_model=None, global_average_pooling=False, compression
189197
self.latent_format = latent_format
190198
self.extra_conds += extra_conds
191199
self.strength_type = strength_type
200+
self.concat_mask = concat_mask
192201

193202
def get_control(self, x_noisy, t, cond, batched_number):
194203
control_prev = None
@@ -220,6 +229,13 @@ def get_control(self, x_noisy, t, cond, batched_number):
220229
comfy.model_management.load_models_gpu(loaded_models)
221230
if self.latent_format is not None:
222231
self.cond_hint = self.latent_format.process_in(self.cond_hint)
232+
if len(self.extra_concat_orig) > 0:
233+
to_concat = []
234+
for c in self.extra_concat_orig:
235+
c = comfy.utils.common_upscale(c, self.cond_hint.shape[3], self.cond_hint.shape[2], self.upscale_algorithm, "center")
236+
to_concat.append(comfy.utils.repeat_to_batch_size(c, self.cond_hint.shape[0]))
237+
self.cond_hint = torch.cat([self.cond_hint] + to_concat, dim=1)
238+
223239
self.cond_hint = self.cond_hint.to(device=self.device, dtype=dtype)
224240
if x_noisy.shape[0] != self.cond_hint.shape[0]:
225241
self.cond_hint = broadcast_image_to(self.cond_hint, x_noisy.shape[0], batched_number)
@@ -410,12 +426,17 @@ def load_controlnet_mmdit(sd):
410426
for k in sd:
411427
new_sd[k] = sd[k]
412428

413-
control_model = comfy.cldm.mmdit.ControlNet(num_blocks=num_blocks, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config)
429+
concat_mask = False
430+
control_latent_channels = new_sd.get("pos_embed_input.proj.weight").shape[1]
431+
if control_latent_channels == 17: #inpaint controlnet
432+
concat_mask = True
433+
434+
control_model = comfy.cldm.mmdit.ControlNet(num_blocks=num_blocks, control_latent_channels=control_latent_channels, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config)
414435
control_model = controlnet_load_state_dict(control_model, new_sd)
415436

416437
latent_format = comfy.latent_formats.SD3()
417438
latent_format.shift_factor = 0 #SD3 controlnet weirdness
418-
control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, load_device=load_device, manual_cast_dtype=manual_cast_dtype)
439+
control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, concat_mask=concat_mask, load_device=load_device, manual_cast_dtype=manual_cast_dtype)
419440
return control
420441

421442

@@ -450,13 +471,16 @@ def load_controlnet_flux_instantx(sd):
450471
num_union_modes = new_sd[union_cnet].shape[0]
451472

452473
control_latent_channels = new_sd.get("pos_embed_input.weight").shape[1] // 4
474+
concat_mask = False
475+
if control_latent_channels == 17:
476+
concat_mask = True
453477

454478
control_model = comfy.ldm.flux.controlnet.ControlNetFlux(latent_input=True, num_union_modes=num_union_modes, control_latent_channels=control_latent_channels, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config)
455479
control_model = controlnet_load_state_dict(control_model, new_sd)
456480

457481
latent_format = comfy.latent_formats.Flux()
458482
extra_conds = ['y', 'guidance']
459-
control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
483+
control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, concat_mask=concat_mask, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds)
460484
return control
461485

462486
def convert_mistoline(sd):

comfy_extras/nodes_controlnet.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
from comfy.cldm.control_types import UNION_CONTROLNET_TYPES
2+
import nodes
3+
import comfy.utils
24

35
class SetUnionControlNetType:
46
@classmethod
@@ -22,6 +24,33 @@ def set_controlnet_type(self, control_net, type):
2224

2325
return (control_net,)
2426

27+
class ControlNetInpaintingAliMamaApply(nodes.ControlNetApplyAdvanced):
28+
@classmethod
29+
def INPUT_TYPES(s):
30+
return {"required": {"positive": ("CONDITIONING", ),
31+
"negative": ("CONDITIONING", ),
32+
"control_net": ("CONTROL_NET", ),
33+
"vae": ("VAE", ),
34+
"image": ("IMAGE", ),
35+
"mask": ("MASK", ),
36+
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
37+
"start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}),
38+
"end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001})
39+
}}
40+
41+
FUNCTION = "apply_inpaint_controlnet"
42+
43+
CATEGORY = "conditioning/controlnet"
44+
45+
def apply_inpaint_controlnet(self, positive, negative, control_net, vae, image, mask, strength, start_percent, end_percent):
46+
mask = 1.0 - mask.reshape((-1, 1, mask.shape[-2], mask.shape[-1]))
47+
mask_apply = comfy.utils.common_upscale(mask, image.shape[2], image.shape[1], "bilinear", "center").round()
48+
image = image * mask_apply.movedim(1, -1).repeat(1, 1, 1, image.shape[3])
49+
return self.apply_controlnet(positive, negative, control_net, image, strength, start_percent, end_percent, vae=vae, extra_concat=[mask])
50+
51+
52+
2553
NODE_CLASS_MAPPINGS = {
2654
"SetUnionControlNetType": SetUnionControlNetType,
55+
"ControlNetInpaintingAliMamaApply": ControlNetInpaintingAliMamaApply,
2756
}

nodes.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -824,7 +824,7 @@ def INPUT_TYPES(s):
824824

825825
CATEGORY = "conditioning/controlnet"
826826

827-
def apply_controlnet(self, positive, negative, control_net, image, strength, start_percent, end_percent, vae=None):
827+
def apply_controlnet(self, positive, negative, control_net, image, strength, start_percent, end_percent, vae=None, extra_concat=[]):
828828
if strength == 0:
829829
return (positive, negative)
830830

@@ -841,7 +841,7 @@ def apply_controlnet(self, positive, negative, control_net, image, strength, sta
841841
if prev_cnet in cnets:
842842
c_net = cnets[prev_cnet]
843843
else:
844-
c_net = control_net.copy().set_cond_hint(control_hint, strength, (start_percent, end_percent), vae)
844+
c_net = control_net.copy().set_cond_hint(control_hint, strength, (start_percent, end_percent), vae=vae, extra_concat=extra_concat)
845845
c_net.set_previous_controlnet(prev_cnet)
846846
cnets[prev_cnet] = c_net
847847

0 commit comments

Comments
 (0)