From 62f2dd2e9382610eb21a1310a9cb19222c7c4031 Mon Sep 17 00:00:00 2001 From: Juan Treminio Date: Sat, 23 May 2026 12:53:06 -0500 Subject: [PATCH 1/6] Adds Lens support --- docs/Model Support.md | 17 ++++++++++++ .../ComfyUIAPIAbstractBackend.cs | 1 + .../ComfyUIBackend/WorkflowGenerator.cs | 2 +- .../WorkflowGeneratorModelSupport.cs | 26 ++++++++++++++++--- src/Text2Image/T2IModelClassSorter.cs | 6 +++++ src/Text2Image/T2IParamTypes.cs | 5 +++- 6 files changed, 52 insertions(+), 5 deletions(-) diff --git a/docs/Model Support.md b/docs/Model Support.md index 9be1a8a86..b0010bc4e 100644 --- a/docs/Model Support.md +++ b/docs/Model Support.md @@ -20,6 +20,7 @@ [Anima](#anima) | DiT | 2026 | Circlestone Labs | 2B | WTF | Modern, very small, decent for anime | [ERNIE](#ernie) | DiT | 2026 | Baidu | 8B | Minimal | Modern, intelligent, good quality, fast | [HiDream O1](#hidream-o1) | "Pixel UiT" | 2026 | HiDream | 8B | Minimal | Modern, intelligent, fast, decent quality | +[Lens](#lens) | MMDiT | 2026 | Microsoft | 3.8B | Minimal | Modern, Great Quality, lightweight | Old or bad options also tracked listed via [Obscure Model Support](/docs/Obscure%20Model%20Support.md): @@ -618,6 +619,22 @@ For upscaling with SD3, the `Refiner Do Tiling` parameter is highly recommended - **Dev Lora:** - A dev lora can be downloaded here [Kijai/hidream-O1-image_comfy](). It allows use of the base model with the distilled behavior from the Dev model. 8 steps will generate a coherent image of lower quality, 16 steps seems closer to original quality. Use CFG Scale 1. +# Lens + +- Microsoft's [Lens]() is supported in SwarmUI! +- It is a 3.8B MMDiT model, with a base model and an official turbo distill designed to run fast. + - The "Turbo" model (in fat BF16) can be downloaded here [Comfy-Org/Lens - turbo]() + - Or the base version (in fat BF16) [Comfy-Org/Lens - base]() + - Save in `diffusion_models` +- Uses the Flux.2 VAE, will be downloaded and handled automatically +- Uses the GPT-OSS 20B text encoder, will be downloaded and handled automatically +- **Parameters:** + - **Sampler:** Default is fine. + - **Scheduler:** Default is fine. + - **CFG Scale:** For Turbo, `1`, for base normal CFG ranges (around `5`) + - **Steps:** For Turbo, `4` is recommended, `8` works well. For Base, `20` as normal. + - **Resolution:** Side length `1440` is the standard. + # Video Models - Video models are documented in [Video Model Support](/docs/Video%20Model%20Support.md). diff --git a/src/BuiltinExtensions/ComfyUIBackend/ComfyUIAPIAbstractBackend.cs b/src/BuiltinExtensions/ComfyUIBackend/ComfyUIAPIAbstractBackend.cs index 02176700b..abf94e193 100644 --- a/src/BuiltinExtensions/ComfyUIBackend/ComfyUIAPIAbstractBackend.cs +++ b/src/BuiltinExtensions/ComfyUIBackend/ComfyUIAPIAbstractBackend.cs @@ -1080,6 +1080,7 @@ void copyParam(T2IRegisteredParam param) copyParam(T2IParamTypes.QwenModel); copyParam(T2IParamTypes.MistralModel); copyParam(T2IParamTypes.GemmaModel); + copyParam(T2IParamTypes.GptOssModel); } WorkflowGenerator wg = new() { UserInput = input, ModelFolderFormat = ModelFolderFormat, Features = [.. SupportedFeatures] }; JObject workflow = wg.Generate(); diff --git a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGenerator.cs b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGenerator.cs index 9ec850097..95ffd2772 100644 --- a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGenerator.cs +++ b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGenerator.cs @@ -957,7 +957,7 @@ public string CreateKSampler(JArray model, JArray pos, JArray neg, JArray latent } } // TODO: Registry of model default preferences instead of this - else if (IsFlux() || IsWanVideo() || IsWanVideo22() || IsOmniGen() || IsQwenImage() || IsZImage() || IsZetaChroma() || IsErnie() || IsHiDreamO1()) + else if (IsFlux() || IsWanVideo() || IsWanVideo22() || IsOmniGen() || IsQwenImage() || IsZImage() || IsZetaChroma() || IsErnie() || IsHiDreamO1() || IsLens()) { defscheduler ??= "simple"; } diff --git a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs index 48d60e7fa..95b49bf55 100644 --- a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs +++ b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs @@ -91,6 +91,9 @@ public bool IsKontext() /// Returns true if the current model is HiDream-O1 Image. public bool IsHiDreamO1() => IsModelCompatClass(T2IModelClassSorter.CompatHiDreamO1); + /// Returns true if the current model is Lens. + public bool IsLens() => IsModelCompatClass(T2IModelClassSorter.CompatLens); + /// Returns true if the current model supports Flux Guidance. public bool HasFluxGuidance() { @@ -269,7 +272,7 @@ public WGNodeData EmptyImage(int width, int height, int batchSize, string id = n ["width"] = width }, id)); } - else if (IsAnyFlux2() || IsErnie()) + else if (IsAnyFlux2() || IsErnie() || IsLens()) { return resultImage(CreateNode("EmptyFlux2LatentImage", new JObject() { @@ -598,6 +601,11 @@ public string GetMinistral3_3bModel() return RequireClipModel("ministral-3-3b.safetensors", "https://huggingface.co/Comfy-Org/ERNIE-Image/resolve/main/text_encoders/ministral-3-3b.safetensors", "49a750a128863854eac7d85e1a277a7b44bf6ec3646405b84686dfeeca3708ca", T2IParamTypes.MistralModel); } + public string GetGptOss_20bModel() + { + return RequireClipModel("gpt_oss_20b_mxfp4.safetensors", "https://huggingface.co/Comfy-Org/Lens/resolve/main/split_files/text_encoders/gpt_oss_20b_mxfp4.safetensors", "f279cf3e73c494f78e0c5e4d35cf665068ae69672f7066813dbb75c021286856", T2IParamTypes.GptOssModel); + } + public string GetClipLModel() { if (g.UserInput.TryGet(T2IParamTypes.ClipLModel, out T2IModel model)) @@ -899,7 +907,7 @@ public void LoadClip3(string type, string modelA, string modelB, string modelC) { dtype = "default"; } - else if (IsZImage() || IsZetaChroma() || IsAnima()) // Model is small and dense, so trust user preferred download format + else if (IsZImage() || IsZetaChroma() || IsAnima() || IsLens()) // Model is small and dense, so trust user preferred download format { dtype = "default"; } @@ -1057,6 +1065,18 @@ public void LoadClip3(string type, string modelA, string modelB, string modelC) helpers.LoadClip("flux2", helpers.GetMinistral3_3bModel()); helpers.DoVaeLoader(UserInput.SourceSession?.User?.Settings?.VAEs?.DefaultFlux2VAE, "flux-2", "flux2-vae"); } + else if (IsLens()) + { + helpers.LoadClip("lens", helpers.GetGptOss_20bModel()); + helpers.DoVaeLoader(UserInput.SourceSession?.User?.Settings?.VAEs?.DefaultFlux2VAE, "flux-2", "flux2-vae"); + string cfgNormNode = CreateNode("CFGNorm", new JObject() + { + ["model"] = LoadingModel, + ["strength"] = 1.0, + ["pre_cfg"] = true + }); + LoadingModel = [cfgNormNode, 0]; + } else if (IsFlux() && (LoadingClip is null || LoadingVAE is null || UserInput.Get(T2IParamTypes.T5XXLModel) is not null || UserInput.Get(T2IParamTypes.ClipLModel) is not null)) { helpers.LoadClip2("flux", helpers.GetT5XXLModel(), helpers.GetClipLModel()); @@ -1323,7 +1343,7 @@ public void LoadClip3(string type, string modelA, string modelB, string modelC) } if (UserInput.TryGet(T2IParamTypes.SigmaShift, out double shiftVal, sectionId: sectionId)) { - if (IsFlux() || IsAnyFlux2()) + if (IsFlux() || IsAnyFlux2() || IsLens()) { string samplingNode = CreateNode("ModelSamplingFlux", new JObject() { diff --git a/src/Text2Image/T2IModelClassSorter.cs b/src/Text2Image/T2IModelClassSorter.cs index 6ceffa9c3..f03a70e48 100644 --- a/src/Text2Image/T2IModelClassSorter.cs +++ b/src/Text2Image/T2IModelClassSorter.cs @@ -71,6 +71,7 @@ public static T2IModelCompatClass CompatZetaChroma = RegisterCompat(new() { ID = "zeta-chroma", ShortCode = "ZChr", LorasTargetTextEnc = false }), CompatAnima = RegisterCompat(new() { ID = "anima", ShortCode = "Anima", LorasTargetTextEnc = false }), CompatHiDreamO1 = RegisterCompat(new() { ID = "hidream-o1", ShortCode = "HiDrO1", LorasTargetTextEnc = false }), + CompatLens = RegisterCompat(new() { ID = "lens", ShortCode = "Lens", LorasTargetTextEnc = false }), // Audio models CompatAceStep15 = RegisterCompat(new() { ID = "ace-step-1_5", ShortCode = "Ace15", IsAudioModel = true }), // Obscure old random ones @@ -158,6 +159,7 @@ bool isFluxLora(JObject h) bool isFlux2KleinLora(JObject h) => hasLoraKey(h, "double_blocks.4.img_attn.proj") && hasLoraKey(h, "double_blocks.4.txt_mlp.2") && hasLoraKey(h, "single_blocks.18.linear1") && hasLoraKey(h, "single_blocks.19.linear2"); bool isFlux2Klein9BLora(JObject h) => hasLoraKey(h, "single_blocks.23.linear1"); bool isFlux2DevLora(JObject h) => hasLoraKey(h, "single_blocks.47.linear2"); + bool isLens(JObject h) => h.ContainsKey("transformer_blocks.0.attn.norm_added_q.weight") && h.ContainsKey("transformer_blocks.0.img_mlp.w1.weight"); bool isSD35Lora(JObject h) => h.ContainsKey("transformer.transformer_blocks.0.attn.to_k.lora_A.weight") && h.ContainsKey("transformer.transformer_blocks.37.attn.to_out.0.lora_B.weight"); bool isMochi(JObject h) => hasKey(h, "blocks.0.attn.k_norm_x.weight"); bool isMochiVae(JObject h) => h.ContainsKey("encoder.layers.4.layers.1.attn_block.attn.qkv.weight") || h.ContainsKey("layers.4.layers.1.attn_block.attn.qkv.weight") || h.ContainsKey("blocks.2.blocks.3.stack.5.weight") || h.ContainsKey("decoder.blocks.2.blocks.3.stack.5.weight"); @@ -478,6 +480,10 @@ JToken GetEmbeddingKey(JObject h) { return isFlux2KleinLora(h) && isFlux2Klein9BLora(h) && !isFlux2DevLora(h); }}); + Register(new() { ID = "lens", CompatClass = CompatLens, Name = "Lens", StandardWidth = 1440, StandardHeight = 1440, IsThisModelOfClass = (m, h) => + { + return isLens(h); + }}); // ====================== Wan Video ====================== Register(new() { ID = "wan-2_1-text2video/vae", CompatClass = CompatWan21, Name = "Wan 2.1 VAE", StandardWidth = 640, StandardHeight = 640, IsThisModelOfClass = (m, h) => { return false; }}); Register(new() { ID = "wan-2_1-text2video-1_3b", CompatClass = CompatWan21_1_3b, Name = "Wan 2.1 Text2Video 1.3B", StandardWidth = 640, StandardHeight = 640, IsThisModelOfClass = (m, h) => diff --git a/src/Text2Image/T2IParamTypes.cs b/src/Text2Image/T2IParamTypes.cs index 6e8c5bdbd..d8b5bcfa4 100644 --- a/src/Text2Image/T2IParamTypes.cs +++ b/src/Text2Image/T2IParamTypes.cs @@ -330,7 +330,7 @@ public static string ApplyStringEdit(string prior, string update) FreeUBlock1, FreeUBlock2, FreeUSkip1, FreeUSkip2, GlobalRegionFactor, EndStepsEarly, SamplerSigmaMin, SamplerSigmaMax, SamplerRho, VideoAugmentationLevel, VideoCFG, VideoMinCFG, Video2VideoCreativity, VideoSwapPercent, VideoExtendSwapPercent, IP2PCFG2, RegionalObjectCleanupFactor, SigmaShift, SegmentThresholdMax, SegmentCFGScale, FluxGuidanceScale, Text2AudioDuration; public static T2IRegisteredParam InitImage, MaskImage, VideoEndFrame; public static T2IRegisteredParam VideoAudioInput, VideoAudioReference; - public static T2IRegisteredParam Model, RefinerModel, VAE, RegionalObjectInpaintingModel, SegmentModel, VideoModel, VideoSwapModel, RefinerVAE, ClipLModel, ClipGModel, ClipVisionModel, T5XXLModel, LLaVAModel, LLaMAModel, QwenModel, MistralModel, GemmaModel, VideoExtendModel, VideoExtendSwapModel; + public static T2IRegisteredParam Model, RefinerModel, VAE, RegionalObjectInpaintingModel, SegmentModel, VideoModel, VideoSwapModel, RefinerVAE, ClipLModel, ClipGModel, ClipVisionModel, T5XXLModel, LLaVAModel, LLaMAModel, QwenModel, MistralModel, GemmaModel, GptOssModel, VideoExtendModel, VideoExtendSwapModel; public static T2IRegisteredParam> Loras, LoraWeights, LoraTencWeights, LoraSectionConfinement; public static T2IRegisteredParam> PromptImages; public static T2IRegisteredParam OutputIntermediateImages, DoNotSave, DoNotSaveIntermediates, ControlNetPreviewOnly, RevisionZeroPrompt, RemoveBackground, NoSeedIncrement, NoPreviews, VideoBoomerang, ModelSpecificEnhancements, UseInpaintingEncode, MaskCompositeUnthresholded, SaveSegmentMask, InitImageRecompositeMask, UseReferenceOnly, RefinerDoTiling, AutomaticVAE, ZeroNegative, FluxDisableGuidance, SmartImagePromptResizing, NoLoadModels, NoInternalSpecialHandling, ForwardRawBackendData, ForwardSwarmData, @@ -715,6 +715,9 @@ static List listVaes(Session s) GemmaModel = Register(new("Gemma Model", "Which Gemma LLM to use as a text encoder, for models that use Gemma (such as Lumina2, LTX2).", "", IgnoreIf: "", Group: GroupAdvancedModelAddons, Subtype: "Clip", Permission: Permissions.ModelParams, Toggleable: true, IsAdvanced: true, OrderPriority: 20, ChangeWeight: 7 )); + GptOssModel = Register(new("GPT-OSS Model", "Which GPT-OSS LLM to use as a text encoder, for Lens-style 'diffusion_models' folder models.", + "", IgnoreIf: "", Group: GroupAdvancedModelAddons, Subtype: "Clip", Permission: Permissions.ModelParams, Toggleable: true, IsAdvanced: true, OrderPriority: 20, ChangeWeight: 7 + )); TorchCompile = Register(new("Torch Compile", "Torch.Compile is a way to dynamically accelerate AI models.\nIt wastes a bit of time (around a minute) on the first call compiling a graph of the generation, and then all subsequent generations run faster thanks to the compiled graph.\nTorch.Compile depends on Triton, which is difficult to install on Windows, easier on Linux.", "Disabled", IgnoreIf: "Disabled", GetValues: _ => ["Disabled", "inductor", "cudagraphs"], OrderPriority: 40, Group: GroupAdvancedModelAddons )); From b3516f3185ff8a62a3abef4dc415a840bf99a567 Mon Sep 17 00:00:00 2001 From: Juan Treminio Date: Sat, 23 May 2026 13:23:39 -0500 Subject: [PATCH 2/6] Adds ModelSamplingFlux --- .../WorkflowGeneratorModelSupport.cs | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs index 95b49bf55..eb18ef873 100644 --- a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs +++ b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs @@ -1069,13 +1069,22 @@ public void LoadClip3(string type, string modelA, string modelB, string modelC) { helpers.LoadClip("lens", helpers.GetGptOss_20bModel()); helpers.DoVaeLoader(UserInput.SourceSession?.User?.Settings?.VAEs?.DefaultFlux2VAE, "flux-2", "flux2-vae"); - string cfgNormNode = CreateNode("CFGNorm", new JObject() + string lensSamplingNode = CreateNode("ModelSamplingFlux", new JObject() + { + ["model"] = LoadingModel, + ["width"] = UserInput.GetImageWidth(), + ["height"] = UserInput.GetImageHeight(), + ["max_shift"] = UserInput.Get(T2IParamTypes.SigmaShift, 1.15, sectionId: sectionId), + ["base_shift"] = 0.5 + }); + LoadingModel = [lensSamplingNode, 0]; + string lensCfgNormNode = CreateNode("CFGNorm", new JObject() { ["model"] = LoadingModel, ["strength"] = 1.0, ["pre_cfg"] = true }); - LoadingModel = [cfgNormNode, 0]; + LoadingModel = [lensCfgNormNode, 0]; } else if (IsFlux() && (LoadingClip is null || LoadingVAE is null || UserInput.Get(T2IParamTypes.T5XXLModel) is not null || UserInput.Get(T2IParamTypes.ClipLModel) is not null)) { @@ -1343,7 +1352,7 @@ public void LoadClip3(string type, string modelA, string modelB, string modelC) } if (UserInput.TryGet(T2IParamTypes.SigmaShift, out double shiftVal, sectionId: sectionId)) { - if (IsFlux() || IsAnyFlux2() || IsLens()) + if (IsFlux() || IsAnyFlux2()) { string samplingNode = CreateNode("ModelSamplingFlux", new JObject() { From bc50d598675228d540b7ad0e5e559d3a33a73e0b Mon Sep 17 00:00:00 2001 From: Juan Treminio Date: Sat, 23 May 2026 13:57:48 -0500 Subject: [PATCH 3/6] Docs tweak --- docs/Model Support.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Model Support.md b/docs/Model Support.md index b0010bc4e..e47c136fe 100644 --- a/docs/Model Support.md +++ b/docs/Model Support.md @@ -20,7 +20,7 @@ [Anima](#anima) | DiT | 2026 | Circlestone Labs | 2B | WTF | Modern, very small, decent for anime | [ERNIE](#ernie) | DiT | 2026 | Baidu | 8B | Minimal | Modern, intelligent, good quality, fast | [HiDream O1](#hidream-o1) | "Pixel UiT" | 2026 | HiDream | 8B | Minimal | Modern, intelligent, fast, decent quality | -[Lens](#lens) | MMDiT | 2026 | Microsoft | 3.8B | Minimal | Modern, Great Quality, lightweight | +[Lens](#lens) | MMDiT | 2026 | Microsoft | 3.8B | Minimal | Modern, lightweight | Old or bad options also tracked listed via [Obscure Model Support](/docs/Obscure%20Model%20Support.md): @@ -622,7 +622,7 @@ For upscaling with SD3, the `Refiner Do Tiling` parameter is highly recommended # Lens - Microsoft's [Lens]() is supported in SwarmUI! -- It is a 3.8B MMDiT model, with a base model and an official turbo distill designed to run fast. +- It is a 3.8B model, with a base model and an official turbo distill designed to run fast. - The "Turbo" model (in fat BF16) can be downloaded here [Comfy-Org/Lens - turbo]() - Or the base version (in fat BF16) [Comfy-Org/Lens - base]() - Save in `diffusion_models` From 1b89787ccec9c782a60511593b678bf1694ff6ff Mon Sep 17 00:00:00 2001 From: Juan Treminio Date: Tue, 26 May 2026 07:59:24 -0500 Subject: [PATCH 4/6] Updating per upstream --- docs/Model Support.md | 6 ++++-- .../ComfyUIBackend/WorkflowGeneratorModelSupport.cs | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/Model Support.md b/docs/Model Support.md index 771a13068..6e06b18f4 100644 --- a/docs/Model Support.md +++ b/docs/Model Support.md @@ -623,8 +623,10 @@ For upscaling with SD3, the `Refiner Do Tiling` parameter is highly recommended - Microsoft's [Lens]() is supported in SwarmUI! - It is a 3.8B model, with a base model and an official turbo distill designed to run fast. - - The "Turbo" model (in fat BF16) can be downloaded here [Comfy-Org/Lens - turbo]() - - Or the base version (in fat BF16) [Comfy-Org/Lens - base]() + - The "Turbo" model (in fat BF16) can be downloaded here [Comfy-Org/Lens - turbo]() + - An MXFP8 variant can be downloaded here [Comfy-Org/Lens - turb mxfp8]() + - Or the base version (in fat BF16) [Comfy-Org/Lens - base]() + - An MXFP8 variant can be downloaded here [Comfy-Org/Lens - base mxfp8]() - Save in `diffusion_models` - Uses the Flux.2 VAE, will be downloaded and handled automatically - Uses the GPT-OSS 20B text encoder, will be downloaded and handled automatically diff --git a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs index eb18ef873..998a22184 100644 --- a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs +++ b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs @@ -603,7 +603,7 @@ public string GetMinistral3_3bModel() public string GetGptOss_20bModel() { - return RequireClipModel("gpt_oss_20b_mxfp4.safetensors", "https://huggingface.co/Comfy-Org/Lens/resolve/main/split_files/text_encoders/gpt_oss_20b_mxfp4.safetensors", "f279cf3e73c494f78e0c5e4d35cf665068ae69672f7066813dbb75c021286856", T2IParamTypes.GptOssModel); + return RequireClipModel("gpt_oss_20b_nvfp4.safetensors", "https://huggingface.co/Comfy-Org/Lens/resolve/main/text_encoders/gpt_oss_20b_nvfp4.safetensors?download=true", "267fb63224796cc3af378abb0ba6b95130c0e53ac642ab7a734034f0cfa2e753", T2IParamTypes.GptOssModel); } public string GetClipLModel() From 524a7f6c2c925016f59c2218695dc3817367ec76 Mon Sep 17 00:00:00 2001 From: Juan Treminio Date: Tue, 26 May 2026 08:05:44 -0500 Subject: [PATCH 5/6] Fix hash --- .../ComfyUIBackend/WorkflowGeneratorModelSupport.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs index 998a22184..97330d85b 100644 --- a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs +++ b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs @@ -603,7 +603,7 @@ public string GetMinistral3_3bModel() public string GetGptOss_20bModel() { - return RequireClipModel("gpt_oss_20b_nvfp4.safetensors", "https://huggingface.co/Comfy-Org/Lens/resolve/main/text_encoders/gpt_oss_20b_nvfp4.safetensors?download=true", "267fb63224796cc3af378abb0ba6b95130c0e53ac642ab7a734034f0cfa2e753", T2IParamTypes.GptOssModel); + return RequireClipModel("gpt_oss_20b_nvfp4.safetensors", "https://huggingface.co/Comfy-Org/Lens/resolve/main/text_encoders/gpt_oss_20b_nvfp4.safetensors?download=true", "103d7759c720627e5ffdcb0d885595695085dad4201fa6a522a84d4b86335ca0", T2IParamTypes.GptOssModel); } public string GetClipLModel() From 098f6d5a8c9ff220e21d010cecd971a56ab514cb Mon Sep 17 00:00:00 2001 From: "Alex \"mcmonkey\" Goodwin" Date: Tue, 26 May 2026 20:12:50 -0700 Subject: [PATCH 6/6] notes --- docs/Model Support.md | 9 ++++----- .../ComfyUIBackend/WorkflowGeneratorModelSupport.cs | 4 +++- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/Model Support.md b/docs/Model Support.md index 6e06b18f4..10160dc7e 100644 --- a/docs/Model Support.md +++ b/docs/Model Support.md @@ -623,10 +623,9 @@ For upscaling with SD3, the `Refiner Do Tiling` parameter is highly recommended - Microsoft's [Lens]() is supported in SwarmUI! - It is a 3.8B model, with a base model and an official turbo distill designed to run fast. - - The "Turbo" model (in fat BF16) can be downloaded here [Comfy-Org/Lens - turbo]() - - An MXFP8 variant can be downloaded here [Comfy-Org/Lens - turb mxfp8]() - - Or the base version (in fat BF16) [Comfy-Org/Lens - base]() - - An MXFP8 variant can be downloaded here [Comfy-Org/Lens - base mxfp8]() + - The raw base model (FP8) can be downloaded here: [Comfy-Org/Lens]() + - The Turbo model (FP8) can be downloaded here: [Comfy-Org/Lens - Turbo]() + - Or fat BF16 versions [Comfy-Org/Lens - base bf16]() [Comfy-Org/Lens - turbo bf16]() - Save in `diffusion_models` - Uses the Flux.2 VAE, will be downloaded and handled automatically - Uses the GPT-OSS 20B text encoder, will be downloaded and handled automatically @@ -635,7 +634,7 @@ For upscaling with SD3, the `Refiner Do Tiling` parameter is highly recommended - **Scheduler:** Default is fine. - **CFG Scale:** For Turbo, `1`, for base normal CFG ranges (around `5`) - **Steps:** For Turbo, `4` is recommended, `8` works well. For Base, `20` as normal. - - **Resolution:** Side length `1440` is the standard. + - **Resolution:** Side length `1440` is the official default, but 1024 is a reasonable option. It retains coherence down to about 512 and up to about 2048. # Video Models diff --git a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs index 97330d85b..32e7c4bb8 100644 --- a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs +++ b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs @@ -603,7 +603,7 @@ public string GetMinistral3_3bModel() public string GetGptOss_20bModel() { - return RequireClipModel("gpt_oss_20b_nvfp4.safetensors", "https://huggingface.co/Comfy-Org/Lens/resolve/main/text_encoders/gpt_oss_20b_nvfp4.safetensors?download=true", "103d7759c720627e5ffdcb0d885595695085dad4201fa6a522a84d4b86335ca0", T2IParamTypes.GptOssModel); + return RequireClipModel("gpt_oss_20b_nvfp4.safetensors", "https://huggingface.co/Comfy-Org/Lens/resolve/main/text_encoders/gpt_oss_20b_nvfp4.safetensors", "103d7759c720627e5ffdcb0d885595695085dad4201fa6a522a84d4b86335ca0", T2IParamTypes.GptOssModel); } public string GetClipLModel() @@ -1069,6 +1069,7 @@ public void LoadClip3(string type, string modelA, string modelB, string modelC) { helpers.LoadClip("lens", helpers.GetGptOss_20bModel()); helpers.DoVaeLoader(UserInput.SourceSession?.User?.Settings?.VAEs?.DefaultFlux2VAE, "flux-2", "flux2-vae"); + // TODO: SamplingFlux is a dirty node, is this really needed? Or can we do a generic shift? string lensSamplingNode = CreateNode("ModelSamplingFlux", new JObject() { ["model"] = LoadingModel, @@ -1078,6 +1079,7 @@ public void LoadClip3(string type, string modelA, string modelB, string modelC) ["base_shift"] = 0.5 }); LoadingModel = [lensSamplingNode, 0]; + // TODO: Should this CFGNorm be configurable? string lensCfgNormNode = CreateNode("CFGNorm", new JObject() { ["model"] = LoadingModel,