diff --git a/Examples/Examples/Chat/ChatWithImageGenExample.cs b/Examples/Examples/Chat/ChatWithImageGenExample.cs index 56a6d9e4..26b77401 100644 --- a/Examples/Examples/Chat/ChatWithImageGenExample.cs +++ b/Examples/Examples/Chat/ChatWithImageGenExample.cs @@ -2,6 +2,7 @@ using MaIN.Core.Hub; using MaIN.Domain.Models; using MaIN.Domain.Models.Abstract; +using MaIN.Domain.Models.Concrete; namespace Examples.Chat; @@ -11,10 +12,13 @@ public async Task Start() { Console.WriteLine("ChatExample with image gen is running!"); - ModelRegistry.RegisterOrReplace(new GenericLocalModel(Models.Local.Flux1Shnell)); + ModelRegistry.RegisterOrReplace(new StableDiffusion1_5()); + + await AIHub.Model().EnsureDownloadedAsync(Models.Local.StableDiffusion1_5); + var result = await AIHub.Chat() - .WithModel(Models.Local.Flux1Shnell) - .WithMessage("Generate cyberpunk godzilla cat warrior") + .WithModel(Models.Local.StableDiffusion1_5) + .WithMessage("Fluffy cat with a book - anime style") .CompleteAsync(); ImagePreview.ShowImage(result.Message.Image); diff --git a/ImageGen/Dockerfile b/ImageGen/Dockerfile deleted file mode 100644 index d365b5a7..00000000 --- a/ImageGen/Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -# Use the official Python image from the Docker Hub -FROM python:3.12-slim - -# Set the working directory in the container -WORKDIR /app - -# Copy the requirements.txt file to the container -COPY requirements.txt . - -# Install the dependencies -RUN pip install --no-cache-dir -r requirements.txt - -# Copy the rest of the application code to the container -COPY . . - -# Expose the port that the Flask app runs on -EXPOSE 5000 - -# Command to run the Flask app -CMD ["python", "main.py"] diff --git a/ImageGen/main.py b/ImageGen/main.py deleted file mode 100644 index 70547825..00000000 --- a/ImageGen/main.py +++ /dev/null @@ -1,66 +0,0 @@ -import torch -from diffusers import FluxPipeline -from flask import Flask, jsonify, request, send_file -from flasgger import Swagger, swag_from -from io import BytesIO - -app = Flask(__name__) -swagger = Swagger(app) - -pipe = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-schnell", torch_dtype=torch.float16) -pipe.enable_sequential_cpu_offload() - -@app.route('/health', methods=['GET']) -@swag_from({ - 'parameters': [], - 'responses': { - 200: 'OK' - } -}) -def health(): - return 'OK' - -@app.route('/generate/', methods=['POST']) -@swag_from({ - 'parameters': [ - { - 'name': 'prompt', - 'in': 'path', - 'type': 'string', - 'required': True, - 'description': 'Text prompt to generate an image' - } - ], - 'responses': { - 200: { - 'description': 'Generated image from the prompt', - 'content': { - 'image/png': { - 'schema': { - 'type': 'string', - 'format': 'binary' - } - } - } - } - } -}) -def generate(prompt): - # Generate image using the provided prompt - image = pipe( - prompt, - guidance_scale=0.0, - num_inference_steps=4, - max_sequence_length=256 - ).images[0] - - # Save the image to an in-memory buffer - img_io = BytesIO() - image.save(img_io, 'PNG') - img_io.seek(0) - - # Return the image as a response - return send_file(img_io, mimetype='image/png') - -if __name__ == '__main__': - app.run(host="0.0.0.0", port=5003) \ No newline at end of file diff --git a/ImageGen/requirements.txt b/ImageGen/requirements.txt deleted file mode 100644 index 46da897e..00000000 --- a/ImageGen/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ ---find-links https://download.pytorch.org/whl/torch_stable.html - -diffusers==0.30.2 -flasgger==0.9.7.1 -Flask==3.0.3 -torch==2.3.1+cu118 -transformers -accelerate -sentencepiece -protobuf -numpy<2.0 \ No newline at end of file diff --git a/MaIN.Core.E2ETests/ChatTests.cs b/MaIN.Core.E2ETests/ChatTests.cs index 5917ffbc..776a02aa 100644 --- a/MaIN.Core.E2ETests/ChatTests.cs +++ b/MaIN.Core.E2ETests/ChatTests.cs @@ -4,6 +4,7 @@ using MaIN.Domain.Entities; using MaIN.Domain.Models; using MaIN.Domain.Models.Abstract; +using MaIN.Domain.Models.Concrete; namespace MaIN.Core.E2ETests; @@ -127,22 +128,13 @@ Fuzzy match failed! [Fact(Skip = "Require powerful GPU")] public async Task Should_GenerateImage_BasedOnPrompt() { - Assert.True(NetworkHelper.PingHost("127.0.0.1", 5003, 5), "Please make sure ImageGen service is running on port 5003"); + ModelRegistry.RegisterOrReplace(new StableDiffusion1_5()); - const string extension = "png"; - - var fluxModel = new GenericLocalModel("FLUX.1_Shnell"); - ModelRegistry.RegisterOrReplace(fluxModel); var result = await AIHub.Chat() - .WithModel(fluxModel.Id) + .WithModel(Models.Local.StableDiffusion1_5) .WithMessage("Generate cat in Rome. Sightseeing, colloseum, ancient builidngs, Italy.") .CompleteAsync(); - if (string.IsNullOrWhiteSpace(extension) || extension.Contains(".")) - { - throw new ArgumentException("Invalid file extension"); - } - Assert.True(result.Done); Assert.NotNull(result.Message.Image); } diff --git a/Releases/0.10.14.md b/Releases/0.10.14.md new file mode 100644 index 00000000..ce35f6f4 --- /dev/null +++ b/Releases/0.10.14.md @@ -0,0 +1,6 @@ +# 0.10.14 release + +- Local image generation now runs in-process via StableDiffusion.NET (GGUF diffusion models on CPU/CUDA), replacing the external Python/Flask image-gen service. +- Adds local diffusion model catalog (`StableDiffusion1_5`, `Flux1Shnell`, `QwenImage`) with multi-asset downloads (VAE/CLIP/T5/etc) via `AIHub.Model().EnsureDownloadedAsync()`. +- InferPage Settings now lists local diffusion models for the Self backend, tagged "Image Gen", and resolves custom model paths correctly for image generation. +- Removes the `ImageGen` Python service, Docker image, and `MaIN__ImageGenUrl` setting. diff --git a/scripts/image_gen_wrapper.sh b/scripts/image_gen_wrapper.sh deleted file mode 100644 index 83ece5f6..00000000 --- a/scripts/image_gen_wrapper.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -# Name the process using exec -a and run the Python script -exec -a "ImageGenAPI" python3.9 ./ImageGen/main.py \ No newline at end of file diff --git a/scripts/install-mcli.ps1 b/scripts/install-mcli.ps1 index fcac69e9..3ea748d7 100644 --- a/scripts/install-mcli.ps1 +++ b/scripts/install-mcli.ps1 @@ -17,15 +17,13 @@ $itemsToCopy = @{ "mcli.ps1", "start.ps1", "start-api.ps1", - "start-image-gen.ps1", "download-models.ps1", "docker-compose.yml", "models_map.txt", ".models" ) Directories = @( - "server", - "ImageGen" + "server" ) } # Copy files diff --git a/scripts/mcli.ps1 b/scripts/mcli.ps1 index 742d0f87..63062a17 100644 --- a/scripts/mcli.ps1 +++ b/scripts/mcli.ps1 @@ -34,9 +34,8 @@ Usage: mcli [options] Commands: - start-demo Start all services (API, image generation, and download models) + start-demo Start all services (API and download models) api Start only the API service - image-gen Start only the image generation service model Download and manage models help Show this help message uninstall Uninstall mcli @@ -45,7 +44,6 @@ Options for 'start': --hard Hard cleanup of containers --no-api Skip starting the API --no-models Skip model downloads - --no-image-gen Skip image generation --models= Specify comma-separated list of models to download Options for 'api': @@ -59,7 +57,6 @@ Options for 'model': Examples: mcli start-demo - mcli start-demo --no-image-gen mcli api --hard mcli model download gemma2-2b-maIN mcli help @@ -81,12 +78,10 @@ Options: --hard Perform hard cleanup of containers before starting --no-api Skip starting the API --no-models Skip model downloads - --no-image-gen Skip image generation --models= Specify comma-separated list of models to download Examples: mcli start-demo - mcli start-demo --no-image-gen mcli start-demo --models=gemma2-2b-maIN "@ } @@ -103,17 +98,6 @@ Options: Examples: mcli api mcli api --hard -"@ - } - "image-gen" { - Write-Host @" -mcli image-gen - Start the image generation service - -Usage: - mcli image-gen - -Examples: - mcli image-gen "@ } "model" { @@ -150,9 +134,6 @@ switch ($command) { "api" { & "$PSScriptRoot\start-api.ps1" $arguments } - "image-gen" { - & "$PSScriptRoot\start-image-gen.ps1" $arguments - } "model" { $subcommand = $arguments[0] $modelArgs = $arguments[1..($arguments.Length-1)] diff --git a/scripts/start-image-gen.ps1 b/scripts/start-image-gen.ps1 deleted file mode 100644 index 700d79c8..00000000 --- a/scripts/start-image-gen.ps1 +++ /dev/null @@ -1,70 +0,0 @@ -# Image Generation API Setup Script -$installRoot = if ($global:MCLI_ROOT) { - $global:MCLI_ROOT -} else { - $PSScriptRoot -} - -Push-Location $installRoot -# Python configuration -$pythonVersion = "3.9.13" -$pythonInstallerUrl = "https://www.python.org/ftp/python/$pythonVersion/python-$pythonVersion-amd64.exe" -$installerPath = "$env:TEMP\python-$pythonVersion-installer.exe" - -# Check if Python 3.9 is already installed -$python = Get-Command python -ErrorAction SilentlyContinue -if (-not $python) { - Write-Host "Downloading Python $pythonVersion..." - try { - Invoke-WebRequest $pythonInstallerUrl -OutFile $installerPath - } - catch { - Write-Host "Failed to download Python installer. Error: $_" - exit 1 - } - - Write-Host "Installing Python $pythonVersion..." - $installProcess = Start-Process $installerPath -ArgumentList '/quiet InstallAllUsers=1 PrependPath=1 Include_pip=1' -PassThru -Wait - Remove-Item $installerPath - - if ($installProcess.ExitCode -ne 0) { - Write-Host "Python installation failed with exit code $($installProcess.ExitCode)" - exit 1 - } - - # Refresh environment variables - $env:Path = [System.Environment]::GetEnvironmentVariable("Path", "Machine") + ";" + [System.Environment]::GetEnvironmentVariable("Path", "User") -} -else { - Write-Host "Python is already installed." -} - -# Verify Python installation -Write-Host "Verifying Python installation..." -python --version -pip --version - -# Install dependencies -Write-Host "Installing dependencies from requirements.txt..." -try { - pip install --default-timeout=900 -r "./ImageGen/requirements.txt" -} -catch { - Write-Host "Failed to install dependencies. Error: $_" - exit 1 -} - -# Start the API -Write-Host "Starting Image Generation API..." -Start-Process -FilePath "python" -ArgumentList "./ImageGen/main.py" -NoNewWindow -PassThru - -Write-Host "Image Generation API is running. Press Ctrl+C to stop." -try { - # Keep the script running until interrupted - while ($true) { - Start-Sleep -Seconds 60 - } -} -finally { - Write-Host "Stopping Image Generation API..." -} \ No newline at end of file diff --git a/scripts/start.ps1 b/scripts/start.ps1 index a4a4a89a..867cc3ce 100644 --- a/scripts/start.ps1 +++ b/scripts/start.ps1 @@ -8,7 +8,6 @@ $hard = $false $models = @() $noApi = $false $apiOnly = $false -$noImageGen = $false $noModels = $false # Parse command-line arguments @@ -32,9 +31,6 @@ foreach ($arg in $arguments) { elseif ($arg -eq '--api-only') { $apiOnly = $true } - elseif ($arg -eq '--no-image-gen') { - $noImageGen = $true - } elseif ($arg -eq '--no-models') { $noModels = $true } @@ -49,7 +45,6 @@ Write-Host "Hard: $hard" Write-Host "Models: $($models -join ', ')" Write-Host "No API: $noApi" Write-Host "API Only: $apiOnly" -Write-Host "No Image Gen: $noImageGen" Write-Host "No Models: $noModels" # Run setup tasks unless --api-only is provided @@ -63,12 +58,6 @@ if (-not $apiOnly) { & "$PSScriptRoot\download-models.ps1" } } - - # Handle Image Generation API unless --no-image-gen is specified - if (-not $noImageGen) { - Write-Host "Starting Image Generation API as a background job..." - Start-Process -FilePath "powershell.exe" -ArgumentList "-NoProfile -ExecutionPolicy Bypass -File `"$PSScriptRoot\start-image-gen.ps1`"" -NoNewWindow - } } # Start API unless --no-api is specified diff --git a/scripts/start.sh b/scripts/start.sh index 84707ff1..45e463d3 100644 --- a/scripts/start.sh +++ b/scripts/start.sh @@ -4,7 +4,6 @@ hard=false models=() noInfra=false infraOnly=false -noImageGen=false # New variable for --no-image-gen # Manually parse the command-line arguments for double-dash parameters for arg in "$@"; do @@ -23,9 +22,6 @@ for arg in "$@"; do --infra-only) infraOnly=true ;; - --no-image-gen) - noImageGen=true - ;; esac done @@ -49,87 +45,6 @@ if [[ $noInfra == false || $infraOnly == true ]]; then sleep 10 echo "Running the Ollama serve." sleep 5 - - # Install Python and run Image Gen API if --no-image-gen is not provided - if [[ $noImageGen == false ]]; then -pythonVersion="3.9.13" -pythonInstallerUrl="https://www.python.org/ftp/python/$pythonVersion/Python-$pythonVersion.tgz" - -# Check if Python 3.9 is already installed -if ! command -v python3.9 &> /dev/null || [[ $(python3.9 --version | awk '{print $2}') != $pythonVersion ]]; then - echo "Downloading Python $pythonVersion..." - wget "$pythonInstallerUrl" -O /tmp/Python-$pythonVersion.tgz - tar -xzf /tmp/Python-$pythonVersion.tgz -C /tmp - - cd /tmp/Python-$pythonVersion - - # Ensure necessary build tools are installed - sudo apt update - sudo apt install -y build-essential zlib1g-dev libffi-dev libssl-dev libreadline-dev libbz2-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev liblzma-dev - - # Configure and install - ./configure --enable-optimizations - make -j$(nproc) - sudo make altinstall - - cd - - - # Clean up - rm -rf /tmp/Python-$pythonVersion /tmp/Python-$pythonVersion.tgz - - # Manually check for and add Python3.9 to PATH if it's not already there - pythonPath=$(command -v python3.9) - if [[ -z "$pythonPath" ]]; then - echo "Python installation failed. Exiting." - exit 1 - fi - - # Adding Python to PATH if not already in it - currentPath=$(echo "$PATH") - pythonBinDir=$(dirname "$pythonPath") - - if [[ ! "$currentPath" =~ "$pythonBinDir" ]]; then - echo "Adding Python 3.9 to the PATH..." - echo "export PATH=\"$pythonBinDir:\$PATH\"" >> ~/.bashrc - export PATH="$pythonBinDir:$PATH" - fi -else - echo "Python is already installed." -fi - -# Verify Python and pip installation -echo "Verifying Python installation..." -python3.9 --version -if ! command -v pip3.9 &> /dev/null; then - echo "Installing pip for Python 3.9..." - curl https://bootstrap.pypa.io/get-pip.py -o /tmp/get-pip.py - python3.9 /tmp/get-pip.py - rm /tmp/get-pip.py -fi - -echo "Verifying pip installation..." -pip3.9 --version - -# Install packages from requirements.txt -echo "Installing dependencies from requirements.txt..." -pip3.9 install --user --default-timeout=900 -r "./ImageGen/requirements.txt" - - sleep 5 - - logfile="image_gen.log" - -# Assuming Python installation steps here - -# Run the wrapper script using setsid and name the process -echo "Running image generation API in the background with real-time logs..." -setsid ./image_gen_wrapper.sh | tee image_gen.log & -# Continue with the rest of the script -echo "Main script is continuing..." - - sleep 100 - else - echo "--no-image-gen flag provided, skipping image generation API..." - fi fi # Run Docker-related tasks only if --infra-only is not provided diff --git a/src/Dockerfile.inferpage b/src/Dockerfile.inferpage index 6996eed1..2f4ad936 100644 --- a/src/Dockerfile.inferpage +++ b/src/Dockerfile.inferpage @@ -79,6 +79,10 @@ EXPOSE 5555 COPY --from=publish /app/out . +RUN find /app/runtimes/linux-x64/native -maxdepth 2 -mindepth 1 -type d \ + | tee /etc/ld.so.conf.d/llamasharp.conf \ + && ldconfig + ENTRYPOINT ["dotnet", "MaIN.InferPage.dll"] FROM mcr.microsoft.com/dotnet/aspnet:10.0 AS runtime-ollama diff --git a/src/MaIN.Core.UnitTests/LocalModelAssetsTests.cs b/src/MaIN.Core.UnitTests/LocalModelAssetsTests.cs new file mode 100644 index 00000000..80c430e8 --- /dev/null +++ b/src/MaIN.Core.UnitTests/LocalModelAssetsTests.cs @@ -0,0 +1,86 @@ +using MaIN.Domain.Models.Abstract; + +namespace MaIN.Core.UnitTests; + +public class LocalModelAssetsTests +{ + [Fact] + public void RequiredAssets_ForSingleFileModel_ContainsOnlyMainFile() + { + var model = new GenericLocalModel("model.gguf"); + + var assets = model.RequiredAssets.ToList(); + + Assert.Single(assets); + Assert.Equal("model.gguf", assets[0].FileName); + } + + [Fact] + public void RequiredAssets_ForMultiAssetDiffusionModel_IncludesMainFileAndConfiguredAssets() + { + var model = new GenericLocalImageGenerationModel( + "sd3.5_large-Q4_0.gguf", + DiffusionArchitecture.SD3, + Vae: new ModelAsset("sd3.5_vae.safetensors"), + ClipL: new ModelAsset("clip_l.safetensors"), + ClipG: new ModelAsset("clip_g.safetensors"), + T5Xxl: new ModelAsset("t5xxl_fp8_e4m3fn.safetensors")); + + var assets = model.RequiredAssets.Select(a => a.FileName).ToList(); + + Assert.Equal( + ["sd3.5_large-Q4_0.gguf", "sd3.5_vae.safetensors", "clip_l.safetensors", "clip_g.safetensors", "t5xxl_fp8_e4m3fn.safetensors"], + assets); + } + + [Fact] + public void IsDownloaded_ForSingleFileModel_TrueOnlyWhenFileExists() + { + var dir = Directory.CreateTempSubdirectory(); + try + { + var model = new GenericLocalModel("model.gguf"); + + Assert.False(model.IsDownloaded(dir.FullName)); + + File.WriteAllText(Path.Combine(dir.FullName, "model.gguf"), "data"); + + Assert.True(model.IsDownloaded(dir.FullName)); + } + finally + { + dir.Delete(recursive: true); + } + } + + [Fact] + public void IsDownloaded_ForMultiAssetModel_RequiresAllAssetsPresent() + { + var dir = Directory.CreateTempSubdirectory(); + try + { + var model = new GenericLocalImageGenerationModel( + "diffusion.gguf", + DiffusionArchitecture.Flux, + Vae: new ModelAsset("ae.safetensors")); + + File.WriteAllText(Path.Combine(dir.FullName, "diffusion.gguf"), "data"); + Assert.False(model.IsDownloaded(dir.FullName)); + + File.WriteAllText(Path.Combine(dir.FullName, "ae.safetensors"), "data"); + Assert.True(model.IsDownloaded(dir.FullName)); + } + finally + { + dir.Delete(recursive: true); + } + } + + [Fact] + public void IsDownloaded_WithoutCustomPathOrBasePath_ReturnsFalse() + { + var model = new GenericLocalModel("model.gguf"); + + Assert.False(model.IsDownloaded(null)); + } +} diff --git a/src/MaIN.Core/.nuspec b/src/MaIN.Core/.nuspec index 0ce3d7b3..f8b64733 100644 --- a/src/MaIN.Core/.nuspec +++ b/src/MaIN.Core/.nuspec @@ -2,7 +2,7 @@ MaIN.NET - 0.10.13 + 0.10.14 Wisedev Wisedev favicon.png diff --git a/src/MaIN.Core/Hub/Contexts/ModelContext.cs b/src/MaIN.Core/Hub/Contexts/ModelContext.cs index fa45c80f..16c52dab 100644 --- a/src/MaIN.Core/Hub/Contexts/ModelContext.cs +++ b/src/MaIN.Core/Hub/Contexts/ModelContext.cs @@ -142,10 +142,23 @@ public async Task LoadToCacheAsync(LocalModel model) private async Task DownloadModelAsync(LocalModel localModel, IProgress? progress, CancellationToken cancellationToken) { - if (localModel.DownloadUrl is null) throw new DownloadUrlNullOrEmptyException(); + foreach (var asset in localModel.RequiredAssets) + { + var assetPath = localModel.GetAssetPath(asset, _defaultModelsPath); + if (File.Exists(assetPath)) + { + continue; + } + + if (asset.DownloadUrl is null) throw new DownloadUrlNullOrEmptyException(); - var filePath = GetModelFilePath(localModel); - Console.WriteLine($"Starting download of {localModel.FileName}..."); + await DownloadAssetAsync(asset.DownloadUrl, assetPath, asset.FileName, progress, cancellationToken); + } + } + + private async Task DownloadAssetAsync(Uri downloadUrl, string filePath, string fileName, IProgress? progress, CancellationToken cancellationToken) + { + Console.WriteLine($"Starting download of {fileName}..."); for (int attempt = 0; attempt <= MaxRetryAttempts; attempt++) { @@ -158,7 +171,7 @@ private async Task DownloadModelAsync(LocalModel localModel, IProgress? progress, CancellationToken cancellationToken) diff --git a/src/MaIN.Domain/Configuration/MaINSettings.cs b/src/MaIN.Domain/Configuration/MaINSettings.cs index f53ccc0a..81093763 100644 --- a/src/MaIN.Domain/Configuration/MaINSettings.cs +++ b/src/MaIN.Domain/Configuration/MaINSettings.cs @@ -6,7 +6,6 @@ public class MaINSettings { public BackendType BackendType { get; set; } = BackendType.Self; public string? ModelsPath { get; set; } - public string? ImageGenUrl { get; set; } public string? OpenAiKey { get; set; } public string? GeminiKey { get; set; } public string? DeepSeekKey { get; set; } diff --git a/src/MaIN.Domain/Models/Abstract/AIModel.cs b/src/MaIN.Domain/Models/Abstract/AIModel.cs index 74f7451a..9b716321 100644 --- a/src/MaIN.Domain/Models/Abstract/AIModel.cs +++ b/src/MaIN.Domain/Models/Abstract/AIModel.cs @@ -39,6 +39,9 @@ public abstract record AIModel( public bool HasImageGeneration => this is IImageGenerationModel; } +/// A file required on disk for a local model, optionally downloadable from a URL. +public record ModelAsset(string FileName, Uri? DownloadUrl = null); + /// Base class for local models. public abstract record LocalModel( string Id, @@ -59,11 +62,14 @@ public abstract record LocalModel( /// Gets or sets the custom file system path (excluding file name eg. your\path\). If not null will be prioritized over the default base path. public string? CustomPath { get; } = CustomPath; + /// Files required on disk for this model. Defaults to just the main model file; multi-asset models (eg. diffusion models with separate VAE/text-encoder files) override this. + public virtual IEnumerable RequiredAssets => [new ModelAsset(FileName, DownloadUrl)]; + public bool IsDownloaded(string? basePath) { try { - return File.Exists(GetFullPath(basePath)); + return RequiredAssets.All(asset => File.Exists(GetAssetPath(asset, basePath))); } catch (ModelPathNullOrEmptyException) { @@ -79,11 +85,20 @@ public bool IsDownloaded(string? basePath) /// The base path to combine with the file name. If null or empty, the method uses the custom path. /// A string representing the full file path formed by combining the base path and the file name. /// Thrown if both CustomPath and basePath are null or empty. - public string GetFullPath(string? basePath = null) + public string GetFullPath(string? basePath = null) => GetAssetPath(new ModelAsset(FileName, DownloadUrl), basePath); + + /// + /// Combines the specified base path with an asset's file name to generate its full file path. + /// + /// The asset whose file path should be resolved. + /// The base path to combine with the asset's file name. If null or empty, the method uses the custom path. + /// A string representing the full file path formed by combining the base path and the asset's file name. + /// Thrown if both CustomPath and basePath are null or empty. + public string GetAssetPath(ModelAsset asset, string? basePath = null) { return string.IsNullOrEmpty(CustomPath) && string.IsNullOrEmpty(basePath) ? throw new ModelPathNullOrEmptyException() - : Path.Combine((CustomPath ?? basePath)!, FileName); + : Path.Combine((CustomPath ?? basePath)!, asset.FileName); } } @@ -236,6 +251,64 @@ public record GenericLocalVisionReasoningModel( public string? AdditionalPrompt { get; } = AdditionalPrompt; } +/// Base class for local GGUF diffusion (image generation) models. +public abstract record LocalDiffusionModel( + string Id, + string FileName, + Uri? DownloadUrl, + string? Name, + DiffusionArchitecture Architecture, + int Width = 1024, + int Height = 1024, + int Steps = 20, + float CfgScale = 7.0f, + ModelAsset? Vae = null, + ModelAsset? ClipL = null, + ModelAsset? ClipG = null, + ModelAsset? T5Xxl = null, + ModelAsset? Qwen2VL = null, + uint MaxContextWindowSize = ModelDefaults.DefaultMaxContextWindow, + string? Description = null, + string? CustomPath = null +) : LocalModel(Id, FileName, DownloadUrl, Name, MaxContextWindowSize, Description, null, CustomPath), ILocalDiffusionModel +{ + public DiffusionArchitecture Architecture { get; } = Architecture; + public int Width { get; } = Width; + public int Height { get; } = Height; + public int Steps { get; } = Steps; + public float CfgScale { get; } = CfgScale; + public ModelAsset? Vae { get; } = Vae; + public ModelAsset? ClipL { get; } = ClipL; + public ModelAsset? ClipG { get; } = ClipG; + public ModelAsset? T5Xxl { get; } = T5Xxl; + public ModelAsset? Qwen2VL { get; } = Qwen2VL; + + /// The main model file plus any configured VAE/text-encoder assets. + public override IEnumerable RequiredAssets => + new ModelAsset?[] { new ModelAsset(FileName, DownloadUrl), Vae, ClipL, ClipG, T5Xxl, Qwen2VL } + .OfType(); +} + +/// Generic class for runtime defined local diffusion (image generation) models. +public record GenericLocalImageGenerationModel( + string FileName, + DiffusionArchitecture Architecture, + string? Name = null, + string? Id = null, + Uri? DownloadUrl = null, + int Width = 1024, + int Height = 1024, + int Steps = 20, + float CfgScale = 7.0f, + ModelAsset? Vae = null, + ModelAsset? ClipL = null, + ModelAsset? ClipG = null, + ModelAsset? T5Xxl = null, + ModelAsset? Qwen2VL = null, + string? CustomPath = null, + string? Description = null +) : LocalDiffusionModel(Id ?? FileName, FileName, DownloadUrl, Name ?? FileName, Architecture, Width, Height, Steps, CfgScale, Vae, ClipL, ClipG, T5Xxl, Qwen2VL, ModelDefaults.DefaultMaxContextWindow, Description, CustomPath); + public static class ModelDefaults { public const uint DefaultMaxContextWindow = 128000; diff --git a/src/MaIN.Domain/Models/Abstract/IModelCapabilities.cs b/src/MaIN.Domain/Models/Abstract/IModelCapabilities.cs index b35cb5dc..49e62723 100644 --- a/src/MaIN.Domain/Models/Abstract/IModelCapabilities.cs +++ b/src/MaIN.Domain/Models/Abstract/IModelCapabilities.cs @@ -50,3 +50,51 @@ public interface ITTSModel; /// Interface for models that generate images from text prompts. /// public interface IImageGenerationModel; + +/// +/// Diffusion model architecture, used to pick sensible generation defaults and required text-encoder/VAE assets. +/// +public enum DiffusionArchitecture +{ + SD1, + SDXL, + SD3, + Flux, + QwenImage +} + +/// +/// Interface for local GGUF diffusion models that generate images in-process via stable-diffusion.cpp. +/// +public interface ILocalDiffusionModel : IImageGenerationModel +{ + /// Diffusion model architecture (affects defaults and required encoder/VAE assets). + DiffusionArchitecture Architecture { get; } + + /// Default output image width in pixels. + int Width { get; } + + /// Default output image height in pixels. + int Height { get; } + + /// Default number of sampling steps. + int Steps { get; } + + /// Default classifier-free guidance scale. + float CfgScale { get; } + + /// Optional separate VAE file. Null if the VAE is embedded in the main model file. + ModelAsset? Vae { get; } + + /// Optional CLIP-L text encoder file. + ModelAsset? ClipL { get; } + + /// Optional CLIP-G text encoder file (SDXL/SD3). + ModelAsset? ClipG { get; } + + /// Optional T5-XXL text encoder file (SD3/FLUX). + ModelAsset? T5Xxl { get; } + + /// Optional Qwen2.5-VL text encoder file (Qwen-Image). + ModelAsset? Qwen2VL { get; } +} diff --git a/src/MaIN.Domain/Models/Concrete/LocalModels.cs b/src/MaIN.Domain/Models/Concrete/LocalModels.cs index cffa60f3..ca6126c0 100644 --- a/src/MaIN.Domain/Models/Concrete/LocalModels.cs +++ b/src/MaIN.Domain/Models/Concrete/LocalModels.cs @@ -296,13 +296,46 @@ public sealed record Olmo2_7b() : LocalModel( // ===== Image Generation ===== -public sealed record Flux1Shnell() : LocalModel( +public sealed record StableDiffusion1_5() : LocalDiffusionModel( + Models.Local.StableDiffusion1_5, + "sd-v1-5-pruned-emaonly-Q8_0.gguf", + new Uri("https://huggingface.co/second-state/stable-diffusion-v1-5-GGUF/resolve/main/stable-diffusion-v1-5-pruned-emaonly-Q8_0.gguf?download=true"), + "Stable Diffusion 1.5", + DiffusionArchitecture.SD1, + Width: 512, + Height: 512, + Steps: 25, + CfgScale: 7.0f, + Description: "Small, fast local image generation model - good for CPUs and low-VRAM machines"); + +public sealed record Flux1Shnell() : LocalDiffusionModel( Models.Local.Flux1Shnell, - "FLUX.1_Shnell", - null, + "flux1-schnell-Q4_0.gguf", + new Uri("https://huggingface.co/city96/FLUX.1-schnell-gguf/resolve/main/flux1-schnell-Q4_0.gguf?download=true"), "FLUX.1 Schnell", - 4096, - "Fast local image generation model"), IImageGenerationModel; + DiffusionArchitecture.Flux, + Width: 768, + Height: 768, + Steps: 4, + CfgScale: 1.0f, + Vae: new ModelAsset("ae.safetensors", new Uri("https://huggingface.co/Kijai/flux-fp8/resolve/main/flux-vae-bf16.safetensors?download=true")), + ClipL: new ModelAsset("clip_l.safetensors", new Uri("https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors?download=true")), + T5Xxl: new ModelAsset("t5xxl_fp8_e4m3fn.safetensors", new Uri("https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp8_e4m3fn.safetensors?download=true")), + Description: "Medium-sized local image generation model - good balance of speed and quality"); + +public sealed record QwenImage() : LocalDiffusionModel( + Models.Local.QwenImage, + "qwen-image-Q4_0.gguf", + new Uri("https://huggingface.co/city96/Qwen-Image-gguf/resolve/main/qwen-image-Q4_0.gguf?download=true"), + "Qwen Image", + DiffusionArchitecture.QwenImage, + Width: 1024, + Height: 1024, + Steps: 20, + CfgScale: 4.0f, + Vae: new ModelAsset("qwen_image_vae.safetensors", new Uri("https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/vae/qwen_image_vae.safetensors?download=true")), + Qwen2VL: new ModelAsset("Qwen2.5-VL-7B-Instruct-Q4_K_M.gguf", new Uri("https://huggingface.co/mradermacher/Qwen2.5-VL-7B-Instruct-GGUF/resolve/main/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf?download=true")), + Description: "Largest local image generation model - 20B parameter Qwen-Image model, highest quality, requires significant RAM/VRAM"); // ===== Embedding Model ===== diff --git a/src/MaIN.Domain/Models/Models.cs b/src/MaIN.Domain/Models/Models.cs index 55dfc21e..74eb1e82 100644 --- a/src/MaIN.Domain/Models/Models.cs +++ b/src/MaIN.Domain/Models/Models.cs @@ -186,6 +186,8 @@ public static class Local public const string Kokoro82m = "kokoro-82m"; // Image Generation + public const string StableDiffusion1_5 = "stable-diffusion-1.5"; public const string Flux1Shnell = "FLUX.1_Shnell"; + public const string QwenImage = "qwen-image"; } } diff --git a/src/MaIN.InferPage/Components/Pages/Home.razor b/src/MaIN.InferPage/Components/Pages/Home.razor index 729acc2c..8d867f65 100644 --- a/src/MaIN.InferPage/Components/Pages/Home.razor +++ b/src/MaIN.InferPage/Components/Pages/Home.razor @@ -363,6 +363,17 @@ => new GenericLocalReasoningModel( FileName: fullPath, ReasonFunction: rm.ReasonFunction, MaxContextWindowSize: localReg.MaxContextWindowSize), + // Diffusion models can need extra asset files (VAE/CLIP/T5/etc) alongside the + // main weights — point CustomPath at the resolved directory so every asset + // (not just the main file) is found there. + ILocalDiffusionModel dm + => new GenericLocalImageGenerationModel( + FileName: System.IO.Path.GetFileName(fullPath), + Architecture: dm.Architecture, + CustomPath: System.IO.Path.GetDirectoryName(fullPath), + Width: dm.Width, Height: dm.Height, + Steps: dm.Steps, CfgScale: dm.CfgScale, + Vae: dm.Vae, ClipL: dm.ClipL, ClipG: dm.ClipG, T5Xxl: dm.T5Xxl, Qwen2VL: dm.Qwen2VL), _ => new GenericLocalModel( FileName: fullPath, MaxContextWindowSize: localReg.MaxContextWindowSize) diff --git a/src/MaIN.InferPage/Components/Pages/Settings.razor b/src/MaIN.InferPage/Components/Pages/Settings.razor index c7c832f7..df4c3de4 100644 --- a/src/MaIN.InferPage/Components/Pages/Settings.razor +++ b/src/MaIN.InferPage/Components/Pages/Settings.razor @@ -264,10 +264,9 @@ [Parameter] public EventCallback OnSettingsApplied { get; set; } [Parameter] public EventCallback OnClose { get; set; } - // Chat-appropriate local models (excludes embedding, TTS, image-gen, ONNX-only models) + // Chat-appropriate local models (excludes embedding, TTS, ONNX-only models; includes image-gen diffusion models) private static readonly LocalModel[] _chatLocalModels = ModelRegistry.GetAllLocal() .Where(m => m.DownloadUrl != null && - m is not IImageGenerationModel && !m.FileName.EndsWith(".onnx", StringComparison.OrdinalIgnoreCase) && !m.Name.Contains("Embed", StringComparison.OrdinalIgnoreCase)) .OrderBy(m => m.Name) @@ -570,6 +569,7 @@ var caps = new List(); if (m is IVisionModel) caps.Add("Vision"); if (m is IReasoningModel) caps.Add("Reasoning"); + if (m is IImageGenerationModel) caps.Add("Image Gen"); return caps.Count > 0 ? $"{m.Name} [{string.Join(", ", caps)}]" : m.Name; } diff --git a/src/MaIN.Services/Bootstrapper.cs b/src/MaIN.Services/Bootstrapper.cs index 356fe664..6f878f68 100644 --- a/src/MaIN.Services/Bootstrapper.cs +++ b/src/MaIN.Services/Bootstrapper.cs @@ -46,7 +46,7 @@ public static IServiceCollection ConfigureMaIN( // Register all concrete implementations as transient serviceCollection.AddTransient(); serviceCollection.AddTransient(); - serviceCollection.AddTransient(); + serviceCollection.AddTransient(); serviceCollection.AddTransient(); serviceCollection.AddTransient(); @@ -148,10 +148,6 @@ private static IServiceCollection AddCommandHandlers(this IServiceCollection ser private static IServiceCollection AddHttpClients(this IServiceCollection services) { - services.AddHttpClient(ServiceConstants.HttpClients.ImageGenClient, client => - { - client.Timeout = TimeSpan.FromMinutes(5); - }); services.AddHttpClient(ServiceConstants.HttpClients.OpenAiClient); services.AddHttpClient(ServiceConstants.HttpClients.GeminiClient); services.AddHttpClient(ServiceConstants.HttpClients.DeepSeekClient); diff --git a/src/MaIN.Services/Constants/ServiceConstants.cs b/src/MaIN.Services/Constants/ServiceConstants.cs index 36077bbf..3c09538e 100644 --- a/src/MaIN.Services/Constants/ServiceConstants.cs +++ b/src/MaIN.Services/Constants/ServiceConstants.cs @@ -4,7 +4,6 @@ public static class ServiceConstants { public static class HttpClients { - public const string ImageGenClient = "ImageGenClient"; public const string OpenAiClient = "OpenAiClient"; public const string GeminiClient = "GeminiClient"; public const string DeepSeekClient = "DeepSeekClient"; @@ -75,7 +74,6 @@ public static class Properties public static class Defaults { public const string ImageSize = "1024x1024"; - public const int HttpImageModelTimeoutInMinutes = 5; } public static class Notifications diff --git a/src/MaIN.Services/MaIN.Services.csproj b/src/MaIN.Services/MaIN.Services.csproj index f1f3c9c2..760c8737 100644 --- a/src/MaIN.Services/MaIN.Services.csproj +++ b/src/MaIN.Services/MaIN.Services.csproj @@ -18,6 +18,10 @@ + + + + diff --git a/src/MaIN.Services/Services/ImageGenServices/ImageGenService.cs b/src/MaIN.Services/Services/ImageGenServices/ImageGenService.cs deleted file mode 100644 index 6143de9d..00000000 --- a/src/MaIN.Services/Services/ImageGenServices/ImageGenService.cs +++ /dev/null @@ -1,56 +0,0 @@ -using MaIN.Domain.Configuration; -using MaIN.Domain.Entities; -using MaIN.Services.Constants; -using MaIN.Services.Services.Abstract; -using MaIN.Services.Services.Models; -using ModelIds = MaIN.Domain.Models.Models; - -namespace MaIN.Services.Services.ImageGenServices; - -public class ImageGenService( - IHttpClientFactory httpClientFactory, - MaINSettings settings) - : IImageGenService -{ - private readonly IHttpClientFactory _httpClientFactory = httpClientFactory ?? throw new ArgumentNullException(nameof(httpClientFactory)); - private readonly MaINSettings _settings = settings ?? throw new ArgumentNullException(nameof(settings)); - - public async Task Send(Chat chat) - { - var client = _httpClientFactory.CreateClient(ServiceConstants.HttpClients.ImageGenClient); - client.Timeout = TimeSpan.FromMinutes(ServiceConstants.Defaults.HttpImageModelTimeoutInMinutes); - - string constructedMessage = BuildMessageContent(chat.Messages); - string requestUrl = $"{_settings.ImageGenUrl}/generate/{constructedMessage}"; - - using var response = await client.PostAsync(requestUrl, null); - response.EnsureSuccessStatusCode(); - - byte[] imageBytes = await response.Content.ReadAsByteArrayAsync(); - return CreateChatResult(imageBytes); - } - - private static string BuildMessageContent(ICollection messages) - { - return messages - .Select((msg, index) => index == 0 ? msg.Content : $"&& {msg.Content}") - .Aggregate((current, next) => $"{current} {next}"); - } - - private static ChatResult CreateChatResult(byte[] imageBytes) - { - return new ChatResult - { - Done = true, - Message = new Message - { - Content = ServiceConstants.Messages.GeneratedImageContent, - Role = ServiceConstants.Roles.Assistant, - Image = imageBytes, - Type = MessageType.Image - }, - Model = ModelIds.Local.Flux1Shnell, - CreatedAt = DateTime.UtcNow - }; - } -} \ No newline at end of file diff --git a/src/MaIN.Services/Services/ImageGenServices/LocalImageGenService.cs b/src/MaIN.Services/Services/ImageGenServices/LocalImageGenService.cs new file mode 100644 index 00000000..51efbd45 --- /dev/null +++ b/src/MaIN.Services/Services/ImageGenServices/LocalImageGenService.cs @@ -0,0 +1,65 @@ +using HPPH.SkiaSharp; +using MaIN.Domain.Configuration; +using MaIN.Domain.Entities; +using MaIN.Domain.Exceptions.Models; +using MaIN.Domain.Models.Abstract; +using MaIN.Services.Constants; +using MaIN.Services.Services.Abstract; +using MaIN.Services.Services.ImageGenServices.Utils; +using MaIN.Services.Services.Models; +using StableDiffusion.NET; + +namespace MaIN.Services.Services.ImageGenServices; + +public class LocalImageGenService(MaINSettings settings) : IImageGenService +{ + private readonly MaINSettings _settings = settings ?? throw new ArgumentNullException(nameof(settings)); + + public Task Send(Chat chat) + { + var model = ModelRegistry.GetById(chat.ModelId); + + if (model is not LocalModel localModel || model is not ILocalDiffusionModel capabilities) + { + throw new InvalidModelTypeException(nameof(ILocalDiffusionModel)); + } + + var diffusionModel = DiffusionModelLoader.GetOrLoad(localModel, capabilities, _settings.ModelsPath); + + var prompt = BuildPrompt(chat.Messages); + var parameters = ImageGenerationParameter + .TextToImage(prompt) + .WithSize(capabilities.Width, capabilities.Height) + .WithCfg(capabilities.CfgScale) + .WithSteps(capabilities.Steps); + + var image = diffusionModel.GenerateImage(parameters) + ?? throw new InvalidOperationException("Image generation failed."); + + return Task.FromResult(CreateChatResult(image.ToPng(), chat.ModelId)); + } + + private static string BuildPrompt(ICollection messages) + { + return messages + .Select((msg, index) => index == 0 ? msg.Content : $"&& {msg.Content}") + .Aggregate((current, next) => $"{current} {next}"); + } + + private static ChatResult CreateChatResult(byte[] imageBytes, string modelId) + { + return new ChatResult + { + Done = true, + Message = new Message + { + Content = ServiceConstants.Messages.GeneratedImageContent, + Role = ServiceConstants.Roles.Assistant, + Image = imageBytes, + Type = MessageType.Image + }, + Model = modelId, + CreatedAt = DateTime.UtcNow + }; + } +} diff --git a/src/MaIN.Services/Services/ImageGenServices/Utils/DiffusionModelLoader.cs b/src/MaIN.Services/Services/ImageGenServices/Utils/DiffusionModelLoader.cs new file mode 100644 index 00000000..136a1af6 --- /dev/null +++ b/src/MaIN.Services/Services/ImageGenServices/Utils/DiffusionModelLoader.cs @@ -0,0 +1,118 @@ +using System.Collections.Concurrent; +using MaIN.Domain.Models.Abstract; +using StableDiffusion.NET; + +namespace MaIN.Services.Services.ImageGenServices.Utils; + +public static class DiffusionModelLoader +{ + private const int MaxRecentLogLines = 50; + + private static readonly ConcurrentDictionary ModelCache = new(); + private static readonly ConcurrentQueue RecentLogLines = new(); + private static readonly object EventsInitLock = new(); + private static bool _eventsInitialized; + + public static DiffusionModel GetOrLoad(LocalModel model, ILocalDiffusionModel capabilities, string? basePath) + { + var key = model.GetFullPath(basePath); + + return ModelCache.GetOrAdd(key, _ => + { + EnsureLogCaptureInitialized(); + + var parameters = DiffusionModelParameter.Create() + .WithMultithreading() + .WithFlashAttention(); + + // SD1/SDXL GGUFs are full checkpoints (unet+vae+clip in one file); FLUX/SD3/Qwen-Image + // GGUFs only contain the diffusion transformer and need separate vae/clip/t5xxl/etc. + parameters = capabilities.Architecture switch + { + DiffusionArchitecture.SD1 or DiffusionArchitecture.SDXL + => parameters.WithModelPath(model.GetFullPath(basePath)), + _ => parameters.WithDiffusionModelPath(model.GetFullPath(basePath)) + }; + + if (capabilities.Vae is { } vae) + { + parameters = parameters.WithVae(model.GetAssetPath(vae, basePath)); + } + + if (capabilities.ClipL is { } clipL) + { + parameters = parameters.WithClipLPath(model.GetAssetPath(clipL, basePath)); + } + + if (capabilities.ClipG is { } clipG) + { + parameters = parameters.WithClipGPath(model.GetAssetPath(clipG, basePath)); + } + + if (capabilities.T5Xxl is { } t5Xxl) + { + parameters = parameters.WithT5xxlPath(model.GetAssetPath(t5Xxl, basePath)); + } + + if (capabilities.Qwen2VL is { } qwen2VL) + { + parameters = parameters.WithLLMVisionPath(model.GetAssetPath(qwen2VL, basePath)); + } + + try + { + return new DiffusionModel(parameters); + } + catch (Exception ex) + { + var log = string.Join(Environment.NewLine, RecentLogLines); + var details = string.IsNullOrWhiteSpace(log) + ? string.Empty + : $"{Environment.NewLine}Native log:{Environment.NewLine}{log}"; + + throw new InvalidOperationException( + $"Failed to load diffusion model '{key}'. {ex.Message}{details}", ex); + } + }); + } + + public static void RemoveModel(string fullPath) + { + if (ModelCache.TryRemove(fullPath, out var model)) + { + model.Dispose(); + } + } + + /// + /// Wires up stable-diffusion.cpp's native log callback so errors/warnings raised while + /// loading or initializing a model (eg. out-of-memory, missing tensors) are captured and + /// can be surfaced in thrown exceptions instead of the generic native error. + /// + private static void EnsureLogCaptureInitialized() + { + if (_eventsInitialized) + { + return; + } + + lock (EventsInitLock) + { + if (_eventsInitialized) + { + return; + } + + StableDiffusionCpp.InitializeEvents(); + StableDiffusionCpp.Log += (sender, e) => + { + RecentLogLines.Enqueue($"[{e.Level}] {e.Text}"); + while (RecentLogLines.Count > MaxRecentLogLines && RecentLogLines.TryDequeue(out _)) + { + } + }; + + _eventsInitialized = true; + } + } +} diff --git a/src/MaIN.Services/Services/LLMService/Factory/ImageGenServiceFactory.cs b/src/MaIN.Services/Services/LLMService/Factory/ImageGenServiceFactory.cs index f8fa4290..28a39d70 100644 --- a/src/MaIN.Services/Services/LLMService/Factory/ImageGenServiceFactory.cs +++ b/src/MaIN.Services/Services/LLMService/Factory/ImageGenServiceFactory.cs @@ -23,8 +23,7 @@ public class ImageGenServiceFactory(IServiceProvider serviceProvider) : IImageGe BackendType.Vertex => new VertexImageGenService(serviceProvider.GetRequiredService(), serviceProvider.GetRequiredService()), BackendType.Ollama => null, - BackendType.Self => new ImageGenService(serviceProvider.GetRequiredService(), - serviceProvider.GetRequiredService()), + BackendType.Self => new LocalImageGenService(serviceProvider.GetRequiredService()), _ => throw new NotSupportedException("Not support image generation."), }; }