diff --git a/docs/backend.md b/docs/backend.md index 248133bc1..2e3122369 100644 --- a/docs/backend.md +++ b/docs/backend.md @@ -124,16 +124,16 @@ Runtime and parameter assignments also share the same backend cache. If `--backe ## Compatibility flags -The older CPU placement flags are still supported: +The example CLI/server still accepts these older CPU placement flags as compatibility aliases: - `--clip-on-cpu` - `--vae-on-cpu` - `--control-net-cpu` - `--offload-to-cpu` -`--clip-on-cpu`, `--vae-on-cpu`, and `--control-net-cpu` affect runtime backend assignment only when `--backend` is not set. They map to `te=cpu`, `vae=cpu`, and `controlnet=cpu`. +`--clip-on-cpu`, `--vae-on-cpu`, and `--control-net-cpu` are deprecated. The example argument layer prepends `te=cpu`, `vae=cpu`, and `controlnet=cpu` to `--backend` before creating the context. -`--offload-to-cpu` prepends a CPU default to the parameter assignment before parsing: +`--offload-to-cpu` prepends a CPU default to the parameter assignment in the caller before creating the context: ```shell --params-backend '*=cpu' @@ -141,4 +141,4 @@ The older CPU placement flags are still supported: Because this default is inserted first, later explicit `--params-backend` entries can still override it, for example `--offload-to-cpu --params-backend te=disk` keeps non-TE parameters on CPU and reloads TE parameters from disk. -Explicit `--backend` and `--params-backend` assignments are preferred for new commands. +Library callers should set `backend` and `params_backend` directly. The old CPU/offload fields are no longer part of the C API. Explicit `--backend` and `--params-backend` assignments are preferred for new commands. diff --git a/docs/performance.md b/docs/performance.md index 2f526057f..ed86a4f7c 100644 --- a/docs/performance.md +++ b/docs/performance.md @@ -31,7 +31,7 @@ Use CPU params to reduce VRAM usage: --backend cuda0 --params-backend cpu ``` -This keeps model weights in system RAM and moves them to the runtime backend when needed. `--offload-to-cpu` is a compatibility shortcut that prepends `*=cpu` to `--params-backend`, so explicit module assignments can still override it: +This keeps model weights in system RAM and moves them to the runtime backend when needed. In the example CLI/server, `--offload-to-cpu` is a compatibility shortcut that prepends `*=cpu` to `--params-backend` before creating the context, so explicit module assignments can still override it: ```shell --offload-to-cpu --params-backend te=disk diff --git a/examples/cli/README.md b/examples/cli/README.md index 1b7c2731c..3df91eebf 100644 --- a/examples/cli/README.md +++ b/examples/cli/README.md @@ -63,9 +63,9 @@ Context Options: --offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed --mmap whether to memory-map model - --control-net-cpu keep controlnet in cpu (for low vram) - --clip-on-cpu keep clip in cpu (for low vram) - --vae-on-cpu keep vae in cpu (for low vram) + --control-net-cpu deprecated; use --backend controlnet=cpu + --clip-on-cpu deprecated; use --backend te=cpu + --vae-on-cpu deprecated; use --backend vae=cpu --fa use flash attention --diffusion-fa use flash attention in the diffusion model only --diffusion-conv-direct use ggml_conv2d_direct in the diffusion model diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index e2854158d..bb5d6862c 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -782,12 +782,11 @@ int main(int argc, const char* argv[]) { int upscale_factor = 4; // unused for RealESRGAN_x4plus_anime_6B.pth if (ctx_params.esrgan_path.size() > 0 && gen_params.upscale_repeats > 0) { UpscalerCtxPtr upscaler_ctx(new_upscaler_ctx(ctx_params.esrgan_path.c_str(), - ctx_params.offload_params_to_cpu, ctx_params.diffusion_conv_direct, ctx_params.n_threads, gen_params.upscale_tile_size, - ctx_params.backend.c_str(), - ctx_params.params_backend.c_str())); + sd_ctx_params.backend, + sd_ctx_params.params_backend)); if (upscaler_ctx == nullptr) { LOG_ERROR("new_upscaler_ctx failed"); diff --git a/examples/common/common.cpp b/examples/common/common.cpp index cb19331ea..f0742f62f 100644 --- a/examples/common/common.cpp +++ b/examples/common/common.cpp @@ -51,6 +51,10 @@ static sd_vae_format_t str_to_vae_format(const std::string& value) { return SD_VAE_FORMAT_COUNT; } +static void prepend_backend_assignment(std::string& spec, const char* assignment) { + spec = spec.empty() ? assignment : std::string(assignment) + "," + spec; +} + #if defined(_WIN32) static std::string utf16_to_utf8(const std::wstring& wstr) { if (wstr.empty()) @@ -463,15 +467,15 @@ ArgOptions SDContextParams::get_options() { true, &enable_mmap}, {"", "--control-net-cpu", - "keep controlnet in cpu (for low vram)", + "deprecated; use --backend controlnet=cpu", true, &control_net_cpu}, {"", "--clip-on-cpu", - "keep clip in cpu (for low vram)", + "deprecated; use --backend te=cpu", true, &clip_on_cpu}, {"", "--vae-on-cpu", - "keep vae in cpu (for low vram)", + "deprecated; use --backend vae=cpu", true, &vae_on_cpu}, {"", "--fa", @@ -688,6 +692,25 @@ bool SDContextParams::resolve_and_validate(SDMode mode) { return true; } +void SDContextParams::prepare_backend_assignments() { + effective_backend = backend; + effective_params_backend = params_backend; + + if (offload_params_to_cpu) { + prepend_backend_assignment(effective_params_backend, "*=cpu"); + } + + if (clip_on_cpu) { + prepend_backend_assignment(effective_backend, "te=cpu"); + } + if (vae_on_cpu) { + prepend_backend_assignment(effective_backend, "vae=cpu"); + } + if (control_net_cpu) { + prepend_backend_assignment(effective_backend, "controlnet=cpu"); + } +} + std::string SDContextParams::to_string() const { std::ostringstream emb_ss; emb_ss << "{\n"; @@ -758,6 +781,7 @@ std::string SDContextParams::to_string() const { } sd_ctx_params_t SDContextParams::to_sd_ctx_params_t(bool taesd_preview) { + prepare_backend_assignments(); embedding_vec.clear(); embedding_vec.reserve(embedding_map.size()); for (const auto& kv : embedding_map) { @@ -767,55 +791,51 @@ sd_ctx_params_t SDContextParams::to_sd_ctx_params_t(bool taesd_preview) { embedding_vec.emplace_back(item); } - sd_ctx_params_t sd_ctx_params = { - model_path.c_str(), - clip_l_path.c_str(), - clip_g_path.c_str(), - clip_vision_path.c_str(), - t5xxl_path.c_str(), - llm_path.c_str(), - llm_vision_path.c_str(), - diffusion_model_path.c_str(), - high_noise_diffusion_model_path.c_str(), - uncond_diffusion_model_path.c_str(), - embeddings_connectors_path.c_str(), - vae_path.c_str(), - audio_vae_path.c_str(), - taesd_path.c_str(), - control_net_path.c_str(), - embedding_vec.data(), - static_cast(embedding_vec.size()), - photo_maker_path.c_str(), - tensor_type_rules.c_str(), - n_threads, - wtype, - rng_type, - sampler_rng_type, - prediction, - lora_apply_mode, - offload_params_to_cpu, - enable_mmap, - clip_on_cpu, - control_net_cpu, - vae_on_cpu, - flash_attn, - diffusion_flash_attn, - taesd_preview, - diffusion_conv_direct, - vae_conv_direct, - circular || circular_x, - circular || circular_y, - force_sdxl_vae_conv_scale, - chroma_use_dit_mask, - chroma_use_t5_mask, - chroma_t5_mask_pad, - qwen_image_zero_cond_t, - str_to_vae_format(vae_format), - max_vram, - stream_layers, - backend.c_str(), - params_backend.c_str(), - }; + sd_ctx_params_t sd_ctx_params; + sd_ctx_params_init(&sd_ctx_params); + sd_ctx_params.model_path = model_path.c_str(); + sd_ctx_params.clip_l_path = clip_l_path.c_str(); + sd_ctx_params.clip_g_path = clip_g_path.c_str(); + sd_ctx_params.clip_vision_path = clip_vision_path.c_str(); + sd_ctx_params.t5xxl_path = t5xxl_path.c_str(); + sd_ctx_params.llm_path = llm_path.c_str(); + sd_ctx_params.llm_vision_path = llm_vision_path.c_str(); + sd_ctx_params.diffusion_model_path = diffusion_model_path.c_str(); + sd_ctx_params.high_noise_diffusion_model_path = high_noise_diffusion_model_path.c_str(); + sd_ctx_params.uncond_diffusion_model_path = uncond_diffusion_model_path.c_str(); + sd_ctx_params.embeddings_connectors_path = embeddings_connectors_path.c_str(); + sd_ctx_params.vae_path = vae_path.c_str(); + sd_ctx_params.audio_vae_path = audio_vae_path.c_str(); + sd_ctx_params.taesd_path = taesd_path.c_str(); + sd_ctx_params.control_net_path = control_net_path.c_str(); + sd_ctx_params.embeddings = embedding_vec.data(); + sd_ctx_params.embedding_count = static_cast(embedding_vec.size()); + sd_ctx_params.photo_maker_path = photo_maker_path.c_str(); + sd_ctx_params.tensor_type_rules = tensor_type_rules.c_str(); + sd_ctx_params.n_threads = n_threads; + sd_ctx_params.wtype = wtype; + sd_ctx_params.rng_type = rng_type; + sd_ctx_params.sampler_rng_type = sampler_rng_type; + sd_ctx_params.prediction = prediction; + sd_ctx_params.lora_apply_mode = lora_apply_mode; + sd_ctx_params.enable_mmap = enable_mmap; + sd_ctx_params.flash_attn = flash_attn; + sd_ctx_params.diffusion_flash_attn = diffusion_flash_attn; + sd_ctx_params.tae_preview_only = taesd_preview; + sd_ctx_params.diffusion_conv_direct = diffusion_conv_direct; + sd_ctx_params.vae_conv_direct = vae_conv_direct; + sd_ctx_params.circular_x = circular || circular_x; + sd_ctx_params.circular_y = circular || circular_y; + sd_ctx_params.force_sdxl_vae_conv_scale = force_sdxl_vae_conv_scale; + sd_ctx_params.chroma_use_dit_mask = chroma_use_dit_mask; + sd_ctx_params.chroma_use_t5_mask = chroma_use_t5_mask; + sd_ctx_params.chroma_t5_mask_pad = chroma_t5_mask_pad; + sd_ctx_params.qwen_image_zero_cond_t = qwen_image_zero_cond_t; + sd_ctx_params.vae_format = str_to_vae_format(vae_format); + sd_ctx_params.max_vram = max_vram; + sd_ctx_params.stream_layers = stream_layers; + sd_ctx_params.backend = effective_backend.c_str(); + sd_ctx_params.params_backend = effective_params_backend.c_str(); return sd_ctx_params; } diff --git a/examples/common/common.h b/examples/common/common.h index 8f97ac95b..2ae54c2c7 100644 --- a/examples/common/common.h +++ b/examples/common/common.h @@ -148,6 +148,8 @@ struct SDContextParams { bool stream_layers = false; std::string backend; std::string params_backend; + std::string effective_backend; + std::string effective_params_backend; bool enable_mmap = false; bool control_net_cpu = false; bool clip_on_cpu = false; @@ -175,6 +177,7 @@ struct SDContextParams { float flow_shift = INFINITY; ArgOptions get_options(); void build_embedding_map(); + void prepare_backend_assignments(); bool resolve(SDMode mode); bool validate(SDMode mode); bool resolve_and_validate(SDMode mode); diff --git a/examples/server/README.md b/examples/server/README.md index 16fb393c6..63e38977a 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -165,9 +165,9 @@ Context Options: --offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed --mmap whether to memory-map model - --control-net-cpu keep controlnet in cpu (for low vram) - --clip-on-cpu keep clip in cpu (for low vram) - --vae-on-cpu keep vae in cpu (for low vram) + --control-net-cpu deprecated; use --backend controlnet=cpu + --clip-on-cpu deprecated; use --backend te=cpu + --vae-on-cpu deprecated; use --backend vae=cpu --fa use flash attention --diffusion-fa use flash attention in the diffusion model only --diffusion-conv-direct use ggml_conv2d_direct in the diffusion model diff --git a/include/stable-diffusion.h b/include/stable-diffusion.h index 02e5b6175..ffefdaadf 100644 --- a/include/stable-diffusion.h +++ b/include/stable-diffusion.h @@ -202,11 +202,7 @@ typedef struct { enum rng_type_t sampler_rng_type; enum prediction_t prediction; enum lora_apply_mode_t lora_apply_mode; - bool offload_params_to_cpu; bool enable_mmap; - bool keep_clip_on_cpu; - bool keep_control_net_on_cpu; - bool keep_vae_on_cpu; bool flash_attn; bool diffusion_flash_attn; bool tae_preview_only; @@ -458,7 +454,6 @@ SD_API bool generate_video(sd_ctx_t* sd_ctx, typedef struct upscaler_ctx_t upscaler_ctx_t; SD_API upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path, - bool offload_params_to_cpu, bool direct, int n_threads, int tile_size, diff --git a/src/core/ggml_extend_backend.cpp b/src/core/ggml_extend_backend.cpp index 500e04e27..834a047e7 100644 --- a/src/core/ggml_extend_backend.cpp +++ b/src/core/ggml_extend_backend.cpp @@ -545,9 +545,6 @@ bool SDBackendManager::runtime_backend_supports_host_buffer(SDBackendModule modu bool SDBackendManager::init(const char* backend_spec, const char* params_backend_spec, - bool keep_clip_on_cpu, - bool keep_vae_on_cpu, - bool keep_control_net_on_cpu, std::string* error) { reset(); @@ -558,18 +555,6 @@ bool SDBackendManager::init(const char* backend_spec, return false; } - if (runtime_assignment_.empty()) { - if (keep_clip_on_cpu) { - runtime_assignment_.set_module(SDBackendModule::TE, "cpu"); - } - if (keep_vae_on_cpu) { - runtime_assignment_.set_module(SDBackendModule::VAE, "cpu"); - } - if (keep_control_net_on_cpu) { - runtime_assignment_.set_module(SDBackendModule::CONTROL_NET, "cpu"); - } - } - return validate(error); } diff --git a/src/core/ggml_extend_backend.h b/src/core/ggml_extend_backend.h index a604984f3..58d41ac44 100644 --- a/src/core/ggml_extend_backend.h +++ b/src/core/ggml_extend_backend.h @@ -51,9 +51,6 @@ class SDBackendManager { bool init(const char* backend_spec, const char* params_backend_spec, - bool keep_clip_on_cpu, - bool keep_vae_on_cpu, - bool keep_control_net_on_cpu, std::string* error); void reset(); diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp index cf44014bf..a5fb0e54d 100644 --- a/src/stable-diffusion.cpp +++ b/src/stable-diffusion.cpp @@ -187,7 +187,6 @@ class StableDiffusionGGML { std::string taesd_path; sd_tiling_params_t vae_tiling_params = {false, false, 0, 0, 0.5f, 0, 0, nullptr}; - bool offload_params_to_cpu = false; bool enable_mmap = false; float max_vram = 0.f; bool stream_layers = false; @@ -250,13 +249,10 @@ class StableDiffusionGGML { params_mem_size); } - bool init_backend(const sd_ctx_params_t* sd_ctx_params) { + bool init_backend() { std::string error; - if (!backend_manager.init(sd_ctx_params->backend, + if (!backend_manager.init(backend_spec.c_str(), params_backend_spec.c_str(), - sd_ctx_params->keep_clip_on_cpu, - sd_ctx_params->keep_vae_on_cpu, - sd_ctx_params->keep_control_net_on_cpu, &error)) { LOG_ERROR("backend config failed: %s", error.c_str()); return false; @@ -316,16 +312,12 @@ class StableDiffusionGGML { } bool init(const sd_ctx_params_t* sd_ctx_params) { - n_threads = sd_ctx_params->n_threads; - offload_params_to_cpu = sd_ctx_params->offload_params_to_cpu; - enable_mmap = sd_ctx_params->enable_mmap; - max_vram = sd_ctx_params->max_vram; - stream_layers = sd_ctx_params->stream_layers; - backend_spec = SAFE_STR(sd_ctx_params->backend); - params_backend_spec = SAFE_STR(sd_ctx_params->params_backend); - if (offload_params_to_cpu) { - params_backend_spec = params_backend_spec.empty() ? "*=cpu" : "*=cpu," + params_backend_spec; - } + n_threads = sd_ctx_params->n_threads; + enable_mmap = sd_ctx_params->enable_mmap; + max_vram = sd_ctx_params->max_vram; + stream_layers = sd_ctx_params->stream_layers; + backend_spec = SAFE_STR(sd_ctx_params->backend); + params_backend_spec = SAFE_STR(sd_ctx_params->params_backend); if (stream_layers && max_vram == 0.f) { LOG_WARN("--stream-layers has no effect without --max-vram set; ignoring"); stream_layers = false; @@ -344,7 +336,7 @@ class StableDiffusionGGML { ggml_log_set(ggml_log_callback_default, nullptr); - if (!init_backend(sd_ctx_params)) { + if (!init_backend()) { return false; } if (stream_layers && !backend_manager.params_backend_is_cpu(SDBackendModule::DIFFUSION)) { @@ -534,8 +526,8 @@ class StableDiffusionGGML { } } // Avoid full-model LoRA merge buffers on constrained setups. - const bool streaming_constrained = stream_layers || - sd_ctx_params->offload_params_to_cpu; + const bool params_offloaded = params_backend_for(SDBackendModule::DIFFUSION) != backend_for(SDBackendModule::DIFFUSION); + const bool streaming_constrained = stream_layers || params_offloaded; if (have_quantized_weight || streaming_constrained) { apply_lora_immediately = false; } else { @@ -2615,29 +2607,25 @@ void sd_hires_params_init(sd_hires_params_t* hires_params) { } void sd_ctx_params_init(sd_ctx_params_t* sd_ctx_params) { - *sd_ctx_params = {}; - sd_ctx_params->n_threads = sd_get_num_physical_cores(); - sd_ctx_params->wtype = SD_TYPE_COUNT; - sd_ctx_params->rng_type = CUDA_RNG; - sd_ctx_params->sampler_rng_type = RNG_TYPE_COUNT; - sd_ctx_params->prediction = PREDICTION_COUNT; - sd_ctx_params->lora_apply_mode = LORA_APPLY_AUTO; - sd_ctx_params->offload_params_to_cpu = false; - sd_ctx_params->max_vram = 0.f; - sd_ctx_params->stream_layers = false; - sd_ctx_params->enable_mmap = false; - sd_ctx_params->keep_clip_on_cpu = false; - sd_ctx_params->keep_control_net_on_cpu = false; - sd_ctx_params->keep_vae_on_cpu = false; - sd_ctx_params->diffusion_flash_attn = false; - sd_ctx_params->circular_x = false; - sd_ctx_params->circular_y = false; - sd_ctx_params->chroma_use_dit_mask = true; - sd_ctx_params->chroma_use_t5_mask = false; - sd_ctx_params->chroma_t5_mask_pad = 1; - sd_ctx_params->vae_format = SD_VAE_FORMAT_AUTO; - sd_ctx_params->backend = nullptr; - sd_ctx_params->params_backend = nullptr; + *sd_ctx_params = {}; + sd_ctx_params->n_threads = sd_get_num_physical_cores(); + sd_ctx_params->wtype = SD_TYPE_COUNT; + sd_ctx_params->rng_type = CUDA_RNG; + sd_ctx_params->sampler_rng_type = RNG_TYPE_COUNT; + sd_ctx_params->prediction = PREDICTION_COUNT; + sd_ctx_params->lora_apply_mode = LORA_APPLY_AUTO; + sd_ctx_params->max_vram = 0.f; + sd_ctx_params->stream_layers = false; + sd_ctx_params->enable_mmap = false; + sd_ctx_params->diffusion_flash_attn = false; + sd_ctx_params->circular_x = false; + sd_ctx_params->circular_y = false; + sd_ctx_params->chroma_use_dit_mask = true; + sd_ctx_params->chroma_use_t5_mask = false; + sd_ctx_params->chroma_t5_mask_pad = 1; + sd_ctx_params->vae_format = SD_VAE_FORMAT_AUTO; + sd_ctx_params->backend = nullptr; + sd_ctx_params->params_backend = nullptr; } char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) { @@ -2669,14 +2657,10 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) { "rng_type: %s\n" "sampler_rng_type: %s\n" "prediction: %s\n" - "offload_params_to_cpu: %s\n" "max_vram: %.3f\n" "stream_layers: %s\n" "backend: %s\n" "params_backend: %s\n" - "keep_clip_on_cpu: %s\n" - "keep_control_net_on_cpu: %s\n" - "keep_vae_on_cpu: %s\n" "flash_attn: %s\n" "diffusion_flash_attn: %s\n" "circular_x: %s\n" @@ -2707,14 +2691,10 @@ char* sd_ctx_params_to_str(const sd_ctx_params_t* sd_ctx_params) { sd_rng_type_name(sd_ctx_params->rng_type), sd_rng_type_name(sd_ctx_params->sampler_rng_type), sd_prediction_name(sd_ctx_params->prediction), - BOOL_STR(sd_ctx_params->offload_params_to_cpu), sd_ctx_params->max_vram, BOOL_STR(sd_ctx_params->stream_layers), SAFE_STR(sd_ctx_params->backend), SAFE_STR(sd_ctx_params->params_backend), - BOOL_STR(sd_ctx_params->keep_clip_on_cpu), - BOOL_STR(sd_ctx_params->keep_control_net_on_cpu), - BOOL_STR(sd_ctx_params->keep_vae_on_cpu), BOOL_STR(sd_ctx_params->flash_attn), BOOL_STR(sd_ctx_params->diffusion_flash_attn), BOOL_STR(sd_ctx_params->circular_x), @@ -4436,7 +4416,6 @@ SD_API sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* s const size_t max_graph_vram_bytes = sd::ggml_graph_cut::max_vram_gib_to_bytes(sd_ctx->sd->max_vram); hires_upscaler->set_max_graph_vram_bytes(max_graph_vram_bytes); if (!hires_upscaler->load_from_file(request.hires.model_path, - sd_ctx->sd->offload_params_to_cpu, sd_ctx->sd->n_threads)) { LOG_ERROR("load hires model upscaler failed"); return nullptr; diff --git a/src/upscaler.cpp b/src/upscaler.cpp index be1bb2f50..d02366ecb 100644 --- a/src/upscaler.cpp +++ b/src/upscaler.cpp @@ -39,20 +39,12 @@ void UpscalerGGML::set_stream_layers_enabled(bool enabled) { } bool UpscalerGGML::load_from_file(const std::string& esrgan_path, - bool offload_params_to_cpu, int n_threads) { ggml_log_set(ggml_log_callback_default, nullptr); - std::string effective_params_backend_spec = params_backend_spec; - if (offload_params_to_cpu) { - effective_params_backend_spec = effective_params_backend_spec.empty() ? "*=cpu" : "*=cpu," + effective_params_backend_spec; - } std::string error; if (!backend_manager.init(backend_spec.c_str(), - effective_params_backend_spec.c_str(), - false, - false, - false, + params_backend_spec.c_str(), &error)) { LOG_ERROR("upscaler backend config failed: %s", error.c_str()); return false; @@ -181,7 +173,6 @@ struct upscaler_ctx_t { }; upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str, - bool offload_params_to_cpu, bool direct, int n_threads, int tile_size, @@ -198,7 +189,7 @@ upscaler_ctx_t* new_upscaler_ctx(const char* esrgan_path_c_str, return nullptr; } - if (!upscaler_ctx->upscaler->load_from_file(esrgan_path, offload_params_to_cpu, n_threads)) { + if (!upscaler_ctx->upscaler->load_from_file(esrgan_path, n_threads)) { delete upscaler_ctx->upscaler; upscaler_ctx->upscaler = nullptr; free(upscaler_ctx); diff --git a/src/upscaler.h b/src/upscaler.h index 349e35318..38150f59f 100644 --- a/src/upscaler.h +++ b/src/upscaler.h @@ -32,7 +32,6 @@ struct UpscalerGGML { ~UpscalerGGML(); bool load_from_file(const std::string& esrgan_path, - bool offload_params_to_cpu, int n_threads); void set_max_graph_vram_bytes(size_t max_vram_bytes); void set_stream_layers_enabled(bool enabled);