I am adding ai based image enhancement to my 3D rendering software. I found your wonderful framework and decided to integrate it. Here my test image:
Here the output of Stable-diffusion:
For comparison here the output of Google Gemini Api:
The difference is crazy. Here my implementation code:
`
void OfflineRenderer::performRenderImageEnhancing()
{
_ASSERT(m_inputImage->isFloatingPointImage());
_ASSERT(m_inputImage->getNbChannels() == 3);
// First build the RGB8 image from RGB32F to feed the enhancer.
const auto imageSrc = Graphics::Texture::Helper::create_RGB8FromRGB32F(*m_inputImage);
// Build the stable diffusion Api native input image.
sd_image_t nativeInputImage;
{
nativeInputImage.width = getRenderingWidth();
nativeInputImage.height = getRenderingHeight();
nativeInputImage.channel = 3u;
nativeInputImage.data = imageSrc->getRawData();
}
// Init the context parameters.
sd_ctx_t* ctx = nullptr;
{
sd_ctx_params_t sd_ctx_params = {};
sd_ctx_params.model_path = "D:\\Stable-diffusion\\Models\\v1-5-pruned-emaonly.safetensors";
//sd_ctx_params.vae_path = "/path/to/your/vae.safetensors";
sd_ctx_params.n_threads = Hardware::HardwareInformationSingleton::instance()->nbOfPhysicalCores();
sd_ctx_params.wtype = sd_type_t::SD_TYPE_F16;
sd_ctx_params.backend = "cpu";
//sd_ctx_params.backend = "vulkan0";
ctx = new_sd_ctx(&sd_ctx_params);
_ASSERT(ctx != nullptr);
}
// Configure generation parameters.
sd_img_gen_params_t sd_img_gen_params;
{
sd_img_gen_params_init(&sd_img_gen_params);
sd_img_gen_params.prompt = "Generate a beautiful daytime bathroom at 12pm.";
sd_img_gen_params.negative_prompt = "";
sd_img_gen_params.clip_skip = 1;
sd_img_gen_params.init_image = nativeInputImage;
sd_img_gen_params.sample_params.sample_method = EULER_A_SAMPLE_METHOD;
sd_img_gen_params.width = getRenderingWidth();
sd_img_gen_params.height = getRenderingHeight();
sd_img_gen_params.strength = 0.75f;
sd_img_gen_params.seed = 9999;
sd_img_gen_params.control_strength = 0.9f;
}
// Set the generation callback.
sd_set_progress_callback(ProgressCallback, &m_stopRender);
sd_image_t* result = nullptr;
try
{
result = generate_image(ctx, &sd_img_gen_params);
_ASSERT(result && result[0].data);
}
catch (const std::runtime_error&)
{
goto FreeMemory;
}
_ASSERT(result->width == imageSrc->getWidth());
_ASSERT(result->height == imageSrc->getHeight());
_ASSERT(result->channel == imageSrc->getNbChannels());
// Read back result.
const uint32_t nbTasks = getRenderingWidth() * getRenderingHeight();
tbb::parallel_for(size_t(0), size_t(nbTasks), [&](size_t tbbIdx) {
if (m_stopRender)
{
tbb::task::current_context()->cancel_group_execution();
return;
}
const uint32_t pixelLinearIdx = (uint32_t)tbbIdx;
const uint32_t pixelStartInArray = (uint32_t)tbbIdx * 3u;
const uint32_t pixelPosX = (uint32_t)(pixelLinearIdx % getRenderingWidth());
const uint32_t pixelPosY = (uint32_t)(pixelLinearIdx / getRenderingWidth());
const Math::Uvec2 pixelPos = Math::Uvec2(pixelPosX, pixelPosY);
const uint8_t R = result[0].data[pixelStartInArray];
const uint8_t G = result[0].data[pixelStartInArray + 1u];
const uint8_t B = result[0].data[pixelStartInArray + 2u];
// OpenCV layout is BGR
const RGBFColor outputColor = RGBFColor((float)B/ 255.0f, (float)G/255.0f, (float)R/ 255.0f);
m_outputImage->setPixelFromPosition(outputColor, pixelPos);
});
// Cleanup resources.
FreeMemory:;
free_sd_ctx(ctx);
if (result)
{
free(result[0].data);
free(result);
}
}`
I think i am doing something wrong but what?
Thanks for helping!
I am adding ai based image enhancement to my 3D rendering software. I found your wonderful framework and decided to integrate it. Here my test image:
Here the output of Stable-diffusion:
For comparison here the output of Google Gemini Api:
The difference is crazy. Here my implementation code:
`
I think i am doing something wrong but what?
Thanks for helping!