Skip to content
Open
260 changes: 254 additions & 6 deletions data/kernels/basecurve.cl
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
This file is part of darktable,
copyright (c) 2016-2025 darktable developers.
copyright (c) 2016-2026 darktable developers.

darktable is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
Expand All @@ -19,6 +19,28 @@
#include "color_conversion.h"
#include "rgb_norms.h"

inline float _aces_tone_map(const float x)
{
const float a = 2.51f;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe a ref to the coefs definition?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"These coefficients are the Narkowicz ACES approximation. They are widely used for their good balance between performance and visual accuracy. You can find the reference here: https://knarkowicz.wordpress.com/2016/01/06/aces-filmic-tone-mapping-curve/"

const float b = 0.03f;
const float c = 2.43f;
const float d = 0.59f;
const float e = 0.14f;

return clamp((x * (a * x + b)) / (x * (c * x + d) + e), 0.0f, 1.0f);
}

inline float _aces_20_tonemap(const float x)
{
const float a = 0.0245786f;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

likewise

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hese coefficients refer to the Fitted ACES (RRT+ODT) approximation, which is more precise than the basic Narkowicz fit. It is based on the curve fitting of the ACES 1.0/2.0 RRT/ODT transform.

Reference: https://github.com/TheRealMJP/BakingLab/blob/master/BakingLab/ACES.hlsl (or similar ACES fitted shaders used in cinematic rendering)."

const float b = 0.000090537f;
const float c = 0.983729f;
const float d = 0.4329510f;
const float e = 0.238081f;

return clamp((x * (x + a) - b) / (x * (c * x + d) + e), 0.0f, 1.0f);
}

/*
Primary LUT lookup. Measures the luminance of a given pixel using a selectable function, looks up that
luminance in the configured basecurve, and then scales each channel by the result.
Expand Down Expand Up @@ -86,9 +108,11 @@ basecurve_legacy_lut(read_only image2d_t in, write_only image2d_t out, const int
float4 pixel = read_imagef(in, sampleri, (int2)(x, y));

// apply ev multiplier and use lut or extrapolation:
pixel.x = lookup_unbounded(table, mul * pixel.x, a);
pixel.y = lookup_unbounded(table, mul * pixel.y, a);
pixel.z = lookup_unbounded(table, mul * pixel.z, a);
float3 f = pixel.xyz * mul;

pixel.x = lookup_unbounded(table, f.x, a);
pixel.y = lookup_unbounded(table, f.y, a);
pixel.z = lookup_unbounded(table, f.z, a);
pixel = fmax(pixel, 0.f);
write_imagef (out, (int2)(x, y), pixel);
}
Expand Down Expand Up @@ -298,14 +322,238 @@ basecurve_reconstruct(read_only image2d_t in, read_only image2d_t tmp, write_onl
}

kernel void
basecurve_finalize(read_only image2d_t in, read_only image2d_t comb, write_only image2d_t out, const int width, const int height)
basecurve_finalize(read_only image2d_t in, read_only image2d_t comb, write_only image2d_t out, const int width,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we are at it, the dt style is one def per line:

basecurve_finalize(read_only image2d_t in, 
                   read_only image2d_t comb,
                   ...)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll fix that. I'm learning... ;

const int height, const int workflow_mode, const float shadow_lift, const float highlight_gain,
const float ucs_saturation_balance, const float gamut_strength, const float highlight_corr, const int target_gamut, const float look_opacity, const float16 look_mat, const float alpha)
{
const int x = get_global_id(0);
const int y = get_global_id(1);

if(x >= width || y >= height) return;

float4 pixel = fmax(read_imagef(comb, sampleri, (int2)(x, y)), 0.f);
float4 pixel = read_imagef(comb, sampleri, (int2)(x, y));

// Sanitize to avoid Inf/NaN propagation
pixel.xyz = clamp(pixel.xyz, -1e6f, 1e6f);

if(workflow_mode > 0)
{
float3 pixel_in = pixel.xyz;
float3 look_transformed;
look_transformed.x = dot(pixel_in, (float3)(look_mat.s0, look_mat.s1, look_mat.s2));
look_transformed.y = dot(pixel_in, (float3)(look_mat.s3, look_mat.s4, look_mat.s5));
look_transformed.z = dot(pixel_in, (float3)(look_mat.s6, look_mat.s7, look_mat.s8));

// Mix between original and transformed
pixel.xyz = mix(pixel_in, look_transformed, look_opacity);
pixel.xyz = fmax(pixel.xyz, 0.0f); // Anti-black artifacts

if(highlight_gain != 1.0f)
pixel.xyz *= highlight_gain;

if(shadow_lift != 1.0f)
{
pixel.x = (pixel.x > 0.0f) ? native_powr(pixel.x, shadow_lift) : pixel.x;
pixel.y = (pixel.y > 0.0f) ? native_powr(pixel.y, shadow_lift) : pixel.y;
pixel.z = (pixel.z > 0.0f) ? native_powr(pixel.z, shadow_lift) : pixel.z;
}

const float r_coeff = 0.2627f;
const float g_coeff = 0.6780f;
const float b_coeff = 0.0593f;

float y_in = pixel.x * r_coeff + pixel.y * g_coeff + pixel.z * b_coeff;
float y_out = y_in;

/* Scene-referred: luminance-adaptive shoulder extension for ACES-like
tonemapping using perceptual luminance Jz. */
if(workflow_mode == 1 || workflow_mode == 2)
{
float3 xyz;
xyz.x = 0.636958f * pixel.x + 0.144617f * pixel.y + 0.168881f * pixel.z;
xyz.y = 0.262700f * pixel.x + 0.677998f * pixel.y + 0.059302f * pixel.z;
xyz.z = 0.000000f * pixel.x + 0.028073f * pixel.y + 1.060985f * pixel.z;

xyz = fmax(xyz, (float3)(0.0f));

float4 xyz_scaled = (float4)(xyz.x * 400.0f, xyz.y * 400.0f, xyz.z * 400.0f, 0.0f);
float4 jab = XYZ_to_JzAzBz(xyz_scaled);

const float L = clamp(jab.x, 0.0f, 1.0f);
const float k = 1.0f + alpha * L * L;

const float x_scaled = y_in / k;
if(workflow_mode == 1)
y_out = _aces_tone_map(x_scaled) * k;
else
y_out = _aces_20_tonemap(x_scaled * 1.257f) * k;
}

float gain = y_out / fmax(y_in, 1e-6f);
pixel.xyz *= gain;

const float threshold = 0.80f;
if(y_out > threshold)
{
float factor = (y_out - threshold) / (1.0f - threshold);
factor = clamp(factor, 0.0f, 1.0f);
pixel.xyz = mix(pixel.xyz, (float3)y_out, factor);
}

float4 jab = (float4)(0.0f);
if(ucs_saturation_balance != 0.0f || gamut_strength > 0.0f || highlight_corr != 0.0f)
{
// RGB Rec2020 to XYZ D65
float3 xyz;
xyz.x = 0.636958f * pixel.x + 0.144617f * pixel.y + 0.168881f * pixel.z;
xyz.y = 0.262700f * pixel.x + 0.677998f * pixel.y + 0.059302f * pixel.z;
xyz.z = 0.000000f * pixel.x + 0.028073f * pixel.y + 1.060985f * pixel.z;

xyz = fmax(xyz, 0.0f);

// XYZ to JzAzBz
float4 xyz_scaled = (float4)(xyz.x * 400.0f, xyz.y * 400.0f, xyz.z * 400.0f, 0.0f);
jab = XYZ_to_JzAzBz(xyz_scaled);

int modified = 0;

if(ucs_saturation_balance != 0.0f)
{
// Chroma-based modulation for saturation balance
const float chroma = fmax(fmax(pixel.x, pixel.y), pixel.z) - fmin(fmin(pixel.x, pixel.y), pixel.z);
const float effective_saturation = ucs_saturation_balance * fmin(chroma * 2.0f, 1.0f);

// Apply saturation balance
const float Y = xyz.y;
const float L = native_sqrt(fmax(Y, 0.0f));
const float fulcrum = 0.5f;
const float n = (L - fulcrum) / fulcrum;
const float mask_shadow = 1.0f / (1.0f + dtcl_exp(n * 4.0f));

float sat_adjust = effective_saturation * (2.0f * mask_shadow - 1.0f);
sat_adjust *= fmin(L * 4.0f, 1.0f);
const float sat_factor = 1.0f + sat_adjust;
jab.y *= sat_factor;
jab.z *= sat_factor;
modified = 1;
}

if(gamut_strength > 0.0f)
{
const float Y = xyz.y;
const float L = native_sqrt(fmax(Y, 0.0f));
const float chroma_factor = 1.0f - gamut_strength * (0.2f + 0.2f * L);
jab.y *= chroma_factor;
jab.z *= chroma_factor;
modified = 1;
}

// HIGH SENSITIVITY CORRECTION
// Start effect at 0.20 up to 0.90. Linear transition.
float hl_mask = clamp((jab.x - 0.20f) / 0.70f, 0.0f, 1.0f);

if(hl_mask > 0.0f && highlight_corr != 0.0f)
{
// 1. Soft symmetric desaturation (0.75 factor)
float desat = 1.0f - (fabs(highlight_corr) * hl_mask * 0.75f);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be honest, I don't have a specific mathematical paper for the 0.75f factor. It was established as an empirical damping factor during the development and testing phase.

jab.y *= desat;
jab.z *= desat;

// 2. Controlled Hue Rotation (2.0 factor)
float angle = highlight_corr * hl_mask * 2.0f;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All consts.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For more context on the visual development and the feedback from the community (including discussions with Boris Hajdukovic regarding the JzAzBz shoulder and hue/sat behavior), you can check the dedicated thread on Pixls.us https://discuss.pixls.us/t/base-curve-module-hybrid-aces-like-proof-of-concept/55796/32

float ca = native_cos(angle);
float sa = native_sin(angle);
float az = jab.y;
float bz = jab.z;

jab.y = az * ca - bz * sa;
jab.z = az * sa + bz * ca;
modified = 1;
}

if(jab.x > 0.95f)
{
const float desat = clamp((1.0f - jab.x) * 20.0f, 0.0f, 1.0f);
jab.y *= desat;
jab.z *= desat;
modified = 1;
}

if(modified)
{
// JzAzBz to XYZ
xyz = JzAzBz_2_XYZ(jab).xyz / 400.0f;

// XYZ D65 to RGB Rec2020
pixel.x = 1.716651f * xyz.x - 0.355671f * xyz.y - 0.253366f * xyz.z;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe worth sharing in color_convertion.h?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a beginner in darktable source code, it is difficult for me to judge the best architectural choice, but I have complete confidence in your expertise in this area.

pixel.y = -0.666684f * xyz.x + 1.616481f * xyz.y + 0.015768f * xyz.z;
pixel.z = 0.017640f * xyz.x - 0.042771f * xyz.y + 0.942103f * xyz.z;

float min_val = fmin(pixel.x, fmin(pixel.y, pixel.z));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are the standard matrix coefficients for XYZ to Linear Rec.2020 conversion.

if(min_val < 0.0f)
{
float lum = 0.2627f * pixel.x + 0.6780f * pixel.y + 0.0593f * pixel.z;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a hue-preserving gamut mapping step for negative values.

if(lum > 0.0f)
{
float factor = lum / (lum - min_val);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The coefficients 0.2627f, 0.6780f, and 0.0593f are the standard Rec.2020 Luma coefficients.
The if(lum > 0.0f) is there to prevent division by zero

pixel.xyz = lum + factor * (pixel.xyz - lum);
}
}
pixel.xyz = clamp(pixel.xyz, 0.0f, 1.0f);
}
}

if(gamut_strength > 0.0f)
{
float4 orig = pixel;

float Y = 0.2126f * pixel.x + 0.7152f * pixel.y + 0.0722f * pixel.z;
float lum_weight = clamp((Y - 0.3f) / (0.8f - 0.3f), 0.0f, 1.0f);
lum_weight = lum_weight * lum_weight * (3.0f - 2.0f * lum_weight);
float effective_strength = gamut_strength * lum_weight;

float limit = 0.90f;
if (target_gamut == 1) limit = 0.95f;
else if (target_gamut == 2) limit = 1.00f;

float threshold = limit * (1.0f - (effective_strength * 0.25f));
float max_val = fmax(pixel.x, fmax(pixel.y, pixel.z));

if (max_val > threshold)
{
float range = limit - threshold;
float delta = max_val - threshold;
Comment on lines +524 to +525
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

const

const float compressed = threshold + range * delta / (delta + range);
const float factor = compressed / max_val;

float range_blue = 1.1f * range;
const float compressed_blue = threshold + range * delta / (delta + range_blue);
const float factor_blue = compressed_blue / max_val;

pixel.x *= factor;
pixel.y *= factor;
pixel.z *= factor_blue;
}
pixel = mix(orig, pixel, effective_strength);
}

// Final gamut check to preserve hue
if(pixel.x < 0.0f || pixel.x > 1.0f || pixel.y < 0.0f || pixel.y > 1.0f || pixel.z < 0.0f || pixel.z > 1.0f)
{
const float luma = 0.2627f * pixel.x + 0.6780f * pixel.y + 0.0593f * pixel.z;
const float target_luma = clamp(luma, 0.0f, 1.0f);
float t = 1.0f;
if (pixel.x < 0.0f) t = fmin(t, target_luma / (target_luma - pixel.x));
if (pixel.y < 0.0f) t = fmin(t, target_luma / (target_luma - pixel.y));
if (pixel.z < 0.0f) t = fmin(t, target_luma / (target_luma - pixel.z));
if (pixel.x > 1.0f) t = fmin(t, (1.0f - target_luma) / (pixel.x - target_luma));
if (pixel.y > 1.0f) t = fmin(t, (1.0f - target_luma) / (pixel.y - target_luma));
if (pixel.z > 1.0f) t = fmin(t, (1.0f - target_luma) / (pixel.z - target_luma));
t = fmax(0.0f, t);
pixel.xyz = target_luma + t * (pixel.xyz - target_luma);
}
}

pixel.w = read_imagef(in, sampleri, (int2)(x, y)).w;

write_imagef (out, (int2)(x, y), pixel);
Expand Down
Loading
Loading