Skip to content

Commit 142f4a7

Browse files
committed
use slightly faster acos
1 parent 4d8056c commit 142f4a7

2 files changed

Lines changed: 14 additions & 1 deletion

File tree

Shaders/LTCGI.cginc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ void LTCGI_Contribution(
165165
}
166166

167167
// sample lookup tables
168-
float theta = acos(dot(worldNorm, viewDir));
168+
float theta = LTCGI_acos_fast(dot(worldNorm, viewDir));
169169
float2 uv = float2(roughness, theta/(0.5*UNITY_PI));
170170
uv = uv*LUT_SCALE + LUT_BIAS;
171171

Shaders/LTCGI_functions.cginc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,19 @@ float3 LTCGI_trilinear(float2 uv, float d, uint idx)
252252
GENERIC HELPERS
253253
*/
254254

255+
// from: https://seblagarde.wordpress.com/2014/12/01/inverse-trigonometric-functions-gpu-optimization-for-amd-gcn-architecture/
256+
// max absolute error 9.0x10^-3
257+
// Eberly's polynomial degree 1 - respect bounds
258+
// 4 VGPR, 12 FR (8 FR, 1 QR), 1 scalar
259+
// input [-1, 1] and output [0, PI]
260+
float LTCGI_acos_fast(float inX)
261+
{
262+
float x = abs(inX);
263+
float res = -0.156583f * x + UNITY_HALF_PI;
264+
res *= sqrt(1.0f - x);
265+
return (inX >= 0) ? res : UNITY_PI - res;
266+
}
267+
255268
bool LTCGI_tri_ray(float3 orig, float3 dir, float3 v0, float3 v1, float3 v2, out float2 bary) {
256269
float3 v0v1 = v1 - v0;
257270
float3 v0v2 = v2 - v0;

0 commit comments

Comments
 (0)