xref: /aosp_15_r20/external/mesa3d/src/compiler/glsl/bc4.glsl (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1*61046927SAndroid Build Coastguard Worker/*
2*61046927SAndroid Build Coastguard Worker * Copyright 2020-2022 Matias N. Goldberg
3*61046927SAndroid Build Coastguard Worker * Copyright 2022 Intel Corporation
4*61046927SAndroid Build Coastguard Worker *
5*61046927SAndroid Build Coastguard Worker * Permission is hereby granted, free of charge, to any person obtaining a
6*61046927SAndroid Build Coastguard Worker * copy of this software and associated documentation files (the "Software"),
7*61046927SAndroid Build Coastguard Worker * to deal in the Software without restriction, including without limitation
8*61046927SAndroid Build Coastguard Worker * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9*61046927SAndroid Build Coastguard Worker * and/or sell copies of the Software, and to permit persons to whom the
10*61046927SAndroid Build Coastguard Worker * Software is furnished to do so, subject to the following conditions:
11*61046927SAndroid Build Coastguard Worker *
12*61046927SAndroid Build Coastguard Worker * The above copyright notice and this permission notice shall be included in
13*61046927SAndroid Build Coastguard Worker * all copies or substantial portions of the Software.
14*61046927SAndroid Build Coastguard Worker *
15*61046927SAndroid Build Coastguard Worker * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16*61046927SAndroid Build Coastguard Worker * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17*61046927SAndroid Build Coastguard Worker * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18*61046927SAndroid Build Coastguard Worker * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19*61046927SAndroid Build Coastguard Worker * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20*61046927SAndroid Build Coastguard Worker * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21*61046927SAndroid Build Coastguard Worker * DEALINGS IN THE SOFTWARE.
22*61046927SAndroid Build Coastguard Worker */
23*61046927SAndroid Build Coastguard Worker
24*61046927SAndroid Build Coastguard Worker#version 310 es
25*61046927SAndroid Build Coastguard Worker
26*61046927SAndroid Build Coastguard Worker#if defined(GL_ES) && GL_ES == 1
27*61046927SAndroid Build Coastguard Worker	// Desktop GLSL allows the const keyword for either compile-time or
28*61046927SAndroid Build Coastguard Worker	// run-time constants. GLSL ES only allows the keyword for compile-time
29*61046927SAndroid Build Coastguard Worker	// constants. Since we use const on run-time constants, define it to
30*61046927SAndroid Build Coastguard Worker	// nothing.
31*61046927SAndroid Build Coastguard Worker	#define const
32*61046927SAndroid Build Coastguard Worker#endif
33*61046927SAndroid Build Coastguard Worker
34*61046927SAndroid Build Coastguard Worker#define __sharedOnlyBarrier memoryBarrierShared();barrier();
35*61046927SAndroid Build Coastguard Worker
36*61046927SAndroid Build Coastguard Worker%s // include "CrossPlatformSettings_piece_all.glsl"
37*61046927SAndroid Build Coastguard Worker
38*61046927SAndroid Build Coastguard Workershared float2 g_minMaxValues[4u * 4u * 4u];
39*61046927SAndroid Build Coastguard Workershared uint2 g_mask[4u * 4u];
40*61046927SAndroid Build Coastguard Worker
41*61046927SAndroid Build Coastguard Workerlayout( location = 0 ) uniform uint2 params;
42*61046927SAndroid Build Coastguard Worker
43*61046927SAndroid Build Coastguard Worker#define p_channelIdx params.x
44*61046927SAndroid Build Coastguard Worker#define p_useSNorm params.y
45*61046927SAndroid Build Coastguard Worker
46*61046927SAndroid Build Coastguard Workeruniform sampler2D srcTex;
47*61046927SAndroid Build Coastguard Worker
48*61046927SAndroid Build Coastguard Workerlayout( rgba16ui ) uniform restrict writeonly mediump uimage2D dstTexture;
49*61046927SAndroid Build Coastguard Worker
50*61046927SAndroid Build Coastguard Workerlayout( local_size_x = 4,  //
51*61046927SAndroid Build Coastguard Worker		local_size_y = 4,  //
52*61046927SAndroid Build Coastguard Worker		local_size_z = 4 ) in;
53*61046927SAndroid Build Coastguard Worker
54*61046927SAndroid Build Coastguard Worker/// Each block is 16 pixels
55*61046927SAndroid Build Coastguard Worker/// Each thread works on 4 pixels
56*61046927SAndroid Build Coastguard Worker/// Therefore each block needs 4 threads, generating 8 masks
57*61046927SAndroid Build Coastguard Worker/// At the end these 8 masks get merged into 2 and results written to output
58*61046927SAndroid Build Coastguard Worker///
59*61046927SAndroid Build Coastguard Worker/// **Q: Why 4 pixels per thread? Why not 1 pixel per thread? Why not 2? Why not 16?**
60*61046927SAndroid Build Coastguard Worker///
61*61046927SAndroid Build Coastguard Worker/// A: It's a sweetspot.
62*61046927SAndroid Build Coastguard Worker///  - Very short threads cannot fill expensive GPUs with enough work (dispatch bound)
63*61046927SAndroid Build Coastguard Worker///  - Lots of threads means lots of synchronization (e.g. evaluating min/max, merging masks)
64*61046927SAndroid Build Coastguard Worker///    overhead, and also more LDS usage which reduces occupancy.
65*61046927SAndroid Build Coastguard Worker///  - Long threads (e.g. 1 thread per block) misses parallelism opportunities
66*61046927SAndroid Build Coastguard Workervoid main()
67*61046927SAndroid Build Coastguard Worker{
68*61046927SAndroid Build Coastguard Worker	float minVal, maxVal;
69*61046927SAndroid Build Coastguard Worker	float4 srcPixel;
70*61046927SAndroid Build Coastguard Worker
71*61046927SAndroid Build Coastguard Worker	const uint blockThreadId = gl_LocalInvocationID.x;
72*61046927SAndroid Build Coastguard Worker
73*61046927SAndroid Build Coastguard Worker	const uint2 pixelsToLoadBase = gl_GlobalInvocationID.yz << 2u;
74*61046927SAndroid Build Coastguard Worker
75*61046927SAndroid Build Coastguard Worker	for( uint i = 0u; i < 4u; ++i )
76*61046927SAndroid Build Coastguard Worker	{
77*61046927SAndroid Build Coastguard Worker		const uint2 pixelsToLoad = pixelsToLoadBase + uint2( i, blockThreadId );
78*61046927SAndroid Build Coastguard Worker
79*61046927SAndroid Build Coastguard Worker		const float4 value = OGRE_Load2D( srcTex, int2( pixelsToLoad ), 0 ).xyzw;
80*61046927SAndroid Build Coastguard Worker		srcPixel[i] = p_channelIdx == 0u ? value.x : ( p_channelIdx == 1u ? value.y : value.w );
81*61046927SAndroid Build Coastguard Worker		srcPixel[i] *= 255.0f;
82*61046927SAndroid Build Coastguard Worker	}
83*61046927SAndroid Build Coastguard Worker
84*61046927SAndroid Build Coastguard Worker	minVal = min3( srcPixel.x, srcPixel.y, srcPixel.z );
85*61046927SAndroid Build Coastguard Worker	maxVal = max3( srcPixel.x, srcPixel.y, srcPixel.z );
86*61046927SAndroid Build Coastguard Worker	minVal = min( minVal, srcPixel.w );
87*61046927SAndroid Build Coastguard Worker	maxVal = max( maxVal, srcPixel.w );
88*61046927SAndroid Build Coastguard Worker
89*61046927SAndroid Build Coastguard Worker	const uint minMaxIdxBase = ( gl_LocalInvocationID.z << 4u ) + ( gl_LocalInvocationID.y << 2u );
90*61046927SAndroid Build Coastguard Worker	const uint maskIdxBase = ( gl_LocalInvocationID.z << 2u ) + gl_LocalInvocationID.y;
91*61046927SAndroid Build Coastguard Worker
92*61046927SAndroid Build Coastguard Worker	g_minMaxValues[minMaxIdxBase + blockThreadId] = float2( minVal, maxVal );
93*61046927SAndroid Build Coastguard Worker	g_mask[maskIdxBase] = uint2( 0u, 0u );
94*61046927SAndroid Build Coastguard Worker
95*61046927SAndroid Build Coastguard Worker	__sharedOnlyBarrier;
96*61046927SAndroid Build Coastguard Worker
97*61046927SAndroid Build Coastguard Worker	// Have all 4 threads in the block grab the min/max value by comparing what all 4 threads uploaded
98*61046927SAndroid Build Coastguard Worker	for( uint i = 0u; i < 4u; ++i )
99*61046927SAndroid Build Coastguard Worker	{
100*61046927SAndroid Build Coastguard Worker		minVal = min( g_minMaxValues[minMaxIdxBase + i].x, minVal );
101*61046927SAndroid Build Coastguard Worker		maxVal = max( g_minMaxValues[minMaxIdxBase + i].y, maxVal );
102*61046927SAndroid Build Coastguard Worker	}
103*61046927SAndroid Build Coastguard Worker
104*61046927SAndroid Build Coastguard Worker	// determine bias and emit color indices
105*61046927SAndroid Build Coastguard Worker	// given the choice of maxVal/minVal, these indices are optimal:
106*61046927SAndroid Build Coastguard Worker	// http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determination/
107*61046927SAndroid Build Coastguard Worker	float dist = maxVal - minVal;
108*61046927SAndroid Build Coastguard Worker	float dist4 = dist * 4.0f;
109*61046927SAndroid Build Coastguard Worker	float dist2 = dist * 2.0f;
110*61046927SAndroid Build Coastguard Worker	float bias = ( dist < 8.0f ) ? ( dist - 1.0f ) : ( trunc( dist * 0.5f ) + 2.0f );
111*61046927SAndroid Build Coastguard Worker	bias -= minVal * 7.0f;
112*61046927SAndroid Build Coastguard Worker
113*61046927SAndroid Build Coastguard Worker	uint mask0 = 0u, mask1 = 0u;
114*61046927SAndroid Build Coastguard Worker
115*61046927SAndroid Build Coastguard Worker	for( uint i = 0u; i < 4u; ++i )
116*61046927SAndroid Build Coastguard Worker	{
117*61046927SAndroid Build Coastguard Worker		float a = srcPixel[i] * 7.0f + bias;
118*61046927SAndroid Build Coastguard Worker
119*61046927SAndroid Build Coastguard Worker		int ind = 0;
120*61046927SAndroid Build Coastguard Worker
121*61046927SAndroid Build Coastguard Worker		// select index. this is a "linear scale" lerp factor between 0 (val=min) and 7 (val=max).
122*61046927SAndroid Build Coastguard Worker		if( a >= dist4 )
123*61046927SAndroid Build Coastguard Worker		{
124*61046927SAndroid Build Coastguard Worker			ind = 4;
125*61046927SAndroid Build Coastguard Worker			a -= dist4;
126*61046927SAndroid Build Coastguard Worker		}
127*61046927SAndroid Build Coastguard Worker
128*61046927SAndroid Build Coastguard Worker		if( a >= dist2 )
129*61046927SAndroid Build Coastguard Worker		{
130*61046927SAndroid Build Coastguard Worker			ind += 2;
131*61046927SAndroid Build Coastguard Worker			a -= dist2;
132*61046927SAndroid Build Coastguard Worker		}
133*61046927SAndroid Build Coastguard Worker
134*61046927SAndroid Build Coastguard Worker		if( a >= dist )
135*61046927SAndroid Build Coastguard Worker			ind += 1;
136*61046927SAndroid Build Coastguard Worker
137*61046927SAndroid Build Coastguard Worker		// turn linear scale into DXT index (0/1 are extremal pts)
138*61046927SAndroid Build Coastguard Worker		ind = -ind & 7;
139*61046927SAndroid Build Coastguard Worker		ind ^= ( 2 > ind ) ? 1 : 0;
140*61046927SAndroid Build Coastguard Worker
141*61046927SAndroid Build Coastguard Worker		// write index
142*61046927SAndroid Build Coastguard Worker		const uint bits = 16u + ( ( blockThreadId << 2u ) + i ) * 3u;
143*61046927SAndroid Build Coastguard Worker		if( bits < 32u )
144*61046927SAndroid Build Coastguard Worker		{
145*61046927SAndroid Build Coastguard Worker			mask0 |= uint( ind ) << bits;
146*61046927SAndroid Build Coastguard Worker			if( bits + 3u > 32u )
147*61046927SAndroid Build Coastguard Worker			{
148*61046927SAndroid Build Coastguard Worker				mask1 |= uint( ind ) >> ( 32u - bits );
149*61046927SAndroid Build Coastguard Worker			}
150*61046927SAndroid Build Coastguard Worker		}
151*61046927SAndroid Build Coastguard Worker		else
152*61046927SAndroid Build Coastguard Worker		{
153*61046927SAndroid Build Coastguard Worker			mask1 |= uint( ind ) << ( bits - 32u );
154*61046927SAndroid Build Coastguard Worker		}
155*61046927SAndroid Build Coastguard Worker	}
156*61046927SAndroid Build Coastguard Worker
157*61046927SAndroid Build Coastguard Worker	if( mask0 != 0u )
158*61046927SAndroid Build Coastguard Worker		atomicOr( g_mask[maskIdxBase].x, mask0 );
159*61046927SAndroid Build Coastguard Worker	if( mask1 != 0u )
160*61046927SAndroid Build Coastguard Worker		atomicOr( g_mask[maskIdxBase].y, mask1 );
161*61046927SAndroid Build Coastguard Worker
162*61046927SAndroid Build Coastguard Worker	__sharedOnlyBarrier;
163*61046927SAndroid Build Coastguard Worker
164*61046927SAndroid Build Coastguard Worker	if( blockThreadId == 0u )
165*61046927SAndroid Build Coastguard Worker	{
166*61046927SAndroid Build Coastguard Worker		// Save data
167*61046927SAndroid Build Coastguard Worker		uint4 outputBytes;
168*61046927SAndroid Build Coastguard Worker
169*61046927SAndroid Build Coastguard Worker		if( p_useSNorm != 0u )
170*61046927SAndroid Build Coastguard Worker		{
171*61046927SAndroid Build Coastguard Worker			outputBytes.x =
172*61046927SAndroid Build Coastguard Worker				packSnorm4x8( float4( maxVal * ( 1.0f / 255.0f ) * 2.0f - 1.0f,
173*61046927SAndroid Build Coastguard Worker									  minVal * ( 1.0f / 255.0f ) * 2.0f - 1.0f, 0.0f, 0.0f ) );
174*61046927SAndroid Build Coastguard Worker		}
175*61046927SAndroid Build Coastguard Worker		else
176*61046927SAndroid Build Coastguard Worker		{
177*61046927SAndroid Build Coastguard Worker			outputBytes.x = packUnorm4x8(
178*61046927SAndroid Build Coastguard Worker				float4( maxVal * ( 1.0f / 255.0f ), minVal * ( 1.0f / 255.0f ), 0.0f, 0.0f ) );
179*61046927SAndroid Build Coastguard Worker		}
180*61046927SAndroid Build Coastguard Worker		outputBytes.y = g_mask[maskIdxBase].x >> 16u;
181*61046927SAndroid Build Coastguard Worker		outputBytes.z = g_mask[maskIdxBase].y & 0xFFFFu;
182*61046927SAndroid Build Coastguard Worker		outputBytes.w = g_mask[maskIdxBase].y >> 16u;
183*61046927SAndroid Build Coastguard Worker
184*61046927SAndroid Build Coastguard Worker		uint2 dstUV = gl_GlobalInvocationID.yz;
185*61046927SAndroid Build Coastguard Worker		imageStore( dstTexture, int2( dstUV ), outputBytes );
186*61046927SAndroid Build Coastguard Worker	}
187*61046927SAndroid Build Coastguard Worker}
188