xref: /aosp_15_r20/external/swiftshader/src/Device/QuadRasterizer.cpp (revision 03ce13f70fcc45d86ee91b7ee4cab1936a95046e)
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "QuadRasterizer.hpp"
16 
17 #include "Primitive.hpp"
18 #include "Renderer.hpp"
19 #include "Pipeline/Constants.hpp"
20 #include "System/Debug.hpp"
21 #include "System/Math.hpp"
22 #include "Vulkan/VkDevice.hpp"
23 
24 namespace sw {
25 
QuadRasterizer(const PixelProcessor::State & state,const SpirvShader * spirvShader)26 QuadRasterizer::QuadRasterizer(const PixelProcessor::State &state, const SpirvShader *spirvShader)
27     : state(state)
28     , spirvShader{ spirvShader }
29 {
30 }
31 
~QuadRasterizer()32 QuadRasterizer::~QuadRasterizer()
33 {
34 }
35 
generate()36 void QuadRasterizer::generate()
37 {
38 	constants = device + OFFSET(vk::Device, constants);
39 	occlusion = 0;
40 
41 	Do
42 	{
43 		Int yMin = *Pointer<Int>(primitive + OFFSET(Primitive, yMin));
44 		Int yMax = *Pointer<Int>(primitive + OFFSET(Primitive, yMax));
45 
46 		Int cluster2 = cluster + cluster;
47 		yMin += clusterCount * 2 - 2 - cluster2;
48 		yMin &= -clusterCount * 2;
49 		yMin += cluster2;
50 
51 		If(yMin < yMax)
52 		{
53 			rasterize(yMin, yMax);
54 		}
55 
56 		primitive += sizeof(Primitive) * state.multiSampleCount;
57 		count--;
58 	}
59 	Until(count == 0);
60 
61 	if(state.occlusionEnabled)
62 	{
63 		UInt clusterOcclusion = *Pointer<UInt>(data + OFFSET(DrawData, occlusion) + 4 * cluster);
64 		clusterOcclusion += occlusion;
65 		*Pointer<UInt>(data + OFFSET(DrawData, occlusion) + 4 * cluster) = clusterOcclusion;
66 	}
67 
68 	Return();
69 }
70 
rasterize(Int & yMin,Int & yMax)71 void QuadRasterizer::rasterize(Int &yMin, Int &yMax)
72 {
73 	Pointer<Byte> cBuffer[MAX_COLOR_BUFFERS];
74 	Pointer<Byte> zBuffer;
75 	Pointer<Byte> sBuffer;
76 
77 	Int clusterCountLog2 = 31 - Ctlz(UInt(clusterCount), false);
78 
79 	for(int index = 0; index < MAX_COLOR_BUFFERS; index++)
80 	{
81 		if(state.colorWriteActive(index))
82 		{
83 			cBuffer[index] = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, colorBuffer[index])) + yMin * *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
84 		}
85 	}
86 
87 	if(state.depthTestActive || state.depthBoundsTestActive)
88 	{
89 		zBuffer = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, depthBuffer)) + yMin * *Pointer<Int>(data + OFFSET(DrawData, depthPitchB));
90 	}
91 
92 	if(state.stencilActive)
93 	{
94 		sBuffer = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, stencilBuffer)) + yMin * *Pointer<Int>(data + OFFSET(DrawData, stencilPitchB));
95 	}
96 
97 	Int y = yMin;
98 
99 	Do
100 	{
101 		Int x0a = Int(*Pointer<Short>(primitive + OFFSET(Primitive, outline->left) + (y + 0) * sizeof(Primitive::Span)));
102 		Int x0b = Int(*Pointer<Short>(primitive + OFFSET(Primitive, outline->left) + (y + 1) * sizeof(Primitive::Span)));
103 		Int x0 = Min(x0a, x0b);
104 
105 		for(unsigned int q = 1; q < state.multiSampleCount; q++)
106 		{
107 			x0a = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive, outline->left) + (y + 0) * sizeof(Primitive::Span)));
108 			x0b = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive, outline->left) + (y + 1) * sizeof(Primitive::Span)));
109 			x0 = Min(x0, Min(x0a, x0b));
110 		}
111 
112 		x0 &= 0xFFFFFFFE;
113 
114 		Int x1a = Int(*Pointer<Short>(primitive + OFFSET(Primitive, outline->right) + (y + 0) * sizeof(Primitive::Span)));
115 		Int x1b = Int(*Pointer<Short>(primitive + OFFSET(Primitive, outline->right) + (y + 1) * sizeof(Primitive::Span)));
116 		Int x1 = Max(x1a, x1b);
117 
118 		for(unsigned int q = 1; q < state.multiSampleCount; q++)
119 		{
120 			x1a = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive, outline->right) + (y + 0) * sizeof(Primitive::Span)));
121 			x1b = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive, outline->right) + (y + 1) * sizeof(Primitive::Span)));
122 			x1 = Max(x1, Max(x1a, x1b));
123 		}
124 
125 		// Compute the y coordinate of each fragment in the SIMD group.
126 		const auto yMorton = SIMD::Float([](int i) { return float(compactEvenBits(i >> 1)); });  // 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, ...
127 		yFragment = SIMD::Float(Float(y)) + yMorton - SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, y0)));
128 
129 		if(interpolateZ())
130 		{
131 			for(unsigned int q = 0; q < state.multiSampleCount; q++)
132 			{
133 				SIMD::Float y = yFragment;
134 
135 				if(state.enableMultiSampling)
136 				{
137 					y += SIMD::Float(*Pointer<Float>(constants + OFFSET(Constants, SampleLocationsY) + q * sizeof(float)));
138 				}
139 
140 				Dz[q] = SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, z.C))) + y * SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, z.B)));
141 			}
142 		}
143 
144 		If(x0 < x1)
145 		{
146 			if(interpolateW())
147 			{
148 				Dw = SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, w.C))) + yFragment * SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, w.B)));
149 			}
150 
151 			if(spirvShader)
152 			{
153 				int packedInterpolant = 0;
154 				for(int interfaceInterpolant = 0; interfaceInterpolant < MAX_INTERFACE_COMPONENTS; interfaceInterpolant++)
155 				{
156 					if(spirvShader->inputs[interfaceInterpolant].Type != SpirvShader::ATTRIBTYPE_UNUSED)
157 					{
158 						Dv[interfaceInterpolant] = *Pointer<Float>(primitive + OFFSET(Primitive, V[packedInterpolant].C));
159 						if(!spirvShader->inputs[interfaceInterpolant].Flat)
160 						{
161 							Dv[interfaceInterpolant] +=
162 							    yFragment * SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, V[packedInterpolant].B)));
163 						}
164 						packedInterpolant++;
165 					}
166 				}
167 
168 				for(unsigned int i = 0; i < state.numClipDistances; i++)
169 				{
170 					DclipDistance[i] = SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, clipDistance[i].C))) +
171 					                   yFragment * SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, clipDistance[i].B)));
172 				}
173 
174 				for(unsigned int i = 0; i < state.numCullDistances; i++)
175 				{
176 					DcullDistance[i] = SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, cullDistance[i].C))) +
177 					                   yFragment * SIMD::Float(*Pointer<Float>(primitive + OFFSET(Primitive, cullDistance[i].B)));
178 				}
179 			}
180 
181 			Short4 xLeft[4];
182 			Short4 xRight[4];
183 
184 			for(unsigned int q = 0; q < state.multiSampleCount; q++)
185 			{
186 				xLeft[q] = *Pointer<Short4>(primitive + q * sizeof(Primitive) + OFFSET(Primitive, outline) + y * sizeof(Primitive::Span));
187 				xRight[q] = xLeft[q];
188 
189 				xLeft[q] = Swizzle(xLeft[q], 0x0022) - Short4(1, 2, 1, 2);
190 				xRight[q] = Swizzle(xRight[q], 0x1133) - Short4(0, 1, 0, 1);
191 			}
192 
193 			For(Int x = x0, x < x1, x += 2)
194 			{
195 				Short4 xxxx = Short4(x);
196 				Int cMask[4];
197 
198 				for(unsigned int q = 0; q < state.multiSampleCount; q++)
199 				{
200 					if(state.multiSampleMask & (1 << q))
201 					{
202 						unsigned int i = state.enableMultiSampling ? q : 0;
203 						Short4 mask = CmpGT(xxxx, xLeft[i]) & CmpGT(xRight[i], xxxx);
204 						cMask[q] = SignMask(PackSigned(mask, mask)) & 0x0000000F;
205 					}
206 				}
207 
208 				quad(cBuffer, zBuffer, sBuffer, cMask, x, y);
209 			}
210 		}
211 
212 		for(int index = 0; index < MAX_COLOR_BUFFERS; index++)
213 		{
214 			if(state.colorWriteActive(index))
215 			{
216 				cBuffer[index] += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index])) << (1 + clusterCountLog2);  // FIXME: Precompute
217 			}
218 		}
219 
220 		if(state.depthTestActive || state.depthBoundsTestActive)
221 		{
222 			zBuffer += *Pointer<Int>(data + OFFSET(DrawData, depthPitchB)) << (1 + clusterCountLog2);  // FIXME: Precompute
223 		}
224 
225 		if(state.stencilActive)
226 		{
227 			sBuffer += *Pointer<Int>(data + OFFSET(DrawData, stencilPitchB)) << (1 + clusterCountLog2);  // FIXME: Precompute
228 		}
229 
230 		y += 2 * clusterCount;
231 	}
232 	Until(y >= yMax);
233 }
234 
interpolate(SIMD::Float & x,SIMD::Float & D,SIMD::Float & rhw,Pointer<Byte> planeEquation,bool flat,bool perspective)235 SIMD::Float QuadRasterizer::interpolate(SIMD::Float &x, SIMD::Float &D, SIMD::Float &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
236 {
237 	if(flat)
238 	{
239 		return D;
240 	}
241 
242 	SIMD::Float interpolant = mulAdd(x, SIMD::Float(*Pointer<Float>(planeEquation + OFFSET(PlaneEquation, A))), D);
243 
244 	if(perspective)
245 	{
246 		interpolant *= rhw;
247 	}
248 
249 	return interpolant;
250 }
251 
interpolateZ() const252 bool QuadRasterizer::interpolateZ() const
253 {
254 	return state.depthTestActive || (spirvShader && spirvShader->hasBuiltinInput(spv::BuiltInFragCoord));
255 }
256 
interpolateW() const257 bool QuadRasterizer::interpolateW() const
258 {
259 	// Note: could optimize cases where there is a fragment shader but it has no
260 	// perspective-correct inputs, but that's vanishingly rare.
261 	return spirvShader != nullptr;
262 }
263 
264 }  // namespace sw
265