1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #ifndef PVR_SHADER_FACTORY_H
25 #define PVR_SHADER_FACTORY_H
26
27 #include <stdint.h>
28 #include <stdbool.h>
29
30 #include "util/bitpack_helpers.h"
31 #include "util/bitscan.h"
32 #include "util/u_math.h"
33
34 /* Occlusion query availability writes. */
35 enum pvr_query_availability_write_pool_const {
36 PVR_QUERY_AVAILABILITY_WRITE_INDEX_COUNT,
37 PVR_QUERY_AVAILABILITY_WRITE_COUNT,
38 };
39
40 /* Copy query pool results. */
41 enum pvr_copy_query_pool_const {
42 PVR_COPY_QUERY_POOL_RESULTS_INDEX_COUNT,
43 PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_LOW,
44 PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_HIGH,
45 PVR_COPY_QUERY_POOL_RESULTS_DEST_STRIDE,
46 PVR_COPY_QUERY_POOL_RESULTS_PARTIAL_RESULT_FLAG,
47 PVR_COPY_QUERY_POOL_RESULTS_64_BIT_FLAG,
48 PVR_COPY_QUERY_POOL_RESULTS_WITH_AVAILABILITY_FLAG,
49 PVR_COPY_QUERY_POOL_RESULTS_COUNT,
50 };
51
52 /* Reset query pool. */
53 enum pvr_reset_query_pool_pool_const {
54 PVR_RESET_QUERY_POOL_INDEX_COUNT,
55 PVR_RESET_QUERY_POOL_COUNT,
56 };
57
58 /* ClearAttachments. */
59 enum pvr_clear_attachment_const {
60 PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_0 = 0, /* Don't change. Indexes array.
61 */
62 PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_1 = 1, /* Don't change. Indexes array.
63 */
64 PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_2 = 2, /* Don't change. Indexes array.
65 */
66 PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_3 = 3, /* Don't change. Indexes array.
67 */
68 PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_UPPER,
69 PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_LOWER,
70 PVR_CLEAR_ATTACHMENT_CONST_COUNT,
71 };
72
73 #define PVR_CLEAR_ATTACHMENT_DEST_ID_UNUSED (~0U)
74
75 /* 8 + 8 = 16 <- 1 Dword, 8 offsets, to registers/tile buffers
76 * 7 + 7 = 14 <- 2 Dwords, 7 offsets, to registers/tile buffers
77 * 6 + 6 = 12 <- 3 Dwords, 6 offsets, to registers/tile buffers
78 * 5 + 5 = 10 <- 4 Dwords, 5 offsets, to registers/tile buffers
79 */
80 #define PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT 52
81
82 /* This defines the max theoretic number of clear attachment programs. In cases
83 * where the dword count goes past the number of on-chip on-tile-buffer targets
84 * there are unused elements. There are 4 versions for clearing 1..4 dwords, 8
85 * versions for clearing offsets 0..7 and 2 versions for clearing either on
86 * chip or in memory calculated as 4 * 8 * 2 = 64.
87 */
88 #define PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT_WITH_HOLES 64
89
90 /**
91 * \brief Returns the index of the clear attachment USC program.
92 *
93 * For shaders which use output registers "dword_count" is essentially the
94 * count of output registers to use, and "offset" is the first output reg to
95 * use. E.g. dword_count 3, offset 1, will use o1, o2, o3.
96 *
97 * For shaders which use tile buffers as the destination "dword_count" is the
98 * the amount of dwords to write to the tile buffer and "offset" is the offset
99 * at which to start writing at.
100 */
101 static inline uint32_t
pvr_get_clear_attachment_program_index(uint32_t dword_count,uint32_t offset,bool uses_tile_buffer)102 pvr_get_clear_attachment_program_index(uint32_t dword_count,
103 uint32_t offset,
104 bool uses_tile_buffer)
105 {
106 /* dest - Clear on chip or in memory.
107 * offset - Clear offset 0..7 .
108 * dword_count - Clear from 1..4 dwords.
109 */
110 const uint32_t dest_start = 0;
111 const uint32_t dest_end = 0;
112
113 const uint32_t offset_start = 1;
114 const uint32_t offset_end = 3;
115
116 const uint32_t dword_count_start = 4;
117 const uint32_t dword_count_end = 5;
118
119 uint32_t idx = 0;
120
121 dword_count -= 1;
122
123 idx |= util_bitpack_uint(uses_tile_buffer, dest_start, dest_end);
124 idx |= util_bitpack_uint(offset, offset_start, offset_end);
125 idx |= util_bitpack_uint(dword_count, dword_count_start, dword_count_end);
126
127 return idx;
128 }
129
130 enum pvr_spm_load_const {
131 SPM_LOAD_CONST_TILE_BUFFER_1_UPPER,
132 SPM_LOAD_CONST_TILE_BUFFER_1_LOWER,
133 SPM_LOAD_CONST_TILE_BUFFER_2_UPPER,
134 SPM_LOAD_CONST_TILE_BUFFER_2_LOWER,
135 SPM_LOAD_CONST_TILE_BUFFER_3_UPPER,
136 SPM_LOAD_CONST_TILE_BUFFER_3_LOWER,
137 /* The following are only available if the core does not have the
138 * has_eight_output_registers feature. I.e. only available if the device has
139 * 4 output regs.
140 */
141 SPM_LOAD_CONST_TILE_BUFFER_4_UPPER,
142 SPM_LOAD_CONST_TILE_BUFFER_4_LOWER,
143 SPM_LOAD_CONST_TILE_BUFFER_5_UPPER,
144 SPM_LOAD_CONST_TILE_BUFFER_5_LOWER,
145 SPM_LOAD_CONST_TILE_BUFFER_6_UPPER,
146 SPM_LOAD_CONST_TILE_BUFFER_6_LOWER,
147 SPM_LOAD_CONST_TILE_BUFFER_7_UPPER,
148 SPM_LOAD_CONST_TILE_BUFFER_7_LOWER,
149 };
150 #define PVR_SPM_LOAD_CONST_COUNT (SPM_LOAD_CONST_TILE_BUFFER_7_LOWER + 1)
151 #define PVR_SPM_LOAD_DEST_UNUSED ~0
152
153 #define PVR_SPM_LOAD_SAMPLES_COUNT 4U
154
155 #define PVR_SPM_LOAD_IN_REGS_COUNT 3 /* 1, 2, 4 */
156 #define PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT 7 /* 1, 2, 3, 4, 5, 6, 7 */
157
158 /* If output_regs == 8
159 * reg_load_programs = 4 # 1, 2, 4, 8
160 * tile_buffer_load_programs = 3 # 1, 2, 3
161 * else #output_regs == 4
162 * reg_load_programs = 3 # 1, 2, 4
163 * tile_buffer_load_programs = 7 # 1, 2, 3, 4, 5, 6, 7
164 *
165 * See PVR_SPM_LOAD_IN_BUFFERS_COUNT for where the amount of
166 * tile_buffer_load_programs comes from.
167 *
168 * Tot = sample_count * (reg_load_programs + tile_buffer_load_programs)
169 */
170 /* FIXME: This is currently hard coded for the am62. The Chromebook has 8
171 * output regs so the count is different.
172 */
173 #define PVR_SPM_LOAD_PROGRAM_COUNT \
174 (PVR_SPM_LOAD_SAMPLES_COUNT * \
175 (PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT))
176
pvr_get_spm_load_program_index(uint32_t sample_count,uint32_t num_tile_buffers,uint32_t num_output_regs)177 static inline uint32_t pvr_get_spm_load_program_index(uint32_t sample_count,
178 uint32_t num_tile_buffers,
179 uint32_t num_output_regs)
180 {
181 uint32_t idx;
182
183 assert(util_is_power_of_two_nonzero(sample_count));
184 idx = util_logbase2(sample_count) *
185 (PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT);
186
187 assert((num_tile_buffers > 0) ^ (num_output_regs > 0));
188
189 if (num_output_regs > 0) {
190 assert(util_is_power_of_two_nonzero(num_output_regs));
191 assert(util_logbase2(num_output_regs) < PVR_SPM_LOAD_IN_REGS_COUNT);
192 idx += util_logbase2(num_output_regs);
193 } else {
194 assert(num_tile_buffers <= PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT);
195 idx += PVR_SPM_LOAD_IN_REGS_COUNT + num_tile_buffers - 1;
196 }
197
198 assert(idx < PVR_SPM_LOAD_PROGRAM_COUNT);
199 return idx;
200 }
201
202 #endif /* PVR_SHADER_FACTORY_H */
203