xref: /aosp_15_r20/external/mesa3d/src/imagination/vulkan/usc/programs/pvr_shader_factory.h (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #ifndef PVR_SHADER_FACTORY_H
25 #define PVR_SHADER_FACTORY_H
26 
27 #include <stdint.h>
28 #include <stdbool.h>
29 
30 #include "util/bitpack_helpers.h"
31 #include "util/bitscan.h"
32 #include "util/u_math.h"
33 
34 /* Occlusion query availability writes. */
35 enum pvr_query_availability_write_pool_const {
36    PVR_QUERY_AVAILABILITY_WRITE_INDEX_COUNT,
37    PVR_QUERY_AVAILABILITY_WRITE_COUNT,
38 };
39 
40 /* Copy query pool results. */
41 enum pvr_copy_query_pool_const {
42    PVR_COPY_QUERY_POOL_RESULTS_INDEX_COUNT,
43    PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_LOW,
44    PVR_COPY_QUERY_POOL_RESULTS_BASE_ADDRESS_HIGH,
45    PVR_COPY_QUERY_POOL_RESULTS_DEST_STRIDE,
46    PVR_COPY_QUERY_POOL_RESULTS_PARTIAL_RESULT_FLAG,
47    PVR_COPY_QUERY_POOL_RESULTS_64_BIT_FLAG,
48    PVR_COPY_QUERY_POOL_RESULTS_WITH_AVAILABILITY_FLAG,
49    PVR_COPY_QUERY_POOL_RESULTS_COUNT,
50 };
51 
52 /* Reset query pool. */
53 enum pvr_reset_query_pool_pool_const {
54    PVR_RESET_QUERY_POOL_INDEX_COUNT,
55    PVR_RESET_QUERY_POOL_COUNT,
56 };
57 
58 /* ClearAttachments. */
59 enum pvr_clear_attachment_const {
60    PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_0 = 0, /* Don't change. Indexes array.
61                                                 */
62    PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_1 = 1, /* Don't change. Indexes array.
63                                                 */
64    PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_2 = 2, /* Don't change. Indexes array.
65                                                 */
66    PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_3 = 3, /* Don't change. Indexes array.
67                                                 */
68    PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_UPPER,
69    PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_LOWER,
70    PVR_CLEAR_ATTACHMENT_CONST_COUNT,
71 };
72 
73 #define PVR_CLEAR_ATTACHMENT_DEST_ID_UNUSED (~0U)
74 
75 /* 8 + 8 = 16 <- 1 Dword, 8 offsets, to registers/tile buffers
76  * 7 + 7 = 14 <- 2 Dwords, 7 offsets, to registers/tile buffers
77  * 6 + 6 = 12 <- 3 Dwords, 6 offsets, to registers/tile buffers
78  * 5 + 5 = 10 <- 4 Dwords, 5 offsets, to registers/tile buffers
79  */
80 #define PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT 52
81 
82 /* This defines the max theoretic number of clear attachment programs. In cases
83  * where the dword count goes past the number of on-chip on-tile-buffer targets
84  * there are unused elements. There are 4 versions for clearing 1..4 dwords, 8
85  * versions for clearing offsets 0..7 and 2 versions for clearing either on
86  * chip or in memory calculated as 4 * 8 * 2 = 64.
87  */
88 #define PVR_CLEAR_ATTACHMENT_PROGRAM_COUNT_WITH_HOLES 64
89 
90 /**
91  * \brief Returns the index of the clear attachment USC program.
92  *
93  * For shaders which use output registers "dword_count" is essentially the
94  * count of output registers to use, and "offset" is the first output reg to
95  * use. E.g. dword_count 3, offset 1, will use o1, o2, o3.
96  *
97  * For shaders which use tile buffers as the destination "dword_count" is the
98  * the amount of dwords to write to the tile buffer and "offset" is the offset
99  * at which to start writing at.
100  */
101 static inline uint32_t
pvr_get_clear_attachment_program_index(uint32_t dword_count,uint32_t offset,bool uses_tile_buffer)102 pvr_get_clear_attachment_program_index(uint32_t dword_count,
103                                        uint32_t offset,
104                                        bool uses_tile_buffer)
105 {
106    /* dest        - Clear on chip or in memory.
107     * offset      - Clear offset 0..7 .
108     * dword_count - Clear from 1..4 dwords.
109     */
110    const uint32_t dest_start = 0;
111    const uint32_t dest_end = 0;
112 
113    const uint32_t offset_start = 1;
114    const uint32_t offset_end = 3;
115 
116    const uint32_t dword_count_start = 4;
117    const uint32_t dword_count_end = 5;
118 
119    uint32_t idx = 0;
120 
121    dword_count -= 1;
122 
123    idx |= util_bitpack_uint(uses_tile_buffer, dest_start, dest_end);
124    idx |= util_bitpack_uint(offset, offset_start, offset_end);
125    idx |= util_bitpack_uint(dword_count, dword_count_start, dword_count_end);
126 
127    return idx;
128 }
129 
130 enum pvr_spm_load_const {
131    SPM_LOAD_CONST_TILE_BUFFER_1_UPPER,
132    SPM_LOAD_CONST_TILE_BUFFER_1_LOWER,
133    SPM_LOAD_CONST_TILE_BUFFER_2_UPPER,
134    SPM_LOAD_CONST_TILE_BUFFER_2_LOWER,
135    SPM_LOAD_CONST_TILE_BUFFER_3_UPPER,
136    SPM_LOAD_CONST_TILE_BUFFER_3_LOWER,
137    /* The following are only available if the core does not have the
138     * has_eight_output_registers feature. I.e. only available if the device has
139     * 4 output regs.
140     */
141    SPM_LOAD_CONST_TILE_BUFFER_4_UPPER,
142    SPM_LOAD_CONST_TILE_BUFFER_4_LOWER,
143    SPM_LOAD_CONST_TILE_BUFFER_5_UPPER,
144    SPM_LOAD_CONST_TILE_BUFFER_5_LOWER,
145    SPM_LOAD_CONST_TILE_BUFFER_6_UPPER,
146    SPM_LOAD_CONST_TILE_BUFFER_6_LOWER,
147    SPM_LOAD_CONST_TILE_BUFFER_7_UPPER,
148    SPM_LOAD_CONST_TILE_BUFFER_7_LOWER,
149 };
150 #define PVR_SPM_LOAD_CONST_COUNT (SPM_LOAD_CONST_TILE_BUFFER_7_LOWER + 1)
151 #define PVR_SPM_LOAD_DEST_UNUSED ~0
152 
153 #define PVR_SPM_LOAD_SAMPLES_COUNT 4U
154 
155 #define PVR_SPM_LOAD_IN_REGS_COUNT 3 /* 1, 2, 4 */
156 #define PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT 7 /* 1, 2, 3, 4, 5, 6, 7 */
157 
158 /* If output_regs == 8
159  *    reg_load_programs = 4            # 1, 2, 4, 8
160  *    tile_buffer_load_programs = 3    # 1, 2, 3
161  * else                                #output_regs == 4
162  *    reg_load_programs = 3            # 1, 2, 4
163  *    tile_buffer_load_programs = 7    # 1, 2, 3, 4, 5, 6, 7
164  *
165  * See PVR_SPM_LOAD_IN_BUFFERS_COUNT for where the amount of
166  * tile_buffer_load_programs comes from.
167  *
168  * Tot = sample_count * (reg_load_programs + tile_buffer_load_programs)
169  */
170 /* FIXME: This is currently hard coded for the am62. The Chromebook has 8
171  * output regs so the count is different.
172  */
173 #define PVR_SPM_LOAD_PROGRAM_COUNT \
174    (PVR_SPM_LOAD_SAMPLES_COUNT *   \
175     (PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT))
176 
pvr_get_spm_load_program_index(uint32_t sample_count,uint32_t num_tile_buffers,uint32_t num_output_regs)177 static inline uint32_t pvr_get_spm_load_program_index(uint32_t sample_count,
178                                                       uint32_t num_tile_buffers,
179                                                       uint32_t num_output_regs)
180 {
181    uint32_t idx;
182 
183    assert(util_is_power_of_two_nonzero(sample_count));
184    idx = util_logbase2(sample_count) *
185          (PVR_SPM_LOAD_IN_REGS_COUNT + PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT);
186 
187    assert((num_tile_buffers > 0) ^ (num_output_regs > 0));
188 
189    if (num_output_regs > 0) {
190       assert(util_is_power_of_two_nonzero(num_output_regs));
191       assert(util_logbase2(num_output_regs) < PVR_SPM_LOAD_IN_REGS_COUNT);
192       idx += util_logbase2(num_output_regs);
193    } else {
194       assert(num_tile_buffers <= PVR_SPM_LOAD_IN_TILE_BUFFERS_COUNT);
195       idx += PVR_SPM_LOAD_IN_REGS_COUNT + num_tile_buffers - 1;
196    }
197 
198    assert(idx < PVR_SPM_LOAD_PROGRAM_COUNT);
199    return idx;
200 }
201 
202 #endif /* PVR_SHADER_FACTORY_H */
203