xref: /aosp_15_r20/external/mesa3d/src/intel/executor/executor_genx.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2024 Intel Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "executor.h"
7 
8 #ifdef HAVE_VALGRIND
9 #include <valgrind.h>
10 #include <memcheck.h>
11 #define VG(x) x
12 #else
13 #define VG(x) ((void)0)
14 #endif
15 
16 #define __gen_address_type executor_address
17 #define __gen_combine_address executor_combine_address
18 #define __gen_user_data void
19 
20 #include "intel/genxml/gen_macros.h"
21 #include "intel/genxml/genX_pack.h"
22 
23 #define __executor_cmd_length(cmd) cmd ## _length
24 #define __executor_cmd_header(cmd) cmd ## _header
25 #define __executor_cmd_pack(cmd) cmd ## _pack
26 
27 #define executor_batch_emit(cmd, name)                                               \
28    for (struct cmd name = { __executor_cmd_header(cmd) },                            \
29         *_dst = executor_alloc_bytes(&ec->bo.batch, __executor_cmd_length(cmd) * 4); \
30         __builtin_expect(_dst != NULL, 1);                                           \
31         ({ __executor_cmd_pack(cmd)(0, _dst, &name);                                 \
32            VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __executor_cmd_length(cmd) * 4));  \
33            _dst = NULL;                                                              \
34          }))
35 
36 static void
emit_pipe_control(executor_context * ec)37 emit_pipe_control(executor_context *ec)
38 {
39    executor_batch_emit(GENX(PIPE_CONTROL), pc) {
40 #if GFX_VER >= 12
41       pc.HDCPipelineFlushEnable     = true;
42 #endif
43       pc.PipeControlFlushEnable     = true;
44       pc.CommandStreamerStallEnable = true;
45    }
46 }
47 
48 static void
emit_state_base_address(executor_context * ec,uint32_t mocs)49 emit_state_base_address(executor_context *ec, uint32_t mocs)
50 {
51    /* Use the full address for everything. */
52    const executor_address base_address = {0};
53    const uint32_t size                 = (1 << 20) - 1;
54 
55    executor_batch_emit(GENX(STATE_BASE_ADDRESS), sba) {
56       sba.GeneralStateBaseAddress               = base_address;
57       sba.GeneralStateBaseAddressModifyEnable   = true;
58       sba.GeneralStateBufferSize                = size;
59       sba.GeneralStateBufferSizeModifyEnable    = true;
60       sba.GeneralStateMOCS                      = mocs;
61 
62       sba.DynamicStateBaseAddress               = base_address;
63       sba.DynamicStateBaseAddressModifyEnable   = true;
64       sba.DynamicStateBufferSize                = size;
65       sba.DynamicStateBufferSizeModifyEnable    = true;
66       sba.DynamicStateMOCS                      = mocs;
67 
68       sba.InstructionBaseAddress                = base_address;
69       sba.InstructionBaseAddressModifyEnable    = true;
70       sba.InstructionBufferSize                 = size;
71       sba.InstructionBuffersizeModifyEnable     = true;
72       sba.InstructionMOCS                       = mocs;
73 
74       sba.IndirectObjectBaseAddress             = base_address;
75       sba.IndirectObjectBaseAddressModifyEnable = true;
76       sba.IndirectObjectBufferSize              = size;
77       sba.IndirectObjectBufferSizeModifyEnable  = true;
78       sba.IndirectObjectMOCS                    = mocs;
79 
80       sba.SurfaceStateMOCS            = mocs;
81       sba.StatelessDataPortAccessMOCS = mocs;
82 
83 #if GFX_VER >= 11
84       sba.BindlessSamplerStateMOCS    = mocs;
85 #endif
86       sba.BindlessSurfaceStateMOCS    = mocs;
87 
88 #if GFX_VERx10 >= 125
89       sba.L1CacheControl = L1CC_WB;
90 #endif
91    };
92 }
93 
94 void
genX(emit_execute)95 genX(emit_execute)(executor_context *ec, const executor_params *params)
96 {
97    uint32_t *kernel = executor_alloc_bytes(&ec->bo.extra, params->kernel_size);
98    memcpy(kernel, params->kernel_bin, params->kernel_size);
99    executor_address kernel_addr = executor_address_of_ptr(&ec->bo.extra, kernel);
100 
101    /* TODO: Let SIMD be a parameter. */
102 
103    struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
104       .KernelStartPointer = kernel_addr.offset,
105       .NumberofThreadsinGPGPUThreadGroup = 1,
106    };
107 
108    void *b = executor_alloc_bytes_aligned(&ec->bo.batch, 0, 256);
109    ec->batch_start = executor_address_of_ptr(&ec->bo.batch, b).offset;
110 
111    emit_pipe_control(ec);
112 
113 #if GFX_VERx10 < 200
114    executor_batch_emit(GENX(PIPELINE_SELECT), ps) {
115       ps.PipelineSelection = GPGPU;
116       ps.MaskBits = 0x3;
117    }
118    emit_pipe_control(ec);
119 #endif
120 
121    const uint32_t mocs = isl_mocs(ec->isl_dev, 0, false);
122 
123    emit_state_base_address(ec, mocs);
124 
125 #if GFX_VERx10 >= 125
126    executor_batch_emit(GENX(STATE_COMPUTE_MODE), cm) {
127       cm.Mask1 = 0xffff;
128 #if GFX_VERx10 >= 200
129       cm.Mask2 = 0xffff;
130 #endif
131    }
132 
133    executor_batch_emit(GENX(CFE_STATE), cfe) {
134       cfe.MaximumNumberofThreads = 64;
135    }
136 #else
137    executor_batch_emit(GENX(MEDIA_VFE_STATE), vfe) {
138       vfe.NumberofURBEntries = 2;
139       vfe.MaximumNumberofThreads = 64;
140    }
141 #endif
142 
143    emit_pipe_control(ec);
144 
145 #if GFX_VERx10 >= 125
146    executor_batch_emit(GENX(COMPUTE_WALKER), cw) {
147 #if GFX_VERx10 >= 200
148       cw.SIMDSize                = 1;
149       cw.MessageSIMD             = 1;
150 #endif
151       cw.ThreadGroupIDXDimension = 1;
152       cw.ThreadGroupIDYDimension = 1;
153       cw.ThreadGroupIDZDimension = 1;
154       cw.ExecutionMask           = 0xFFFFFFFF;
155       cw.PostSync.MOCS           = mocs;
156       cw.InterfaceDescriptor     = desc;
157    };
158 #else
159    uint32_t *idd = executor_alloc_bytes_aligned(&ec->bo.extra, 8 * 4, 256);
160    GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL, idd, &desc);
161 
162    executor_address idd_addr = executor_address_of_ptr(&ec->bo.extra, idd);
163 
164    executor_batch_emit(GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) {
165       load.InterfaceDescriptorDataStartAddress = idd_addr.offset,
166       load.InterfaceDescriptorTotalLength = 8 * 4;
167    }
168 
169    executor_batch_emit(GENX(GPGPU_WALKER), gw) {
170       gw.ThreadGroupIDXDimension = 1;
171       gw.ThreadGroupIDYDimension = 1;
172       gw.ThreadGroupIDZDimension = 1;
173       gw.RightExecutionMask      = 0xFFFFFFFF;
174       gw.BottomExecutionMask     = 0xFFFFFFFF;
175    }
176 
177    executor_batch_emit(GENX(MEDIA_STATE_FLUSH), msf);
178 #endif
179 
180    emit_pipe_control(ec);
181 
182    executor_batch_emit(GENX(MI_BATCH_BUFFER_END), end);
183 }
184