1 /*
2 * Copyright © 2024 Intel Corporation
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "executor.h"
7
8 #ifdef HAVE_VALGRIND
9 #include <valgrind.h>
10 #include <memcheck.h>
11 #define VG(x) x
12 #else
13 #define VG(x) ((void)0)
14 #endif
15
16 #define __gen_address_type executor_address
17 #define __gen_combine_address executor_combine_address
18 #define __gen_user_data void
19
20 #include "intel/genxml/gen_macros.h"
21 #include "intel/genxml/genX_pack.h"
22
23 #define __executor_cmd_length(cmd) cmd ## _length
24 #define __executor_cmd_header(cmd) cmd ## _header
25 #define __executor_cmd_pack(cmd) cmd ## _pack
26
27 #define executor_batch_emit(cmd, name) \
28 for (struct cmd name = { __executor_cmd_header(cmd) }, \
29 *_dst = executor_alloc_bytes(&ec->bo.batch, __executor_cmd_length(cmd) * 4); \
30 __builtin_expect(_dst != NULL, 1); \
31 ({ __executor_cmd_pack(cmd)(0, _dst, &name); \
32 VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __executor_cmd_length(cmd) * 4)); \
33 _dst = NULL; \
34 }))
35
36 static void
emit_pipe_control(executor_context * ec)37 emit_pipe_control(executor_context *ec)
38 {
39 executor_batch_emit(GENX(PIPE_CONTROL), pc) {
40 #if GFX_VER >= 12
41 pc.HDCPipelineFlushEnable = true;
42 #endif
43 pc.PipeControlFlushEnable = true;
44 pc.CommandStreamerStallEnable = true;
45 }
46 }
47
48 static void
emit_state_base_address(executor_context * ec,uint32_t mocs)49 emit_state_base_address(executor_context *ec, uint32_t mocs)
50 {
51 /* Use the full address for everything. */
52 const executor_address base_address = {0};
53 const uint32_t size = (1 << 20) - 1;
54
55 executor_batch_emit(GENX(STATE_BASE_ADDRESS), sba) {
56 sba.GeneralStateBaseAddress = base_address;
57 sba.GeneralStateBaseAddressModifyEnable = true;
58 sba.GeneralStateBufferSize = size;
59 sba.GeneralStateBufferSizeModifyEnable = true;
60 sba.GeneralStateMOCS = mocs;
61
62 sba.DynamicStateBaseAddress = base_address;
63 sba.DynamicStateBaseAddressModifyEnable = true;
64 sba.DynamicStateBufferSize = size;
65 sba.DynamicStateBufferSizeModifyEnable = true;
66 sba.DynamicStateMOCS = mocs;
67
68 sba.InstructionBaseAddress = base_address;
69 sba.InstructionBaseAddressModifyEnable = true;
70 sba.InstructionBufferSize = size;
71 sba.InstructionBuffersizeModifyEnable = true;
72 sba.InstructionMOCS = mocs;
73
74 sba.IndirectObjectBaseAddress = base_address;
75 sba.IndirectObjectBaseAddressModifyEnable = true;
76 sba.IndirectObjectBufferSize = size;
77 sba.IndirectObjectBufferSizeModifyEnable = true;
78 sba.IndirectObjectMOCS = mocs;
79
80 sba.SurfaceStateMOCS = mocs;
81 sba.StatelessDataPortAccessMOCS = mocs;
82
83 #if GFX_VER >= 11
84 sba.BindlessSamplerStateMOCS = mocs;
85 #endif
86 sba.BindlessSurfaceStateMOCS = mocs;
87
88 #if GFX_VERx10 >= 125
89 sba.L1CacheControl = L1CC_WB;
90 #endif
91 };
92 }
93
94 void
genX(emit_execute)95 genX(emit_execute)(executor_context *ec, const executor_params *params)
96 {
97 uint32_t *kernel = executor_alloc_bytes(&ec->bo.extra, params->kernel_size);
98 memcpy(kernel, params->kernel_bin, params->kernel_size);
99 executor_address kernel_addr = executor_address_of_ptr(&ec->bo.extra, kernel);
100
101 /* TODO: Let SIMD be a parameter. */
102
103 struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
104 .KernelStartPointer = kernel_addr.offset,
105 .NumberofThreadsinGPGPUThreadGroup = 1,
106 };
107
108 void *b = executor_alloc_bytes_aligned(&ec->bo.batch, 0, 256);
109 ec->batch_start = executor_address_of_ptr(&ec->bo.batch, b).offset;
110
111 emit_pipe_control(ec);
112
113 #if GFX_VERx10 < 200
114 executor_batch_emit(GENX(PIPELINE_SELECT), ps) {
115 ps.PipelineSelection = GPGPU;
116 ps.MaskBits = 0x3;
117 }
118 emit_pipe_control(ec);
119 #endif
120
121 const uint32_t mocs = isl_mocs(ec->isl_dev, 0, false);
122
123 emit_state_base_address(ec, mocs);
124
125 #if GFX_VERx10 >= 125
126 executor_batch_emit(GENX(STATE_COMPUTE_MODE), cm) {
127 cm.Mask1 = 0xffff;
128 #if GFX_VERx10 >= 200
129 cm.Mask2 = 0xffff;
130 #endif
131 }
132
133 executor_batch_emit(GENX(CFE_STATE), cfe) {
134 cfe.MaximumNumberofThreads = 64;
135 }
136 #else
137 executor_batch_emit(GENX(MEDIA_VFE_STATE), vfe) {
138 vfe.NumberofURBEntries = 2;
139 vfe.MaximumNumberofThreads = 64;
140 }
141 #endif
142
143 emit_pipe_control(ec);
144
145 #if GFX_VERx10 >= 125
146 executor_batch_emit(GENX(COMPUTE_WALKER), cw) {
147 #if GFX_VERx10 >= 200
148 cw.SIMDSize = 1;
149 cw.MessageSIMD = 1;
150 #endif
151 cw.ThreadGroupIDXDimension = 1;
152 cw.ThreadGroupIDYDimension = 1;
153 cw.ThreadGroupIDZDimension = 1;
154 cw.ExecutionMask = 0xFFFFFFFF;
155 cw.PostSync.MOCS = mocs;
156 cw.InterfaceDescriptor = desc;
157 };
158 #else
159 uint32_t *idd = executor_alloc_bytes_aligned(&ec->bo.extra, 8 * 4, 256);
160 GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL, idd, &desc);
161
162 executor_address idd_addr = executor_address_of_ptr(&ec->bo.extra, idd);
163
164 executor_batch_emit(GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) {
165 load.InterfaceDescriptorDataStartAddress = idd_addr.offset,
166 load.InterfaceDescriptorTotalLength = 8 * 4;
167 }
168
169 executor_batch_emit(GENX(GPGPU_WALKER), gw) {
170 gw.ThreadGroupIDXDimension = 1;
171 gw.ThreadGroupIDYDimension = 1;
172 gw.ThreadGroupIDZDimension = 1;
173 gw.RightExecutionMask = 0xFFFFFFFF;
174 gw.BottomExecutionMask = 0xFFFFFFFF;
175 }
176
177 executor_batch_emit(GENX(MEDIA_STATE_FLUSH), msf);
178 #endif
179
180 emit_pipe_control(ec);
181
182 executor_batch_emit(GENX(MI_BATCH_BUFFER_END), end);
183 }
184