1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "xe_guc_ads.h"
7 
8 #include <linux/fault-inject.h>
9 
10 #include <drm/drm_managed.h>
11 
12 #include <generated/xe_wa_oob.h>
13 
14 #include "abi/guc_actions_abi.h"
15 #include "regs/xe_engine_regs.h"
16 #include "regs/xe_gt_regs.h"
17 #include "regs/xe_guc_regs.h"
18 #include "xe_bo.h"
19 #include "xe_gt.h"
20 #include "xe_gt_ccs_mode.h"
21 #include "xe_gt_printk.h"
22 #include "xe_guc.h"
23 #include "xe_guc_capture.h"
24 #include "xe_guc_ct.h"
25 #include "xe_hw_engine.h"
26 #include "xe_lrc.h"
27 #include "xe_map.h"
28 #include "xe_mmio.h"
29 #include "xe_platform_types.h"
30 #include "xe_uc_fw.h"
31 #include "xe_wa.h"
32 #include "xe_gt_mcr.h"
33 
34 /* Slack of a few additional entries per engine */
35 #define ADS_REGSET_EXTRA_MAX	8
36 
37 static struct xe_guc *
ads_to_guc(struct xe_guc_ads * ads)38 ads_to_guc(struct xe_guc_ads *ads)
39 {
40 	return container_of(ads, struct xe_guc, ads);
41 }
42 
43 static struct xe_gt *
ads_to_gt(struct xe_guc_ads * ads)44 ads_to_gt(struct xe_guc_ads *ads)
45 {
46 	return container_of(ads, struct xe_gt, uc.guc.ads);
47 }
48 
49 static struct xe_device *
ads_to_xe(struct xe_guc_ads * ads)50 ads_to_xe(struct xe_guc_ads *ads)
51 {
52 	return gt_to_xe(ads_to_gt(ads));
53 }
54 
55 static struct iosys_map *
ads_to_map(struct xe_guc_ads * ads)56 ads_to_map(struct xe_guc_ads *ads)
57 {
58 	return &ads->bo->vmap;
59 }
60 
61 /* UM Queue parameters: */
62 #define GUC_UM_QUEUE_SIZE       (SZ_64K)
63 #define GUC_PAGE_RES_TIMEOUT_US (-1)
64 
65 /*
66  * The Additional Data Struct (ADS) has pointers for different buffers used by
67  * the GuC. One single gem object contains the ADS struct itself (guc_ads) and
68  * all the extra buffers indirectly linked via the ADS struct's entries.
69  *
70  * Layout of the ADS blob allocated for the GuC:
71  *
72  *      +---------------------------------------+ <== base
73  *      | guc_ads                               |
74  *      +---------------------------------------+
75  *      | guc_policies                          |
76  *      +---------------------------------------+
77  *      | guc_gt_system_info                    |
78  *      +---------------------------------------+
79  *      | guc_engine_usage                      |
80  *      +---------------------------------------+
81  *      | guc_um_init_params                    |
82  *      +---------------------------------------+ <== static
83  *      | guc_mmio_reg[countA] (engine 0.0)     |
84  *      | guc_mmio_reg[countB] (engine 0.1)     |
85  *      | guc_mmio_reg[countC] (engine 1.0)     |
86  *      |   ...                                 |
87  *      +---------------------------------------+ <== dynamic
88  *      | padding                               |
89  *      +---------------------------------------+ <== 4K aligned
90  *      | golden contexts                       |
91  *      +---------------------------------------+
92  *      | padding                               |
93  *      +---------------------------------------+ <== 4K aligned
94  *      | w/a KLVs                              |
95  *      +---------------------------------------+
96  *      | padding                               |
97  *      +---------------------------------------+ <== 4K aligned
98  *      | capture lists                         |
99  *      +---------------------------------------+
100  *      | padding                               |
101  *      +---------------------------------------+ <== 4K aligned
102  *      | UM queues                             |
103  *      +---------------------------------------+
104  *      | padding                               |
105  *      +---------------------------------------+ <== 4K aligned
106  *      | private data                          |
107  *      +---------------------------------------+
108  *      | padding                               |
109  *      +---------------------------------------+ <== 4K aligned
110  */
111 struct __guc_ads_blob {
112 	struct guc_ads ads;
113 	struct guc_policies policies;
114 	struct guc_gt_system_info system_info;
115 	struct guc_engine_usage engine_usage;
116 	struct guc_um_init_params um_init_params;
117 	/* From here on, location is dynamic! Refer to above diagram. */
118 	struct guc_mmio_reg regset[];
119 } __packed;
120 
121 #define ads_blob_read(ads_, field_) \
122 	xe_map_rd_field(ads_to_xe(ads_), ads_to_map(ads_), 0, \
123 			struct __guc_ads_blob, field_)
124 
125 #define ads_blob_write(ads_, field_, val_)			\
126 	xe_map_wr_field(ads_to_xe(ads_), ads_to_map(ads_), 0,	\
127 			struct __guc_ads_blob, field_, val_)
128 
129 #define info_map_write(xe_, map_, field_, val_) \
130 	xe_map_wr_field(xe_, map_, 0, struct guc_gt_system_info, field_, val_)
131 
132 #define info_map_read(xe_, map_, field_) \
133 	xe_map_rd_field(xe_, map_, 0, struct guc_gt_system_info, field_)
134 
guc_ads_regset_size(struct xe_guc_ads * ads)135 static size_t guc_ads_regset_size(struct xe_guc_ads *ads)
136 {
137 	struct xe_device *xe = ads_to_xe(ads);
138 
139 	xe_assert(xe, ads->regset_size);
140 
141 	return ads->regset_size;
142 }
143 
guc_ads_golden_lrc_size(struct xe_guc_ads * ads)144 static size_t guc_ads_golden_lrc_size(struct xe_guc_ads *ads)
145 {
146 	return PAGE_ALIGN(ads->golden_lrc_size);
147 }
148 
guc_ads_waklv_size(struct xe_guc_ads * ads)149 static u32 guc_ads_waklv_size(struct xe_guc_ads *ads)
150 {
151 	return PAGE_ALIGN(ads->ads_waklv_size);
152 }
153 
guc_ads_capture_size(struct xe_guc_ads * ads)154 static size_t guc_ads_capture_size(struct xe_guc_ads *ads)
155 {
156 	return PAGE_ALIGN(ads->capture_size);
157 }
158 
guc_ads_um_queues_size(struct xe_guc_ads * ads)159 static size_t guc_ads_um_queues_size(struct xe_guc_ads *ads)
160 {
161 	struct xe_device *xe = ads_to_xe(ads);
162 
163 	if (!xe->info.has_usm)
164 		return 0;
165 
166 	return GUC_UM_QUEUE_SIZE * GUC_UM_HW_QUEUE_MAX;
167 }
168 
guc_ads_private_data_size(struct xe_guc_ads * ads)169 static size_t guc_ads_private_data_size(struct xe_guc_ads *ads)
170 {
171 	return PAGE_ALIGN(ads_to_guc(ads)->fw.private_data_size);
172 }
173 
guc_ads_regset_offset(struct xe_guc_ads * ads)174 static size_t guc_ads_regset_offset(struct xe_guc_ads *ads)
175 {
176 	return offsetof(struct __guc_ads_blob, regset);
177 }
178 
guc_ads_golden_lrc_offset(struct xe_guc_ads * ads)179 static size_t guc_ads_golden_lrc_offset(struct xe_guc_ads *ads)
180 {
181 	size_t offset;
182 
183 	offset = guc_ads_regset_offset(ads) +
184 		guc_ads_regset_size(ads);
185 
186 	return PAGE_ALIGN(offset);
187 }
188 
guc_ads_waklv_offset(struct xe_guc_ads * ads)189 static size_t guc_ads_waklv_offset(struct xe_guc_ads *ads)
190 {
191 	u32 offset;
192 
193 	offset = guc_ads_golden_lrc_offset(ads) +
194 		 guc_ads_golden_lrc_size(ads);
195 
196 	return PAGE_ALIGN(offset);
197 }
198 
guc_ads_capture_offset(struct xe_guc_ads * ads)199 static size_t guc_ads_capture_offset(struct xe_guc_ads *ads)
200 {
201 	size_t offset;
202 
203 	offset = guc_ads_waklv_offset(ads) +
204 		 guc_ads_waklv_size(ads);
205 
206 	return PAGE_ALIGN(offset);
207 }
208 
guc_ads_um_queues_offset(struct xe_guc_ads * ads)209 static size_t guc_ads_um_queues_offset(struct xe_guc_ads *ads)
210 {
211 	u32 offset;
212 
213 	offset = guc_ads_capture_offset(ads) +
214 		 guc_ads_capture_size(ads);
215 
216 	return PAGE_ALIGN(offset);
217 }
218 
guc_ads_private_data_offset(struct xe_guc_ads * ads)219 static size_t guc_ads_private_data_offset(struct xe_guc_ads *ads)
220 {
221 	size_t offset;
222 
223 	offset = guc_ads_um_queues_offset(ads) +
224 		guc_ads_um_queues_size(ads);
225 
226 	return PAGE_ALIGN(offset);
227 }
228 
guc_ads_size(struct xe_guc_ads * ads)229 static size_t guc_ads_size(struct xe_guc_ads *ads)
230 {
231 	return guc_ads_private_data_offset(ads) +
232 		guc_ads_private_data_size(ads);
233 }
234 
calculate_regset_size(struct xe_gt * gt)235 static size_t calculate_regset_size(struct xe_gt *gt)
236 {
237 	struct xe_reg_sr_entry *sr_entry;
238 	unsigned long sr_idx;
239 	struct xe_hw_engine *hwe;
240 	enum xe_hw_engine_id id;
241 	unsigned int count = 0;
242 
243 	for_each_hw_engine(hwe, gt, id)
244 		xa_for_each(&hwe->reg_sr.xa, sr_idx, sr_entry)
245 			count++;
246 
247 	count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES;
248 
249 	if (XE_WA(gt, 1607983814))
250 		count += LNCFCMOCS_REG_COUNT;
251 
252 	return count * sizeof(struct guc_mmio_reg);
253 }
254 
engine_enable_mask(struct xe_gt * gt,enum xe_engine_class class)255 static u32 engine_enable_mask(struct xe_gt *gt, enum xe_engine_class class)
256 {
257 	struct xe_hw_engine *hwe;
258 	enum xe_hw_engine_id id;
259 	u32 mask = 0;
260 
261 	for_each_hw_engine(hwe, gt, id)
262 		if (hwe->class == class)
263 			mask |= BIT(hwe->instance);
264 
265 	return mask;
266 }
267 
calculate_golden_lrc_size(struct xe_guc_ads * ads)268 static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
269 {
270 	struct xe_gt *gt = ads_to_gt(ads);
271 	size_t total_size = 0, alloc_size, real_size;
272 	int class;
273 
274 	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
275 		if (!engine_enable_mask(gt, class))
276 			continue;
277 
278 		real_size = xe_gt_lrc_size(gt, class);
279 		alloc_size = PAGE_ALIGN(real_size);
280 		total_size += alloc_size;
281 	}
282 
283 	return total_size;
284 }
285 
guc_waklv_enable_one_word(struct xe_guc_ads * ads,enum xe_guc_klv_ids klv_id,u32 value,u32 * offset,u32 * remain)286 static void guc_waklv_enable_one_word(struct xe_guc_ads *ads,
287 				      enum xe_guc_klv_ids klv_id,
288 				      u32 value,
289 				      u32 *offset, u32 *remain)
290 {
291 	u32 size;
292 	u32 klv_entry[] = {
293 		/* 16:16 key/length */
294 		FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
295 		FIELD_PREP(GUC_KLV_0_LEN, 1),
296 		value,
297 		/* 1 dword data */
298 	};
299 
300 	size = sizeof(klv_entry);
301 
302 	if (*remain < size) {
303 		drm_warn(&ads_to_xe(ads)->drm,
304 			 "w/a klv buffer too small to add klv id %d\n", klv_id);
305 	} else {
306 		xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset,
307 				 klv_entry, size);
308 		*offset += size;
309 		*remain -= size;
310 	}
311 }
312 
guc_waklv_enable_simple(struct xe_guc_ads * ads,enum xe_guc_klv_ids klv_id,u32 * offset,u32 * remain)313 static void guc_waklv_enable_simple(struct xe_guc_ads *ads,
314 				    enum xe_guc_klv_ids klv_id, u32 *offset, u32 *remain)
315 {
316 	u32 klv_entry[] = {
317 		/* 16:16 key/length */
318 		FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
319 		FIELD_PREP(GUC_KLV_0_LEN, 0),
320 		/* 0 dwords data */
321 	};
322 	u32 size;
323 
324 	size = sizeof(klv_entry);
325 
326 	if (xe_gt_WARN(ads_to_gt(ads), *remain < size,
327 		       "w/a klv buffer too small to add klv id %d\n", klv_id))
328 		return;
329 
330 	xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset,
331 			 klv_entry, size);
332 	*offset += size;
333 	*remain -= size;
334 }
335 
guc_waklv_init(struct xe_guc_ads * ads)336 static void guc_waklv_init(struct xe_guc_ads *ads)
337 {
338 	struct xe_gt *gt = ads_to_gt(ads);
339 	u64 addr_ggtt;
340 	u32 offset, remain, size;
341 
342 	offset = guc_ads_waklv_offset(ads);
343 	remain = guc_ads_waklv_size(ads);
344 
345 	if (XE_WA(gt, 14019882105))
346 		guc_waklv_enable_simple(ads,
347 					GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED,
348 					&offset, &remain);
349 	if (XE_WA(gt, 18024947630))
350 		guc_waklv_enable_simple(ads,
351 					GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING,
352 					&offset, &remain);
353 	if (XE_WA(gt, 16022287689))
354 		guc_waklv_enable_simple(ads,
355 					GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE,
356 					&offset, &remain);
357 
358 	if (XE_WA(gt, 14022866841))
359 		guc_waklv_enable_simple(ads,
360 					GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO,
361 					&offset, &remain);
362 
363 	/*
364 	 * On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now,
365 	 * the default value for this register is determined to be 0xC40. This could change in the
366 	 * future, so GuC depends on KMD to send it the correct value.
367 	 */
368 	if (XE_WA(gt, 13011645652))
369 		guc_waklv_enable_one_word(ads,
370 					  GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE,
371 					  0xC40,
372 					  &offset, &remain);
373 
374 	if (XE_WA(gt, 14022293748) || XE_WA(gt, 22019794406))
375 		guc_waklv_enable_simple(ads,
376 					GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET,
377 					&offset, &remain);
378 
379 	size = guc_ads_waklv_size(ads) - remain;
380 	if (!size)
381 		return;
382 
383 	offset = guc_ads_waklv_offset(ads);
384 	addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
385 
386 	ads_blob_write(ads, ads.wa_klv_addr_lo, lower_32_bits(addr_ggtt));
387 	ads_blob_write(ads, ads.wa_klv_addr_hi, upper_32_bits(addr_ggtt));
388 	ads_blob_write(ads, ads.wa_klv_size, size);
389 }
390 
calculate_waklv_size(struct xe_guc_ads * ads)391 static int calculate_waklv_size(struct xe_guc_ads *ads)
392 {
393 	/*
394 	 * A single page is both the minimum size possible and
395 	 * is sufficiently large enough for all current platforms.
396 	 */
397 	return SZ_4K;
398 }
399 
400 #define MAX_GOLDEN_LRC_SIZE	(SZ_4K * 64)
401 
xe_guc_ads_init(struct xe_guc_ads * ads)402 int xe_guc_ads_init(struct xe_guc_ads *ads)
403 {
404 	struct xe_device *xe = ads_to_xe(ads);
405 	struct xe_gt *gt = ads_to_gt(ads);
406 	struct xe_tile *tile = gt_to_tile(gt);
407 	struct xe_bo *bo;
408 
409 	ads->golden_lrc_size = calculate_golden_lrc_size(ads);
410 	ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads));
411 	ads->regset_size = calculate_regset_size(gt);
412 	ads->ads_waklv_size = calculate_waklv_size(ads);
413 
414 	bo = xe_managed_bo_create_pin_map(xe, tile, guc_ads_size(ads) + MAX_GOLDEN_LRC_SIZE,
415 					  XE_BO_FLAG_SYSTEM |
416 					  XE_BO_FLAG_GGTT |
417 					  XE_BO_FLAG_GGTT_INVALIDATE);
418 	if (IS_ERR(bo))
419 		return PTR_ERR(bo);
420 
421 	ads->bo = bo;
422 
423 	return 0;
424 }
425 ALLOW_ERROR_INJECTION(xe_guc_ads_init, ERRNO); /* See xe_pci_probe() */
426 
427 /**
428  * xe_guc_ads_init_post_hwconfig - initialize ADS post hwconfig load
429  * @ads: Additional data structures object
430  *
431  * Recalculate golden_lrc_size, capture_size and regset_size as the number
432  * hardware engines may have changed after the hwconfig was loaded. Also verify
433  * the new sizes fit in the already allocated ADS buffer object.
434  *
435  * Return: 0 on success, negative error code on error.
436  */
xe_guc_ads_init_post_hwconfig(struct xe_guc_ads * ads)437 int xe_guc_ads_init_post_hwconfig(struct xe_guc_ads *ads)
438 {
439 	struct xe_gt *gt = ads_to_gt(ads);
440 	u32 prev_regset_size = ads->regset_size;
441 
442 	xe_gt_assert(gt, ads->bo);
443 
444 	ads->golden_lrc_size = calculate_golden_lrc_size(ads);
445 	/* Calculate Capture size with worst size */
446 	ads->capture_size = xe_guc_capture_ads_input_worst_size(ads_to_guc(ads));
447 	ads->regset_size = calculate_regset_size(gt);
448 
449 	xe_gt_assert(gt, ads->golden_lrc_size +
450 		     (ads->regset_size - prev_regset_size) <=
451 		     MAX_GOLDEN_LRC_SIZE);
452 
453 	return 0;
454 }
455 
guc_policies_init(struct xe_guc_ads * ads)456 static void guc_policies_init(struct xe_guc_ads *ads)
457 {
458 	struct xe_device *xe = ads_to_xe(ads);
459 	u32 global_flags = 0;
460 
461 	ads_blob_write(ads, policies.dpc_promote_time,
462 		       GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US);
463 	ads_blob_write(ads, policies.max_num_work_items,
464 		       GLOBAL_POLICY_MAX_NUM_WI);
465 
466 	if (xe->wedged.mode == 2)
467 		global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
468 
469 	ads_blob_write(ads, policies.global_flags, global_flags);
470 	ads_blob_write(ads, policies.is_valid, 1);
471 }
472 
fill_engine_enable_masks(struct xe_gt * gt,struct iosys_map * info_map)473 static void fill_engine_enable_masks(struct xe_gt *gt,
474 				     struct iosys_map *info_map)
475 {
476 	struct xe_device *xe = gt_to_xe(gt);
477 
478 	info_map_write(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS],
479 		       engine_enable_mask(gt, XE_ENGINE_CLASS_RENDER));
480 	info_map_write(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS],
481 		       engine_enable_mask(gt, XE_ENGINE_CLASS_COPY));
482 	info_map_write(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS],
483 		       engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_DECODE));
484 	info_map_write(xe, info_map,
485 		       engine_enabled_masks[GUC_VIDEOENHANCE_CLASS],
486 		       engine_enable_mask(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE));
487 	info_map_write(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS],
488 		       engine_enable_mask(gt, XE_ENGINE_CLASS_COMPUTE));
489 	info_map_write(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS],
490 		       engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER));
491 }
492 
493 /*
494  * Write the offsets corresponding to the golden LRCs. The actual data is
495  * populated later by guc_golden_lrc_populate()
496  */
guc_golden_lrc_init(struct xe_guc_ads * ads)497 static void guc_golden_lrc_init(struct xe_guc_ads *ads)
498 {
499 	struct xe_device *xe = ads_to_xe(ads);
500 	struct xe_gt *gt = ads_to_gt(ads);
501 	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
502 			offsetof(struct __guc_ads_blob, system_info));
503 	size_t alloc_size, real_size;
504 	u32 addr_ggtt, offset;
505 	int class;
506 
507 	offset = guc_ads_golden_lrc_offset(ads);
508 	addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
509 
510 	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
511 		u8 guc_class;
512 
513 		guc_class = xe_engine_class_to_guc_class(class);
514 
515 		if (!info_map_read(xe, &info_map,
516 				   engine_enabled_masks[guc_class]))
517 			continue;
518 
519 		real_size = xe_gt_lrc_size(gt, class);
520 		alloc_size = PAGE_ALIGN(real_size);
521 
522 		/*
523 		 * This interface is slightly confusing. We need to pass the
524 		 * base address of the full golden context and the size of just
525 		 * the engine state, which is the section of the context image
526 		 * that starts after the execlists LRC registers. This is
527 		 * required to allow the GuC to restore just the engine state
528 		 * when a watchdog reset occurs.
529 		 * We calculate the engine state size by removing the size of
530 		 * what comes before it in the context image (which is identical
531 		 * on all engines).
532 		 */
533 		ads_blob_write(ads, ads.eng_state_size[guc_class],
534 			       real_size - xe_lrc_skip_size(xe));
535 		ads_blob_write(ads, ads.golden_context_lrca[guc_class],
536 			       addr_ggtt);
537 
538 		addr_ggtt += alloc_size;
539 	}
540 }
541 
guc_mapping_table_init_invalid(struct xe_gt * gt,struct iosys_map * info_map)542 static void guc_mapping_table_init_invalid(struct xe_gt *gt,
543 					   struct iosys_map *info_map)
544 {
545 	struct xe_device *xe = gt_to_xe(gt);
546 	unsigned int i, j;
547 
548 	/* Table must be set to invalid values for entries not used */
549 	for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i)
550 		for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j)
551 			info_map_write(xe, info_map, mapping_table[i][j],
552 				       GUC_MAX_INSTANCES_PER_CLASS);
553 }
554 
guc_mapping_table_init(struct xe_gt * gt,struct iosys_map * info_map)555 static void guc_mapping_table_init(struct xe_gt *gt,
556 				   struct iosys_map *info_map)
557 {
558 	struct xe_device *xe = gt_to_xe(gt);
559 	struct xe_hw_engine *hwe;
560 	enum xe_hw_engine_id id;
561 
562 	guc_mapping_table_init_invalid(gt, info_map);
563 
564 	for_each_hw_engine(hwe, gt, id) {
565 		u8 guc_class;
566 
567 		guc_class = xe_engine_class_to_guc_class(hwe->class);
568 		info_map_write(xe, info_map,
569 			       mapping_table[guc_class][hwe->logical_instance],
570 			       hwe->instance);
571 	}
572 }
573 
guc_get_capture_engine_mask(struct xe_gt * gt,struct iosys_map * info_map,enum guc_capture_list_class_type capture_class)574 static u32 guc_get_capture_engine_mask(struct xe_gt *gt, struct iosys_map *info_map,
575 				       enum guc_capture_list_class_type capture_class)
576 {
577 	struct xe_device *xe = gt_to_xe(gt);
578 	u32 mask;
579 
580 	switch (capture_class) {
581 	case GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE:
582 		mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_RENDER_CLASS]);
583 		mask |= info_map_read(xe, info_map, engine_enabled_masks[GUC_COMPUTE_CLASS]);
584 		break;
585 	case GUC_CAPTURE_LIST_CLASS_VIDEO:
586 		mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEO_CLASS]);
587 		break;
588 	case GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE:
589 		mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_VIDEOENHANCE_CLASS]);
590 		break;
591 	case GUC_CAPTURE_LIST_CLASS_BLITTER:
592 		mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_BLITTER_CLASS]);
593 		break;
594 	case GUC_CAPTURE_LIST_CLASS_GSC_OTHER:
595 		mask = info_map_read(xe, info_map, engine_enabled_masks[GUC_GSC_OTHER_CLASS]);
596 		break;
597 	default:
598 		mask = 0;
599 	}
600 
601 	return mask;
602 }
603 
get_capture_list(struct xe_guc_ads * ads,struct xe_guc * guc,struct xe_gt * gt,int owner,int type,int class,u32 * total_size,size_t * size,void ** pptr)604 static inline bool get_capture_list(struct xe_guc_ads *ads, struct xe_guc *guc, struct xe_gt *gt,
605 				    int owner, int type, int class, u32 *total_size, size_t *size,
606 				    void **pptr)
607 {
608 	*size = 0;
609 
610 	if (!xe_guc_capture_getlistsize(guc, owner, type, class, size)) {
611 		if (*total_size + *size > ads->capture_size)
612 			xe_gt_dbg(gt, "Capture size overflow :%zu vs %d\n",
613 				  *total_size + *size, ads->capture_size);
614 		else if (!xe_guc_capture_getlist(guc, owner, type, class, pptr))
615 			return false;
616 	}
617 
618 	return true;
619 }
620 
guc_capture_prep_lists(struct xe_guc_ads * ads)621 static int guc_capture_prep_lists(struct xe_guc_ads *ads)
622 {
623 	struct xe_guc *guc = ads_to_guc(ads);
624 	struct xe_gt *gt = ads_to_gt(ads);
625 	u32 ads_ggtt, capture_offset, null_ggtt, total_size = 0;
626 	struct iosys_map info_map;
627 	size_t size = 0;
628 	void *ptr;
629 	int i, j;
630 
631 	/*
632 	 * GuC Capture's steered reg-list needs to be allocated and initialized
633 	 * after the GuC-hwconfig is available which guaranteed from here.
634 	 */
635 	xe_guc_capture_steered_list_init(ads_to_guc(ads));
636 
637 	capture_offset = guc_ads_capture_offset(ads);
638 	ads_ggtt = xe_bo_ggtt_addr(ads->bo);
639 	info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
640 					 offsetof(struct __guc_ads_blob, system_info));
641 
642 	/* first, set aside the first page for a capture_list with zero descriptors */
643 	total_size = PAGE_SIZE;
644 	if (!xe_guc_capture_getnullheader(guc, &ptr, &size))
645 		xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr, size);
646 
647 	null_ggtt = ads_ggtt + capture_offset;
648 	capture_offset += PAGE_SIZE;
649 
650 	/*
651 	 * Populate capture list : at this point adps is already allocated and
652 	 * mapped to worst case size
653 	 */
654 	for (i = 0; i < GUC_CAPTURE_LIST_INDEX_MAX; i++) {
655 		bool write_empty_list;
656 
657 		for (j = 0; j < GUC_CAPTURE_LIST_CLASS_MAX; j++) {
658 			u32 engine_mask = guc_get_capture_engine_mask(gt, &info_map, j);
659 			/* null list if we dont have said engine or list */
660 			if (!engine_mask) {
661 				ads_blob_write(ads, ads.capture_class[i][j], null_ggtt);
662 				ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt);
663 				continue;
664 			}
665 
666 			/* engine exists: start with engine-class registers */
667 			write_empty_list = get_capture_list(ads, guc, gt, i,
668 							    GUC_STATE_CAPTURE_TYPE_ENGINE_CLASS,
669 							    j, &total_size, &size, &ptr);
670 			if (!write_empty_list) {
671 				ads_blob_write(ads, ads.capture_class[i][j],
672 					       ads_ggtt + capture_offset);
673 				xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset,
674 						 ptr, size);
675 				total_size += size;
676 				capture_offset += size;
677 			} else {
678 				ads_blob_write(ads, ads.capture_class[i][j], null_ggtt);
679 			}
680 
681 			/* engine exists: next, engine-instance registers   */
682 			write_empty_list = get_capture_list(ads, guc, gt, i,
683 							    GUC_STATE_CAPTURE_TYPE_ENGINE_INSTANCE,
684 							    j, &total_size, &size, &ptr);
685 			if (!write_empty_list) {
686 				ads_blob_write(ads, ads.capture_instance[i][j],
687 					       ads_ggtt + capture_offset);
688 				xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset,
689 						 ptr, size);
690 				total_size += size;
691 				capture_offset += size;
692 			} else {
693 				ads_blob_write(ads, ads.capture_instance[i][j], null_ggtt);
694 			}
695 		}
696 
697 		/* global registers is last in our PF/VF loops */
698 		write_empty_list = get_capture_list(ads, guc, gt, i,
699 						    GUC_STATE_CAPTURE_TYPE_GLOBAL,
700 						    0, &total_size, &size, &ptr);
701 		if (!write_empty_list) {
702 			ads_blob_write(ads, ads.capture_global[i], ads_ggtt + capture_offset);
703 			xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), capture_offset, ptr,
704 					 size);
705 			total_size += size;
706 			capture_offset += size;
707 		} else {
708 			ads_blob_write(ads, ads.capture_global[i], null_ggtt);
709 		}
710 	}
711 
712 	if (ads->capture_size != PAGE_ALIGN(total_size))
713 		xe_gt_dbg(gt, "ADS capture alloc size changed from %d to %d\n",
714 			  ads->capture_size, PAGE_ALIGN(total_size));
715 	return PAGE_ALIGN(total_size);
716 }
717 
guc_mmio_regset_write_one(struct xe_guc_ads * ads,struct iosys_map * regset_map,struct xe_reg reg,unsigned int n_entry)718 static void guc_mmio_regset_write_one(struct xe_guc_ads *ads,
719 				      struct iosys_map *regset_map,
720 				      struct xe_reg reg,
721 				      unsigned int n_entry)
722 {
723 	struct guc_mmio_reg entry = {
724 		.offset = reg.addr,
725 		.flags = reg.masked ? GUC_REGSET_MASKED : 0,
726 	};
727 
728 	if (reg.mcr) {
729 		struct xe_reg_mcr mcr_reg = XE_REG_MCR(reg.addr);
730 		u8 group, instance;
731 
732 		bool steer = xe_gt_mcr_get_nonterminated_steering(ads_to_gt(ads), mcr_reg,
733 								  &group, &instance);
734 
735 		if (steer) {
736 			entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_GROUP, group);
737 			entry.flags |= FIELD_PREP(GUC_REGSET_STEERING_INSTANCE, instance);
738 			entry.flags |= GUC_REGSET_STEERING_NEEDED;
739 		}
740 	}
741 
742 	xe_map_memcpy_to(ads_to_xe(ads), regset_map, n_entry * sizeof(entry),
743 			 &entry, sizeof(entry));
744 }
745 
guc_mmio_regset_write(struct xe_guc_ads * ads,struct iosys_map * regset_map,struct xe_hw_engine * hwe)746 static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
747 					  struct iosys_map *regset_map,
748 					  struct xe_hw_engine *hwe)
749 {
750 	struct xe_hw_engine *hwe_rcs_reset_domain =
751 		xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER);
752 	struct xe_reg_sr_entry *entry;
753 	unsigned long idx;
754 	unsigned int count = 0;
755 	const struct {
756 		struct xe_reg reg;
757 		bool skip;
758 	} *e, extra_regs[] = {
759 		{ .reg = RING_MODE(hwe->mmio_base),			},
760 		{ .reg = RING_HWS_PGA(hwe->mmio_base),			},
761 		{ .reg = RING_IMR(hwe->mmio_base),			},
762 		{ .reg = RCU_MODE, .skip = hwe != hwe_rcs_reset_domain	},
763 		{ .reg = CCS_MODE,
764 		  .skip = hwe != hwe_rcs_reset_domain || !xe_gt_ccs_mode_enabled(hwe->gt) },
765 	};
766 	u32 i;
767 
768 	BUILD_BUG_ON(ARRAY_SIZE(extra_regs) > ADS_REGSET_EXTRA_MAX);
769 
770 	xa_for_each(&hwe->reg_sr.xa, idx, entry)
771 		guc_mmio_regset_write_one(ads, regset_map, entry->reg, count++);
772 
773 	for (e = extra_regs; e < extra_regs + ARRAY_SIZE(extra_regs); e++) {
774 		if (e->skip)
775 			continue;
776 
777 		guc_mmio_regset_write_one(ads, regset_map, e->reg, count++);
778 	}
779 
780 	if (XE_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) {
781 		for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
782 			guc_mmio_regset_write_one(ads, regset_map,
783 						  XELP_LNCFCMOCS(i), count++);
784 		}
785 	}
786 
787 	return count;
788 }
789 
guc_mmio_reg_state_init(struct xe_guc_ads * ads)790 static void guc_mmio_reg_state_init(struct xe_guc_ads *ads)
791 {
792 	size_t regset_offset = guc_ads_regset_offset(ads);
793 	struct xe_gt *gt = ads_to_gt(ads);
794 	struct xe_hw_engine *hwe;
795 	enum xe_hw_engine_id id;
796 	u32 addr = xe_bo_ggtt_addr(ads->bo) + regset_offset;
797 	struct iosys_map regset_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
798 							    regset_offset);
799 	unsigned int regset_used = 0;
800 
801 	for_each_hw_engine(hwe, gt, id) {
802 		unsigned int count;
803 		u8 gc;
804 
805 		/*
806 		 * 1. Write all MMIO entries for this exec queue to the table. No
807 		 * need to worry about fused-off engines and when there are
808 		 * entries in the regset: the reg_state_list has been zero'ed
809 		 * by xe_guc_ads_populate()
810 		 */
811 		count = guc_mmio_regset_write(ads, &regset_map, hwe);
812 		if (!count)
813 			continue;
814 
815 		/*
816 		 * 2. Record in the header (ads.reg_state_list) the address
817 		 * location and number of entries
818 		 */
819 		gc = xe_engine_class_to_guc_class(hwe->class);
820 		ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].address, addr);
821 		ads_blob_write(ads, ads.reg_state_list[gc][hwe->instance].count, count);
822 
823 		addr += count * sizeof(struct guc_mmio_reg);
824 		iosys_map_incr(&regset_map, count * sizeof(struct guc_mmio_reg));
825 
826 		regset_used += count * sizeof(struct guc_mmio_reg);
827 	}
828 
829 	xe_gt_assert(gt, regset_used <= ads->regset_size);
830 }
831 
guc_um_init_params(struct xe_guc_ads * ads)832 static void guc_um_init_params(struct xe_guc_ads *ads)
833 {
834 	u32 um_queue_offset = guc_ads_um_queues_offset(ads);
835 	u64 base_dpa;
836 	u32 base_ggtt;
837 	int i;
838 
839 	base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset;
840 	base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset;
841 
842 	for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) {
843 		ads_blob_write(ads, um_init_params.queue_params[i].base_dpa,
844 			       base_dpa + (i * GUC_UM_QUEUE_SIZE));
845 		ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address,
846 			       base_ggtt + (i * GUC_UM_QUEUE_SIZE));
847 		ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes,
848 			       GUC_UM_QUEUE_SIZE);
849 	}
850 
851 	ads_blob_write(ads, um_init_params.page_response_timeout_in_us,
852 		       GUC_PAGE_RES_TIMEOUT_US);
853 }
854 
guc_doorbell_init(struct xe_guc_ads * ads)855 static void guc_doorbell_init(struct xe_guc_ads *ads)
856 {
857 	struct xe_device *xe = ads_to_xe(ads);
858 	struct xe_gt *gt = ads_to_gt(ads);
859 
860 	if (GRAPHICS_VER(xe) >= 12 && !IS_DGFX(xe)) {
861 		u32 distdbreg =
862 			xe_mmio_read32(&gt->mmio, DIST_DBS_POPULATED);
863 
864 		ads_blob_write(ads,
865 			       system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI],
866 			       REG_FIELD_GET(DOORBELLS_PER_SQIDI_MASK, distdbreg) + 1);
867 	}
868 }
869 
870 /**
871  * xe_guc_ads_populate_minimal - populate minimal ADS
872  * @ads: Additional data structures object
873  *
874  * This function populates a minimal ADS that does not support submissions but
875  * enough so the GuC can load and the hwconfig table can be read.
876  */
xe_guc_ads_populate_minimal(struct xe_guc_ads * ads)877 void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads)
878 {
879 	struct xe_gt *gt = ads_to_gt(ads);
880 	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
881 			offsetof(struct __guc_ads_blob, system_info));
882 	u32 base = xe_bo_ggtt_addr(ads->bo);
883 
884 	xe_gt_assert(gt, ads->bo);
885 
886 	xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
887 	guc_policies_init(ads);
888 	guc_golden_lrc_init(ads);
889 	guc_mapping_table_init_invalid(gt, &info_map);
890 	guc_doorbell_init(ads);
891 
892 	ads_blob_write(ads, ads.scheduler_policies, base +
893 		       offsetof(struct __guc_ads_blob, policies));
894 	ads_blob_write(ads, ads.gt_system_info, base +
895 		       offsetof(struct __guc_ads_blob, system_info));
896 	ads_blob_write(ads, ads.private_data, base +
897 		       guc_ads_private_data_offset(ads));
898 }
899 
xe_guc_ads_populate(struct xe_guc_ads * ads)900 void xe_guc_ads_populate(struct xe_guc_ads *ads)
901 {
902 	struct xe_device *xe = ads_to_xe(ads);
903 	struct xe_gt *gt = ads_to_gt(ads);
904 	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
905 			offsetof(struct __guc_ads_blob, system_info));
906 	u32 base = xe_bo_ggtt_addr(ads->bo);
907 
908 	xe_gt_assert(gt, ads->bo);
909 
910 	xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
911 	guc_policies_init(ads);
912 	fill_engine_enable_masks(gt, &info_map);
913 	guc_mmio_reg_state_init(ads);
914 	guc_golden_lrc_init(ads);
915 	guc_mapping_table_init(gt, &info_map);
916 	guc_capture_prep_lists(ads);
917 	guc_doorbell_init(ads);
918 	guc_waklv_init(ads);
919 
920 	if (xe->info.has_usm) {
921 		guc_um_init_params(ads);
922 		ads_blob_write(ads, ads.um_init_data, base +
923 			       offsetof(struct __guc_ads_blob, um_init_params));
924 	}
925 
926 	ads_blob_write(ads, ads.scheduler_policies, base +
927 		       offsetof(struct __guc_ads_blob, policies));
928 	ads_blob_write(ads, ads.gt_system_info, base +
929 		       offsetof(struct __guc_ads_blob, system_info));
930 	ads_blob_write(ads, ads.private_data, base +
931 		       guc_ads_private_data_offset(ads));
932 }
933 
934 /*
935  * After the golden LRC's are recorded for each engine class by the first
936  * submission, copy them to the ADS, as initialized earlier by
937  * guc_golden_lrc_init().
938  */
guc_golden_lrc_populate(struct xe_guc_ads * ads)939 static void guc_golden_lrc_populate(struct xe_guc_ads *ads)
940 {
941 	struct xe_device *xe = ads_to_xe(ads);
942 	struct xe_gt *gt = ads_to_gt(ads);
943 	struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
944 			offsetof(struct __guc_ads_blob, system_info));
945 	size_t total_size = 0, alloc_size, real_size;
946 	u32 offset;
947 	int class;
948 
949 	offset = guc_ads_golden_lrc_offset(ads);
950 
951 	for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
952 		u8 guc_class;
953 
954 		guc_class = xe_engine_class_to_guc_class(class);
955 
956 		if (!info_map_read(xe, &info_map,
957 				   engine_enabled_masks[guc_class]))
958 			continue;
959 
960 		xe_gt_assert(gt, gt->default_lrc[class]);
961 
962 		real_size = xe_gt_lrc_size(gt, class);
963 		alloc_size = PAGE_ALIGN(real_size);
964 		total_size += alloc_size;
965 
966 		xe_map_memcpy_to(xe, ads_to_map(ads), offset,
967 				 gt->default_lrc[class], real_size);
968 
969 		offset += alloc_size;
970 	}
971 
972 	xe_gt_assert(gt, total_size == ads->golden_lrc_size);
973 }
974 
xe_guc_ads_populate_post_load(struct xe_guc_ads * ads)975 void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
976 {
977 	guc_golden_lrc_populate(ads);
978 }
979 
guc_ads_action_update_policies(struct xe_guc_ads * ads,u32 policy_offset)980 static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset)
981 {
982 	struct  xe_guc_ct *ct = &ads_to_guc(ads)->ct;
983 	u32 action[] = {
984 		XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE,
985 		policy_offset
986 	};
987 
988 	return xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
989 }
990 
991 /**
992  * xe_guc_ads_scheduler_policy_toggle_reset - Toggle reset policy
993  * @ads: Additional data structures object
994  *
995  * This function update the GuC's engine reset policy based on wedged.mode.
996  *
997  * Return: 0 on success, and negative error code otherwise.
998  */
xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads * ads)999 int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads)
1000 {
1001 	struct xe_device *xe = ads_to_xe(ads);
1002 	struct xe_gt *gt = ads_to_gt(ads);
1003 	struct xe_tile *tile = gt_to_tile(gt);
1004 	struct guc_policies *policies;
1005 	struct xe_bo *bo;
1006 	int ret = 0;
1007 
1008 	policies = kmalloc(sizeof(*policies), GFP_KERNEL);
1009 	if (!policies)
1010 		return -ENOMEM;
1011 
1012 	policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time);
1013 	policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items);
1014 	policies->is_valid = 1;
1015 	if (xe->wedged.mode == 2)
1016 		policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET;
1017 	else
1018 		policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET;
1019 
1020 	bo = xe_managed_bo_create_from_data(xe, tile, policies, sizeof(struct guc_policies),
1021 					    XE_BO_FLAG_VRAM_IF_DGFX(tile) |
1022 					    XE_BO_FLAG_GGTT);
1023 	if (IS_ERR(bo)) {
1024 		ret = PTR_ERR(bo);
1025 		goto out;
1026 	}
1027 
1028 	ret = guc_ads_action_update_policies(ads, xe_bo_ggtt_addr(bo));
1029 out:
1030 	kfree(policies);
1031 	return ret;
1032 }
1033