intel_workarounds.c 39.9 KB
Newer Older
1
2
3
4
5
6
7
/*
 * SPDX-License-Identifier: MIT
 *
 * Copyright © 2014-2018 Intel Corporation
 */

#include "i915_drv.h"
8
#include "intel_context.h"
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#include "intel_workarounds.h"

/**
 * DOC: Hardware workarounds
 *
 * This file is intended as a central place to implement most [1]_ of the
 * required workarounds for hardware to work as originally intended. They fall
 * in five basic categories depending on how/when they are applied:
 *
 * - Workarounds that touch registers that are saved/restored to/from the HW
 *   context image. The list is emitted (via Load Register Immediate commands)
 *   everytime a new context is created.
 * - GT workarounds. The list of these WAs is applied whenever these registers
 *   revert to default values (on GPU reset, suspend/resume [2]_, etc..).
 * - Display workarounds. The list is applied during display clock-gating
 *   initialization.
 * - Workarounds that whitelist a privileged register, so that UMDs can manage
 *   them directly. This is just a special case of a MMMIO workaround (as we
 *   write the list of these to/be-whitelisted registers to some special HW
 *   registers).
 * - Workaround batchbuffers, that get executed automatically by the hardware
 *   on every HW context restore.
 *
 * .. [1] Please notice that there are other WAs that, due to their nature,
 *    cannot be applied from a central place. Those are peppered around the rest
 *    of the code, as needed.
 *
 * .. [2] Technically, some registers are powercontext saved & restored, so they
 *    survive a suspend/resume. In practice, writing them again is not too
 *    costly and simplifies things. We can revisit this in the future.
 *
 * Layout
41
 * ~~~~~~
42
43
44
45
46
47
48
49
50
51
 *
 * Keep things in this file ordered by WA type, as per the above (context, GT,
 * display, register whitelist, batchbuffer). Then, inside each type, keep the
 * following order:
 *
 * - Infrastructure functions and macros
 * - WAs per platform in standard gen/chrono order
 * - Public functions to init or apply the given workaround type.
 */

52
53
54
55
56
static void wa_init_start(struct i915_wa_list *wal, const char *name)
{
	wal->name = name;
}

57
58
#define WA_LIST_CHUNK (1 << 4)

59
60
static void wa_init_finish(struct i915_wa_list *wal)
{
61
62
63
64
65
66
67
68
69
70
71
72
	/* Trim unused entries. */
	if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
		struct i915_wa *list = kmemdup(wal->list,
					       wal->count * sizeof(*list),
					       GFP_KERNEL);

		if (list) {
			kfree(wal->list);
			wal->list = list;
		}
	}

73
74
75
76
	if (!wal->count)
		return;

	DRM_DEBUG_DRIVER("Initialized %u %s workarounds\n",
77
			 wal->wa_count, wal->name);
78
79
}

80
static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
81
{
82
83
	unsigned int addr = i915_mmio_reg_offset(wa->reg);
	unsigned int start = 0, end = wal->count;
84
	const unsigned int grow = WA_LIST_CHUNK;
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
	struct i915_wa *wa_;

	GEM_BUG_ON(!is_power_of_2(grow));

	if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
		struct i915_wa *list;

		list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*wa),
				     GFP_KERNEL);
		if (!list) {
			DRM_ERROR("No space for workaround init!\n");
			return;
		}

		if (wal->list)
			memcpy(list, wal->list, sizeof(*wa) * wal->count);

		wal->list = list;
	}
104
105
106
107

	while (start < end) {
		unsigned int mid = start + (end - start) / 2;

108
		if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
109
			start = mid + 1;
110
		} else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
111
112
			end = mid;
		} else {
113
			wa_ = &wal->list[mid];
114

115
			if ((wa->mask & ~wa_->mask) == 0) {
116
				DRM_ERROR("Discarding overwritten w/a for reg %04x (mask: %08x, value: %08x)\n",
117
118
					  i915_mmio_reg_offset(wa_->reg),
					  wa_->mask, wa_->val);
119

120
				wa_->val &= ~wa->mask;
121
122
			}

123
124
125
			wal->wa_count++;
			wa_->val |= wa->val;
			wa_->mask |= wa->mask;
126
			wa_->read |= wa->read;
127
128
129
			return;
		}
	}
130

131
132
133
	wal->wa_count++;
	wa_ = &wal->list[wal->count++];
	*wa_ = *wa;
134

135
136
137
138
139
	while (wa_-- > wal->list) {
		GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
			   i915_mmio_reg_offset(wa_[1].reg));
		if (i915_mmio_reg_offset(wa_[1].reg) >
		    i915_mmio_reg_offset(wa_[0].reg))
140
			break;
141

142
		swap(wa_[1], wa_[0]);
143
	}
144
145
}

146
static void
147
148
wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask,
		   u32 val)
149
150
{
	struct i915_wa wa = {
151
		.reg  = reg,
152
		.mask = mask,
153
154
		.val  = val,
		.read = mask,
155
156
157
158
159
	};

	_wa_add(wal, &wa);
}

160
161
162
static void
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
163
	wa_write_masked_or(wal, reg, val, _MASKED_BIT_ENABLE(val));
164
165
166
167
168
169
170
171
172
173
174
175
176
177
}

static void
wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
	wa_write_masked_or(wal, reg, ~0, val);
}

static void
wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
	wa_write_masked_or(wal, reg, val, val);
}

178
179
180
181
182
183
184
185
186
187
188
189
190
static void
ignore_wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 mask, u32 val)
{
	struct i915_wa wa = {
		.reg  = reg,
		.mask = mask,
		.val  = val,
		/* Bonkers HW, skip verifying */
	};

	_wa_add(wal, &wa);
}

191
#define WA_SET_BIT_MASKED(addr, mask) \
192
	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_ENABLE(mask))
193
194

#define WA_CLR_BIT_MASKED(addr, mask) \
195
	wa_write_masked_or(wal, (addr), (mask), _MASKED_BIT_DISABLE(mask))
196
197

#define WA_SET_FIELD_MASKED(addr, mask, value) \
198
	wa_write_masked_or(wal, (addr), (mask), _MASKED_FIELD((mask), (value)))
199

200
201
static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
{
	WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);

	/* WaDisableAsyncFlipPerfMode:bdw,chv */
	WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);

	/* WaDisablePartialInstShootdown:bdw,chv */
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);

	/* Use Force Non-Coherent whenever executing a 3D context. This is a
	 * workaround for for a possible hang in the unlikely event a TLB
	 * invalidation occurs during a PSD flush.
	 */
	/* WaForceEnableNonCoherent:bdw,chv */
	/* WaHdcDisableFetchWhenMasked:bdw,chv */
	WA_SET_BIT_MASKED(HDC_CHICKEN0,
			  HDC_DONOT_FETCH_MEM_WHEN_MASKED |
			  HDC_FORCE_NON_COHERENT);

	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
	 *  polygons in the same 8x4 pixel/sample area to be processed without
	 *  stalling waiting for the earlier ones to write to Hierarchical Z
	 *  buffer."
	 *
	 * This optimization is off by default for BDW and CHV; turn it on.
	 */
	WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);

	/* Wa4x4STCOptimizationDisable:bdw,chv */
	WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);

	/*
	 * BSpec recommends 8x4 when MSAA is used,
	 * however in practice 16x4 seems fastest.
	 *
	 * Note that PS/WM thread counts depend on the WIZ hashing
	 * disable bit, which we don't touch here, but it's good
	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
	 */
	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
			    GEN6_WIZ_HASHING_MASK,
			    GEN6_WIZ_HASHING_16x4);
}

248
249
static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
250
{
251
	struct drm_i915_private *i915 = engine->i915;
252

253
	gen8_ctx_workarounds_init(engine, wal);
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272

	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);

	/* WaDisableDopClockGating:bdw
	 *
	 * Also see the related UCGTCL1 write in broadwell_init_clock_gating()
	 * to disable EUTC clock gating.
	 */
	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
			  DOP_CLOCK_GATING_DISABLE);

	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
			  GEN8_SAMPLER_POWER_BYPASS_DIS);

	WA_SET_BIT_MASKED(HDC_CHICKEN0,
			  /* WaForceContextSaveRestoreNonCoherent:bdw */
			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
			  /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
273
			  (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
274
275
}

276
277
static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
278
{
279
	gen8_ctx_workarounds_init(engine, wal);
280
281
282
283
284
285
286
287

	/* WaDisableThreadStallDopClockGating:chv */
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);

	/* Improve HiZ throughput on CHV. */
	WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
}

288
289
static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
				      struct i915_wa_list *wal)
290
{
291
292
293
	struct drm_i915_private *i915 = engine->i915;

	if (HAS_LLC(i915)) {
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
		 *
		 * Must match Display Engine. See
		 * WaCompressedResourceDisplayNewHashMode.
		 */
		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
				  GEN9_PBE_COMPRESSED_HASH_SELECTION);
		WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
				  GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
	}

	/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
	/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
			  FLOW_CONTROL_ENABLE |
			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);

	/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
	/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
	WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
			  GEN9_ENABLE_YV12_BUGFIX |
			  GEN9_ENABLE_GPGPU_PREEMPTION);

	/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
	/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
	WA_SET_BIT_MASKED(CACHE_MODE_1,
			  GEN8_4x4_STC_OPTIMIZATION_DISABLE |
			  GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);

	/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
	WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
			  GEN9_CCS_TLB_PREFETCH_ENABLE);

	/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
	WA_SET_BIT_MASKED(HDC_CHICKEN0,
			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
			  HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);

	/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
	 * both tied to WaForceContextSaveRestoreNonCoherent
	 * in some hsds for skl. We keep the tie for all gen9. The
	 * documentation is a bit hazy and so we want to get common behaviour,
	 * even though there is no clear evidence we would need both on kbl/bxt.
	 * This area has been source of system hangs so we play it safe
	 * and mimic the skl regardless of what bspec says.
	 *
	 * Use Force Non-Coherent whenever executing a 3D context. This
	 * is a workaround for a possible hang in the unlikely event
	 * a TLB invalidation occurs during a PSD flush.
	 */

	/* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
	WA_SET_BIT_MASKED(HDC_CHICKEN0,
			  HDC_FORCE_NON_COHERENT);

	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
350
	if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
				  GEN8_SAMPLER_POWER_BYPASS_DIS);

	/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);

	/*
	 * Supporting preemption with fine-granularity requires changes in the
	 * batch buffer programming. Since we can't break old userspace, we
	 * need to set our default preemption level to safe value. Userspace is
	 * still able to use more fine-grained preemption levels, since in
	 * WaEnablePreemptionGranularityControlByUMD we're whitelisting the
	 * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
	 * not real HW workarounds, but merely a way to start using preemption
	 * while maintaining old contract with userspace.
	 */

	/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);

	/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);

376
	/* WaClearHIZ_WM_CHICKEN3:bxt,glk */
377
	if (IS_GEN9_LP(i915))
378
		WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
379
380
}

381
382
static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
				struct i915_wa_list *wal)
383
{
384
	struct drm_i915_private *i915 = engine->i915;
385
386
387
388
389
390
391
392
393
394
	u8 vals[3] = { 0, 0, 0 };
	unsigned int i;

	for (i = 0; i < 3; i++) {
		u8 ss;

		/*
		 * Only consider slices where one, and only one, subslice has 7
		 * EUs
		 */
395
		if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
396
397
398
399
400
401
402
403
			continue;

		/*
		 * subslice_7eu[i] != 0 (because of the check above) and
		 * ss_max == 4 (maximum number of subslices possible per slice)
		 *
		 * ->    0 <= ss <= 3;
		 */
404
		ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
405
406
407
408
		vals[i] = 3 - ss;
	}

	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
409
		return;
410
411
412
413
414
415
416
417
418
419
420

	/* Tune IZ hashing. See intel_device_info_runtime_init() */
	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
			    GEN9_IZ_HASHING_MASK(2) |
			    GEN9_IZ_HASHING_MASK(1) |
			    GEN9_IZ_HASHING_MASK(0),
			    GEN9_IZ_HASHING(2, vals[2]) |
			    GEN9_IZ_HASHING(1, vals[1]) |
			    GEN9_IZ_HASHING(0, vals[0]));
}

421
422
static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
423
{
424
425
	gen9_ctx_workarounds_init(engine, wal);
	skl_tune_iz_hashing(engine, wal);
426
}
427

428
429
static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
430
{
431
	gen9_ctx_workarounds_init(engine, wal);
432

433
434
435
436
437
438
439
	/* WaDisableThreadStallDopClockGating:bxt */
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
			  STALL_DOP_GATING_DISABLE);

	/* WaToEnableHwFixForPushConstHWBug:bxt */
	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
440
441
}

442
443
static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
444
{
445
	struct drm_i915_private *i915 = engine->i915;
446

447
	gen9_ctx_workarounds_init(engine, wal);
448

449
	/* WaToEnableHwFixForPushConstHWBug:kbl */
450
	if (IS_KBL_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
451
452
		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
				  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
453

454
455
456
457
458
	/* WaDisableSbeCacheDispatchPortSharing:kbl */
	WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
			  GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
}

459
460
static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
461
{
462
	gen9_ctx_workarounds_init(engine, wal);
463
464

	/* WaToEnableHwFixForPushConstHWBug:glk */
465
466
467
468
	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
}

469
470
static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
471
{
472
	gen9_ctx_workarounds_init(engine, wal);
473
474
475
476

	/* WaToEnableHwFixForPushConstHWBug:cfl */
	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
477

478
479
480
481
482
	/* WaDisableSbeCacheDispatchPortSharing:cfl */
	WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1,
			  GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
}

483
484
static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
485
{
486
487
	struct drm_i915_private *i915 = engine->i915;

488
489
490
491
492
	/* WaForceContextSaveRestoreNonCoherent:cnl */
	WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0,
			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);

	/* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */
493
	if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
494
495
496
497
498
499
500
		WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5);

	/* WaDisableReplayBufferBankArbitrationOptimization:cnl */
	WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
			  GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);

	/* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */
501
	if (IS_CNL_REVID(i915, 0, CNL_REVID_B0))
502
503
504
505
506
507
		WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2,
				  GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE);

	/* WaPushConstantDereferenceHoldDisable:cnl */
	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);

508
	/* FtrEnableFastAnisoL1BankingFix:cnl */
509
510
511
512
513
514
515
516
517
518
519
520
521
522
	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);

	/* WaDisable3DMidCmdPreemption:cnl */
	WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);

	/* WaDisableGPGPUMidCmdPreemption:cnl */
	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
			    GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);

	/* WaDisableEarlyEOT:cnl */
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
}

523
524
static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
				     struct i915_wa_list *wal)
525
{
526
527
	struct drm_i915_private *i915 = engine->i915;

Daniel Vetter's avatar
Daniel Vetter committed
528
529
530
531
532
	/* WaDisableBankHangMode:icl */
	wa_write(wal,
		 GEN8_L3CNTLREG,
		 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
		 GEN8_ERRDETBCTRL);
533

534
535
536
537
538
539
	/* WaDisableBankHangMode:icl */
	wa_write(wal,
		 GEN8_L3CNTLREG,
		 intel_uncore_read(engine->uncore, GEN8_L3CNTLREG) |
		 GEN8_ERRDETBCTRL);

540
541
542
	/* Wa_1604370585:icl (pre-prod)
	 * Formerly known as WaPushConstantDereferenceHoldDisable
	 */
543
	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
544
545
546
547
548
549
550
551
552
553
554
555
		WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
				  PUSH_CONSTANT_DEREF_DISABLE);

	/* WaForceEnableNonCoherent:icl
	 * This is not the same workaround as in early Gen9 platforms, where
	 * lacking this could cause system hangs, but coherency performance
	 * overhead is high and only a few compute workloads really need it
	 * (the register is whitelisted in hardware now, so UMDs can opt in
	 * for coherency if they have a good reason).
	 */
	WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);

556
557
558
	/* Wa_2006611047:icl (pre-prod)
	 * Formerly known as WaDisableImprovedTdlClkGating
	 */
559
	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
560
561
562
		WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
				  GEN11_TDL_CLOCK_GATING_FIX_DISABLE);

Oscar Mateo's avatar
Oscar Mateo committed
563
	/* Wa_2006665173:icl (pre-prod) */
564
	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
Oscar Mateo's avatar
Oscar Mateo committed
565
566
		WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
				  GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
567
568
569
570
571
572

	/* WaEnableFloatBlendOptimization:icl */
	wa_write_masked_or(wal,
			   GEN10_CACHE_MODE_SS,
			   0, /* write-only, so skip validation */
			   _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
573
574
575
576
577

	/* WaDisableGPGPUMidThreadPreemption:icl */
	WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
			    GEN9_PREEMPT_GPGPU_LEVEL_MASK,
			    GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
578
579
580
581

	/* allow headerless messages for preemptible GPGPU context */
	WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE,
			  GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
582
583
}

584
585
586
587
static void
__intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
			   struct i915_wa_list *wal,
			   const char *name)
588
{
589
590
	struct drm_i915_private *i915 = engine->i915;

591
592
593
594
	if (engine->class != RENDER_CLASS)
		return;

	wa_init_start(wal, name);
595

596
	if (IS_GEN(i915, 11))
597
		icl_ctx_workarounds_init(engine, wal);
598
	else if (IS_CANNONLAKE(i915))
599
		cnl_ctx_workarounds_init(engine, wal);
600
	else if (IS_COFFEELAKE(i915))
601
		cfl_ctx_workarounds_init(engine, wal);
602
	else if (IS_GEMINILAKE(i915))
603
		glk_ctx_workarounds_init(engine, wal);
604
	else if (IS_KABYLAKE(i915))
605
		kbl_ctx_workarounds_init(engine, wal);
606
	else if (IS_BROXTON(i915))
607
		bxt_ctx_workarounds_init(engine, wal);
608
	else if (IS_SKYLAKE(i915))
609
		skl_ctx_workarounds_init(engine, wal);
610
	else if (IS_CHERRYVIEW(i915))
611
		chv_ctx_workarounds_init(engine, wal);
612
	else if (IS_BROADWELL(i915))
613
		bdw_ctx_workarounds_init(engine, wal);
614
615
	else if (INTEL_GEN(i915) < 8)
		return;
616
	else
617
		MISSING_CASE(INTEL_GEN(i915));
618

619
	wa_init_finish(wal);
620
621
}

622
623
624
625
626
void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
{
	__intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
}

627
int intel_engine_emit_ctx_wa(struct i915_request *rq)
628
{
629
630
631
	struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
	struct i915_wa *wa;
	unsigned int i;
632
	u32 *cs;
633
	int ret;
634

635
	if (wal->count == 0)
636
637
638
		return 0;

	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
639
640
641
	if (ret)
		return ret;

642
	cs = intel_ring_begin(rq, (wal->count * 2 + 2));
643
644
645
	if (IS_ERR(cs))
		return PTR_ERR(cs);

646
647
648
649
	*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
		*cs++ = i915_mmio_reg_offset(wa->reg);
		*cs++ = wa->val;
650
651
652
653
654
655
656
657
658
659
660
661
	}
	*cs++ = MI_NOOP;

	intel_ring_advance(rq, cs);

	ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
	if (ret)
		return ret;

	return 0;
}

662
663
static void
gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
664
{
665
	/* WaDisableKillLogic:bxt,skl,kbl */
666
667
668
669
	if (!IS_COFFEELAKE(i915))
		wa_write_or(wal,
			    GAM_ECOCHK,
			    ECOCHK_DIS_TLB);
670

671
	if (HAS_LLC(i915)) {
672
673
674
675
676
		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
		 *
		 * Must match Display Engine. See
		 * WaCompressedResourceDisplayNewHashMode.
		 */
677
678
679
		wa_write_or(wal,
			    MMCD_MISC_CTRL,
			    MMCD_PCLA | MMCD_HOTSPOT_EN);
680
681
682
	}

	/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
683
684
685
	wa_write_or(wal,
		    GAM_ECOCHK,
		    BDW_DISABLE_HDC_INVALIDATION);
686
687
}

688
689
static void
skl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
690
{
691
	gen9_gt_workarounds_init(i915, wal);
692
693

	/* WaDisableGafsUnitClkGating:skl */
694
695
696
	wa_write_or(wal,
		    GEN7_UCGCTL4,
		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
697
698

	/* WaInPlaceDecompressionHang:skl */
699
700
701
702
	if (IS_SKL_REVID(i915, SKL_REVID_H0, REVID_FOREVER))
		wa_write_or(wal,
			    GEN9_GAMT_ECO_REG_RW_IA,
			    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
703
704
}

705
706
static void
bxt_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
707
{
708
	gen9_gt_workarounds_init(i915, wal);
709
710

	/* WaInPlaceDecompressionHang:bxt */
711
712
713
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
714
715
}

716
717
static void
kbl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
718
{
719
	gen9_gt_workarounds_init(i915, wal);
720

721
	/* WaDisableDynamicCreditSharing:kbl */
722
723
724
725
	if (IS_KBL_REVID(i915, 0, KBL_REVID_B0))
		wa_write_or(wal,
			    GAMT_CHKN_BIT_REG,
			    GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
726

727
	/* WaDisableGafsUnitClkGating:kbl */
728
729
730
	wa_write_or(wal,
		    GEN7_UCGCTL4,
		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
731

732
	/* WaInPlaceDecompressionHang:kbl */
733
734
735
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
736
}
737

738
739
static void
glk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
740
{
741
	gen9_gt_workarounds_init(i915, wal);
742
743
}

744
745
static void
cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
746
{
747
	gen9_gt_workarounds_init(i915, wal);
748
749

	/* WaDisableGafsUnitClkGating:cfl */
750
751
752
	wa_write_or(wal,
		    GEN7_UCGCTL4,
		    GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
753

754
	/* WaInPlaceDecompressionHang:cfl */
755
756
757
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
758
}
759

760
static void
761
wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
762
{
763
	const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
764
765
	u32 mcr_slice_subslice_mask;

766
767
768
769
770
771
772
773
774
775
776
777
778
	/*
	 * WaProgramMgsrForL3BankSpecificMmioReads: cnl,icl
	 * L3Banks could be fused off in single slice scenario. If that is
	 * the case, we might need to program MCR select to a valid L3Bank
	 * by default, to make sure we correctly read certain registers
	 * later on (in the range 0xB100 - 0xB3FF).
	 * This might be incompatible with
	 * WaProgramMgsrForCorrectSliceSpecificMmioReads.
	 * Fortunately, this should not happen in production hardware, so
	 * we only assert that this is the case (instead of implementing
	 * something more complex that requires checking the range of every
	 * MMIO read).
	 */
779
	if (INTEL_GEN(i915) >= 10 &&
780
781
782
783
784
785
	    is_power_of_2(sseu->slice_mask)) {
		/*
		 * read FUSE3 for enabled L3 Bank IDs, if L3 Bank matches
		 * enabled subslice, no need to redirect MCR packet
		 */
		u32 slice = fls(sseu->slice_mask);
786
787
		u32 fuse3 =
			intel_uncore_read(&i915->uncore, GEN10_MIRROR_FUSE3);
788
789
790
791
792
793
794
795
796
797
798
799
800
		u8 ss_mask = sseu->subslice_mask[slice];

		u8 enabled_mask = (ss_mask | ss_mask >>
				   GEN10_L3BANK_PAIR_COUNT) & GEN10_L3BANK_MASK;
		u8 disabled_mask = fuse3 & GEN10_L3BANK_MASK;

		/*
		 * Production silicon should have matched L3Bank and
		 * subslice enabled
		 */
		WARN_ON((enabled_mask & disabled_mask) != enabled_mask);
	}

801
	if (INTEL_GEN(i915) >= 11)
802
803
804
805
806
		mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK |
					  GEN11_MCR_SUBSLICE_MASK;
	else
		mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK |
					  GEN8_MCR_SUBSLICE_MASK;
807
	/*
808
	 * WaProgramMgsrForCorrectSliceSpecificMmioReads:cnl,icl
809
810
811
812
813
814
815
816
817
	 * Before any MMIO read into slice/subslice specific registers, MCR
	 * packet control register needs to be programmed to point to any
	 * enabled s/ss pair. Otherwise, incorrect values will be returned.
	 * This means each subsequent MMIO read will be forwarded to an
	 * specific s/ss combination, but this is OK since these registers
	 * are consistent across s/ss in almost all cases. In the rare
	 * occasions, such as INSTDONE, where this value is dependent
	 * on s/ss combo, the read should be done with read_subslice_reg.
	 */
818
819
820
	wa_write_masked_or(wal,
			   GEN8_MCR_SELECTOR,
			   mcr_slice_subslice_mask,
821
			   intel_calculate_mcr_s_ss_select(i915));
822
823
}

824
825
static void
cnl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
826
{
827
	wa_init_mcr(i915, wal);
828

829
	/* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */
830
831
832
833
	if (IS_CNL_REVID(i915, CNL_REVID_B0, CNL_REVID_B0))
		wa_write_or(wal,
			    GAMT_CHKN_BIT_REG,
			    GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT);
834
835

	/* WaInPlaceDecompressionHang:cnl */
836
837
838
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
839
840
}

841
842
static void
icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
843
{
844
	wa_init_mcr(i915, wal);
845

846
	/* WaInPlaceDecompressionHang:icl */
847
848
849
	wa_write_or(wal,
		    GEN9_GAMT_ECO_REG_RW_IA,
		    GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
850

851
	/* WaModifyGamTlbPartitioning:icl */
852
853
854
855
	wa_write_masked_or(wal,
			   GEN11_GACB_PERF_CTRL,
			   GEN11_HASH_CTRL_MASK,
			   GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
856

857
858
859
	/* Wa_1405766107:icl
	 * Formerly known as WaCL2SFHalfMaxAlloc
	 */
860
861
862
863
	wa_write_or(wal,
		    GEN11_LSN_UNSLCVC,
		    GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
		    GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
Oscar Mateo's avatar
Oscar Mateo committed
864
865
866
867

	/* Wa_220166154:icl
	 * Formerly known as WaDisCtxReload
	 */
868
869
870
	wa_write_or(wal,
		    GEN8_GAMW_ECO_DEV_RW_IA,
		    GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
Oscar Mateo's avatar
Oscar Mateo committed
871
872

	/* Wa_1405779004:icl (pre-prod) */
873
874
875
876
	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
		wa_write_or(wal,
			    SLICE_UNIT_LEVEL_CLKGATE,
			    MSCUNIT_CLKGATE_DIS);
Oscar Mateo's avatar
Oscar Mateo committed
877
878

	/* Wa_1406680159:icl */
879
880
881
	wa_write_or(wal,
		    SUBSLICE_UNIT_LEVEL_CLKGATE,
		    GWUNIT_CLKGATE_DIS);
Oscar Mateo's avatar
Oscar Mateo committed
882

Oscar Mateo's avatar
Oscar Mateo committed
883
	/* Wa_1406838659:icl (pre-prod) */
884
885
886
887
	if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
		wa_write_or(wal,
			    INF_UNIT_LEVEL_CLKGATE,
			    CGPSF_CLKGATE_DIS);
888

Oscar Mateo's avatar
Oscar Mateo committed
889
890
891
	/* Wa_1406463099:icl
	 * Formerly known as WaGamTlbPendError
	 */
892
893
894
	wa_write_or(wal,
		    GAMT_CHKN_BIT_REG,
		    GAMT_CHKN_DISABLE_L3_COH_PIPE);
895
896
}

897
898
static void
gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
899
{
900
	if (IS_GEN(i915, 11))
901
		icl_gt_workarounds_init(i915, wal);
902
	else if (IS_CANNONLAKE(i915))
903
		cnl_gt_workarounds_init(i915, wal);
904
905
906
907
908
909
910
911
912
913
914
915
	else if (IS_COFFEELAKE(i915))
		cfl_gt_workarounds_init(i915, wal);
	else if (IS_GEMINILAKE(i915))
		glk_gt_workarounds_init(i915, wal);
	else if (IS_KABYLAKE(i915))
		kbl_gt_workarounds_init(i915, wal);
	else if (IS_BROXTON(i915))
		bxt_gt_workarounds_init(i915, wal);
	else if (IS_SKYLAKE(i915))
		skl_gt_workarounds_init(i915, wal);
	else if (INTEL_GEN(i915) <= 8)
		return;
916
	else
917
		MISSING_CASE(INTEL_GEN(i915));
918
919
920
921
922
}

void intel_gt_init_workarounds(struct drm_i915_private *i915)
{
	struct i915_wa_list *wal = &i915->gt_wa_list;
923

924
925
	wa_init_start(wal, "GT");
	gt_init_workarounds(i915, wal);
926
927
928
929
	wa_init_finish(wal);
}

static enum forcewake_domains
930
wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
931
932
933
934
935
936
{
	enum forcewake_domains fw = 0;
	struct i915_wa *wa;
	unsigned int i;

	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
937
		fw |= intel_uncore_forcewake_for_reg(uncore,
938
939
940
941
942
943
944
						     wa->reg,
						     FW_REG_READ |
						     FW_REG_WRITE);

	return fw;
}

945
946
947
static bool
wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
{
948
	if ((cur ^ wa->val) & wa->read) {
949
		DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x, mask=%x)\n",
950
951
952
			  name, from, i915_mmio_reg_offset(wa->reg),
			  cur, cur & wa->read,
			  wa->val, wa->mask);
953
954
955
956
957
958
959

		return false;
	}

	return true;
}

960
static void
961
wa_list_apply(struct intel_uncore *uncore, const struct i915_wa_list *wal)
962
963
964
965
966
967
968
969
970
{
	enum forcewake_domains fw;
	unsigned long flags;
	struct i915_wa *wa;
	unsigned int i;

	if (!wal->count)
		return;

971
	fw = wal_get_fw_for_rmw(uncore, wal);
972

973
974
	spin_lock_irqsave(&uncore->lock, flags);
	intel_uncore_forcewake_get__locked(uncore, fw);
975
976

	for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
977
		intel_uncore_rmw_fw(uncore, wa->reg, wa->mask, wa->val);
978
979
980
981
		if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
			wa_verify(wa,
				  intel_uncore_read_fw(uncore, wa->reg),
				  wal->name, "application");
982
983
	}

984
985
	intel_uncore_forcewake_put__locked(uncore, fw);
	spin_unlock_irqrestore(&uncore->lock, flags);
986
987
}

988
void intel_gt_apply_workarounds(struct drm_i915_private *i915)
989
{
990
	wa_list_apply(&i915->uncore, &i915->gt_wa_list);
991
992
}

993
static bool wa_list_verify(struct intel_uncore *uncore,
994
995
996
997
998
999
1000
1001
			   const struct i915_wa_list *wal,
			   const char *from)
{
	struct i915_wa *wa;
	unsigned int i;
	bool ok = true;

	for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
1002
1003
1004
		ok &= wa_verify(wa,
				intel_uncore_read(uncore, wa->reg),
				wal->name, from);
1005
1006
1007
1008

	return ok;
}

1009
bool intel_gt_verify_workarounds(struct drm_i915_private *i915,
1010
1011
				 const char *from)
{
1012
	return wa_list_verify(&i915->uncore, &i915->gt_wa_list, from);
1013
1014
}

1015
static void
1016
whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
1017
{
1018
1019
1020
	struct i915_wa wa = {
		.reg = reg
	};
1021

1022
1023
	if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
		return;
1024

1025
	wa.reg.reg |= flags;
1026
	_wa_add(wal, &wa);
1027
1028
}

1029
1030
1031
1032
1033
1034
static void
whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
{
	whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_RW);
}

1035
static void gen9_whitelist_build(struct i915_wa_list *w)
1036
1037
{
	/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
1038
	whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
1039
1040

	/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
1041
	whitelist_reg(w, GEN8_CS_CHICKEN1);
1042
1043

	/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
1044
	whitelist_reg(w, GEN8_HDC_CHICKEN1);
1045
1046
}

1047
static void skl_whitelist_build(struct intel_engine_cs *engine)
1048
{
1049
1050
1051
1052
1053
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1054
	gen9_whitelist_build(w);
1055
1056

	/* WaDisableLSQCROPERFforOCL:skl */
1057
	whitelist_reg(w, GEN8_L3SQCREG4);
1058
1059
}

1060
static void bxt_whitelist_build(struct intel_engine_cs *engine)
1061
{
1062
1063
1064
1065
	if (engine->class != RENDER_CLASS)
		return;

	gen9_whitelist_build(&engine->whitelist);
1066
1067
}

1068
static void kbl_whitelist_build(struct intel_engine_cs *engine)
1069
{
1070
1071
1072
1073
1074
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1075
	gen9_whitelist_build(w);
1076

1077
	/* WaDisableLSQCROPERFforOCL:kbl */
1078
	whitelist_reg(w, GEN8_L3SQCREG4);
1079
1080
}

1081
static void glk_whitelist_build(struct intel_engine_cs *engine)
1082
{
1083
1084
1085
1086
1087
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1088
	gen9_whitelist_build(w);
1089

1090
	/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
1091
	whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
1092
}
1093

1094
static void cfl_whitelist_build(struct intel_engine_cs *engine)
1095
{
1096
1097
	struct i915_wa_list *w = &engine->whitelist;

1098
1099
1100
	if (engine->class != RENDER_CLASS)
		return;

1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
	gen9_whitelist_build(w);

	/*
	 * WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
	 *
	 * This covers 4 register which are next to one another :
	 *   - PS_INVOCATION_COUNT
	 *   - PS_INVOCATION_COUNT_UDW
	 *   - PS_DEPTH_COUNT
	 *   - PS_DEPTH_COUNT_UDW
	 */
	whitelist_reg_ext(w, PS_INVOCATION_COUNT,
			  RING_FORCE_TO_NONPRIV_RD |
			  RING_FORCE_TO_NONPRIV_RANGE_4);
1115
1116
}

1117
static void cnl_whitelist_build(struct intel_engine_cs *engine)
1118
{
1119
1120
1121
1122
1123
	struct i915_wa_list *w = &engine->whitelist;

	if (engine->class != RENDER_CLASS)
		return;

1124
	/* WaEnablePreemptionGranularityControlByUMD:cnl */
1125
1126
1127
	whitelist_reg(w, GEN8_CS_CHICKEN1);
}

1128
static void icl_whitelist_build(struct intel_engine_cs *engine)
1129
{
1130
	struct i915_wa_list *w = &engine->whitelist;
1131

1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
	switch (engine->class) {
	case RENDER_CLASS:
		/* WaAllowUMDToModifyHalfSliceChicken7:icl */
		whitelist_reg(w, GEN9_HALF_SLICE_CHICKEN7);

		/* WaAllowUMDToModifySamplerMode:icl */
		whitelist_reg(