i915_gem_gtt.c 100 KB
Newer Older
1
2
/*
 * Copyright © 2010 Daniel Vetter
3
 * Copyright © 2011-2014 Intel Corporation
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 */

26
27
28
#include <linux/slab.h> /* fault-inject.h is not standalone! */

#include <linux/fault-inject.h>
29
#include <linux/log2.h>
30
#include <linux/random.h>
31
#include <linux/seq_file.h>
32
#include <linux/stop_machine.h>
33

Laura Abbott's avatar
Laura Abbott committed
34
#include <asm/set_memory.h>
Chris Wilson's avatar
Chris Wilson committed
35
#include <asm/smp.h>
Laura Abbott's avatar
Laura Abbott committed
36

37
#include <drm/i915_drm.h>
38

39
#include "display/intel_frontbuffer.h"
40
#include "gt/intel_gt.h"
41
#include "gt/intel_gt_requests.h"
42

43
#include "i915_drv.h"
44
#include "i915_scatterlist.h"
45
#include "i915_trace.h"
46
#include "i915_vgpu.h"
47

48
#define I915_GFP_ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
49

50
51
52
53
54
55
#if IS_ENABLED(CONFIG_DRM_I915_TRACE_GTT)
#define DBG(...) trace_printk(__VA_ARGS__)
#else
#define DBG(...)
#endif

56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
/**
 * DOC: Global GTT views
 *
 * Background and previous state
 *
 * Historically objects could exists (be bound) in global GTT space only as
 * singular instances with a view representing all of the object's backing pages
 * in a linear fashion. This view will be called a normal view.
 *
 * To support multiple views of the same object, where the number of mapped
 * pages is not equal to the backing store, or where the layout of the pages
 * is not linear, concept of a GGTT view was added.
 *
 * One example of an alternative view is a stereo display driven by a single
 * image. In this case we would have a framebuffer looking like this
 * (2x2 pages):
 *
 *    12
 *    34
 *
 * Above would represent a normal GGTT view as normally mapped for GPU or CPU
 * rendering. In contrast, fed to the display engine would be an alternative
 * view which could look something like this:
 *
 *   1212
 *   3434
 *
 * In this example both the size and layout of pages in the alternative view is
 * different from the normal view.
 *
 * Implementation and usage
 *
 * GGTT views are implemented using VMAs and are distinguished via enum
 * i915_ggtt_view_type and struct i915_ggtt_view.
 *
 * A new flavour of core GEM functions which work with GGTT bound objects were
92
93
94
 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
 * renaming  in large amounts of code. They take the struct i915_ggtt_view
 * parameter encapsulating all metadata required to implement a view.
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
 *
 * As a helper for callers which are only interested in the normal view,
 * globally const i915_ggtt_view_normal singleton instance exists. All old core
 * GEM API functions, the ones not taking the view parameter, are operating on,
 * or with the normal GGTT view.
 *
 * Code wanting to add or use a new GGTT view needs to:
 *
 * 1. Add a new enum with a suitable name.
 * 2. Extend the metadata in the i915_ggtt_view structure if required.
 * 3. Add support to i915_get_vma_pages().
 *
 * New views are required to build a scatter-gather table from within the
 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
 * exists for the lifetime of an VMA.
 *
 * Core API is designed to have copy semantics which means that passed in
 * struct i915_ggtt_view does not need to be persistent (left around after
 * calling the core API functions).
 *
 */

117
118
#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)

119
120
121
static int
i915_get_ggtt_vma_pages(struct i915_vma *vma);

122
static void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
123
{
124
	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
125

126
127
	/*
	 * Note that as an uncached mmio write, this will flush the
128
129
	 * WCB of the writes into the GGTT before it triggers the invalidate.
	 */
130
	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
131
132
}

133
static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
134
{
135
	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
136
	struct drm_i915_private *i915 = ggtt->vm.i915;
137

138
	gen6_ggtt_invalidate(ggtt);
139
140
141
142
143
144

	if (INTEL_GEN(i915) >= 12)
		intel_uncore_write_fw(uncore, GEN12_GUC_TLB_INV_CR,
				      GEN12_GUC_TLB_INV_CR_INVALIDATE);
	else
		intel_uncore_write_fw(uncore, GEN8_GTCR, GEN8_GTCR_INVALIDATE);
145
146
}

147
static void gmch_ggtt_invalidate(struct i915_ggtt *ggtt)
148
149
150
151
{
	intel_gtt_chipset_flush();
}

152
153
static int ppgtt_bind_vma(struct i915_vma *vma,
			  enum i915_cache_level cache_level,
154
			  u32 flags)
155
{
156
	u32 pte_flags;
157
158
	int err;

159
	if (flags & I915_VMA_ALLOC) {
160
161
162
163
		err = vma->vm->allocate_va_range(vma->vm,
						 vma->node.start, vma->size);
		if (err)
			return err;
164
165

		set_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma));
166
	}
167

168
	/* Applicable to VLV, and gen8+ */
169
	pte_flags = 0;
170
	if (i915_gem_object_is_readonly(vma->obj))
171
172
		pte_flags |= PTE_READ_ONLY;

173
	GEM_BUG_ON(!test_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)));
174
	vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags);
175
	wmb();
176
177

	return 0;
178
179
180
181
}

static void ppgtt_unbind_vma(struct i915_vma *vma)
{
182
183
	if (test_and_clear_bit(I915_VMA_ALLOC_BIT, __i915_vma_flags(vma)))
		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
184
}
185

186
187
188
189
190
191
static int ppgtt_set_pages(struct i915_vma *vma)
{
	GEM_BUG_ON(vma->pages);

	vma->pages = vma->obj->mm.pages;

192
193
	vma->page_sizes = vma->obj->mm.page_sizes;

194
195
196
197
198
199
200
201
202
203
204
205
	return 0;
}

static void clear_pages(struct i915_vma *vma)
{
	GEM_BUG_ON(!vma->pages);

	if (vma->pages != vma->obj->mm.pages) {
		sg_free_table(vma->pages);
		kfree(vma->pages);
	}
	vma->pages = NULL;
206
207

	memset(&vma->page_sizes, 0, sizeof(vma->page_sizes));
208
209
}

210
211
212
static u64 gen8_pte_encode(dma_addr_t addr,
			   enum i915_cache_level level,
			   u32 flags)
Ben Widawsky's avatar
Ben Widawsky committed
213
{
214
215
216
217
	gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW;

	if (unlikely(flags & PTE_READ_ONLY))
		pte &= ~_PAGE_RW;
218
219
220

	switch (level) {
	case I915_CACHE_NONE:
221
		pte |= PPAT_UNCACHED;
222
223
		break;
	case I915_CACHE_WT:
224
		pte |= PPAT_DISPLAY_ELLC;
225
226
		break;
	default:
227
		pte |= PPAT_CACHED;
228
229
230
		break;
	}

Ben Widawsky's avatar
Ben Widawsky committed
231
232
233
	return pte;
}

234
235
static u64 gen8_pde_encode(const dma_addr_t addr,
			   const enum i915_cache_level level)
236
{
237
	u64 pde = _PAGE_PRESENT | _PAGE_RW;
238
239
	pde |= addr;
	if (level != I915_CACHE_NONE)
240
		pde |= PPAT_CACHED_PDE;
241
	else
242
		pde |= PPAT_UNCACHED;
243
244
245
	return pde;
}

246
247
248
static u64 snb_pte_encode(dma_addr_t addr,
			  enum i915_cache_level level,
			  u32 flags)
249
{
250
	gen6_pte_t pte = GEN6_PTE_VALID;
251
	pte |= GEN6_PTE_ADDR_ENCODE(addr);
252
253

	switch (level) {
254
255
256
257
258
259
260
261
	case I915_CACHE_L3_LLC:
	case I915_CACHE_LLC:
		pte |= GEN6_PTE_CACHE_LLC;
		break;
	case I915_CACHE_NONE:
		pte |= GEN6_PTE_UNCACHED;
		break;
	default:
262
		MISSING_CASE(level);
263
264
265
266
267
	}

	return pte;
}

268
269
270
static u64 ivb_pte_encode(dma_addr_t addr,
			  enum i915_cache_level level,
			  u32 flags)
271
{
272
	gen6_pte_t pte = GEN6_PTE_VALID;
273
274
275
276
277
	pte |= GEN6_PTE_ADDR_ENCODE(addr);

	switch (level) {
	case I915_CACHE_L3_LLC:
		pte |= GEN7_PTE_CACHE_L3_LLC;
278
279
280
281
282
		break;
	case I915_CACHE_LLC:
		pte |= GEN6_PTE_CACHE_LLC;
		break;
	case I915_CACHE_NONE:
283
		pte |= GEN6_PTE_UNCACHED;
284
285
		break;
	default:
286
		MISSING_CASE(level);
287
288
	}

289
290
291
	return pte;
}

292
293
294
static u64 byt_pte_encode(dma_addr_t addr,
			  enum i915_cache_level level,
			  u32 flags)
295
{
296
	gen6_pte_t pte = GEN6_PTE_VALID;
297
298
	pte |= GEN6_PTE_ADDR_ENCODE(addr);

299
300
	if (!(flags & PTE_READ_ONLY))
		pte |= BYT_PTE_WRITEABLE;
301
302
303
304
305
306
307

	if (level != I915_CACHE_NONE)
		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;

	return pte;
}

308
309
310
static u64 hsw_pte_encode(dma_addr_t addr,
			  enum i915_cache_level level,
			  u32 flags)
311
{
312
	gen6_pte_t pte = GEN6_PTE_VALID;
313
	pte |= HSW_PTE_ADDR_ENCODE(addr);
314
315

	if (level != I915_CACHE_NONE)
316
		pte |= HSW_WB_LLC_AGE3;
317
318
319
320

	return pte;
}

321
322
323
static u64 iris_pte_encode(dma_addr_t addr,
			   enum i915_cache_level level,
			   u32 flags)
324
{
325
	gen6_pte_t pte = GEN6_PTE_VALID;
326
327
	pte |= HSW_PTE_ADDR_ENCODE(addr);

328
329
330
331
	switch (level) {
	case I915_CACHE_NONE:
		break;
	case I915_CACHE_WT:
332
		pte |= HSW_WT_ELLC_LLC_AGE3;
333
334
		break;
	default:
335
		pte |= HSW_WB_ELLC_LLC_AGE3;
336
337
		break;
	}
338
339
340
341

	return pte;
}

342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
static void stash_init(struct pagestash *stash)
{
	pagevec_init(&stash->pvec);
	spin_lock_init(&stash->lock);
}

static struct page *stash_pop_page(struct pagestash *stash)
{
	struct page *page = NULL;

	spin_lock(&stash->lock);
	if (likely(stash->pvec.nr))
		page = stash->pvec.pages[--stash->pvec.nr];
	spin_unlock(&stash->lock);

	return page;
}

static void stash_push_pagevec(struct pagestash *stash, struct pagevec *pvec)
{
362
	unsigned int nr;
363
364
365

	spin_lock_nested(&stash->lock, SINGLE_DEPTH_NESTING);

366
	nr = min_t(typeof(nr), pvec->nr, pagevec_space(&stash->pvec));
367
368
369
370
371
372
373
374
375
376
	memcpy(stash->pvec.pages + stash->pvec.nr,
	       pvec->pages + pvec->nr - nr,
	       sizeof(pvec->pages[0]) * nr);
	stash->pvec.nr += nr;

	spin_unlock(&stash->lock);

	pvec->nr -= nr;
}

377
static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
378
{
379
380
	struct pagevec stack;
	struct page *page;
381

382
383
	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
		i915_gem_shrink_all(vm->i915);
384

385
386
387
	page = stash_pop_page(&vm->free_pages);
	if (page)
		return page;
388
389
390
391
392

	if (!vm->pt_kmap_wc)
		return alloc_page(gfp);

	/* Look in our global stash of WC pages... */
393
394
395
	page = stash_pop_page(&vm->i915->mm.wc_stash);
	if (page)
		return page;
396

397
	/*
398
	 * Otherwise batch allocate pages to amortize cost of set_pages_wc.
399
400
401
402
403
404
	 *
	 * We have to be careful as page allocation may trigger the shrinker
	 * (via direct reclaim) which will fill up the WC stash underneath us.
	 * So we add our WB pages into a temporary pvec on the stack and merge
	 * them into the WC stash after all the allocations are complete.
	 */
405
	pagevec_init(&stack);
406
407
	do {
		struct page *page;
408

409
410
411
412
		page = alloc_page(gfp);
		if (unlikely(!page))
			break;

413
414
		stack.pages[stack.nr++] = page;
	} while (pagevec_space(&stack));
415

416
417
	if (stack.nr && !set_pages_array_wc(stack.pages, stack.nr)) {
		page = stack.pages[--stack.nr];
418

419
		/* Merge spare WC pages to the global stash */
420
421
		if (stack.nr)
			stash_push_pagevec(&vm->i915->mm.wc_stash, &stack);
422

423
424
425
		/* Push any surplus WC pages onto the local VM stash */
		if (stack.nr)
			stash_push_pagevec(&vm->free_pages, &stack);
426
	}
427

428
429
430
431
432
433
434
	/* Return unwanted leftovers */
	if (unlikely(stack.nr)) {
		WARN_ON_ONCE(set_pages_array_wb(stack.pages, stack.nr));
		__pagevec_release(&stack);
	}

	return page;
435
436
}

437
438
static void vm_free_pages_release(struct i915_address_space *vm,
				  bool immediate)
439
{
440
441
	struct pagevec *pvec = &vm->free_pages.pvec;
	struct pagevec stack;
442

443
	lockdep_assert_held(&vm->free_pages.lock);
444
	GEM_BUG_ON(!pagevec_count(pvec));
445

446
	if (vm->pt_kmap_wc) {
447
448
		/*
		 * When we use WC, first fill up the global stash and then
449
450
		 * only if full immediately free the overflow.
		 */
451
		stash_push_pagevec(&vm->i915->mm.wc_stash, pvec);
452

453
454
455
456
457
458
459
460
		/*
		 * As we have made some room in the VM's free_pages,
		 * we can wait for it to fill again. Unless we are
		 * inside i915_address_space_fini() and must
		 * immediately release the pages!
		 */
		if (pvec->nr <= (immediate ? 0 : PAGEVEC_SIZE - 1))
			return;
461

462
463
464
465
466
467
468
469
470
471
		/*
		 * We have to drop the lock to allow ourselves to sleep,
		 * so take a copy of the pvec and clear the stash for
		 * others to use it as we sleep.
		 */
		stack = *pvec;
		pagevec_reinit(pvec);
		spin_unlock(&vm->free_pages.lock);

		pvec = &stack;
472
		set_pages_array_wb(pvec->pages, pvec->nr);
473
474

		spin_lock(&vm->free_pages.lock);
475
476
477
	}

	__pagevec_release(pvec);
478
479
480
481
}

static void vm_free_page(struct i915_address_space *vm, struct page *page)
{
482
483
484
485
486
487
488
489
	/*
	 * On !llc, we need to change the pages back to WB. We only do so
	 * in bulk, so we rarely need to change the page attributes here,
	 * but doing so requires a stop_machine() from deep inside arch/x86/mm.
	 * To make detection of the possible sleep more likely, use an
	 * unconditional might_sleep() for everybody.
	 */
	might_sleep();
490
	spin_lock(&vm->free_pages.lock);
491
	while (!pagevec_space(&vm->free_pages.pvec))
492
		vm_free_pages_release(vm, false);
493
494
	GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec) >= PAGEVEC_SIZE);
	pagevec_add(&vm->free_pages.pvec, page);
495
496
497
	spin_unlock(&vm->free_pages.lock);
}

498
499
500
501
502
503
504
505
506
507
508
509
510
static void i915_address_space_fini(struct i915_address_space *vm)
{
	spin_lock(&vm->free_pages.lock);
	if (pagevec_count(&vm->free_pages.pvec))
		vm_free_pages_release(vm, true);
	GEM_BUG_ON(pagevec_count(&vm->free_pages.pvec));
	spin_unlock(&vm->free_pages.lock);

	drm_mm_takedown(&vm->mm);

	mutex_destroy(&vm->mutex);
}

511
void __i915_vm_close(struct i915_address_space *vm)
512
{
513
	struct i915_vma *vma, *vn;
514

515
516
517
518
519
520
521
522
523
524
	mutex_lock(&vm->mutex);
	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
		struct drm_i915_gem_object *obj = vma->obj;

		/* Keep the obj (and hence the vma) alive as _we_ destroy it */
		if (!kref_get_unless_zero(&obj->base.refcount))
			continue;

		atomic_and(~I915_VMA_PIN_MASK, &vma->flags);
		WARN_ON(__i915_vma_unbind(vma));
525
		i915_vma_destroy(vma);
526
527
528

		i915_gem_object_put(obj);
	}
529
	GEM_BUG_ON(!list_empty(&vm->bound_list));
530
	mutex_unlock(&vm->mutex);
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
}

static void __i915_vm_release(struct work_struct *work)
{
	struct i915_address_space *vm =
		container_of(work, struct i915_address_space, rcu.work);

	vm->cleanup(vm);
	i915_address_space_fini(vm);

	kfree(vm);
}

void i915_vm_release(struct kref *kref)
{
	struct i915_address_space *vm =
		container_of(kref, struct i915_address_space, ref);

	GEM_BUG_ON(i915_is_ggtt(vm));
	trace_i915_ppgtt_release(vm);

	queue_rcu_work(vm->i915->wq, &vm->rcu);
}

555
static void i915_address_space_init(struct i915_address_space *vm, int subclass)
556
{
557
	kref_init(&vm->ref);
558
	INIT_RCU_WORK(&vm->rcu, __i915_vm_release);
559
	atomic_set(&vm->open, 1);
560

561
562
563
564
565
566
	/*
	 * The vm->mutex must be reclaim safe (for use in the shrinker).
	 * Do a dummy acquire now under fs_reclaim so that any allocation
	 * attempt holding the lock is immediately reported by lockdep.
	 */
	mutex_init(&vm->mutex);
567
	lockdep_set_subclass(&vm->mutex, subclass);
568
	i915_gem_shrinker_taints_mutex(vm->i915, &vm->mutex);
569

570
571
572
573
574
575
	GEM_BUG_ON(!vm->total);
	drm_mm_init(&vm->mm, 0, vm->total);
	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;

	stash_init(&vm->free_pages);

576
	INIT_LIST_HEAD(&vm->bound_list);
577
578
}

579
580
581
582
static int __setup_page_dma(struct i915_address_space *vm,
			    struct i915_page_dma *p,
			    gfp_t gfp)
{
583
	p->page = vm_alloc_page(vm, gfp | I915_GFP_ALLOW_FAIL);
584
585
	if (unlikely(!p->page))
		return -ENOMEM;
586

587
588
589
	p->daddr = dma_map_page_attrs(vm->dma,
				      p->page, 0, PAGE_SIZE,
				      PCI_DMA_BIDIRECTIONAL,
590
				      DMA_ATTR_SKIP_CPU_SYNC |
591
				      DMA_ATTR_NO_WARN);
592
593
594
	if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
		vm_free_page(vm, p->page);
		return -ENOMEM;
595
	}
596
597

	return 0;
598
599
}

600
static int setup_page_dma(struct i915_address_space *vm,
601
			  struct i915_page_dma *p)
602
{
603
	return __setup_page_dma(vm, p, __GFP_HIGHMEM);
604
605
}

606
static void cleanup_page_dma(struct i915_address_space *vm,
607
			     struct i915_page_dma *p)
608
{
609
610
	dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
	vm_free_page(vm, p->page);
611
612
}

613
#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
614

615
616
static void
fill_page_dma(const struct i915_page_dma *p, const u64 val, unsigned int count)
617
{
618
	kunmap_atomic(memset64(kmap_atomic(p->page), val, count));
619
620
}

621
622
623
624
625
#define fill_px(px, v) fill_page_dma(px_base(px), (v), PAGE_SIZE / sizeof(u64))
#define fill32_px(px, v) do {						\
	u64 v__ = lower_32_bits(v);					\
	fill_px((px), v__ << 32 | v__);					\
} while (0)
626

627
static int
628
setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
629
{
630
	unsigned long size;
631

632
633
634
635
636
637
638
	/*
	 * In order to utilize 64K pages for an object with a size < 2M, we will
	 * need to support a 64K scratch page, given that every 16th entry for a
	 * page-table operating in 64K mode must point to a properly aligned 64K
	 * region, including any PTEs which happen to point to scratch.
	 *
	 * This is only relevant for the 48b PPGTT where we support
639
640
641
	 * huge-gtt-pages, see also i915_vma_insert(). However, as we share the
	 * scratch (read-only) between all vm, we create one 64k scratch page
	 * for all.
642
	 */
643
	size = I915_GTT_PAGE_SIZE_4K;
644
	if (i915_vm_is_4lvl(vm) &&
645
	    HAS_PAGE_SIZES(vm->i915, I915_GTT_PAGE_SIZE_64K)) {
646
647
		size = I915_GTT_PAGE_SIZE_64K;
		gfp |= __GFP_NOWARN;
648
	}
649
650
651
	gfp |= __GFP_ZERO | __GFP_RETRY_MAYFAIL;

	do {
652
		unsigned int order = get_order(size);
653
654
		struct page *page;
		dma_addr_t addr;
655

656
		page = alloc_pages(gfp, order);
657
		if (unlikely(!page))
658
			goto skip;
659

660
661
662
		addr = dma_map_page_attrs(vm->dma,
					  page, 0, size,
					  PCI_DMA_BIDIRECTIONAL,
663
					  DMA_ATTR_SKIP_CPU_SYNC |
664
					  DMA_ATTR_NO_WARN);
665
666
		if (unlikely(dma_mapping_error(vm->dma, addr)))
			goto free_page;
667

668
669
		if (unlikely(!IS_ALIGNED(addr, size)))
			goto unmap_page;
670

671
672
		vm->scratch[0].base.page = page;
		vm->scratch[0].base.daddr = addr;
673
		vm->scratch_order = order;
674
675
676
677
678
679
680
681
682
683
684
685
686
		return 0;

unmap_page:
		dma_unmap_page(vm->dma, addr, size, PCI_DMA_BIDIRECTIONAL);
free_page:
		__free_pages(page, order);
skip:
		if (size == I915_GTT_PAGE_SIZE_4K)
			return -ENOMEM;

		size = I915_GTT_PAGE_SIZE_4K;
		gfp &= ~__GFP_NOWARN;
	} while (1);
687
688
}

689
static void cleanup_scratch_page(struct i915_address_space *vm)
690
{
691
692
	struct i915_page_dma *p = px_base(&vm->scratch[0]);
	unsigned int order = vm->scratch_order;
693

694
	dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
695
		       PCI_DMA_BIDIRECTIONAL);
696
	__free_pages(p->page, order);
697
698
}

699
700
static void free_scratch(struct i915_address_space *vm)
{
701
702
703
	int i;

	if (!px_dma(&vm->scratch[0])) /* set to 0 on clones */
704
705
		return;

706
707
708
709
710
	for (i = 1; i <= vm->top; i++) {
		if (!px_dma(&vm->scratch[i]))
			break;
		cleanup_page_dma(vm, px_base(&vm->scratch[i]));
	}
711
712
713
714

	cleanup_scratch_page(vm);
}

715
static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
716
{
717
	struct i915_page_table *pt;
718

719
	pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL);
720
	if (unlikely(!pt))
721
722
		return ERR_PTR(-ENOMEM);

723
	if (unlikely(setup_page_dma(vm, &pt->base))) {
724
725
726
		kfree(pt);
		return ERR_PTR(-ENOMEM);
	}
727

728
	atomic_set(&pt->used, 0);
729
730
731
	return pt;
}

732
static struct i915_page_directory *__alloc_pd(size_t sz)
733
{
734
	struct i915_page_directory *pd;
735

736
	pd = kzalloc(sz, I915_GFP_ALLOW_FAIL);
737
738
739
740
741
742
743
744
745
746
747
	if (unlikely(!pd))
		return NULL;

	spin_lock_init(&pd->lock);
	return pd;
}

static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
{
	struct i915_page_directory *pd;

748
	pd = __alloc_pd(sizeof(*pd));
749
	if (unlikely(!pd))
750
751
		return ERR_PTR(-ENOMEM);

752
	if (unlikely(setup_page_dma(vm, px_base(pd)))) {
753
754
755
		kfree(pd);
		return ERR_PTR(-ENOMEM);
	}
756

757
758
759
	return pd;
}

760
static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
761
{
762
	cleanup_page_dma(vm, pd);
763
	kfree(pd);
764
765
}

766
767
#define free_px(vm, px) free_pd(vm, px_base(px))

768
769
static inline void
write_dma_entry(struct i915_page_dma * const pdma,
770
		const unsigned short idx,
771
772
773
774
		const u64 encoded_entry)
{
	u64 * const vaddr = kmap_atomic(pdma->page);

775
	vaddr[idx] = encoded_entry;
776
777
778
779
780
	kunmap_atomic(vaddr);
}

static inline void
__set_pd_entry(struct i915_page_directory * const pd,
781
	       const unsigned short idx,
782
783
	       struct i915_page_dma * const to,
	       u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
784
{
785
786
	/* Each thread pre-pins the pd, and we may have a thread per pde. */
	GEM_BUG_ON(atomic_read(px_used(pd)) > 2 * ARRAY_SIZE(pd->entry));
787

788
	atomic_inc(px_used(pd));
789
790
	pd->entry[idx] = to;
	write_dma_entry(px_base(pd), idx, encode(to->daddr, I915_CACHE_LLC));
791
792
}

793
794
#define set_pd_entry(pd, idx, to) \
	__set_pd_entry((pd), (idx), px_base(to), gen8_pde_encode)
795

796
static inline void
797
clear_pd_entry(struct i915_page_directory * const pd,
798
799
	       const unsigned short idx,
	       const struct i915_page_scratch * const scratch)
800
{
801
	GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
802

803
804
	write_dma_entry(px_base(pd), idx, scratch->encode);
	pd->entry[idx] = NULL;
805
	atomic_dec(px_used(pd));
806
807
}

808
809
static bool
release_pd_entry(struct i915_page_directory * const pd,
810
		 const unsigned short idx,
811
		 struct i915_page_table * const pt,
812
		 const struct i915_page_scratch * const scratch)
813
814
815
{
	bool free = false;

816
817
818
	if (atomic_add_unless(&pt->used, -1, 1))
		return false;

819
	spin_lock(&pd->lock);
820
	if (atomic_dec_and_test(&pt->used)) {
821
		clear_pd_entry(pd, idx, scratch);
822
823
824
825
826
827
		free = true;
	}
	spin_unlock(&pd->lock);

	return free;
}
828

829
static void gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
830
{
831
	struct drm_i915_private *dev_priv = ppgtt->vm.i915;
832
833
834
835
836
837
838
839
	enum vgt_g2v_type msg;
	int i;

	if (create)
		atomic_inc(px_used(ppgtt->pd)); /* never remove */
	else
		atomic_dec(px_used(ppgtt->pd));

840
841
842
	mutex_lock(&dev_priv->vgpu.lock);

	if (i915_vm_is_4lvl(&ppgtt->vm)) {
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
		const u64 daddr = px_dma(ppgtt->pd);

		I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
		I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));

		msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
				VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
	} else {
		for (i = 0; i < GEN8_3LVL_PDPES; i++) {
			const u64 daddr = i915_page_dir_dma_addr(ppgtt, i);

			I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
			I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
		}

		msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
				VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
	}

862
	/* g2v_notify atomically (via hv trap) consumes the message packet. */
863
864
	I915_WRITE(vgtif_reg(g2v_notify), msg);

865
	mutex_unlock(&dev_priv->vgpu.lock);
866
867
}

868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
/* Index shifts into the pagetable are offset by GEN8_PTE_SHIFT [12] */
#define GEN8_PAGE_SIZE (SZ_4K) /* page and page-directory sizes are the same */
#define GEN8_PTE_SHIFT (ilog2(GEN8_PAGE_SIZE))
#define GEN8_PDES (GEN8_PAGE_SIZE / sizeof(u64))
#define gen8_pd_shift(lvl) ((lvl) * ilog2(GEN8_PDES))
#define gen8_pd_index(i, lvl) i915_pde_index((i), gen8_pd_shift(lvl))
#define __gen8_pte_shift(lvl) (GEN8_PTE_SHIFT + gen8_pd_shift(lvl))
#define __gen8_pte_index(a, lvl) i915_pde_index((a), __gen8_pte_shift(lvl))

static inline unsigned int
gen8_pd_range(u64 start, u64 end, int lvl, unsigned int *idx)
{
	const int shift = gen8_pd_shift(lvl);
	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);

	GEM_BUG_ON(start >= end);
	end += ~mask >> gen8_pd_shift(1);

	*idx = i915_pde_index(start, shift);
	if ((start ^ end) & mask)
		return GEN8_PDES - *idx;
	else
		return i915_pde_index(end, shift) - *idx;
}

static inline bool gen8_pd_contains(u64 start, u64 end, int lvl)
{
	const u64 mask = ~0ull << gen8_pd_shift(lvl + 1);

	GEM_BUG_ON(start >= end);
	return (start ^ end) & mask && (start & ~mask) == 0;
}

static inline unsigned int gen8_pt_count(u64 start, u64 end)
{
	GEM_BUG_ON(start >= end);
	if ((start ^ end) >> gen8_pd_shift(1))
		return GEN8_PDES - (start & (GEN8_PDES - 1));
	else
		return end - start;
}

910
911
912
913
914
915
static inline unsigned int gen8_pd_top_count(const struct i915_address_space *vm)
{
	unsigned int shift = __gen8_pte_shift(vm->top);
	return (vm->total + (1ull << shift) - 1) >> shift;
}

916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
static inline struct i915_page_directory *
gen8_pdp_for_page_index(struct i915_address_space * const vm, const u64 idx)
{
	struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(vm);

	if (vm->top == 2)
		return ppgtt->pd;
	else
		return i915_pd_entry(ppgtt->pd, gen8_pd_index(idx, vm->top));
}

static inline struct i915_page_directory *
gen8_pdp_for_page_address(struct i915_address_space * const vm, const u64 addr)
{
	return gen8_pdp_for_page_index(vm, addr >> GEN8_PTE_SHIFT);
}

933
934
935
static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
				 struct i915_page_directory *pd,
				 int count, int lvl)
936
{
937
938
	if (lvl) {
		void **pde = pd->entry;
939

940
941
942
		do {
			if (!*pde)
				continue;
943

944
945
			__gen8_ppgtt_cleanup(vm, *pde, GEN8_PDES, lvl - 1);
		} while (pde++, --count);
946
947
	}

948
	free_px(vm, pd);
949
950
951
952
953
954
}

static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
{
	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);

955
	if (intel_vgpu_active(vm->i915))
956
957
		gen8_ppgtt_notify_vgt(ppgtt, false);

958
	__gen8_ppgtt_cleanup(vm, ppgtt->pd, gen8_pd_top_count(vm), vm->top);
959
960
961
	free_scratch(vm);
}

962
963
964
static u64 __gen8_ppgtt_clear(struct i915_address_space * const vm,
			      struct i915_page_directory * const pd,
			      u64 start, const u64 end, int lvl)
965
{
966
967
	const struct i915_page_scratch * const scratch = &vm->scratch[lvl];
	unsigned int idx, len;
968

969
970
	GEM_BUG_ON(end > vm->total >> GEN8_PTE_SHIFT);

971
	len = gen8_pd_range(start, end, lvl--, &idx);
972
	DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d }\n",
973
974
975
	    __func__, vm, lvl + 1, start, end,
	    idx, len, atomic_read(px_used(pd)));
	GEM_BUG_ON(!len || len >= atomic_read(px_used(pd)));
976

977
978
979
980
981
982
983
984
985
986
987
988
	do {
		struct i915_page_table *pt = pd->entry[idx];

		if (atomic_fetch_inc(&pt->used) >> gen8_pd_shift(1) &&
		    gen8_pd_contains(start, end, lvl)) {
			DBG("%s(%p):{ lvl:%d, idx:%d, start:%llx, end:%llx } removing pd\n",
			    __func__, vm, lvl + 1, idx, start, end);
			clear_pd_entry(pd, idx, scratch);
			__gen8_ppgtt_cleanup(vm, as_pd(pt), I915_PDES, lvl);
			start += (u64)I915_PDES << gen8_pd_shift(lvl);
			continue;
		}
989

990
991
992
993
994
995
		if (lvl) {
			start = __gen8_ppgtt_clear(vm, as_pd(pt),
						   start, end, lvl);
		} else {
			unsigned int count;
			u64 *vaddr;
996

997
			count = gen8_pt_count(start, end);
998
			DBG("%s(%p):{ lvl:%d, start:%llx, end:%llx, idx:%d, len:%d, used:%d } removing pte\n",
999
1000
			    __func__, vm, lvl, start, end,
			    gen8_pd_index(start, 0), count,