amdgpu_vm.c 70.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
/*
 * Copyright 2008 Advanced Micro Devices, Inc.
 * Copyright 2008 Red Hat Inc.
 * Copyright 2009 Jerome Glisse.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: Dave Airlie
 *          Alex Deucher
 *          Jerome Glisse
 */
28
#include <linux/dma-fence-array.h>
29
#include <linux/interval_tree_generic.h>
30
#include <linux/idr.h>
31
32
33
34
#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
35
#include "amdgpu_amdkfd.h"
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56

/*
 * GPUVM
 * GPUVM is similar to the legacy gart on older asics, however
 * rather than there being a single global gart table
 * for the entire GPU, there are multiple VM page tables active
 * at any given time.  The VM page tables can contain a mix
 * vram pages and system memory pages and system memory pages
 * can be mapped as snooped (cached system pages) or unsnooped
 * (uncached system pages).
 * Each VM has an ID associated with it and there is a page table
 * associated with each VMID.  When execting a command buffer,
 * the kernel tells the the ring what VMID to use for that command
 * buffer.  VMIDs are allocated dynamically as commands are submitted.
 * The userspace drivers maintain their own address space and the kernel
 * sets up their pages tables accordingly when they submit their
 * command buffers and a VMID is assigned.
 * Cayman/Trinity support up to 8 active VMs at any given time;
 * SI supports 16.
 */

57
58
59
60
61
62
63
64
65
#define START(node) ((node)->start)
#define LAST(node) ((node)->last)

INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
		     START, LAST, static, amdgpu_vm_it)

#undef START
#undef LAST

66
67
68
/* Local structure. Encapsulate some VM table update parameters to reduce
 * the number of function parameters
 */
69
struct amdgpu_pte_update_params {
70
71
	/* amdgpu device we do this update for */
	struct amdgpu_device *adev;
72
73
	/* optional amdgpu_vm we do this update for */
	struct amdgpu_vm *vm;
74
75
76
77
	/* address where to copy page table entries from */
	uint64_t src;
	/* indirect buffer to fill with commands */
	struct amdgpu_ib *ib;
78
	/* Function which actually does the update */
79
80
	void (*func)(struct amdgpu_pte_update_params *params,
		     struct amdgpu_bo *bo, uint64_t pe,
81
		     uint64_t addr, unsigned count, uint32_t incr,
82
		     uint64_t flags);
83
84
85
86
87
88
	/* The next two are used during VM update by CPU
	 *  DMA addresses to use for mapping
	 *  Kernel pointer of PD/PT BO that needs to be updated
	 */
	dma_addr_t *pages_addr;
	void *kptr;
89
90
};

91
92
93
94
95
96
/* Helper to disable partial resident texture feature from a fence callback */
struct amdgpu_prt_cb {
	struct amdgpu_device *adev;
	struct dma_fence_cb cb;
};

97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
				   struct amdgpu_vm *vm,
				   struct amdgpu_bo *bo)
{
	base->vm = vm;
	base->bo = bo;
	INIT_LIST_HEAD(&base->bo_list);
	INIT_LIST_HEAD(&base->vm_status);

	if (!bo)
		return;
	list_add_tail(&base->bo_list, &bo->va);

	if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
		return;

	if (bo->preferred_domains &
	    amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
		return;

	/*
	 * we checked all the prerequisites, but it looks like this per vm bo
	 * is currently evicted. add the bo to the evicted list to make sure it
	 * is validated on next vm use to avoid fault.
	 * */
	list_move_tail(&base->vm_status, &vm->evicted);
}

125
126
127
128
129
130
131
132
133
134
/**
 * amdgpu_vm_level_shift - return the addr shift for each level
 *
 * @adev: amdgpu_device pointer
 *
 * Returns the number of bits the pfn needs to be right shifted for a level.
 */
static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
				      unsigned level)
{
135
136
137
138
139
140
141
	unsigned shift = 0xff;

	switch (level) {
	case AMDGPU_VM_PDB2:
	case AMDGPU_VM_PDB1:
	case AMDGPU_VM_PDB0:
		shift = 9 * (AMDGPU_VM_PDB0 - level) +
142
			adev->vm_manager.block_size;
143
144
145
146
147
148
149
150
151
		break;
	case AMDGPU_VM_PTB:
		shift = 0;
		break;
	default:
		dev_err(adev->dev, "the level%d isn't supported.\n", level);
	}

	return shift;
152
153
}

154
/**
155
 * amdgpu_vm_num_entries - return the number of entries in a PD/PT
156
157
158
 *
 * @adev: amdgpu_device pointer
 *
159
 * Calculate the number of entries in a page directory or page table.
160
 */
161
162
static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
				      unsigned level)
163
{
164
165
	unsigned shift = amdgpu_vm_level_shift(adev,
					       adev->vm_manager.root_level);
166

167
	if (level == adev->vm_manager.root_level)
168
		/* For the root directory */
169
		return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift;
170
	else if (level != AMDGPU_VM_PTB)
171
172
173
		/* Everything in between */
		return 512;
	else
174
		/* For the page tables on the leaves */
175
		return AMDGPU_VM_PTE_COUNT(adev);
176
177
178
}

/**
179
 * amdgpu_vm_bo_size - returns the size of the BOs in bytes
180
181
182
 *
 * @adev: amdgpu_device pointer
 *
183
 * Calculate the size of the BO for a page directory or page table in bytes.
184
 */
185
static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
186
{
187
	return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
188
189
190
}

/**
191
 * amdgpu_vm_get_pd_bo - add the VM PD to a validation list
192
193
 *
 * @vm: vm providing the BOs
194
 * @validated: head of validation list
195
 * @entry: entry to add
196
197
 *
 * Add the page directory to the list of BOs to
198
 * validate for command submission.
199
 */
200
201
202
void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
			 struct list_head *validated,
			 struct amdgpu_bo_list_entry *entry)
203
{
204
	entry->robj = vm->root.base.bo;
205
	entry->priority = 0;
206
	entry->tv.bo = &entry->robj->tbo;
207
	entry->tv.shared = true;
208
	entry->user_pages = NULL;
209
210
	list_add(&entry->tv.head, validated);
}
211

212
/**
213
 * amdgpu_vm_validate_pt_bos - validate the page table BOs
214
 *
215
 * @adev: amdgpu device pointer
216
 * @vm: vm providing the BOs
217
218
219
220
221
 * @validate: callback to do the validation
 * @param: parameter for the validation callback
 *
 * Validate the page table BOs on command submission if neccessary.
 */
222
223
224
int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
			      int (*validate)(void *p, struct amdgpu_bo *bo),
			      void *param)
225
{
226
	struct ttm_bo_global *glob = adev->mman.bdev.glob;
227
228
	struct amdgpu_vm_bo_base *bo_base, *tmp;
	int r = 0;
229

230
231
	list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
		struct amdgpu_bo *bo = bo_base->bo;
232

233
234
235
		if (bo->parent) {
			r = validate(param, bo);
			if (r)
236
				break;
237

238
239
240
241
242
243
			spin_lock(&glob->lru_lock);
			ttm_bo_move_to_lru_tail(&bo->tbo);
			if (bo->shadow)
				ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
			spin_unlock(&glob->lru_lock);
		}
244

245
246
		if (bo->tbo.type != ttm_bo_type_kernel) {
			spin_lock(&vm->moved_lock);
247
			list_move(&bo_base->vm_status, &vm->moved);
248
249
			spin_unlock(&vm->moved_lock);
		} else {
250
			list_move(&bo_base->vm_status, &vm->relocated);
251
		}
252
253
	}

254
255
256
257
258
259
260
261
262
263
264
265
266
	spin_lock(&glob->lru_lock);
	list_for_each_entry(bo_base, &vm->idle, vm_status) {
		struct amdgpu_bo *bo = bo_base->bo;

		if (!bo->parent)
			continue;

		ttm_bo_move_to_lru_tail(&bo->tbo);
		if (bo->shadow)
			ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
	}
	spin_unlock(&glob->lru_lock);

267
	return r;
268
269
}

270
/**
271
 * amdgpu_vm_ready - check VM is ready for updates
272
 *
273
 * @vm: VM to check
274
 *
275
 * Check if all VM PDs/PTs are ready for updates
276
 */
277
bool amdgpu_vm_ready(struct amdgpu_vm *vm)
278
{
279
	return list_empty(&vm->evicted);
280
281
}

282
283
284
285
286
287
288
289
290
291
/**
 * amdgpu_vm_clear_bo - initially clear the PDs/PTs
 *
 * @adev: amdgpu_device pointer
 * @bo: BO to clear
 * @level: level this BO is at
 *
 * Root PD needs to be reserved when calling this.
 */
static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
292
293
			      struct amdgpu_vm *vm, struct amdgpu_bo *bo,
			      unsigned level, bool pte_support_ats)
294
295
296
{
	struct ttm_operation_ctx ctx = { true, false };
	struct dma_fence *fence = NULL;
297
	unsigned entries, ats_entries;
298
299
	struct amdgpu_ring *ring;
	struct amdgpu_job *job;
300
	uint64_t addr;
301
302
	int r;

303
304
305
306
307
308
309
310
311
312
313
314
315
316
	addr = amdgpu_bo_gpu_offset(bo);
	entries = amdgpu_bo_size(bo) / 8;

	if (pte_support_ats) {
		if (level == adev->vm_manager.root_level) {
			ats_entries = amdgpu_vm_level_shift(adev, level);
			ats_entries += AMDGPU_GPU_PAGE_SHIFT;
			ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
			ats_entries = min(ats_entries, entries);
			entries -= ats_entries;
		} else {
			ats_entries = entries;
			entries = 0;
		}
317
	} else {
318
		ats_entries = 0;
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
	}

	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);

	r = reservation_object_reserve_shared(bo->tbo.resv);
	if (r)
		return r;

	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
	if (r)
		goto error;

	r = amdgpu_job_alloc_with_ib(adev, 64, &job);
	if (r)
		goto error;

335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
	if (ats_entries) {
		uint64_t ats_value;

		ats_value = AMDGPU_PTE_DEFAULT_ATC;
		if (level != AMDGPU_VM_PTB)
			ats_value |= AMDGPU_PDE_PTE;

		amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
				      ats_entries, 0, ats_value);
		addr += ats_entries * 8;
	}

	if (entries)
		amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
				      entries, 0, 0);

351
352
353
	amdgpu_ring_pad_ib(ring, &job->ibs[0]);

	WARN_ON(job->ibs[0].length_dw > 64);
354
355
356
357
358
	r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
			     AMDGPU_FENCE_OWNER_UNDEFINED, false);
	if (r)
		goto error_free;

359
360
361
362
363
364
365
	r = amdgpu_job_submit(job, ring, &vm->entity,
			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
	if (r)
		goto error_free;

	amdgpu_bo_fence(bo, fence, true);
	dma_fence_put(fence);
366
367
368
369
370

	if (bo->shadow)
		return amdgpu_vm_clear_bo(adev, vm, bo->shadow,
					  level, pte_support_ats);

371
372
373
374
375
376
377
378
379
	return 0;

error_free:
	amdgpu_job_free(job);

error:
	return r;
}

380
/**
381
382
383
384
385
386
387
388
389
390
391
392
393
 * amdgpu_vm_alloc_levels - allocate the PD/PT levels
 *
 * @adev: amdgpu_device pointer
 * @vm: requested vm
 * @saddr: start of the address range
 * @eaddr: end of the address range
 *
 * Make sure the page directories and page tables are allocated
 */
static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
				  struct amdgpu_vm *vm,
				  struct amdgpu_vm_pt *parent,
				  uint64_t saddr, uint64_t eaddr,
394
				  unsigned level, bool ats)
395
{
396
	unsigned shift = amdgpu_vm_level_shift(adev, level);
397
	unsigned pt_idx, from, to;
398
	u64 flags;
399
	int r;
400
401
402
403

	if (!parent->entries) {
		unsigned num_entries = amdgpu_vm_num_entries(adev, level);

404
405
406
		parent->entries = kvmalloc_array(num_entries,
						   sizeof(struct amdgpu_vm_pt),
						   GFP_KERNEL | __GFP_ZERO);
407
408
409
410
411
		if (!parent->entries)
			return -ENOMEM;
		memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
	}

412
413
414
415
416
	from = saddr >> shift;
	to = eaddr >> shift;
	if (from >= amdgpu_vm_num_entries(adev, level) ||
	    to >= amdgpu_vm_num_entries(adev, level))
		return -EINVAL;
417
418

	++level;
419
420
	saddr = saddr & ((1 << shift) - 1);
	eaddr = eaddr & ((1 << shift) - 1);
421

422
	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
423
424
425
426
427
428
	if (vm->use_cpu_for_update)
		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
	else
		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
				AMDGPU_GEM_CREATE_SHADOW);

429
430
	/* walk over the address space and allocate the page tables */
	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
431
		struct reservation_object *resv = vm->root.base.bo->tbo.resv;
432
433
434
		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
		struct amdgpu_bo *pt;

435
		if (!entry->base.bo) {
436
437
438
439
440
441
442
443
444
445
			struct amdgpu_bo_param bp;

			memset(&bp, 0, sizeof(bp));
			bp.size = amdgpu_vm_bo_size(adev, level);
			bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
			bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
			bp.flags = flags;
			bp.type = ttm_bo_type_kernel;
			bp.resv = resv;
			r = amdgpu_bo_create(adev, &bp, &pt);
446
447
448
			if (r)
				return r;

449
			r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
450
			if (r) {
451
				amdgpu_bo_unref(&pt->shadow);
452
453
454
455
				amdgpu_bo_unref(&pt);
				return r;
			}

456
457
458
			if (vm->use_cpu_for_update) {
				r = amdgpu_bo_kmap(pt, NULL);
				if (r) {
459
					amdgpu_bo_unref(&pt->shadow);
460
461
462
463
464
					amdgpu_bo_unref(&pt);
					return r;
				}
			}

465
466
467
			/* Keep a reference to the root directory to avoid
			* freeing them up in the wrong order.
			*/
468
			pt->parent = amdgpu_bo_ref(parent->base.bo);
469

470
471
			amdgpu_vm_bo_base_init(&entry->base, vm, pt);
			list_move(&entry->base.vm_status, &vm->relocated);
472
473
		}

474
		if (level < AMDGPU_VM_PTB) {
475
476
477
478
			uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
			uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
				((1 << shift) - 1);
			r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
479
						   sub_eaddr, level, ats);
480
481
482
483
484
485
486
487
			if (r)
				return r;
		}
	}

	return 0;
}

488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
/**
 * amdgpu_vm_alloc_pts - Allocate page tables.
 *
 * @adev: amdgpu_device pointer
 * @vm: VM to allocate page tables for
 * @saddr: Start address which needs to be allocated
 * @size: Size from start address we need.
 *
 * Make sure the page tables are allocated.
 */
int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
			struct amdgpu_vm *vm,
			uint64_t saddr, uint64_t size)
{
	uint64_t eaddr;
503
	bool ats = false;
504
505
506
507
508
509

	/* validate the parameters */
	if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
		return -EINVAL;

	eaddr = saddr + size - 1;
510
511
512

	if (vm->pte_support_ats)
		ats = saddr < AMDGPU_VA_HOLE_START;
513
514
515
516

	saddr /= AMDGPU_GPU_PAGE_SIZE;
	eaddr /= AMDGPU_GPU_PAGE_SIZE;

517
518
519
520
521
522
	if (eaddr >= adev->vm_manager.max_pfn) {
		dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
			eaddr, adev->vm_manager.max_pfn);
		return -EINVAL;
	}

523
	return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
524
				      adev->vm_manager.root_level, ats);
525
526
}

527
528
529
530
531
532
/**
 * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
 *
 * @adev: amdgpu_device pointer
 */
void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
533
{
534
	const struct amdgpu_ip_block *ip_block;
535
536
537
	bool has_compute_vm_bug;
	struct amdgpu_ring *ring;
	int i;
538

539
	has_compute_vm_bug = false;
540

541
	ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
542
543
544
545
546
547
548
549
550
	if (ip_block) {
		/* Compute has a VM bug for GFX version < 7.
		   Compute has a VM bug for GFX 8 MEC firmware version < 673.*/
		if (ip_block->version->major <= 7)
			has_compute_vm_bug = true;
		else if (ip_block->version->major == 8)
			if (adev->gfx.mec_fw_version < 673)
				has_compute_vm_bug = true;
	}
551

552
553
554
555
556
	for (i = 0; i < adev->num_rings; i++) {
		ring = adev->rings[i];
		if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
			/* only compute rings */
			ring->has_compute_vm_bug = has_compute_vm_bug;
557
		else
558
			ring->has_compute_vm_bug = false;
559
560
561
	}
}

562
563
bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
				  struct amdgpu_job *job)
564
{
565
566
	struct amdgpu_device *adev = ring->adev;
	unsigned vmhub = ring->funcs->vmhub;
567
568
	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
	struct amdgpu_vmid *id;
569
	bool gds_switch_needed;
570
	bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
571

572
	if (job->vmid == 0)
573
		return false;
574
	id = &id_mgr->ids[job->vmid];
575
576
577
578
579
580
581
	gds_switch_needed = ring->funcs->emit_gds_switch && (
		id->gds_base != job->gds_base ||
		id->gds_size != job->gds_size ||
		id->gws_base != job->gws_base ||
		id->gws_size != job->gws_size ||
		id->oa_base != job->oa_base ||
		id->oa_size != job->oa_size);
582

583
	if (amdgpu_vmid_had_gpu_reset(adev, id))
584
		return true;
585

586
	return vm_flush_needed || gds_switch_needed;
587
588
}

589
590
static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
{
591
	return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size);
592
593
}

594
595
596
597
/**
 * amdgpu_vm_flush - hardware flush the vm
 *
 * @ring: ring to use for flush
598
 * @vmid: vmid number to use
599
 * @pd_addr: address of the page directory
600
 *
601
 * Emit a VM flush when it is necessary.
602
 */
Monk Liu's avatar
Monk Liu committed
603
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
604
{
605
	struct amdgpu_device *adev = ring->adev;
606
	unsigned vmhub = ring->funcs->vmhub;
607
	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
608
	struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
609
	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
610
611
612
613
614
615
		id->gds_base != job->gds_base ||
		id->gds_size != job->gds_size ||
		id->gws_base != job->gws_base ||
		id->gws_size != job->gws_size ||
		id->oa_base != job->oa_base ||
		id->oa_size != job->oa_size);
616
	bool vm_flush_needed = job->vm_needs_flush;
617
618
619
620
	bool pasid_mapping_needed = id->pasid != job->pasid ||
		!id->pasid_mapping ||
		!dma_fence_is_signaled(id->pasid_mapping);
	struct dma_fence *fence = NULL;
621
	unsigned patch_offset = 0;
622
	int r;
623

624
	if (amdgpu_vmid_had_gpu_reset(adev, id)) {
625
626
		gds_switch_needed = true;
		vm_flush_needed = true;
627
		pasid_mapping_needed = true;
628
	}
629

630
631
632
633
634
	gds_switch_needed &= !!ring->funcs->emit_gds_switch;
	vm_flush_needed &= !!ring->funcs->emit_vm_flush;
	pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
		ring->funcs->emit_wreg;

Monk Liu's avatar
Monk Liu committed
635
	if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
636
		return 0;
637

638
639
	if (ring->funcs->init_cond_exec)
		patch_offset = amdgpu_ring_init_cond_exec(ring);
640

Monk Liu's avatar
Monk Liu committed
641
642
643
	if (need_pipe_sync)
		amdgpu_ring_emit_pipeline_sync(ring);

644
	if (vm_flush_needed) {
645
		trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
646
		amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
647
648
649
650
	}

	if (pasid_mapping_needed)
		amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
651

652
	if (vm_flush_needed || pasid_mapping_needed) {
653
		r = amdgpu_fence_emit(ring, &fence, 0);
654
655
		if (r)
			return r;
656
	}
657

658
	if (vm_flush_needed) {
659
		mutex_lock(&id_mgr->lock);
660
		dma_fence_put(id->last_flush);
661
662
663
		id->last_flush = dma_fence_get(fence);
		id->current_gpu_reset_count =
			atomic_read(&adev->gpu_reset_counter);
664
		mutex_unlock(&id_mgr->lock);
665
	}
666

667
668
669
670
671
672
673
	if (pasid_mapping_needed) {
		id->pasid = job->pasid;
		dma_fence_put(id->pasid_mapping);
		id->pasid_mapping = dma_fence_get(fence);
	}
	dma_fence_put(fence);

674
	if (ring->funcs->emit_gds_switch && gds_switch_needed) {
675
676
677
678
679
680
		id->gds_base = job->gds_base;
		id->gds_size = job->gds_size;
		id->gws_base = job->gws_base;
		id->gws_size = job->gws_size;
		id->oa_base = job->oa_base;
		id->oa_size = job->oa_size;
681
		amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
682
683
684
685
686
687
688
689
690
691
692
693
					    job->gds_size, job->gws_base,
					    job->gws_size, job->oa_base,
					    job->oa_size);
	}

	if (ring->funcs->patch_cond_exec)
		amdgpu_ring_patch_cond_exec(ring, patch_offset);

	/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
	if (ring->funcs->emit_switch_buffer) {
		amdgpu_ring_emit_switch_buffer(ring);
		amdgpu_ring_emit_switch_buffer(ring);
694
	}
695
	return 0;
696
697
}

698
699
700
701
702
703
/**
 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
 *
 * @vm: requested vm
 * @bo: requested buffer object
 *
704
 * Find @bo inside the requested vm.
705
706
707
708
709
710
711
712
713
714
 * Search inside the @bos vm list for the requested vm
 * Returns the found bo_va or NULL if none is found
 *
 * Object has to be reserved!
 */
struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
				       struct amdgpu_bo *bo)
{
	struct amdgpu_bo_va *bo_va;

715
716
	list_for_each_entry(bo_va, &bo->va, base.bo_list) {
		if (bo_va->base.vm == vm) {
717
718
719
720
721
722
723
			return bo_va;
		}
	}
	return NULL;
}

/**
724
 * amdgpu_vm_do_set_ptes - helper to call the right asic function
725
 *
726
 * @params: see amdgpu_pte_update_params definition
727
 * @bo: PD/PT to update
728
729
730
731
732
733
734
735
736
 * @pe: addr of the page entry
 * @addr: dst addr to write into pe
 * @count: number of page entries to update
 * @incr: increase next addr by incr bytes
 * @flags: hw access flags
 *
 * Traces the parameters and calls the right asic functions
 * to setup the page table using the DMA.
 */
737
static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
738
				  struct amdgpu_bo *bo,
739
740
				  uint64_t pe, uint64_t addr,
				  unsigned count, uint32_t incr,
741
				  uint64_t flags)
742
{
743
	pe += amdgpu_bo_gpu_offset(bo);
744
	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
745

746
	if (count < 3) {
747
748
		amdgpu_vm_write_pte(params->adev, params->ib, pe,
				    addr | flags, count, incr);
749
750

	} else {
751
		amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr,
752
753
754
755
				      count, incr, flags);
	}
}

756
757
758
759
/**
 * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
 *
 * @params: see amdgpu_pte_update_params definition
760
 * @bo: PD/PT to update
761
762
763
764
765
766
767
768
769
 * @pe: addr of the page entry
 * @addr: dst addr to write into pe
 * @count: number of page entries to update
 * @incr: increase next addr by incr bytes
 * @flags: hw access flags
 *
 * Traces the parameters and calls the DMA function to copy the PTEs.
 */
static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
770
				   struct amdgpu_bo *bo,
771
772
				   uint64_t pe, uint64_t addr,
				   unsigned count, uint32_t incr,
773
				   uint64_t flags)
774
{
775
	uint64_t src = (params->src + (addr >> 12) * 8);
776

777
	pe += amdgpu_bo_gpu_offset(bo);
778
779
780
	trace_amdgpu_vm_copy_ptes(pe, src, count);

	amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
781
782
}

783
/**
784
 * amdgpu_vm_map_gart - Resolve gart mapping of addr
785
 *
786
 * @pages_addr: optional DMA address to use for lookup
787
788
789
 * @addr: the unmapped addr
 *
 * Look up the physical address of the page that the pte resolves
790
 * to and return the pointer for the page table entry.
791
 */
792
static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
793
794
795
{
	uint64_t result;

796
797
	/* page table offset */
	result = pages_addr[addr >> PAGE_SHIFT];
798

799
800
	/* in case cpu page size != gpu page size*/
	result |= addr & (~PAGE_MASK);
801

802
	result &= 0xFFFFFFFFFFFFF000ULL;
803
804
805
806

	return result;
}

807
808
809
810
/**
 * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
 *
 * @params: see amdgpu_pte_update_params definition
811
 * @bo: PD/PT to update
812
813
814
815
816
817
818
819
820
 * @pe: kmap addr of the page entry
 * @addr: dst addr to write into pe
 * @count: number of page entries to update
 * @incr: increase next addr by incr bytes
 * @flags: hw access flags
 *
 * Write count number of PT/PD entries directly.
 */
static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
821
				   struct amdgpu_bo *bo,
822
823
824
825
826
				   uint64_t pe, uint64_t addr,
				   unsigned count, uint32_t incr,
				   uint64_t flags)
{
	unsigned int i;
827
	uint64_t value;
828

829
830
	pe += (unsigned long)amdgpu_bo_kptr(bo);

831
832
	trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);

833
	for (i = 0; i < count; i++) {
834
835
836
		value = params->pages_addr ?
			amdgpu_vm_map_gart(params->pages_addr, addr) :
			addr;
837
838
		amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
				       i, value, flags);
839
840
841
842
		addr += incr;
	}
}

843
844
static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
			     void *owner)
845
846
847
848
849
{
	struct amdgpu_sync sync;
	int r;

	amdgpu_sync_create(&sync);
850
	amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false);
851
852
853
854
855
856
	r = amdgpu_sync_wait(&sync, true);
	amdgpu_sync_free(&sync);

	return r;
}

857
/*
858
 * amdgpu_vm_update_pde - update a single level in the hierarchy
859
 *
860
 * @param: parameters for the update
861
 * @vm: requested vm
862
 * @parent: parent directory
863
 * @entry: entry to update
864
 *
865
 * Makes sure the requested entry in parent is up to date.
866
 */
867
868
869
870
static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
				 struct amdgpu_vm *vm,
				 struct amdgpu_vm_pt *parent,
				 struct amdgpu_vm_pt *entry)
871
{
872
	struct amdgpu_bo *bo = parent->base.bo, *pbo;
873
874
	uint64_t pde, pt, flags;
	unsigned level;
875

876
877
878
	/* Don't update huge pages here */
	if (entry->huge)
		return;
879

880
	for (level = 0, pbo = bo->parent; pbo; ++level)
881
882
		pbo = pbo->parent;

883
	level += params->adev->vm_manager.root_level;
884
	pt = amdgpu_bo_gpu_offset(entry->base.bo);
885
	flags = AMDGPU_PTE_VALID;
886
	amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags);
887
888
889
890
	pde = (entry - parent->entries) * 8;
	if (bo->shadow)
		params->func(params, bo->shadow, pde, pt, 1, 0, flags);
	params->func(params, bo, pde, pt, 1, 0, flags);
891
892
}

893
894
895
896
897
898
899
/*
 * amdgpu_vm_invalidate_level - mark all PD levels as invalid
 *
 * @parent: parent PD
 *
 * Mark all PD level as invalid after an error.
 */
900
901
902
903
static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
				       struct amdgpu_vm *vm,
				       struct amdgpu_vm_pt *parent,
				       unsigned level)
904
{
905
	unsigned pt_idx, num_entries;
906
907
908
909
910

	/*
	 * Recurse into the subdirectories. This recursion is harmless because
	 * we only have a maximum of 5 layers.
	 */
911
912
	num_entries = amdgpu_vm_num_entries(adev, level);
	for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
913
914
		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];

915
		if (!entry->base.bo)
916
917
			continue;

918
919
		if (!entry->base.moved)
			list_move(&entry->base.vm_status, &vm->relocated);
920
		amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
921
922
923
	}
}

924
925
926
927
928
929
930
931
932
933
934
935
/*
 * amdgpu_vm_update_directories - make sure that all directories are valid
 *
 * @adev: amdgpu_device pointer
 * @vm: requested vm
 *
 * Makes sure all directories are up to date.
 * Returns 0 for success, error for failure.
 */
int amdgpu_vm_update_directories(struct amdgpu_device *adev,
				 struct amdgpu_vm *vm)
{
936
937
938
	struct amdgpu_pte_update_params params;
	struct amdgpu_job *job;
	unsigned ndw = 0;
939
	int r = 0;
940

941
942
943
944
945
946
947
948
	if (list_empty(&vm->relocated))
		return 0;

restart:
	memset(&params, 0, sizeof(params));
	params.adev = adev;

	if (vm->use_cpu_for_update) {
949
950
951
952
953
954
955
956
		struct amdgpu_vm_bo_base *bo_base;

		list_for_each_entry(bo_base, &vm->relocated, vm_status) {
			r = amdgpu_bo_kmap(bo_base->bo, NULL);
			if (unlikely(r))
				return r;
		}

957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
		if (unlikely(r))
			return r;

		params.func = amdgpu_vm_cpu_set_ptes;
	} else {
		ndw = 512 * 8;
		r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
		if (r)
			return r;

		params.ib = &job->ibs[0];
		params.func = amdgpu_vm_do_set_ptes;
	}

972
	while (!list_empty(&vm->relocated)) {
973
974
		struct amdgpu_vm_bo_base *bo_base, *parent;
		struct amdgpu_vm_pt *pt, *entry;
975
976
977
978
979
		struct amdgpu_bo *bo;

		bo_base = list_first_entry(&vm->relocated,
					   struct amdgpu_vm_bo_base,
					   vm_status);
980
		bo_base->moved = false;
981
		list_move(&bo_base->vm_status, &vm->idle);
982
983

		bo = bo_base->bo->parent;
984
		if (!bo)
985
986
987
988
989
990
991
992
993
994
995
996
			continue;

		parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
					  bo_list);
		pt = container_of(parent, struct amdgpu_vm_pt, base);
		entry = container_of(bo_base, struct amdgpu_vm_pt, base);

		amdgpu_vm_update_pde(&params, vm, pt, entry);

		if (!vm->use_cpu_for_update &&
		    (ndw - params.ib->length_dw) < 32)
			break;
997
	}
998

999
1000
1001
	if (vm->use_cpu_for_update) {
		/* Flush HDP */
		mb();
1002
		amdgpu_asic_flush_hdp(adev, NULL);
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
	} else if (params.ib->length_dw == 0) {
		amdgpu_job_free(job);
	} else {
		struct amdgpu_bo *root = vm->root.base.bo;
		struct amdgpu_ring *ring;
		struct dma_fence *fence;

		ring = container_of(vm->entity.sched, struct amdgpu_ring,
				    sched);

		amdgpu_ring_pad_ib(ring, params.ib);
		amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
				 AMDGPU_FENCE_OWNER_VM, false);
		WARN_ON(params.ib->length_dw > ndw);
		r = amdgpu_job_submit(job, ring, &vm->entity,
				      AMDGPU_FENCE_OWNER_VM, &fence);
		if (r)
			goto error;

		amdgpu_bo_fence(root, fence, true);
		dma_fence_put(vm->last_update);
		vm->last_update = fence;
1025
1026
	}

1027
1028
1029
1030
1031
1032
	if (!list_empty(&vm->relocated))
		goto restart;

	return 0;

error:
1033
1034
	amdgpu_vm_invalidate_level(adev, vm, &vm->root,
				   adev->vm_manager.root_level);
1035
	amdgpu_job_free(job);
1036
	return r;
1037
1038
}

1039
/**
1040
 * amdgpu_vm_find_entry - find the entry for an address
1041
1042
1043
 *
 * @p: see amdgpu_pte_update_params definition
 * @addr: virtual address in question
1044
1045
 * @entry: resulting entry or NULL
 * @parent: parent entry
1046
 *
1047
 * Find the vm_pt entry and it's parent for the given address.
1048
 */
1049
1050
1051
void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
			 struct amdgpu_vm_pt **entry,
			 struct amdgpu_vm_pt **parent)
1052
{
1053
	unsigned level = p->adev->vm_manager.root_level;
1054

1055
1056
1057
	*parent = NULL;
	*entry = &p->vm->root;
	while ((*entry)->entries) {
1058
		unsigned shift = amdgpu_vm_level_shift(p->adev, level++);
1059

1060
		*parent = *entry;
1061
1062
		*entry = &(*entry)->entries[addr >> shift];
		addr &= (1ULL << shift) - 1;
1063