amdgpu_object.c 25.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
/*
 * Copyright 2009 Jerome Glisse.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 */
/*
 * Authors:
 *    Jerome Glisse <glisse@freedesktop.org>
 *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
 *    Dave Airlie
 */
#include <linux/list.h>
#include <linux/slab.h>
#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
36
#include <drm/drm_cache.h>
37
38
#include "amdgpu.h"
#include "amdgpu_trace.h"
39
#include "amdgpu_amdkfd.h"
40

41
42
43
44
45
static bool amdgpu_need_backup(struct amdgpu_device *adev)
{
	if (adev->flags & AMD_IS_APU)
		return false;

46
47
48
49
50
	if (amdgpu_gpu_recovery == 0 ||
	    (amdgpu_gpu_recovery == -1  && !amdgpu_sriov_vf(adev)))
		return false;

	return true;
51
52
}

53
54
static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
{
55
	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
56
	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
57

58
59
60
	if (bo->kfd_bo)
		amdgpu_amdkfd_unreserve_system_memory_limit(bo);

61
	amdgpu_bo_kunmap(bo);
62

63
64
	if (bo->gem_base.import_attach)
		drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg);
65
	drm_gem_object_release(&bo->gem_base);
66
	amdgpu_bo_unref(&bo->parent);
67
	if (!list_empty(&bo->shadow_list)) {
68
		mutex_lock(&adev->shadow_list_lock);
69
		list_del_init(&bo->shadow_list);
70
		mutex_unlock(&adev->shadow_list_lock);
71
	}
72
73
74
75
76
77
78
79
80
81
82
	kfree(bo->metadata);
	kfree(bo);
}

bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
{
	if (bo->destroy == &amdgpu_ttm_bo_destroy)
		return true;
	return false;
}

83
void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
84
{
85
86
87
88
	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
	struct ttm_placement *placement = &abo->placement;
	struct ttm_place *places = abo->placements;
	u64 flags = abo->flags;
89
	u32 c = 0;
90

91
	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
92
		unsigned visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
93
94

		places[c].fpfn = 0;
95
		places[c].lpfn = 0;
96
		places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
97
			TTM_PL_FLAG_VRAM;
98

99
100
101
102
		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
			places[c].lpfn = visible_pfn;
		else
			places[c].flags |= TTM_PL_FLAG_TOPDOWN;
103
104
105

		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
			places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
106
		c++;
107
108
109
	}

	if (domain & AMDGPU_GEM_DOMAIN_GTT) {
110
		places[c].fpfn = 0;
111
		if (flags & AMDGPU_GEM_CREATE_SHADOW)
112
			places[c].lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
113
114
		else
			places[c].lpfn = 0;
115
116
117
118
119
120
121
		places[c].flags = TTM_PL_FLAG_TT;
		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
			places[c].flags |= TTM_PL_FLAG_WC |
				TTM_PL_FLAG_UNCACHED;
		else
			places[c].flags |= TTM_PL_FLAG_CACHED;
		c++;
122
123
124
	}

	if (domain & AMDGPU_GEM_DOMAIN_CPU) {
125
126
127
128
129
130
131
132
133
		places[c].fpfn = 0;
		places[c].lpfn = 0;
		places[c].flags = TTM_PL_FLAG_SYSTEM;
		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
			places[c].flags |= TTM_PL_FLAG_WC |
				TTM_PL_FLAG_UNCACHED;
		else
			places[c].flags |= TTM_PL_FLAG_CACHED;
		c++;
134
135
136
	}

	if (domain & AMDGPU_GEM_DOMAIN_GDS) {
137
138
139
140
		places[c].fpfn = 0;
		places[c].lpfn = 0;
		places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GDS;
		c++;
141
	}
142

143
	if (domain & AMDGPU_GEM_DOMAIN_GWS) {
144
145
146
147
		places[c].fpfn = 0;
		places[c].lpfn = 0;
		places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GWS;
		c++;
148
	}
149

150
	if (domain & AMDGPU_GEM_DOMAIN_OA) {
151
152
153
154
		places[c].fpfn = 0;
		places[c].lpfn = 0;
		places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_OA;
		c++;
155
156
157
	}

	if (!c) {
158
159
160
161
		places[c].fpfn = 0;
		places[c].lpfn = 0;
		places[c].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
		c++;
162
	}
163

164
	placement->num_placement = c;
165
	placement->placement = places;
166

167
168
	placement->num_busy_placement = c;
	placement->busy_placement = places;
169
170
}

171
/**
172
 * amdgpu_bo_create_reserved - create reserved BO for kernel use
173
174
175
176
177
 *
 * @adev: amdgpu device object
 * @size: size for the new BO
 * @align: alignment for the new BO
 * @domain: where to place it
178
 * @bo_ptr: used to initialize BOs in structures
179
180
181
 * @gpu_addr: GPU addr of the pinned BO
 * @cpu_addr: optional CPU address mapping
 *
182
183
 * Allocates and pins a BO for kernel internal use, and returns it still
 * reserved.
184
 *
185
186
 * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
 *
187
188
 * Returns 0 on success, negative error code otherwise.
 */
189
190
191
192
int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
			      unsigned long size, int align,
			      u32 domain, struct amdgpu_bo **bo_ptr,
			      u64 *gpu_addr, void **cpu_addr)
193
{
194
	struct amdgpu_bo_param bp;
195
	bool free = false;
196
197
	int r;

198
199
200
201
202
203
204
205
206
	memset(&bp, 0, sizeof(bp));
	bp.size = size;
	bp.byte_align = align;
	bp.domain = domain;
	bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
		AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
	bp.type = ttm_bo_type_kernel;
	bp.resv = NULL;

207
	if (!*bo_ptr) {
208
		r = amdgpu_bo_create(adev, &bp, bo_ptr);
209
210
211
212
213
214
		if (r) {
			dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
				r);
			return r;
		}
		free = true;
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
	}

	r = amdgpu_bo_reserve(*bo_ptr, false);
	if (r) {
		dev_err(adev->dev, "(%d) failed to reserve kernel bo\n", r);
		goto error_free;
	}

	r = amdgpu_bo_pin(*bo_ptr, domain, gpu_addr);
	if (r) {
		dev_err(adev->dev, "(%d) kernel bo pin failed\n", r);
		goto error_unreserve;
	}

	if (cpu_addr) {
		r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
		if (r) {
			dev_err(adev->dev, "(%d) kernel bo map failed\n", r);
			goto error_unreserve;
		}
	}

	return 0;

error_unreserve:
	amdgpu_bo_unreserve(*bo_ptr);

error_free:
243
244
	if (free)
		amdgpu_bo_unref(bo_ptr);
245
246
247
248

	return r;
}

249
250
251
252
253
254
255
/**
 * amdgpu_bo_create_kernel - create BO for kernel use
 *
 * @adev: amdgpu device object
 * @size: size for the new BO
 * @align: alignment for the new BO
 * @domain: where to place it
256
 * @bo_ptr:  used to initialize BOs in structures
257
258
259
260
261
 * @gpu_addr: GPU addr of the pinned BO
 * @cpu_addr: optional CPU address mapping
 *
 * Allocates and pins a BO for kernel internal use.
 *
262
263
 * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
 *
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
 * Returns 0 on success, negative error code otherwise.
 */
int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
			    unsigned long size, int align,
			    u32 domain, struct amdgpu_bo **bo_ptr,
			    u64 *gpu_addr, void **cpu_addr)
{
	int r;

	r = amdgpu_bo_create_reserved(adev, size, align, domain, bo_ptr,
				      gpu_addr, cpu_addr);

	if (r)
		return r;

	amdgpu_bo_unreserve(*bo_ptr);

	return 0;
}

284
285
286
287
288
289
290
291
292
293
294
295
296
/**
 * amdgpu_bo_free_kernel - free BO for kernel use
 *
 * @bo: amdgpu BO to free
 *
 * unmaps and unpin a BO for kernel internal use.
 */
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
			   void **cpu_addr)
{
	if (*bo == NULL)
		return;

297
	if (likely(amdgpu_bo_reserve(*bo, true) == 0)) {
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
		if (cpu_addr)
			amdgpu_bo_kunmap(*bo);

		amdgpu_bo_unpin(*bo);
		amdgpu_bo_unreserve(*bo);
	}
	amdgpu_bo_unref(bo);

	if (gpu_addr)
		*gpu_addr = 0;

	if (cpu_addr)
		*cpu_addr = NULL;
}

313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
/* Validate bo size is bit bigger then the request domain */
static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
					  unsigned long size, u32 domain)
{
	struct ttm_mem_type_manager *man = NULL;

	/*
	 * If GTT is part of requested domains the check must succeed to
	 * allow fall back to GTT
	 */
	if (domain & AMDGPU_GEM_DOMAIN_GTT) {
		man = &adev->mman.bdev.man[TTM_PL_TT];

		if (size < (man->size << PAGE_SHIFT))
			return true;
		else
			goto fail;
	}

	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
		man = &adev->mman.bdev.man[TTM_PL_VRAM];

		if (size < (man->size << PAGE_SHIFT))
			return true;
		else
			goto fail;
	}


	/* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */
	return true;

fail:
346
347
	DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size,
		  man->size << PAGE_SHIFT);
348
349
350
	return false;
}

351
352
static int amdgpu_bo_do_create(struct amdgpu_device *adev,
			       struct amdgpu_bo_param *bp,
353
			       struct amdgpu_bo **bo_ptr)
354
{
355
	struct ttm_operation_ctx ctx = {
356
		.interruptible = (bp->type != ttm_bo_type_kernel),
357
		.no_wait_gpu = false,
358
		.resv = bp->resv,
359
		.flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
360
	};
361
	struct amdgpu_bo *bo;
362
	unsigned long page_align, size = bp->size;
363
364
365
	size_t acc_size;
	int r;

366
	page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
367
368
	size = ALIGN(size, PAGE_SIZE);

369
	if (!amdgpu_bo_validate_size(adev, size, bp->domain))
370
371
		return -ENOMEM;

372
373
374
375
376
377
378
379
	*bo_ptr = NULL;

	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
				       sizeof(struct amdgpu_bo));

	bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL);
	if (bo == NULL)
		return -ENOMEM;
380
	drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
381
	INIT_LIST_HEAD(&bo->shadow_list);
382
	INIT_LIST_HEAD(&bo->va);
383
	bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain :
384
		bp->domain;
385
	bo->allowed_domains = bo->preferred_domains;
386
	if (bp->type != ttm_bo_type_kernel &&
387
388
	    bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
		bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
389

390
	bo->flags = bp->flags;
391

392
393
394
395
396
397
398
399
400
401
402
#ifdef CONFIG_X86_32
	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
	 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
	 */
	bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
	/* Don't try to enable write-combining when it can't work, or things
	 * may be slow
	 * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
	 */

403
#ifndef CONFIG_COMPILE_TEST
404
405
#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
	 thanks to write-combining
406
#endif
407
408
409
410
411
412

	if (bo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
		DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
			      "better performance thanks to write-combining\n");
	bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
#else
413
414
415
416
417
	/* For architectures that don't support WC memory,
	 * mask out the WC flag from the BO
	 */
	if (!drm_arch_can_wc_memory())
		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
418
#endif
419

420
	bo->tbo.bdev = &adev->mman.bdev;
421
	amdgpu_ttm_placement_from_domain(bo, bp->domain);
422
423
	if (bp->type == ttm_bo_type_kernel)
		bo->tbo.priority = 1;
424

425
	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type,
426
				 &bo->placement, page_align, &ctx, acc_size,
427
				 NULL, bp->resv, &amdgpu_ttm_bo_destroy);
428
	if (unlikely(r != 0))
429
430
		return r;

431
	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
432
	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&
433
	    bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
434
435
		amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
					     ctx.bytes_moved);
436
	else
437
		amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
438

439
	if (bp->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
440
	    bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
441
		struct dma_fence *fence;
442

443
		r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence);
444
445
446
		if (unlikely(r))
			goto fail_unreserve;

447
		amdgpu_bo_fence(bo, fence, false);
448
449
450
		dma_fence_put(bo->tbo.moving);
		bo->tbo.moving = dma_fence_get(fence);
		dma_fence_put(fence);
451
	}
452
	if (!bp->resv)
453
		amdgpu_bo_unreserve(bo);
454
455
456
457
	*bo_ptr = bo;

	trace_amdgpu_bo_create(bo);

458
	/* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */
459
	if (bp->type == ttm_bo_type_device)
460
461
		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;

462
	return 0;
463
464

fail_unreserve:
465
	if (!bp->resv)
466
		ww_mutex_unlock(&bo->tbo.resv->lock);
467
468
	amdgpu_bo_unref(&bo);
	return r;
469
470
}

471
472
473
474
static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
				   unsigned long size, int byte_align,
				   struct amdgpu_bo *bo)
{
475
	struct amdgpu_bo_param bp;
476
477
478
479
480
	int r;

	if (bo->shadow)
		return 0;

481
482
483
484
485
486
487
488
489
	memset(&bp, 0, sizeof(bp));
	bp.size = size;
	bp.byte_align = byte_align;
	bp.domain = AMDGPU_GEM_DOMAIN_GTT;
	bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
		AMDGPU_GEM_CREATE_SHADOW;
	bp.type = ttm_bo_type_kernel;
	bp.resv = bo->tbo.resv;

490
	r = amdgpu_bo_do_create(adev, &bp, &bo->shadow);
491
	if (!r) {
492
		bo->shadow->parent = amdgpu_bo_ref(bo);
493
494
495
496
		mutex_lock(&adev->shadow_list_lock);
		list_add_tail(&bo->shadow_list, &adev->shadow_list);
		mutex_unlock(&adev->shadow_list_lock);
	}
497
498
499
500

	return r;
}

501
502
int amdgpu_bo_create(struct amdgpu_device *adev,
		     struct amdgpu_bo_param *bp,
503
		     struct amdgpu_bo **bo_ptr)
504
{
505
	u64 flags = bp->flags;
506
	int r;
507

508
509
	bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
	r = amdgpu_bo_do_create(adev, bp, bo_ptr);
510
511
512
	if (r)
		return r;

513
	if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
514
		if (!bp->resv)
515
516
			WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
							NULL));
517

518
		r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr));
519

520
		if (!bp->resv)
521
			reservation_object_unlock((*bo_ptr)->tbo.resv);
522

523
524
525
526
527
		if (r)
			amdgpu_bo_unref(bo_ptr);
	}

	return r;
528
529
}

530
531
532
533
int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
			       struct amdgpu_ring *ring,
			       struct amdgpu_bo *bo,
			       struct reservation_object *resv,
534
			       struct dma_fence **fence,
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
			       bool direct)

{
	struct amdgpu_bo *shadow = bo->shadow;
	uint64_t bo_addr, shadow_addr;
	int r;

	if (!shadow)
		return -EINVAL;

	bo_addr = amdgpu_bo_gpu_offset(bo);
	shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);

	r = reservation_object_reserve_shared(bo->tbo.resv);
	if (r)
		goto err;

	r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr,
			       amdgpu_bo_size(bo), resv, fence,
554
			       direct, false);
555
556
557
558
559
560
561
	if (!r)
		amdgpu_bo_fence(bo, *fence, true);

err:
	return r;
}

562
563
int amdgpu_bo_validate(struct amdgpu_bo *bo)
{
564
	struct ttm_operation_ctx ctx = { false, false };
565
566
567
568
569
570
	uint32_t domain;
	int r;

	if (bo->pin_count)
		return 0;

Kent Russell's avatar
Kent Russell committed
571
	domain = bo->preferred_domains;
572
573
574

retry:
	amdgpu_ttm_placement_from_domain(bo, domain);
575
	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
576
577
578
579
580
581
582
583
	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
		domain = bo->allowed_domains;
		goto retry;
	}

	return r;
}

584
585
586
587
int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
				  struct amdgpu_ring *ring,
				  struct amdgpu_bo *bo,
				  struct reservation_object *resv,
588
				  struct dma_fence **fence,
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
				  bool direct)

{
	struct amdgpu_bo *shadow = bo->shadow;
	uint64_t bo_addr, shadow_addr;
	int r;

	if (!shadow)
		return -EINVAL;

	bo_addr = amdgpu_bo_gpu_offset(bo);
	shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);

	r = reservation_object_reserve_shared(bo->tbo.resv);
	if (r)
		goto err;

	r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
			       amdgpu_bo_size(bo), resv, fence,
608
			       direct, false);
609
610
611
612
613
614
615
	if (!r)
		amdgpu_bo_fence(bo, *fence, true);

err:
	return r;
}

616
617
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
{
618
	void *kptr;
619
	long r;
620

621
622
623
	if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
		return -EPERM;

624
625
626
627
	kptr = amdgpu_bo_kptr(bo);
	if (kptr) {
		if (ptr)
			*ptr = kptr;
628
629
		return 0;
	}
630
631
632
633
634
635

	r = reservation_object_wait_timeout_rcu(bo->tbo.resv, false, false,
						MAX_SCHEDULE_TIMEOUT);
	if (r < 0)
		return r;

636
	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
637
	if (r)
638
		return r;
639
640

	if (ptr)
641
		*ptr = amdgpu_bo_kptr(bo);
642

643
644
645
	return 0;
}

646
647
648
649
650
651
652
void *amdgpu_bo_kptr(struct amdgpu_bo *bo)
{
	bool is_iomem;

	return ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
}

653
654
void amdgpu_bo_kunmap(struct amdgpu_bo *bo)
{
655
656
	if (bo->kmap.bo)
		ttm_bo_kunmap(&bo->kmap);
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
}

struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
{
	if (bo == NULL)
		return NULL;

	ttm_bo_reference(&bo->tbo);
	return bo;
}

void amdgpu_bo_unref(struct amdgpu_bo **bo)
{
	struct ttm_buffer_object *tbo;

	if ((*bo) == NULL)
		return;

	tbo = &((*bo)->tbo);
	ttm_bo_unref(&tbo);
	if (tbo == NULL)
		*bo = NULL;
}

681
682
int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
			     u64 min_offset, u64 max_offset,
683
684
			     u64 *gpu_addr)
{
685
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
686
	struct ttm_operation_ctx ctx = { false, false };
687
688
	int r, i;

689
	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
690
691
		return -EPERM;

692
693
694
	if (WARN_ON_ONCE(min_offset > max_offset))
		return -EINVAL;

695
	/* A shared bo cannot be migrated to VRAM */
696
697
698
699
700
701
	if (bo->prime_shared_count) {
		if (domain & AMDGPU_GEM_DOMAIN_GTT)
			domain = AMDGPU_GEM_DOMAIN_GTT;
		else
			return -EINVAL;
	}
702

703
704
705
	/* This assumes only APU display buffers are pinned with (VRAM|GTT).
	 * See function amdgpu_display_supported_domains()
	 */
706
	domain = amdgpu_bo_get_preferred_pin_domain(adev, domain);
707

708
	if (bo->pin_count) {
709
710
		uint32_t mem_type = bo->tbo.mem.mem_type;

711
		if (!(domain & amdgpu_mem_type_to_domain(mem_type)))
712
713
			return -EINVAL;

714
715
716
717
718
		bo->pin_count++;
		if (gpu_addr)
			*gpu_addr = amdgpu_bo_gpu_offset(bo);

		if (max_offset != 0) {
719
			u64 domain_start = bo->tbo.bdev->man[mem_type].gpu_offset;
720
721
722
723
724
725
			WARN_ON_ONCE(max_offset <
				     (amdgpu_bo_gpu_offset(bo) - domain_start));
		}

		return 0;
	}
726
727

	bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
728
729
730
	/* force to pin into visible video ram */
	if (!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS))
		bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
731
732
	amdgpu_ttm_placement_from_domain(bo, domain);
	for (i = 0; i < bo->placement.num_placement; i++) {
733
734
735
736
737
		unsigned fpfn, lpfn;

		fpfn = min_offset >> PAGE_SHIFT;
		lpfn = max_offset >> PAGE_SHIFT;

738
739
		if (fpfn > bo->placements[i].fpfn)
			bo->placements[i].fpfn = fpfn;
740
741
		if (!bo->placements[i].lpfn ||
		    (lpfn && lpfn < bo->placements[i].lpfn))
742
			bo->placements[i].lpfn = lpfn;
743
744
745
		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
	}

746
	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
747
	if (unlikely(r)) {
748
		dev_err(adev->dev, "%p pin failed\n", bo);
749
750
751
		goto error;
	}

752
	r = amdgpu_ttm_alloc_gart(&bo->tbo);
753
754
755
756
757
	if (unlikely(r)) {
		dev_err(adev->dev, "%p bind failed\n", bo);
		goto error;
	}

758
	bo->pin_count = 1;
759
	if (gpu_addr != NULL)
760
		*gpu_addr = amdgpu_bo_gpu_offset(bo);
761
762

	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
763
	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
764
		adev->vram_pin_size += amdgpu_bo_size(bo);
765
		if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
766
			adev->invisible_pin_size += amdgpu_bo_size(bo);
767
	} else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
768
		adev->gart_pin_size += amdgpu_bo_size(bo);
769
	}
770
771

error:
772
773
774
775
776
	return r;
}

int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr)
{
777
	return amdgpu_bo_pin_restricted(bo, domain, 0, 0, gpu_addr);
778
779
780
781
}

int amdgpu_bo_unpin(struct amdgpu_bo *bo)
{
782
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
783
	struct ttm_operation_ctx ctx = { false, false };
784
785
786
	int r, i;

	if (!bo->pin_count) {
787
		dev_warn(adev->dev, "%p unpin not necessary\n", bo);
788
789
790
791
792
793
794
795
796
		return 0;
	}
	bo->pin_count--;
	if (bo->pin_count)
		return 0;
	for (i = 0; i < bo->placement.num_placement; i++) {
		bo->placements[i].lpfn = 0;
		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
	}
797
	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
798
	if (unlikely(r)) {
799
		dev_err(adev->dev, "%p validate failed for unpin\n", bo);
800
		goto error;
801
	}
802
803

	if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
804
		adev->vram_pin_size -= amdgpu_bo_size(bo);
805
		if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
806
			adev->invisible_pin_size -= amdgpu_bo_size(bo);
807
	} else if (bo->tbo.mem.mem_type == TTM_PL_TT) {
808
		adev->gart_pin_size -= amdgpu_bo_size(bo);
809
810
811
	}

error:
812
813
814
815
816
817
	return r;
}

int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
{
	/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
818
	if (0 && (adev->flags & AMD_IS_APU)) {
819
820
821
822
823
824
		/* Useless to evict on IGP chips */
		return 0;
	}
	return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
}

825
826
827
828
829
830
831
832
static const char *amdgpu_vram_names[] = {
	"UNKNOWN",
	"GDDR1",
	"DDR2",
	"GDDR3",
	"GDDR4",
	"GDDR5",
	"HBM",
833
834
	"DDR3",
	"DDR4",
835
836
};

837
838
int amdgpu_bo_init(struct amdgpu_device *adev)
{
839
	/* reserve PAT memory space to WC for VRAM */
840
841
	arch_io_reserve_memtype_wc(adev->gmc.aper_base,
				   adev->gmc.aper_size);
842

843
	/* Add an MTRR for the VRAM */
844
845
	adev->gmc.vram_mtrr = arch_phys_wc_add(adev->gmc.aper_base,
					      adev->gmc.aper_size);
846
	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
847
848
		 adev->gmc.mc_vram_size >> 20,
		 (unsigned long long)adev->gmc.aper_size >> 20);
849
	DRM_INFO("RAM width %dbits %s\n",
850
		 adev->gmc.vram_width, amdgpu_vram_names[adev->gmc.vram_type]);
851
852
853
	return amdgpu_ttm_init(adev);
}

854
855
856
857
858
859
860
int amdgpu_bo_late_init(struct amdgpu_device *adev)
{
	amdgpu_ttm_late_init(adev);

	return 0;
}

861
862
863
void amdgpu_bo_fini(struct amdgpu_device *adev)
{
	amdgpu_ttm_fini(adev);
864
865
	arch_phys_wc_del(adev->gmc.vram_mtrr);
	arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
866
867
868
869
870
871
872
873
874
875
}

int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
			     struct vm_area_struct *vma)
{
	return ttm_fbdev_mmap(vma, &bo->tbo);
}

int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
{
876
877
878
879
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);

	if (adev->family <= AMDGPU_FAMILY_CZ &&
	    AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6)
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
		return -EINVAL;

	bo->tiling_flags = tiling_flags;
	return 0;
}

void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
{
	lockdep_assert_held(&bo->tbo.resv->lock.base);

	if (tiling_flags)
		*tiling_flags = bo->tiling_flags;
}

int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
			    uint32_t metadata_size, uint64_t flags)
{
	void *buffer;

	if (!metadata_size) {
		if (bo->metadata_size) {
			kfree(bo->metadata);
902
			bo->metadata = NULL;
903
904
905
906
907
908
909
910
			bo->metadata_size = 0;
		}
		return 0;
	}

	if (metadata == NULL)
		return -EINVAL;

911
	buffer = kmemdup(metadata, metadata_size, GFP_KERNEL);
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
	if (buffer == NULL)
		return -ENOMEM;

	kfree(bo->metadata);
	bo->metadata_flags = flags;
	bo->metadata = buffer;
	bo->metadata_size = metadata_size;

	return 0;
}

int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
			   size_t buffer_size, uint32_t *metadata_size,
			   uint64_t *flags)
{
	if (!buffer && !metadata_size)
		return -EINVAL;

	if (buffer) {
		if (buffer_size < bo->metadata_size)
			return -EINVAL;

		if (bo->metadata_size)
			memcpy(buffer, bo->metadata, bo->metadata_size);
	}

	if (metadata_size)
		*metadata_size = bo->metadata_size;
	if (flags)
		*flags = bo->metadata_flags;

	return 0;
}

void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
947
			   bool evict,
948
949
			   struct ttm_mem_reg *new_mem)
{
950
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
951
	struct amdgpu_bo *abo;
952
	struct ttm_mem_reg *old_mem = &bo->mem;
953
954
955
956

	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
		return;

957
	abo = ttm_to_amdgpu_bo(bo);
958
	amdgpu_vm_bo_invalidate(adev, abo, evict);
959

960
961
	amdgpu_bo_kunmap(abo);

962
963
964
965
	/* remember the eviction */
	if (evict)
		atomic64_inc(&adev->num_evictions);

966
967
968
969
970
	/* update statistics */
	if (!new_mem)
		return;

	/* move_notify is called before move happens */
971
	trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
972
973
974
975
}

int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
{
976
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
977
	struct ttm_operation_ctx ctx = { false, false };
978
	struct amdgpu_bo *abo;
979
980
	unsigned long offset, size;
	int r;
981
982
983

	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
		return 0;
984

985
	abo = ttm_to_amdgpu_bo(bo);
986
987
988
989

	/* Remember that this BO was accessed by the CPU */
	abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;

990
991
992
993
994
	if (bo->mem.mem_type != TTM_PL_VRAM)
		return 0;

	size = bo->mem.num_pages << PAGE_SHIFT;
	offset = bo->mem.start << PAGE_SHIFT;
995
	if ((offset + size) <= adev->gmc.visible_vram_size)
996
997
		return 0;

998
999
1000
1001
	/* Can't move a pinned BO to visible VRAM */
	if (abo->pin_count > 0)
		return -EINVAL;

1002
	/* hurrah the memory is not visible ! */
1003
	atomic64_inc(&adev->num_vram_cpu_page_faults);
1004
1005
1006
1007
1008
1009
1010
	amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
					 AMDGPU_GEM_DOMAIN_GTT);

	/* Avoid costly evictions; only set GTT as a busy placement */
	abo->placement.num_busy_placement = 1;
	abo->placement.busy_placement = &abo->placements[1];

1011
	r = ttm_bo_validate(bo, &abo->placement, &ctx);
1012
	if (unlikely(r != 0))
1013
1014
1015
1016
		return r;

	offset = bo->mem.start << PAGE_SHIFT;
	/* this should never happen */
1017
	if (bo->mem.mem_type == TTM_PL_VRAM &&
1018
	    (offset + size) > adev->gmc.visible_vram_size)
1019
1020
		return -EINVAL;

1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
	return 0;
}

/**
 * amdgpu_bo_fence - add fence to buffer object
 *
 * @bo: buffer object in question
 * @fence: fence to add
 * @shared: true if fence should be added shared
 *
 */
1032
void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
1033
1034
1035
1036
1037
		     bool shared)
{
	struct reservation_object *resv = bo->tbo.resv;

	if (shared)
1038
		reservation_object_add_shared_fence(resv, fence);
1039
	else
1040
		reservation_object_add_excl_fence(resv, fence);
1041
}
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054

/**
 * amdgpu_bo_gpu_offset - return GPU offset of bo
 * @bo:	amdgpu object for which we query the offset
 *
 * Returns current GPU offset of the object.
 *
 * Note: object should either be pinned or reserved when calling this
 * function, it might be useful to add check for this for debugging.
 */
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
{
	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
1055
	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
1056
		     !amdgpu_gtt_mgr_has_gart_addr(&bo->tbo.mem));
1057
1058
	WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
		     !bo->pin_count);
1059
	WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET);
1060
1061
	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM &&
		     !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS));
1062
1063
1064

	return bo->tbo.offset;
}
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075

uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
					    uint32_t domain)
{
	if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) {
		domain = AMDGPU_GEM_DOMAIN_VRAM;
		if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD)
			domain = AMDGPU_GEM_DOMAIN_GTT;
	}
	return domain;
}