Commit 4d2af08e authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'iommu-fixes-5.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull iommu fixes from Joerg Roedel:
 "A couple of fixes for the AMD IOMMU driver have piled up:

   - Some fixes for the reworked IO page-table which caused memory leaks
     or did not allow to downgrade mappings under some conditions.

   - Locking fixes to fix a couple of possible races around accessing
     'struct protection_domain'. The races got introduced when the
     dma-ops path became lock-less in the fast-path"

* tag 'iommu-fixes-5.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu:
  iommu/amd: Lock code paths traversing protection_domain->dev_list
  iommu/amd: Lock dev_data in attach/detach code paths
  iommu/amd: Check for busy devices earlier in attach_device()
  iommu/amd: Take domain->lock for complete attach/detach path
  iommu/amd: Remove amd_iommu_devtable_lock
  iommu/amd: Remove domain->updated
  iommu/amd: Wait for completion of IOTLB flush in attach_device
  iommu/amd: Unmap all L7 PTEs when downgrading page-sizes
  iommu/amd: Introduce first_pte_l7() helper
  iommu/amd: Fix downgrading default page-sizes in alloc_pte()
  iommu/amd: Fix pages leak in free_pagetable()
parents 02dc96ef 2a78f996
...@@ -70,7 +70,6 @@ ...@@ -70,7 +70,6 @@
*/ */
#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
static DEFINE_SPINLOCK(amd_iommu_devtable_lock);
static DEFINE_SPINLOCK(pd_bitmap_lock); static DEFINE_SPINLOCK(pd_bitmap_lock);
/* List of all available dev_data structures */ /* List of all available dev_data structures */
...@@ -202,6 +201,7 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) ...@@ -202,6 +201,7 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
if (!dev_data) if (!dev_data)
return NULL; return NULL;
spin_lock_init(&dev_data->lock);
dev_data->devid = devid; dev_data->devid = devid;
ratelimit_default_init(&dev_data->rs); ratelimit_default_init(&dev_data->rs);
...@@ -501,6 +501,29 @@ static void iommu_uninit_device(struct device *dev) ...@@ -501,6 +501,29 @@ static void iommu_uninit_device(struct device *dev)
*/ */
} }
/*
* Helper function to get the first pte of a large mapping
*/
static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
unsigned long *count)
{
unsigned long pte_mask, pg_size, cnt;
u64 *fpte;
pg_size = PTE_PAGE_SIZE(*pte);
cnt = PAGE_SIZE_PTE_COUNT(pg_size);
pte_mask = ~((cnt << 3) - 1);
fpte = (u64 *)(((unsigned long)pte) & pte_mask);
if (page_size)
*page_size = pg_size;
if (count)
*count = cnt;
return fpte;
}
/**************************************************************************** /****************************************************************************
* *
* Interrupt handling functions * Interrupt handling functions
...@@ -1311,8 +1334,12 @@ static void domain_flush_np_cache(struct protection_domain *domain, ...@@ -1311,8 +1334,12 @@ static void domain_flush_np_cache(struct protection_domain *domain,
dma_addr_t iova, size_t size) dma_addr_t iova, size_t size)
{ {
if (unlikely(amd_iommu_np_cache)) { if (unlikely(amd_iommu_np_cache)) {
unsigned long flags;
spin_lock_irqsave(&domain->lock, flags);
domain_flush_pages(domain, iova, size); domain_flush_pages(domain, iova, size);
domain_flush_complete(domain); domain_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
} }
} }
...@@ -1425,7 +1452,7 @@ static void free_pagetable(struct protection_domain *domain) ...@@ -1425,7 +1452,7 @@ static void free_pagetable(struct protection_domain *domain)
BUG_ON(domain->mode < PAGE_MODE_NONE || BUG_ON(domain->mode < PAGE_MODE_NONE ||
domain->mode > PAGE_MODE_6_LEVEL); domain->mode > PAGE_MODE_6_LEVEL);
free_sub_pt(root, domain->mode, freelist); freelist = free_sub_pt(root, domain->mode, freelist);
free_page_list(freelist); free_page_list(freelist);
} }
...@@ -1435,10 +1462,11 @@ static void free_pagetable(struct protection_domain *domain) ...@@ -1435,10 +1462,11 @@ static void free_pagetable(struct protection_domain *domain)
* another level increases the size of the address space by 9 bits to a size up * another level increases the size of the address space by 9 bits to a size up
* to 64 bits. * to 64 bits.
*/ */
static void increase_address_space(struct protection_domain *domain, static bool increase_address_space(struct protection_domain *domain,
gfp_t gfp) gfp_t gfp)
{ {
unsigned long flags; unsigned long flags;
bool ret = false;
u64 *pte; u64 *pte;
spin_lock_irqsave(&domain->lock, flags); spin_lock_irqsave(&domain->lock, flags);
...@@ -1455,19 +1483,21 @@ static void increase_address_space(struct protection_domain *domain, ...@@ -1455,19 +1483,21 @@ static void increase_address_space(struct protection_domain *domain,
iommu_virt_to_phys(domain->pt_root)); iommu_virt_to_phys(domain->pt_root));
domain->pt_root = pte; domain->pt_root = pte;
domain->mode += 1; domain->mode += 1;
domain->updated = true;
ret = true;
out: out:
spin_unlock_irqrestore(&domain->lock, flags); spin_unlock_irqrestore(&domain->lock, flags);
return; return ret;
} }
static u64 *alloc_pte(struct protection_domain *domain, static u64 *alloc_pte(struct protection_domain *domain,
unsigned long address, unsigned long address,
unsigned long page_size, unsigned long page_size,
u64 **pte_page, u64 **pte_page,
gfp_t gfp) gfp_t gfp,
bool *updated)
{ {
int level, end_lvl; int level, end_lvl;
u64 *pte, *page; u64 *pte, *page;
...@@ -1475,7 +1505,7 @@ static u64 *alloc_pte(struct protection_domain *domain, ...@@ -1475,7 +1505,7 @@ static u64 *alloc_pte(struct protection_domain *domain,
BUG_ON(!is_power_of_2(page_size)); BUG_ON(!is_power_of_2(page_size));
while (address > PM_LEVEL_SIZE(domain->mode)) while (address > PM_LEVEL_SIZE(domain->mode))
increase_address_space(domain, gfp); *updated = increase_address_space(domain, gfp) || *updated;
level = domain->mode - 1; level = domain->mode - 1;
pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)];
...@@ -1489,9 +1519,32 @@ static u64 *alloc_pte(struct protection_domain *domain, ...@@ -1489,9 +1519,32 @@ static u64 *alloc_pte(struct protection_domain *domain,
__pte = *pte; __pte = *pte;
pte_level = PM_PTE_LEVEL(__pte); pte_level = PM_PTE_LEVEL(__pte);
if (!IOMMU_PTE_PRESENT(__pte) || /*
* If we replace a series of large PTEs, we need
* to tear down all of them.
*/
if (IOMMU_PTE_PRESENT(__pte) &&
pte_level == PAGE_MODE_7_LEVEL) { pte_level == PAGE_MODE_7_LEVEL) {
unsigned long count, i;
u64 *lpte;
lpte = first_pte_l7(pte, NULL, &count);
/*
* Unmap the replicated PTEs that still match the
* original large mapping
*/
for (i = 0; i < count; ++i)
cmpxchg64(&lpte[i], __pte, 0ULL);
*updated = true;
continue;
}
if (!IOMMU_PTE_PRESENT(__pte) ||
pte_level == PAGE_MODE_NONE) {
page = (u64 *)get_zeroed_page(gfp); page = (u64 *)get_zeroed_page(gfp);
if (!page) if (!page)
return NULL; return NULL;
...@@ -1500,8 +1553,8 @@ static u64 *alloc_pte(struct protection_domain *domain, ...@@ -1500,8 +1553,8 @@ static u64 *alloc_pte(struct protection_domain *domain,
/* pte could have been changed somewhere. */ /* pte could have been changed somewhere. */
if (cmpxchg64(pte, __pte, __npte) != __pte) if (cmpxchg64(pte, __pte, __npte) != __pte)
free_page((unsigned long)page); free_page((unsigned long)page);
else if (pte_level == PAGE_MODE_7_LEVEL) else if (IOMMU_PTE_PRESENT(__pte))
domain->updated = true; *updated = true;
continue; continue;
} }
...@@ -1566,17 +1619,12 @@ static u64 *fetch_pte(struct protection_domain *domain, ...@@ -1566,17 +1619,12 @@ static u64 *fetch_pte(struct protection_domain *domain,
*page_size = PTE_LEVEL_PAGE_SIZE(level); *page_size = PTE_LEVEL_PAGE_SIZE(level);
} }
if (PM_PTE_LEVEL(*pte) == 0x07) { /*
unsigned long pte_mask; * If we have a series of large PTEs, make
* sure to return a pointer to the first one.
/* */
* If we have a series of large PTEs, make if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
* sure to return a pointer to the first one. pte = first_pte_l7(pte, page_size, NULL);
*/
*page_size = pte_mask = PTE_PAGE_SIZE(*pte);
pte_mask = ~((PAGE_SIZE_PTE_COUNT(pte_mask) << 3) - 1);
pte = (u64 *)(((unsigned long)pte) & pte_mask);
}
return pte; return pte;
} }
...@@ -1615,26 +1663,29 @@ static int iommu_map_page(struct protection_domain *dom, ...@@ -1615,26 +1663,29 @@ static int iommu_map_page(struct protection_domain *dom,
gfp_t gfp) gfp_t gfp)
{ {
struct page *freelist = NULL; struct page *freelist = NULL;
bool updated = false;
u64 __pte, *pte; u64 __pte, *pte;
int i, count; int ret, i, count;
BUG_ON(!IS_ALIGNED(bus_addr, page_size)); BUG_ON(!IS_ALIGNED(bus_addr, page_size));
BUG_ON(!IS_ALIGNED(phys_addr, page_size)); BUG_ON(!IS_ALIGNED(phys_addr, page_size));
ret = -EINVAL;
if (!(prot & IOMMU_PROT_MASK)) if (!(prot & IOMMU_PROT_MASK))
return -EINVAL; goto out;
count = PAGE_SIZE_PTE_COUNT(page_size); count = PAGE_SIZE_PTE_COUNT(page_size);
pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp); pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated);
ret = -ENOMEM;
if (!pte) if (!pte)
return -ENOMEM; goto out;
for (i = 0; i < count; ++i) for (i = 0; i < count; ++i)
freelist = free_clear_pte(&pte[i], pte[i], freelist); freelist = free_clear_pte(&pte[i], pte[i], freelist);
if (freelist != NULL) if (freelist != NULL)
dom->updated = true; updated = true;
if (count > 1) { if (count > 1) {
__pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size); __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
...@@ -1650,12 +1701,21 @@ static int iommu_map_page(struct protection_domain *dom, ...@@ -1650,12 +1701,21 @@ static int iommu_map_page(struct protection_domain *dom,
for (i = 0; i < count; ++i) for (i = 0; i < count; ++i)
pte[i] = __pte; pte[i] = __pte;
update_domain(dom); ret = 0;
out:
if (updated) {
unsigned long flags;
spin_lock_irqsave(&dom->lock, flags);
update_domain(dom);
spin_unlock_irqrestore(&dom->lock, flags);
}
/* Everything flushed out, free pages now */ /* Everything flushed out, free pages now */
free_page_list(freelist); free_page_list(freelist);
return 0; return ret;
} }
static unsigned long iommu_unmap_page(struct protection_domain *dom, static unsigned long iommu_unmap_page(struct protection_domain *dom,
...@@ -1806,8 +1866,12 @@ static void free_gcr3_table(struct protection_domain *domain) ...@@ -1806,8 +1866,12 @@ static void free_gcr3_table(struct protection_domain *domain)
static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom) static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
{ {
unsigned long flags;
spin_lock_irqsave(&dom->domain.lock, flags);
domain_flush_tlb(&dom->domain); domain_flush_tlb(&dom->domain);
domain_flush_complete(&dom->domain); domain_flush_complete(&dom->domain);
spin_unlock_irqrestore(&dom->domain.lock, flags);
} }
static void iova_domain_flush_tlb(struct iova_domain *iovad) static void iova_domain_flush_tlb(struct iova_domain *iovad)
...@@ -2022,36 +2086,6 @@ static void do_detach(struct iommu_dev_data *dev_data) ...@@ -2022,36 +2086,6 @@ static void do_detach(struct iommu_dev_data *dev_data)
domain->dev_cnt -= 1; domain->dev_cnt -= 1;
} }
/*
* If a device is not yet associated with a domain, this function makes the
* device visible in the domain
*/
static int __attach_device(struct iommu_dev_data *dev_data,
struct protection_domain *domain)
{
int ret;
/* lock domain */
spin_lock(&domain->lock);
ret = -EBUSY;
if (dev_data->domain != NULL)
goto out_unlock;
/* Attach alias group root */
do_attach(dev_data, domain);
ret = 0;
out_unlock:
/* ready */
spin_unlock(&domain->lock);
return ret;
}
static void pdev_iommuv2_disable(struct pci_dev *pdev) static void pdev_iommuv2_disable(struct pci_dev *pdev)
{ {
pci_disable_ats(pdev); pci_disable_ats(pdev);
...@@ -2133,19 +2167,28 @@ static int attach_device(struct device *dev, ...@@ -2133,19 +2167,28 @@ static int attach_device(struct device *dev,
unsigned long flags; unsigned long flags;
int ret; int ret;
spin_lock_irqsave(&domain->lock, flags);
dev_data = get_dev_data(dev); dev_data = get_dev_data(dev);
spin_lock(&dev_data->lock);
ret = -EBUSY;
if (dev_data->domain != NULL)
goto out;
if (!dev_is_pci(dev)) if (!dev_is_pci(dev))
goto skip_ats_check; goto skip_ats_check;
pdev = to_pci_dev(dev); pdev = to_pci_dev(dev);
if (domain->flags & PD_IOMMUV2_MASK) { if (domain->flags & PD_IOMMUV2_MASK) {
ret = -EINVAL;
if (!dev_data->passthrough) if (!dev_data->passthrough)
return -EINVAL; goto out;
if (dev_data->iommu_v2) { if (dev_data->iommu_v2) {
if (pdev_iommuv2_enable(pdev) != 0) if (pdev_iommuv2_enable(pdev) != 0)
return -EINVAL; goto out;
dev_data->ats.enabled = true; dev_data->ats.enabled = true;
dev_data->ats.qdep = pci_ats_queue_depth(pdev); dev_data->ats.qdep = pci_ats_queue_depth(pdev);
...@@ -2158,9 +2201,9 @@ static int attach_device(struct device *dev, ...@@ -2158,9 +2201,9 @@ static int attach_device(struct device *dev,
} }
skip_ats_check: skip_ats_check:
spin_lock_irqsave(&amd_iommu_devtable_lock, flags); ret = 0;
ret = __attach_device(dev_data, domain);
spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags); do_attach(dev_data, domain);
/* /*
* We might boot into a crash-kernel here. The crashed kernel * We might boot into a crash-kernel here. The crashed kernel
...@@ -2169,23 +2212,14 @@ skip_ats_check: ...@@ -2169,23 +2212,14 @@ skip_ats_check:
*/ */
domain_flush_tlb_pde(domain); domain_flush_tlb_pde(domain);
return ret; domain_flush_complete(domain);
}
/*
* Removes a device from a protection domain (unlocked)
*/
static void __detach_device(struct iommu_dev_data *dev_data)
{
struct protection_domain *domain;
domain = dev_data->domain;
spin_lock(&domain->lock); out:
spin_unlock(&dev_data->lock);
do_detach(dev_data); spin_unlock_irqrestore(&domain->lock, flags);
spin_unlock(&domain->lock); return ret;
} }
/* /*
...@@ -2200,6 +2234,10 @@ static void detach_device(struct device *dev) ...@@ -2200,6 +2234,10 @@ static void detach_device(struct device *dev)
dev_data = get_dev_data(dev); dev_data = get_dev_data(dev);
domain = dev_data->domain; domain = dev_data->domain;
spin_lock_irqsave(&domain->lock, flags);
spin_lock(&dev_data->lock);
/* /*
* First check if the device is still attached. It might already * First check if the device is still attached. It might already
* be detached from its domain because the generic * be detached from its domain because the generic
...@@ -2207,15 +2245,12 @@ static void detach_device(struct device *dev) ...@@ -2207,15 +2245,12 @@ static void detach_device(struct device *dev)
* our alias handling. * our alias handling.
*/ */
if (WARN_ON(!dev_data->domain)) if (WARN_ON(!dev_data->domain))
return; goto out;
/* lock device table */ do_detach(dev_data);
spin_lock_irqsave(&amd_iommu_devtable_lock, flags);
__detach_device(dev_data);
spin_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
if (!dev_is_pci(dev)) if (!dev_is_pci(dev))
return; goto out;
if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2) if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2)
pdev_iommuv2_disable(to_pci_dev(dev)); pdev_iommuv2_disable(to_pci_dev(dev));
...@@ -2223,6 +2258,11 @@ static void detach_device(struct device *dev) ...@@ -2223,6 +2258,11 @@ static void detach_device(struct device *dev)
pci_disable_ats(to_pci_dev(dev)); pci_disable_ats(to_pci_dev(dev));
dev_data->ats.enabled = false; dev_data->ats.enabled = false;
out:
spin_unlock(&dev_data->lock);
spin_unlock_irqrestore(&domain->lock, flags);
} }
static int amd_iommu_add_device(struct device *dev) static int amd_iommu_add_device(struct device *dev)
...@@ -2354,15 +2394,10 @@ static void update_device_table(struct protection_domain *domain) ...@@ -2354,15 +2394,10 @@ static void update_device_table(struct protection_domain *domain)
static void update_domain(struct protection_domain *domain) static void update_domain(struct protection_domain *domain)
{ {
if (!domain->updated)
return;
update_device_table(domain); update_device_table(domain);
domain_flush_devices(domain); domain_flush_devices(domain);
domain_flush_tlb_pde(domain); domain_flush_tlb_pde(domain);
domain->updated = false;
} }
static int dir2prot(enum dma_data_direction direction) static int dir2prot(enum dma_data_direction direction)
...@@ -2392,6 +2427,7 @@ static dma_addr_t __map_single(struct device *dev, ...@@ -2392,6 +2427,7 @@ static dma_addr_t __map_single(struct device *dev,
{ {
dma_addr_t offset = paddr & ~PAGE_MASK; dma_addr_t offset = paddr & ~PAGE_MASK;
dma_addr_t address, start, ret; dma_addr_t address, start, ret;
unsigned long flags;
unsigned int pages; unsigned int pages;
int prot = 0; int prot = 0;
int i; int i;
...@@ -2429,8 +2465,10 @@ out_unmap: ...@@ -2429,8 +2465,10 @@ out_unmap:
iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE); iommu_unmap_page(&dma_dom->domain, start, PAGE_SIZE);
} }
spin_lock_irqsave(&dma_dom->domain.lock, flags);
domain_flush_tlb(&dma_dom->domain); domain_flush_tlb(&dma_dom->domain);
domain_flush_complete(&dma_dom->domain); domain_flush_complete(&dma_dom->domain);
spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
dma_ops_free_iova(dma_dom, address, pages); dma_ops_free_iova(dma_dom, address, pages);
...@@ -2459,8 +2497,12 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, ...@@ -2459,8 +2497,12 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
} }
if (amd_iommu_unmap_flush) { if (amd_iommu_unmap_flush) {
unsigned long flags;
spin_lock_irqsave(&dma_dom->domain.lock, flags);
domain_flush_tlb(&dma_dom->domain); domain_flush_tlb(&dma_dom->domain);
domain_flush_complete(&dma_dom->domain); domain_flush_complete(&dma_dom->domain);
spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
dma_ops_free_iova(dma_dom, dma_addr, pages); dma_ops_free_iova(dma_dom, dma_addr, pages);
} else { } else {
pages = __roundup_pow_of_two(pages); pages = __roundup_pow_of_two(pages);
...@@ -2866,16 +2908,16 @@ static void cleanup_domain(struct protection_domain *domain) ...@@ -2866,16 +2908,16 @@ static void cleanup_domain(struct protection_domain *domain)
struct iommu_dev_data *entry; struct iommu_dev_data *entry;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&amd_iommu_devtable_lock, flags); spin_lock_irqsave(&domain->lock, flags);
while (!list_empty(&domain->dev_list)) { while (!list_empty(&domain->dev_list)) {
entry = list_first_entry(&domain->dev_list, entry = list_first_entry(&domain<