Commit 69a5c49a authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'iommu-updates-v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull iommu updates from Joerg Roedel:

 - New DART IOMMU driver for Apple Silicon M1 chips

 - Optimizations for iommu_[map/unmap] performance

 - Selective TLB flush support for the AMD IOMMU driver to make it more
   efficient on emulated IOMMUs

 - Rework IOVA setup and default domain type setting to move more code
   out of IOMMU drivers and to support runtime switching between certain
   types of default domains

 - VT-d Updates from Lu Baolu:
      - Update the virtual command related registers
      - Enable Intel IOMMU scalable mode by default
      - Preset A/D bits for user space DMA usage
      - Allow devices to have more than 32 outstanding PRs
      - Various cleanups

 - ARM SMMU Updates from Will Deacon:
      SMMUv3:
       - Minor optimisation to avoid zeroing struct members on CMD submission
       - Increased use of batched commands to reduce submission latency
       - Refactoring in preparation for ECMDQ support
      SMMUv2:
       - Fix races when probing devices with identical StreamIDs
       - Optimise walk cache flushing for Qualcomm implementations
       - Allow deep sleep states for some Qualcomm SoCs with shared clocks

 - Various smaller optimizations, cleanups, and fixes

* tag 'iommu-updates-v5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (85 commits)
  iommu/io-pgtable: Abstract iommu_iotlb_gather access
  iommu/arm-smmu: Fix missing unlock on error in arm_smmu_device_group()
  iommu/vt-d: Add present bit check in pasid entry setup helpers
  iommu/vt-d: Use pasid_pte_is_present() helper function
  iommu/vt-d: Drop the kernel doc annotation
  iommu/vt-d: Allow devices to have more than 32 outstanding PRs
  iommu/vt-d: Preset A/D bits for user space DMA usage
  iommu/vt-d: Enable Intel IOMMU scalable mode by default
  iommu/vt-d: Refactor Kconfig a bit
  iommu/vt-d: Remove unnecessary oom message
  iommu/vt-d: Update the virtual command related registers
  iommu: Allow enabling non-strict mode dynamically
  iommu: Merge strictness and domain type configs
  iommu: Only log strictness for DMA domains
  iommu: Expose DMA domain strictness via sysfs
  iommu: Express DMA strictness via the domain type
  iommu/vt-d: Prepare for multiple DMA domain types
  iommu/arm-smmu: Prepare for multiple DMA domain types
  iommu/amd: Prepare for multiple DMA domain types
  iommu: Introduce explicit type for non-strict DMA domains
  ...
parents 3de18c86 d8768d7e
......@@ -42,8 +42,12 @@ Description: /sys/kernel/iommu_groups/<grp_id>/type shows the type of default
======== ======================================================
DMA All the DMA transactions from the device in this group
are translated by the iommu.
DMA-FQ As above, but using batched invalidation to lazily
remove translations after use. This may offer reduced
overhead at the cost of reduced memory protection.
identity All the DMA transactions from the device in this group
are not translated by the iommu.
are not translated by the iommu. Maximum performance
but zero protection.
auto Change to the type the device was booted with.
======== ======================================================
......
......@@ -301,10 +301,7 @@
amd_iommu= [HW,X86-64]
Pass parameters to the AMD IOMMU driver in the system.
Possible values are:
fullflush - enable flushing of IO/TLB entries when
they are unmapped. Otherwise they are
flushed before they will be reused, which
is a lot of faster
fullflush - Deprecated, equivalent to iommu.strict=1
off - do not initialize any AMD IOMMU found in
the system
force_isolation - Force device isolation for all
......@@ -1958,18 +1955,17 @@
this case, gfx device will use physical address for
DMA.
strict [Default Off]
With this option on every unmap_single operation will
result in a hardware IOTLB flush operation as opposed
to batching them for performance.
Deprecated, equivalent to iommu.strict=1.
sp_off [Default Off]
By default, super page will be supported if Intel IOMMU
has the capability. With this option, super page will
not be supported.
sm_on [Default Off]
By default, scalable mode will be disabled even if the
hardware advertises that it has support for the scalable
mode translation. With this option set, scalable mode
will be used on hardware which claims to support it.
sm_on
Enable the Intel IOMMU scalable mode if the hardware
advertises that it has support for the scalable mode
translation.
sm_off
Disallow use of the Intel IOMMU scalable mode.
tboot_noforce [Default Off]
Do not force the Intel IOMMU enabled under tboot.
By default, tboot will force Intel IOMMU on, which
......@@ -2061,13 +2057,12 @@
throughput at the cost of reduced device isolation.
Will fall back to strict mode if not supported by
the relevant IOMMU driver.
1 - Strict mode (default).
1 - Strict mode.
DMA unmap operations invalidate IOMMU hardware TLBs
synchronously.
Note: on x86, the default behaviour depends on the
equivalent driver-specific parameters, but a strict
mode explicitly specified by either method takes
precedence.
unset - Use value of CONFIG_IOMMU_DEFAULT_DMA_{LAZY,STRICT}.
Note: on x86, strict mode specified via one of the
legacy driver-specific options takes precedence.
iommu.passthrough=
[ARM64, X86] Configure DMA to bypass the IOMMU by default.
......
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/iommu/apple,dart.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Apple DART IOMMU
maintainers:
- Sven Peter <sven@svenpeter.dev>
description: |+
Apple SoCs may contain an implementation of their Device Address
Resolution Table which provides a mandatory layer of address
translations for various masters.
Each DART instance is capable of handling up to 16 different streams
with individual pagetables and page-level read/write protection flags.
This DART IOMMU also raises interrupts in response to various
fault conditions.
properties:
compatible:
const: apple,t8103-dart
reg:
maxItems: 1
interrupts:
maxItems: 1
clocks:
description:
Reference to the gate clock phandle if required for this IOMMU.
Optional since not all IOMMUs are attached to a clock gate.
'#iommu-cells':
const: 1
description:
Has to be one. The single cell describes the stream id emitted by
a master to the IOMMU.
required:
- compatible
- reg
- '#iommu-cells'
- interrupts
additionalProperties: false
examples:
- |+
dart1: iommu@82f80000 {
compatible = "apple,t8103-dart";
reg = <0x82f80000 0x4000>;
interrupts = <1 781 4>;
#iommu-cells = <1>;
};
master1 {
iommus = <&dart1 0>;
};
- |+
dart2a: iommu@82f00000 {
compatible = "apple,t8103-dart";
reg = <0x82f00000 0x4000>;
interrupts = <1 781 4>;
#iommu-cells = <1>;
};
dart2b: iommu@82f80000 {
compatible = "apple,t8103-dart";
reg = <0x82f80000 0x4000>;
interrupts = <1 781 4>;
#iommu-cells = <1>;
};
master2 {
iommus = <&dart2a 0>, <&dart2b 1>;
};
......@@ -1268,6 +1268,13 @@ L: linux-input@vger.kernel.org
S: Odd fixes
F: drivers/input/mouse/bcm5974.c
 
APPLE DART IOMMU DRIVER
M: Sven Peter <sven@svenpeter.dev>
L: iommu@lists.linux-foundation.org
S: Maintained
F: Documentation/devicetree/bindings/iommu/apple,dart.yaml
F: drivers/iommu/apple-dart.c
APPLE SMC DRIVER
M: Henrik Rydberg <rydberg@bitmath.org>
L: linux-hwmon@vger.kernel.org
......
......@@ -79,16 +79,57 @@ config IOMMU_DEBUGFS
debug/iommu directory, and then populate a subdirectory with
entries as required.
config IOMMU_DEFAULT_PASSTHROUGH
bool "IOMMU passthrough by default"
choice
prompt "IOMMU default domain type"
depends on IOMMU_API
default IOMMU_DEFAULT_DMA_LAZY if AMD_IOMMU || INTEL_IOMMU
default IOMMU_DEFAULT_DMA_STRICT
help
Enable passthrough by default, removing the need to pass in
iommu.passthrough=on or iommu=pt through command line. If this
is enabled, you can still disable with iommu.passthrough=off
or iommu=nopt depending on the architecture.
Choose the type of IOMMU domain used to manage DMA API usage by
device drivers. The options here typically represent different
levels of tradeoff between robustness/security and performance,
depending on the IOMMU driver. Not all IOMMUs support all options.
This choice can be overridden at boot via the command line, and for
some devices also at runtime via sysfs.
If unsure, say N here.
If unsure, keep the default.
config IOMMU_DEFAULT_DMA_STRICT
bool "Translated - Strict"
help
Trusted devices use translation to restrict their access to only
DMA-mapped pages, with strict TLB invalidation on unmap. Equivalent
to passing "iommu.passthrough=0 iommu.strict=1" on the command line.
Untrusted devices always use this mode, with an additional layer of
bounce-buffering such that they cannot gain access to any unrelated
data within a mapped page.
config IOMMU_DEFAULT_DMA_LAZY
bool "Translated - Lazy"
help
Trusted devices use translation to restrict their access to only
DMA-mapped pages, but with "lazy" batched TLB invalidation. This
mode allows higher performance with some IOMMUs due to reduced TLB
flushing, but at the cost of reduced isolation since devices may be
able to access memory for some time after it has been unmapped.
Equivalent to passing "iommu.passthrough=0 iommu.strict=0" on the
command line.
If this mode is not supported by the IOMMU driver, the effective
runtime default will fall back to IOMMU_DEFAULT_DMA_STRICT.
config IOMMU_DEFAULT_PASSTHROUGH
bool "Passthrough"
help
Trusted devices are identity-mapped, giving them unrestricted access
to memory with minimal performance overhead. Equivalent to passing
"iommu.passthrough=1" (historically "iommu=pt") on the command line.
If this mode is not supported by the IOMMU driver, the effective
runtime default will fall back to IOMMU_DEFAULT_DMA_STRICT.
endchoice
config OF_IOMMU
def_bool y
......@@ -249,6 +290,20 @@ config SPAPR_TCE_IOMMU
Enables bits of IOMMU API required by VFIO. The iommu_ops
is not implemented as it is not necessary for VFIO.
config APPLE_DART
tristate "Apple DART IOMMU Support"
depends on ARCH_APPLE || (COMPILE_TEST && !GENERIC_ATOMIC64)
select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
default ARCH_APPLE
help
Support for Apple DART (Device Address Resolution Table) IOMMUs
found in Apple ARM SoCs like the M1.
This IOMMU is required for most peripherals using DMA to access
the main memory.
Say Y here if you are using an Apple SoC.
# ARM IOMMU support
config ARM_SMMU
tristate "ARM Ltd. System MMU (SMMU) Support"
......
......@@ -29,3 +29,4 @@ obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o
obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
obj-$(CONFIG_APPLE_DART) += apple-dart.o
......@@ -779,12 +779,6 @@ extern u16 amd_iommu_last_bdf;
/* allocation bitmap for domain ids */
extern unsigned long *amd_iommu_pd_alloc_bitmap;
/*
* If true, the addresses will be flushed on unmap time, not when
* they are reused
*/
extern bool amd_iommu_unmap_flush;
/* Smallest max PASID supported by any IOMMU in the system */
extern u32 amd_iommu_max_pasid;
......
......@@ -161,7 +161,6 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
to handle */
LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
we find in ACPI */
bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
system */
......@@ -1850,8 +1849,11 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
if (ret)
return ret;
if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
pr_info("Using strict mode due to virtualization\n");
iommu_set_dma_strict();
amd_iommu_np_cache = true;
}
init_iommu_perf_ctr(iommu);
......@@ -3098,8 +3100,10 @@ static int __init parse_amd_iommu_intr(char *str)
static int __init parse_amd_iommu_options(char *str)
{
for (; *str; ++str) {
if (strncmp(str, "fullflush", 9) == 0)
amd_iommu_unmap_flush = true;
if (strncmp(str, "fullflush", 9) == 0) {
pr_warn("amd_iommu=fullflush deprecated; use iommu.strict=1 instead\n");
iommu_set_dma_strict();
}
if (strncmp(str, "force_enable", 12) == 0)
amd_iommu_force_enable = true;
if (strncmp(str, "off", 3) == 0)
......
......@@ -493,9 +493,6 @@ static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned lo
unsigned long offset_mask, pte_pgsize;
u64 *pte, __pte;
if (pgtable->mode == PAGE_MODE_NONE)
return iova;
pte = fetch_pte(pgtable, iova, &pte_pgsize);
if (!pte || !IOMMU_PTE_PRESENT(*pte))
......
......@@ -425,9 +425,11 @@ static void amd_iommu_report_rmp_hw_error(volatile u32 *event)
if (pdev)
dev_data = dev_iommu_priv_get(&pdev->dev);
if (dev_data && __ratelimit(&dev_data->rs)) {
pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
vmg_tag, spa, flags);
if (dev_data) {
if (__ratelimit(&dev_data->rs)) {
pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
vmg_tag, spa, flags);
}
} else {
pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
......@@ -456,9 +458,11 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event)
if (pdev)
dev_data = dev_iommu_priv_get(&pdev->dev);
if (dev_data && __ratelimit(&dev_data->rs)) {
pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
vmg_tag, gpa, flags_rmp, flags);
if (dev_data) {
if (__ratelimit(&dev_data->rs)) {
pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
vmg_tag, gpa, flags_rmp, flags);
}
} else {
pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
......@@ -480,11 +484,13 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
if (pdev)
dev_data = dev_iommu_priv_get(&pdev->dev);
if (dev_data && __ratelimit(&dev_data->rs)) {
pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
domain_id, address, flags);
} else if (printk_ratelimit()) {
pr_err("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
if (dev_data) {
if (__ratelimit(&dev_data->rs)) {
pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n",
domain_id, address, flags);
}
} else {
pr_err_ratelimited("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n",
PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
domain_id, address, flags);
}
......@@ -1261,15 +1267,52 @@ static void __domain_flush_pages(struct protection_domain *domain,
}
static void domain_flush_pages(struct protection_domain *domain,
u64 address, size_t size)
u64 address, size_t size, int pde)
{
__domain_flush_pages(domain, address, size, 0);
if (likely(!amd_iommu_np_cache)) {
__domain_flush_pages(domain, address, size, pde);
return;
}
/*
* When NpCache is on, we infer that we run in a VM and use a vIOMMU.
* In such setups it is best to avoid flushes of ranges which are not
* naturally aligned, since it would lead to flushes of unmodified
* PTEs. Such flushes would require the hypervisor to do more work than
* necessary. Therefore, perform repeated flushes of aligned ranges
* until you cover the range. Each iteration flushes the smaller
* between the natural alignment of the address that we flush and the
* greatest naturally aligned region that fits in the range.
*/
while (size != 0) {
int addr_alignment = __ffs(address);
int size_alignment = __fls(size);
int min_alignment;
size_t flush_size;
/*
* size is always non-zero, but address might be zero, causing
* addr_alignment to be negative. As the casting of the
* argument in __ffs(address) to long might trim the high bits
* of the address on x86-32, cast to long when doing the check.
*/
if (likely((unsigned long)address != 0))
min_alignment = min(addr_alignment, size_alignment);
else
min_alignment = size_alignment;
flush_size = 1ul << min_alignment;
__domain_flush_pages(domain, address, flush_size, pde);
address += flush_size;
size -= flush_size;
}
}
/* Flush the whole IO/TLB for a given protection domain - including PDE */
void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain)
{
__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
}
void amd_iommu_domain_flush_complete(struct protection_domain *domain)
......@@ -1296,7 +1339,7 @@ static void domain_flush_np_cache(struct protection_domain *domain,
unsigned long flags;
spin_lock_irqsave(&domain->lock, flags);
domain_flush_pages(domain, iova, size);
domain_flush_pages(domain, iova, size, 1);
amd_iommu_domain_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
}
......@@ -1707,14 +1750,9 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
static void amd_iommu_probe_finalize(struct device *dev)
{
struct iommu_domain *domain;
/* Domains are initialized for this device - have a look what we ended up with */
domain = iommu_get_domain_for_dev(dev);
if (domain->type == IOMMU_DOMAIN_DMA)
iommu_setup_dma_ops(dev, 0, U64_MAX);
else
set_dma_ops(dev, NULL);
set_dma_ops(dev, NULL);
iommu_setup_dma_ops(dev, 0, U64_MAX);
}
static void amd_iommu_release_device(struct device *dev)
......@@ -1775,12 +1813,6 @@ void amd_iommu_domain_update(struct protection_domain *domain)
static void __init amd_iommu_init_dma_ops(void)
{
swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
if (amd_iommu_unmap_flush)
pr_info("IO/TLB flush on unmap enabled\n");
else
pr_info("Lazy IO/TLB flushing enabled\n");
iommu_set_dma_strict(amd_iommu_unmap_flush);
}
int __init amd_iommu_init_api(void)
......@@ -1924,16 +1956,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
domain->domain.geometry.aperture_end = ~0ULL;
domain->domain.geometry.force_aperture = true;
if (type == IOMMU_DOMAIN_DMA &&
iommu_get_dma_cookie(&domain->domain) == -ENOMEM)
goto free_domain;
return &domain->domain;
free_domain:
protection_domain_free(domain);
return NULL;
}
static void amd_iommu_domain_free(struct iommu_domain *dom)
......@@ -1950,9 +1973,6 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
if (!dom)
return;
if (dom->type == IOMMU_DOMAIN_DMA)
iommu_put_dma_cookie(&domain->domain);
if (domain->flags & PD_IOMMUV2_MASK)
free_gcr3_table(domain);
......@@ -2022,6 +2042,16 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
return ret;
}
static void amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
unsigned long iova, size_t size)
{
struct protection_domain *domain = to_pdomain(dom);
struct io_pgtable_ops *ops = &domain->iop.iop.ops;
if (ops->map)
domain_flush_np_cache(domain, iova, size);
}
static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
phys_addr_t paddr, size_t page_size, int iommu_prot,
gfp_t gfp)
......@@ -2040,26 +2070,50 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
if (iommu_prot & IOMMU_WRITE)
prot |= IOMMU_PROT_IW;
if (ops->map) {
if (ops->map)
ret = ops->map(ops, iova, paddr, page_size, prot, gfp);
domain_flush_np_cache(domain, iova, page_size);
}
return ret;
}
static void amd_iommu_iotlb_gather_add_page(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather,
unsigned long iova, size_t size)
{
/*
* AMD's IOMMU can flush as many pages as necessary in a single flush.
* Unless we run in a virtual machine, which can be inferred according
* to whether "non-present cache" is on, it is probably best to prefer
* (potentially) too extensive TLB flushing (i.e., more misses) over
* mutliple TLB flushes (i.e., more flushes). For virtual machines the
* hypervisor needs to synchronize the host IOMMU PTEs with those of
* the guest, and the trade-off is different: unnecessary TLB flushes
* should be avoided.
*/
if (amd_iommu_np_cache &&
iommu_iotlb_gather_is_disjoint(gather, iova, size))
iommu_iotlb_sync(domain, gather);
iommu_iotlb_gather_add_range(gather, iova, size);
}
static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
size_t page_size,
struct iommu_iotlb_gather *gather)
{
struct protection_domain *domain = to_pdomain(dom);
struct io_pgtable_ops *ops = &domain->iop.iop.ops;
size_t r;
if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
(domain->iop.mode == PAGE_MODE_NONE))
return 0;
return (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0;
r = (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0;
amd_iommu_iotlb_gather_add_page(dom, gather, iova, page_size);
return r;
}
static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
......@@ -2162,7 +2216,13 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
static void amd_iommu_iotlb_sync(struct iommu_domain *domain,
struct iommu_iotlb_gather *gather)
{
amd_iommu_flush_iotlb_all(domain);
struct protection_domain *dom = to_pdomain(domain);
unsigned long flags;
spin_lock_irqsave(&dom->lock, flags);
domain_flush_pages(dom, gather->start, gather->end - gather->start, 1);
amd_iommu_domain_flush_complete(dom);
spin_unlock_irqrestore(&dom->lock, flags);
}
static int amd_iommu_def_domain_type(struct device *dev)
......@@ -2191,6 +2251,7 @@ const struct iommu_ops amd_iommu_ops = {
.attach_dev = amd_iommu_attach_device,
.detach_dev = amd_iommu_detach_device,
.map = amd_iommu_map,
.iotlb_sync_map = amd_iommu_iotlb_sync_map,
.unmap = amd_iommu_unmap,
.iova_to_phys = amd_iommu_iova_to_phys,
.probe_device = amd_iommu_probe_device,
......
......@@ -6,6 +6,7 @@
#define pr_fmt(fmt) "AMD-Vi: " fmt
#include <linux/refcount.h>
#include <linux/mmu_notifier.h>
#include <linux/amd-iommu.h>
#include <linux/mm_types.h>
......@@ -33,7 +34,7 @@ struct pri_queue {
struct pasid_state {
struct list_head list; /* For global state-list */
atomic_t count; /* Reference count */
refcount_t count; /* Reference count */
unsigned mmu_notifier_count; /* Counting nested mmu_notifier
calls */
struct mm_struct *mm; /* mm_struct for the faults */
......@@ -242,7 +243,7 @@ static struct pasid_state *get_pasid_state(struct device_state *dev_state,
ret = *ptr;
if (ret)
atomic_inc(&ret->count);
refcount_inc(&ret->count);
out_unlock:
spin_unlock_irqrestore(&dev_state->lock, flags);
......@@ -257,14 +258,14 @@ static void free_pasid_state(struct pasid_state *pasid_state)
static void put_pasid_state(struct pasid_state *pasid_state)
{
if (atomic_dec_and_test(&pasid_state->count))
if (refcount_dec_and_test(&pasid_state->count))
wake_up(&pasid_state->wq);
}
static void put_pasid_state_wait(struct pasid_state *pasid_state)
{
atomic_dec(&pasid_state->count);
wait_event(pasid_state->wq, !atomic_read(&pasid_state->count));
refcount_dec(&pasid_state->count);