Merge tag 'iommu-updates-v4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull IOMMU updates from Joerg Roedel:

 - support for interrupt virtualization in the AMD IOMMU driver. These
   patches were shared with the KVM tree and are already merged through
   that tree.

 - generic DT-binding support for the ARM-SMMU driver. With this the
   driver now makes use of the generic DMA-API code. This also required
   some changes outside of the IOMMU code, but these are acked by the
   respective maintainers.

 - more cleanups and fixes all over the place.

* tag 'iommu-updates-v4.9' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (40 commits)
  iommu/amd: No need to wait iommu completion if no dte irq entry change
  iommu/amd: Free domain id when free a domain of struct dma_ops_domain
  iommu/amd: Use standard bitmap operation to set bitmap
  iommu/amd: Clean up the cmpxchg64 invocation
  iommu/io-pgtable-arm: Check for v7s-incapable systems
  iommu/dma: Avoid PCI host bridge windows
  iommu/dma: Add support for mapping MSIs
  iommu/arm-smmu: Set domain geometry
  iommu/arm-smmu: Wire up generic configuration support
  Docs: dt: document ARM SMMU generic binding usage
  iommu/arm-smmu: Convert to iommu_fwspec
  iommu/arm-smmu: Intelligent SMR allocation
  iommu/arm-smmu: Add a stream map entry iterator
  iommu/arm-smmu: Streamline SMMU data lookups
  iommu/arm-smmu: Refactor mmu-masters handling
  iommu/arm-smmu: Keep track of S2CR state
  iommu/arm-smmu: Consolidate stream map entry state
  iommu/arm-smmu: Handle stream IDs more dynamically
  iommu/arm-smmu: Set PRIVCFG in stage 1 STEs
  iommu/arm-smmu: Support non-PCI devices with SMMUv3
  ...
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
index 7b94c88..be57550 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
@@ -27,6 +27,12 @@
                       * "cmdq-sync" - CMD_SYNC complete
                       * "gerror"    - Global Error activated
 
+- #iommu-cells      : See the generic IOMMU binding described in
+                        devicetree/bindings/pci/pci-iommu.txt
+                      for details. For SMMUv3, must be 1, with each cell
+                      describing a single stream ID. All possible stream
+                      IDs which a device may emit must be described.
+
 ** SMMUv3 optional properties:
 
 - dma-coherent      : Present if DMA operations made by the SMMU (page
@@ -54,6 +60,6 @@
                              <GIC_SPI 79 IRQ_TYPE_EDGE_RISING>;
                 interrupt-names = "eventq", "priq", "cmdq-sync", "gerror";
                 dma-coherent;
-                #iommu-cells = <0>;
+                #iommu-cells = <1>;
                 msi-parent = <&its 0xff0000>;
         };
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
index 19fe6f2..e862d148 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -35,12 +35,16 @@
                   interrupt per context bank. In the case of a single,
                   combined interrupt, it must be listed multiple times.
 
-- mmu-masters   : A list of phandles to device nodes representing bus
-                  masters for which the SMMU can provide a translation
-                  and their corresponding StreamIDs (see example below).
-                  Each device node linked from this list must have a
-                  "#stream-id-cells" property, indicating the number of
-                  StreamIDs associated with it.
+- #iommu-cells  : See Documentation/devicetree/bindings/iommu/iommu.txt
+                  for details. With a value of 1, each "iommus" entry
+                  represents a distinct stream ID emitted by that device
+                  into the relevant SMMU.
+
+                  SMMUs with stream matching support and complex masters
+                  may use a value of 2, where the second cell represents
+                  an SMR mask to combine with the ID in the first cell.
+                  Care must be taken to ensure the set of matched IDs
+                  does not result in conflicts.
 
 ** System MMU optional properties:
 
@@ -56,9 +60,20 @@
                   aliases of secure registers have to be used during
                   SMMU configuration.
 
-Example:
+** Deprecated properties:
 
-        smmu {
+- mmu-masters (deprecated in favour of the generic "iommus" binding) :
+                  A list of phandles to device nodes representing bus
+                  masters for which the SMMU can provide a translation
+                  and their corresponding Stream IDs. Each device node
+                  linked from this list must have a "#stream-id-cells"
+                  property, indicating the number of Stream ID
+                  arguments associated with its phandle.
+
+** Examples:
+
+        /* SMMU with stream matching or stream indexing */
+        smmu1: iommu {
                 compatible = "arm,smmu-v1";
                 reg = <0xba5e0000 0x10000>;
                 #global-interrupts = <2>;
@@ -68,11 +83,29 @@
                              <0 35 4>,
                              <0 36 4>,
                              <0 37 4>;
+                #iommu-cells = <1>;
+        };
 
-                /*
-                 * Two DMA controllers, the first with two StreamIDs (0xd01d
-                 * and 0xd01e) and the second with only one (0xd11c).
-                 */
-                mmu-masters = <&dma0 0xd01d 0xd01e>,
-                              <&dma1 0xd11c>;
+        /* device with two stream IDs, 0 and 7 */
+        master1 {
+                iommus = <&smmu1 0>,
+                         <&smmu1 7>;
+        };
+
+
+        /* SMMU with stream matching */
+        smmu2: iommu {
+                ...
+                #iommu-cells = <2>;
+        };
+
+        /* device with stream IDs 0 and 7 */
+        master2 {
+                iommus = <&smmu2 0 0>,
+                         <&smmu2 7 0>;
+        };
+
+        /* device with stream IDs 1, 17, 33 and 49 */
+        master3 {
+                iommus = <&smmu2 1 0x30>;
         };
diff --git a/Documentation/devicetree/bindings/pci/pci-iommu.txt b/Documentation/devicetree/bindings/pci/pci-iommu.txt
new file mode 100644
index 0000000..56c8296
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/pci-iommu.txt
@@ -0,0 +1,171 @@
+This document describes the generic device tree binding for describing the
+relationship between PCI(e) devices and IOMMU(s).
+
+Each PCI(e) device under a root complex is uniquely identified by its Requester
+ID (AKA RID). A Requester ID is a triplet of a Bus number, Device number, and
+Function number.
+
+For the purpose of this document, when treated as a numeric value, a RID is
+formatted such that:
+
+* Bits [15:8] are the Bus number.
+* Bits [7:3] are the Device number.
+* Bits [2:0] are the Function number.
+* Any other bits required for padding must be zero.
+
+IOMMUs may distinguish PCI devices through sideband data derived from the
+Requester ID. While a given PCI device can only master through one IOMMU, a
+root complex may split masters across a set of IOMMUs (e.g. with one IOMMU per
+bus).
+
+The generic 'iommus' property is insufficient to describe this relationship,
+and a mechanism is required to map from a PCI device to its IOMMU and sideband
+data.
+
+For generic IOMMU bindings, see
+Documentation/devicetree/bindings/iommu/iommu.txt.
+
+
+PCI root complex
+================
+
+Optional properties
+-------------------
+
+- iommu-map: Maps a Requester ID to an IOMMU and associated iommu-specifier
+  data.
+
+  The property is an arbitrary number of tuples of
+  (rid-base,iommu,iommu-base,length).
+
+  Any RID r in the interval [rid-base, rid-base + length) is associated with
+  the listed IOMMU, with the iommu-specifier (r - rid-base + iommu-base).
+
+- iommu-map-mask: A mask to be applied to each Requester ID prior to being
+  mapped to an iommu-specifier per the iommu-map property.
+
+
+Example (1)
+===========
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	iommu: iommu@a {
+		reg = <0xa 0x1>;
+		compatible = "vendor,some-iommu";
+		#iommu-cells = <1>;
+	};
+
+	pci: pci@f {
+		reg = <0xf 0x1>;
+		compatible = "vendor,pcie-root-complex";
+		device_type = "pci";
+
+		/*
+		 * The sideband data provided to the IOMMU is the RID,
+		 * identity-mapped.
+		 */
+		iommu-map = <0x0 &iommu 0x0 0x10000>;
+	};
+};
+
+
+Example (2)
+===========
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	iommu: iommu@a {
+		reg = <0xa 0x1>;
+		compatible = "vendor,some-iommu";
+		#iommu-cells = <1>;
+	};
+
+	pci: pci@f {
+		reg = <0xf 0x1>;
+		compatible = "vendor,pcie-root-complex";
+		device_type = "pci";
+
+		/*
+		 * The sideband data provided to the IOMMU is the RID with the
+		 * function bits masked out.
+		 */
+		iommu-map = <0x0 &iommu 0x0 0x10000>;
+		iommu-map-mask = <0xfff8>;
+	};
+};
+
+
+Example (3)
+===========
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	iommu: iommu@a {
+		reg = <0xa 0x1>;
+		compatible = "vendor,some-iommu";
+		#iommu-cells = <1>;
+	};
+
+	pci: pci@f {
+		reg = <0xf 0x1>;
+		compatible = "vendor,pcie-root-complex";
+		device_type = "pci";
+
+		/*
+		 * The sideband data provided to the IOMMU is the RID,
+		 * but the high bits of the bus number are flipped.
+		 */
+		iommu-map = <0x0000 &iommu 0x8000 0x8000>,
+			    <0x8000 &iommu 0x0000 0x8000>;
+	};
+};
+
+
+Example (4)
+===========
+
+/ {
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	iommu_a: iommu@a {
+		reg = <0xa 0x1>;
+		compatible = "vendor,some-iommu";
+		#iommu-cells = <1>;
+	};
+
+	iommu_b: iommu@b {
+		reg = <0xb 0x1>;
+		compatible = "vendor,some-iommu";
+		#iommu-cells = <1>;
+	};
+
+	iommu_c: iommu@c {
+		reg = <0xc 0x1>;
+		compatible = "vendor,some-iommu";
+		#iommu-cells = <1>;
+	};
+
+	pci: pci@f {
+		reg = <0xf 0x1>;
+		compatible = "vendor,pcie-root-complex";
+		device_type = "pci";
+
+		/*
+		 * Devices with bus number 0-127 are mastered via IOMMU
+		 * a, with sideband data being RID[14:0].
+		 * Devices with bus number 128-255 are mastered via
+		 * IOMMU b, with sideband data being RID[14:0].
+		 * No devices master via IOMMU c.
+		 */
+		iommu-map = <0x0000 &iommu_a 0x0000 0x8000>,
+			    <0x8000 &iommu_b 0x0000 0x8000>;
+	};
+};
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index bdacead..3f74d0d 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -828,7 +828,7 @@
 	 * then the IOMMU core will have already configured a group for this
 	 * device, and allocated the default domain for that group.
 	 */
-	if (!domain || iommu_dma_init_domain(domain, dma_base, size)) {
+	if (!domain || iommu_dma_init_domain(domain, dma_base, size, dev)) {
 		pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
 			dev_name(dev));
 		return false;
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index d28bdab..7ef4a09 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -255,7 +255,6 @@
 CONFIG_DMADEVICES=y
 CONFIG_EEEPC_LAPTOP=y
 CONFIG_AMD_IOMMU=y
-CONFIG_AMD_IOMMU_STATS=y
 CONFIG_INTEL_IOMMU=y
 # CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
 CONFIG_EFI_VARS=y
diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.h b/drivers/gpu/drm/exynos/exynos_drm_iommu.h
index c8de491..87f6b56 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_iommu.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.h
@@ -66,7 +66,7 @@
 	if (ret)
 		goto free_domain;
 
-	ret = iommu_dma_init_domain(domain, start, size);
+	ret = iommu_dma_init_domain(domain, start, size, NULL);
 	if (ret)
 		goto put_cookie;
 
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index d432ca8..8ee54d7 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -309,7 +309,7 @@
 
 config ARM_SMMU_V3
 	bool "ARM Ltd. System MMU Version 3 (SMMUv3) Support"
-	depends on ARM64 && PCI
+	depends on ARM64
 	select IOMMU_API
 	select IOMMU_IO_PGTABLE_LPAE
 	select GENERIC_MSI_IRQ_DOMAIN
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 58fa8cc..754595e 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -103,7 +103,7 @@
 	struct flush_queue_entry *entries;
 };
 
-DEFINE_PER_CPU(struct flush_queue, flush_queue);
+static DEFINE_PER_CPU(struct flush_queue, flush_queue);
 
 static atomic_t queue_timer_on;
 static struct timer_list queue_timer;
@@ -1361,7 +1361,8 @@
 
 			__npte = PM_LEVEL_PDE(level, virt_to_phys(page));
 
-			if (cmpxchg64(pte, __pte, __npte)) {
+			/* pte could have been changed somewhere. */
+			if (cmpxchg64(pte, __pte, __npte) != __pte) {
 				free_page((unsigned long)page);
 				continue;
 			}
@@ -1741,6 +1742,9 @@
 
 	free_pagetable(&dom->domain);
 
+	if (dom->domain.id)
+		domain_id_free(dom->domain.id);
+
 	kfree(dom);
 }
 
@@ -3649,7 +3653,7 @@
 
 	table = irq_lookup_table[devid];
 	if (table)
-		goto out;
+		goto out_unlock;
 
 	alias = amd_iommu_alias_table[devid];
 	table = irq_lookup_table[alias];
@@ -3663,7 +3667,7 @@
 	/* Nothing there yet, allocate new irq remapping table */
 	table = kzalloc(sizeof(*table), GFP_ATOMIC);
 	if (!table)
-		goto out;
+		goto out_unlock;
 
 	/* Initialize table spin-lock */
 	spin_lock_init(&table->lock);
@@ -3676,7 +3680,7 @@
 	if (!table->table) {
 		kfree(table);
 		table = NULL;
-		goto out;
+		goto out_unlock;
 	}
 
 	if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
@@ -4153,6 +4157,7 @@
 	}
 	if (index < 0) {
 		pr_warn("Failed to allocate IRTE\n");
+		ret = index;
 		goto out_free_parent;
 	}
 
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index cd17136..157e934 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -20,6 +20,7 @@
 #include <linux/pci.h>
 #include <linux/acpi.h>
 #include <linux/list.h>
+#include <linux/bitmap.h>
 #include <linux/slab.h>
 #include <linux/syscore_ops.h>
 #include <linux/interrupt.h>
@@ -2285,7 +2286,7 @@
 	 * never allocate domain 0 because its used as the non-allocated and
 	 * error value placeholder
 	 */
-	amd_iommu_pd_alloc_bitmap[0] = 1;
+	__set_bit(0, amd_iommu_pd_alloc_bitmap);
 
 	spin_lock_init(&amd_iommu_pd_lock);
 
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index faa3b48..7eb60c1 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -79,12 +79,6 @@
 extern int amd_iommu_complete_ppr(struct pci_dev *pdev, int pasid,
 				  int status, int tag);
 
-#ifndef CONFIG_AMD_IOMMU_STATS
-
-static inline void amd_iommu_stats_init(void) { }
-
-#endif /* !CONFIG_AMD_IOMMU_STATS */
-
 static inline bool is_rd890_iommu(struct pci_dev *pdev)
 {
 	return (pdev->vendor == PCI_VENDOR_ID_ATI) &&
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 641e887..15c01c3 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -30,10 +30,13 @@
 #include <linux/msi.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/of_iommu.h>
 #include <linux/of_platform.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 
+#include <linux/amba/bus.h>
+
 #include "io-pgtable.h"
 
 /* MMIO registers */
@@ -123,6 +126,10 @@
 #define CR2_RECINVSID			(1 << 1)
 #define CR2_E2H				(1 << 0)
 
+#define ARM_SMMU_GBPA			0x44
+#define GBPA_ABORT			(1 << 20)
+#define GBPA_UPDATE			(1 << 31)
+
 #define ARM_SMMU_IRQ_CTRL		0x50
 #define IRQ_CTRL_EVTQ_IRQEN		(1 << 2)
 #define IRQ_CTRL_PRIQ_IRQEN		(1 << 1)
@@ -260,6 +267,9 @@
 #define STRTAB_STE_1_SHCFG_INCOMING	1UL
 #define STRTAB_STE_1_SHCFG_SHIFT	44
 
+#define STRTAB_STE_1_PRIVCFG_UNPRIV	2UL
+#define STRTAB_STE_1_PRIVCFG_SHIFT	48
+
 #define STRTAB_STE_2_S2VMID_SHIFT	0
 #define STRTAB_STE_2_S2VMID_MASK	0xffffUL
 #define STRTAB_STE_2_VTCR_SHIFT		32
@@ -606,12 +616,9 @@
 	struct arm_smmu_strtab_cfg	strtab_cfg;
 };
 
-/* SMMU private data for an IOMMU group */
-struct arm_smmu_group {
+/* SMMU private data for each master */
+struct arm_smmu_master_data {
 	struct arm_smmu_device		*smmu;
-	struct arm_smmu_domain		*domain;
-	int				num_sids;
-	u32				*sids;
 	struct arm_smmu_strtab_ent	ste;
 };
 
@@ -713,19 +720,15 @@
 	writel(q->prod, q->prod_reg);
 }
 
-static bool __queue_cons_before(struct arm_smmu_queue *q, u32 until)
-{
-	if (Q_WRP(q, q->cons) == Q_WRP(q, until))
-		return Q_IDX(q, q->cons) < Q_IDX(q, until);
-
-	return Q_IDX(q, q->cons) >= Q_IDX(q, until);
-}
-
-static int queue_poll_cons(struct arm_smmu_queue *q, u32 until, bool wfe)
+/*
+ * Wait for the SMMU to consume items. If drain is true, wait until the queue
+ * is empty. Otherwise, wait until there is at least one free slot.
+ */
+static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
 {
 	ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
 
-	while (queue_sync_cons(q), __queue_cons_before(q, until)) {
+	while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
 		if (ktime_compare(ktime_get(), timeout) > 0)
 			return -ETIMEDOUT;
 
@@ -896,8 +899,8 @@
 static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
 				    struct arm_smmu_cmdq_ent *ent)
 {
-	u32 until;
 	u64 cmd[CMDQ_ENT_DWORDS];
+	unsigned long flags;
 	bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
 	struct arm_smmu_queue *q = &smmu->cmdq.q;
 
@@ -907,20 +910,15 @@
 		return;
 	}
 
-	spin_lock(&smmu->cmdq.lock);
-	while (until = q->prod + 1, queue_insert_raw(q, cmd) == -ENOSPC) {
-		/*
-		 * Keep the queue locked, otherwise the producer could wrap
-		 * twice and we could see a future consumer pointer that looks
-		 * like it's behind us.
-		 */
-		if (queue_poll_cons(q, until, wfe))
+	spin_lock_irqsave(&smmu->cmdq.lock, flags);
+	while (queue_insert_raw(q, cmd) == -ENOSPC) {
+		if (queue_poll_cons(q, false, wfe))
 			dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
 	}
 
-	if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, until, wfe))
+	if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, true, wfe))
 		dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
-	spin_unlock(&smmu->cmdq.lock);
+	spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
 }
 
 /* Context descriptor manipulation functions */
@@ -1073,7 +1071,9 @@
 #ifdef CONFIG_PCI_ATS
 			 STRTAB_STE_1_EATS_TRANS << STRTAB_STE_1_EATS_SHIFT |
 #endif
-			 STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
+			 STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT |
+			 STRTAB_STE_1_PRIVCFG_UNPRIV <<
+			 STRTAB_STE_1_PRIVCFG_SHIFT);
 
 		if (smmu->features & ARM_SMMU_FEAT_STALLS)
 			dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
@@ -1161,36 +1161,66 @@
 	struct arm_smmu_queue *q = &smmu->evtq.q;
 	u64 evt[EVTQ_ENT_DWORDS];
 
-	while (!queue_remove_raw(q, evt)) {
-		u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
+	do {
+		while (!queue_remove_raw(q, evt)) {
+			u8 id = evt[0] >> EVTQ_0_ID_SHIFT & EVTQ_0_ID_MASK;
 
-		dev_info(smmu->dev, "event 0x%02x received:\n", id);
-		for (i = 0; i < ARRAY_SIZE(evt); ++i)
-			dev_info(smmu->dev, "\t0x%016llx\n",
-				 (unsigned long long)evt[i]);
-	}
+			dev_info(smmu->dev, "event 0x%02x received:\n", id);
+			for (i = 0; i < ARRAY_SIZE(evt); ++i)
+				dev_info(smmu->dev, "\t0x%016llx\n",
+					 (unsigned long long)evt[i]);
+
+		}
+
+		/*
+		 * Not much we can do on overflow, so scream and pretend we're
+		 * trying harder.
+		 */
+		if (queue_sync_prod(q) == -EOVERFLOW)
+			dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
+	} while (!queue_empty(q));
 
 	/* Sync our overflow flag, as we believe we're up to speed */
 	q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t arm_smmu_evtq_handler(int irq, void *dev)
+static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt)
 {
-	irqreturn_t ret = IRQ_WAKE_THREAD;
-	struct arm_smmu_device *smmu = dev;
-	struct arm_smmu_queue *q = &smmu->evtq.q;
+	u32 sid, ssid;
+	u16 grpid;
+	bool ssv, last;
 
-	/*
-	 * Not much we can do on overflow, so scream and pretend we're
-	 * trying harder.
-	 */
-	if (queue_sync_prod(q) == -EOVERFLOW)
-		dev_err(smmu->dev, "EVTQ overflow detected -- events lost\n");
-	else if (queue_empty(q))
-		ret = IRQ_NONE;
+	sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
+	ssv = evt[0] & PRIQ_0_SSID_V;
+	ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
+	last = evt[0] & PRIQ_0_PRG_LAST;
+	grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
 
-	return ret;
+	dev_info(smmu->dev, "unexpected PRI request received:\n");
+	dev_info(smmu->dev,
+		 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
+		 sid, ssid, grpid, last ? "L" : "",
+		 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
+		 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
+		 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
+		 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
+		 evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
+
+	if (last) {
+		struct arm_smmu_cmdq_ent cmd = {
+			.opcode			= CMDQ_OP_PRI_RESP,
+			.substream_valid	= ssv,
+			.pri			= {
+				.sid	= sid,
+				.ssid	= ssid,
+				.grpid	= grpid,
+				.resp	= PRI_RESP_DENY,
+			},
+		};
+
+		arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+	}
 }
 
 static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
@@ -1199,63 +1229,19 @@
 	struct arm_smmu_queue *q = &smmu->priq.q;
 	u64 evt[PRIQ_ENT_DWORDS];
 
-	while (!queue_remove_raw(q, evt)) {
-		u32 sid, ssid;
-		u16 grpid;
-		bool ssv, last;
+	do {
+		while (!queue_remove_raw(q, evt))
+			arm_smmu_handle_ppr(smmu, evt);
 
-		sid = evt[0] >> PRIQ_0_SID_SHIFT & PRIQ_0_SID_MASK;
-		ssv = evt[0] & PRIQ_0_SSID_V;
-		ssid = ssv ? evt[0] >> PRIQ_0_SSID_SHIFT & PRIQ_0_SSID_MASK : 0;
-		last = evt[0] & PRIQ_0_PRG_LAST;
-		grpid = evt[1] >> PRIQ_1_PRG_IDX_SHIFT & PRIQ_1_PRG_IDX_MASK;
-
-		dev_info(smmu->dev, "unexpected PRI request received:\n");
-		dev_info(smmu->dev,
-			 "\tsid 0x%08x.0x%05x: [%u%s] %sprivileged %s%s%s access at iova 0x%016llx\n",
-			 sid, ssid, grpid, last ? "L" : "",
-			 evt[0] & PRIQ_0_PERM_PRIV ? "" : "un",
-			 evt[0] & PRIQ_0_PERM_READ ? "R" : "",
-			 evt[0] & PRIQ_0_PERM_WRITE ? "W" : "",
-			 evt[0] & PRIQ_0_PERM_EXEC ? "X" : "",
-			 evt[1] & PRIQ_1_ADDR_MASK << PRIQ_1_ADDR_SHIFT);
-
-		if (last) {
-			struct arm_smmu_cmdq_ent cmd = {
-				.opcode			= CMDQ_OP_PRI_RESP,
-				.substream_valid	= ssv,
-				.pri			= {
-					.sid	= sid,
-					.ssid	= ssid,
-					.grpid	= grpid,
-					.resp	= PRI_RESP_DENY,
-				},
-			};
-
-			arm_smmu_cmdq_issue_cmd(smmu, &cmd);
-		}
-	}
+		if (queue_sync_prod(q) == -EOVERFLOW)
+			dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
+	} while (!queue_empty(q));
 
 	/* Sync our overflow flag, as we believe we're up to speed */
 	q->cons = Q_OVF(q, q->prod) | Q_WRP(q, q->cons) | Q_IDX(q, q->cons);
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t arm_smmu_priq_handler(int irq, void *dev)
-{
-	irqreturn_t ret = IRQ_WAKE_THREAD;
-	struct arm_smmu_device *smmu = dev;
-	struct arm_smmu_queue *q = &smmu->priq.q;
-
-	/* PRIQ overflow indicates a programming error */
-	if (queue_sync_prod(q) == -EOVERFLOW)
-		dev_err(smmu->dev, "PRIQ overflow detected -- requests lost\n");
-	else if (queue_empty(q))
-		ret = IRQ_NONE;
-
-	return ret;
-}
-
 static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
 {
 	/* We don't actually use CMD_SYNC interrupts for anything */
@@ -1288,15 +1274,11 @@
 	if (active & GERROR_MSI_GERROR_ABT_ERR)
 		dev_warn(smmu->dev, "GERROR MSI write aborted\n");
 
-	if (active & GERROR_MSI_PRIQ_ABT_ERR) {
+	if (active & GERROR_MSI_PRIQ_ABT_ERR)
 		dev_warn(smmu->dev, "PRIQ MSI write aborted\n");
-		arm_smmu_priq_handler(irq, smmu->dev);
-	}
 
-	if (active & GERROR_MSI_EVTQ_ABT_ERR) {
+	if (active & GERROR_MSI_EVTQ_ABT_ERR)
 		dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
-		arm_smmu_evtq_handler(irq, smmu->dev);
-	}
 
 	if (active & GERROR_MSI_CMDQ_ABT_ERR) {
 		dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
@@ -1569,6 +1551,8 @@
 		return -ENOMEM;
 
 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+	domain->geometry.aperture_end = (1UL << ias) - 1;
+	domain->geometry.force_aperture = true;
 	smmu_domain->pgtbl_ops = pgtbl_ops;
 
 	ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
@@ -1578,20 +1562,6 @@
 	return ret;
 }
 
-static struct arm_smmu_group *arm_smmu_group_get(struct device *dev)
-{
-	struct iommu_group *group;
-	struct arm_smmu_group *smmu_group;
-
-	group = iommu_group_get(dev);
-	if (!group)
-		return NULL;
-
-	smmu_group = iommu_group_get_iommudata(group);
-	iommu_group_put(group);
-	return smmu_group;
-}
-
 static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
 {
 	__le64 *step;
@@ -1614,27 +1584,17 @@
 	return step;
 }
 
-static int arm_smmu_install_ste_for_group(struct arm_smmu_group *smmu_group)
+static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
 {
 	int i;
-	struct arm_smmu_domain *smmu_domain = smmu_group->domain;
-	struct arm_smmu_strtab_ent *ste = &smmu_group->ste;
-	struct arm_smmu_device *smmu = smmu_group->smmu;
+	struct arm_smmu_master_data *master = fwspec->iommu_priv;
+	struct arm_smmu_device *smmu = master->smmu;
 
-	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
-		ste->s1_cfg = &smmu_domain->s1_cfg;
-		ste->s2_cfg = NULL;
-		arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
-	} else {
-		ste->s1_cfg = NULL;
-		ste->s2_cfg = &smmu_domain->s2_cfg;
-	}
-
-	for (i = 0; i < smmu_group->num_sids; ++i) {
-		u32 sid = smmu_group->sids[i];
+	for (i = 0; i < fwspec->num_ids; ++i) {
+		u32 sid = fwspec->ids[i];
 		__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
 
-		arm_smmu_write_strtab_ent(smmu, sid, step, ste);
+		arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
 	}
 
 	return 0;
@@ -1642,13 +1602,11 @@
 
 static void arm_smmu_detach_dev(struct device *dev)
 {
-	struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
+	struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
 
-	smmu_group->ste.bypass = true;
-	if (arm_smmu_install_ste_for_group(smmu_group) < 0)
+	master->ste.bypass = true;
+	if (arm_smmu_install_ste_for_dev(dev->iommu_fwspec) < 0)
 		dev_warn(dev, "failed to install bypass STE\n");
-
-	smmu_group->domain = NULL;
 }
 
 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
@@ -1656,16 +1614,20 @@
 	int ret = 0;
 	struct arm_smmu_device *smmu;
 	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-	struct arm_smmu_group *smmu_group = arm_smmu_group_get(dev);
+	struct arm_smmu_master_data *master;
+	struct arm_smmu_strtab_ent *ste;
 
-	if (!smmu_group)
+	if (!dev->iommu_fwspec)
 		return -ENOENT;
 
+	master = dev->iommu_fwspec->iommu_priv;
+	smmu = master->smmu;
+	ste = &master->ste;
+
 	/* Already attached to a different domain? */
-	if (smmu_group->domain && smmu_group->domain != smmu_domain)
+	if (!ste->bypass)
 		arm_smmu_detach_dev(dev);
 
-	smmu = smmu_group->smmu;
 	mutex_lock(&smmu_domain->init_mutex);
 
 	if (!smmu_domain->smmu) {
@@ -1684,21 +1646,21 @@
 		goto out_unlock;
 	}
 
-	/* Group already attached to this domain? */
-	if (smmu_group->domain)
-		goto out_unlock;
+	ste->bypass = false;
+	ste->valid = true;
 
-	smmu_group->domain	= smmu_domain;
+	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+		ste->s1_cfg = &smmu_domain->s1_cfg;
+		ste->s2_cfg = NULL;
+		arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
+	} else {
+		ste->s1_cfg = NULL;
+		ste->s2_cfg = &smmu_domain->s2_cfg;
+	}
 
-	/*
-	 * FIXME: This should always be "false" once we have IOMMU-backed
-	 * DMA ops for all devices behind the SMMU.
-	 */
-	smmu_group->ste.bypass	= domain->type == IOMMU_DOMAIN_DMA;
-
-	ret = arm_smmu_install_ste_for_group(smmu_group);
+	ret = arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
 	if (ret < 0)
-		smmu_group->domain = NULL;
+		ste->valid = false;
 
 out_unlock:
 	mutex_unlock(&smmu_domain->init_mutex);
@@ -1757,40 +1719,19 @@
 	return ret;
 }
 
-static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *sidp)
+static struct platform_driver arm_smmu_driver;
+
+static int arm_smmu_match_node(struct device *dev, void *data)
 {
-	*(u32 *)sidp = alias;
-	return 0; /* Continue walking */
+	return dev->of_node == data;
 }
 
-static void __arm_smmu_release_pci_iommudata(void *data)
+static struct arm_smmu_device *arm_smmu_get_by_node(struct device_node *np)
 {
-	kfree(data);
-}
-
-static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
-{
-	struct device_node *of_node;
-	struct platform_device *smmu_pdev;
-	struct arm_smmu_device *smmu = NULL;
-	struct pci_bus *bus = pdev->bus;
-
-	/* Walk up to the root bus */
-	while (!pci_is_root_bus(bus))
-		bus = bus->parent;
-
-	/* Follow the "iommus" phandle from the host controller */
-	of_node = of_parse_phandle(bus->bridge->parent->of_node, "iommus", 0);
-	if (!of_node)
-		return NULL;
-
-	/* See if we can find an SMMU corresponding to the phandle */
-	smmu_pdev = of_find_device_by_node(of_node);
-	if (smmu_pdev)
-		smmu = platform_get_drvdata(smmu_pdev);
-
-	of_node_put(of_node);
-	return smmu;
+	struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
+						np, arm_smmu_match_node);
+	put_device(dev);
+	return dev ? dev_get_drvdata(dev) : NULL;
 }
 
 static bool arm_smmu_sid_in_range(struct arm_smmu_device *smmu, u32 sid)
@@ -1803,94 +1744,91 @@
 	return sid < limit;
 }
 
+static struct iommu_ops arm_smmu_ops;
+
 static int arm_smmu_add_device(struct device *dev)
 {
 	int i, ret;
-	u32 sid, *sids;
-	struct pci_dev *pdev;
-	struct iommu_group *group;
-	struct arm_smmu_group *smmu_group;
 	struct arm_smmu_device *smmu;
+	struct arm_smmu_master_data *master;
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+	struct iommu_group *group;
 
-	/* We only support PCI, for now */
-	if (!dev_is_pci(dev))
+	if (!fwspec || fwspec->ops != &arm_smmu_ops)
 		return -ENODEV;
-
-	pdev = to_pci_dev(dev);
-	group = iommu_group_get_for_dev(dev);
-	if (IS_ERR(group))
-		return PTR_ERR(group);
-
-	smmu_group = iommu_group_get_iommudata(group);
-	if (!smmu_group) {
-		smmu = arm_smmu_get_for_pci_dev(pdev);
-		if (!smmu) {
-			ret = -ENOENT;
-			goto out_remove_dev;
-		}
-
-		smmu_group = kzalloc(sizeof(*smmu_group), GFP_KERNEL);
-		if (!smmu_group) {
-			ret = -ENOMEM;
-			goto out_remove_dev;
-		}
-
-		smmu_group->ste.valid	= true;
-		smmu_group->smmu	= smmu;
-		iommu_group_set_iommudata(group, smmu_group,
-					  __arm_smmu_release_pci_iommudata);
+	/*
+	 * We _can_ actually withstand dodgy bus code re-calling add_device()
+	 * without an intervening remove_device()/of_xlate() sequence, but
+	 * we're not going to do so quietly...
+	 */
+	if (WARN_ON_ONCE(fwspec->iommu_priv)) {
+		master = fwspec->iommu_priv;
+		smmu = master->smmu;
 	} else {
-		smmu = smmu_group->smmu;
+		smmu = arm_smmu_get_by_node(to_of_node(fwspec->iommu_fwnode));
+		if (!smmu)
+			return -ENODEV;
+		master = kzalloc(sizeof(*master), GFP_KERNEL);
+		if (!master)
+			return -ENOMEM;
+
+		master->smmu = smmu;
+		fwspec->iommu_priv = master;
 	}
 
-	/* Assume SID == RID until firmware tells us otherwise */
-	pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid);
-	for (i = 0; i < smmu_group->num_sids; ++i) {
-		/* If we already know about this SID, then we're done */
-		if (smmu_group->sids[i] == sid)
-			goto out_put_group;
+	/* Check the SIDs are in range of the SMMU and our stream table */
+	for (i = 0; i < fwspec->num_ids; i++) {
+		u32 sid = fwspec->ids[i];
+
+		if (!arm_smmu_sid_in_range(smmu, sid))
+			return -ERANGE;
+
+		/* Ensure l2 strtab is initialised */
+		if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
+			ret = arm_smmu_init_l2_strtab(smmu, sid);
+			if (ret)
+				return ret;
+		}
 	}
 
-	/* Check the SID is in range of the SMMU and our stream table */
-	if (!arm_smmu_sid_in_range(smmu, sid)) {
-		ret = -ERANGE;
-		goto out_remove_dev;
-	}
+	group = iommu_group_get_for_dev(dev);
+	if (!IS_ERR(group))
+		iommu_group_put(group);
 
-	/* Ensure l2 strtab is initialised */
-	if (smmu->features & ARM_SMMU_FEAT_2_LVL_STRTAB) {
-		ret = arm_smmu_init_l2_strtab(smmu, sid);
-		if (ret)
-			goto out_remove_dev;
-	}
-
-	/* Resize the SID array for the group */
-	smmu_group->num_sids++;
-	sids = krealloc(smmu_group->sids, smmu_group->num_sids * sizeof(*sids),
-			GFP_KERNEL);
-	if (!sids) {
-		smmu_group->num_sids--;
-		ret = -ENOMEM;
-		goto out_remove_dev;
-	}
-
-	/* Add the new SID */
-	sids[smmu_group->num_sids - 1] = sid;
-	smmu_group->sids = sids;
-
-out_put_group:
-	iommu_group_put(group);
-	return 0;
-
-out_remove_dev:
-	iommu_group_remove_device(dev);
-	iommu_group_put(group);
-	return ret;
+	return PTR_ERR_OR_ZERO(group);
 }
 
 static void arm_smmu_remove_device(struct device *dev)
 {
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+	struct arm_smmu_master_data *master;
+
+	if (!fwspec || fwspec->ops != &arm_smmu_ops)
+		return;
+
+	master = fwspec->iommu_priv;
+	if (master && master->ste.valid)
+		arm_smmu_detach_dev(dev);
 	iommu_group_remove_device(dev);
+	kfree(master);
+	iommu_fwspec_free(dev);
+}
+
+static struct iommu_group *arm_smmu_device_group(struct device *dev)
+{
+	struct iommu_group *group;
+
+	/*
+	 * We don't support devices sharing stream IDs other than PCI RID
+	 * aliases, since the necessary ID-to-device lookup becomes rather
+	 * impractical given a potential sparse 32-bit stream ID space.
+	 */
+	if (dev_is_pci(dev))
+		group = pci_device_group(dev);
+	else
+		group = generic_device_group(dev);
+
+	return group;
 }
 
 static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
@@ -1937,6 +1875,11 @@
 	return ret;
 }
 
+static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
+{
+	return iommu_fwspec_add_ids(dev, args->args, 1);
+}
+
 static struct iommu_ops arm_smmu_ops = {
 	.capable		= arm_smmu_capable,
 	.domain_alloc		= arm_smmu_domain_alloc,
@@ -1948,9 +1891,10 @@
 	.iova_to_phys		= arm_smmu_iova_to_phys,
 	.add_device		= arm_smmu_add_device,
 	.remove_device		= arm_smmu_remove_device,
-	.device_group		= pci_device_group,
+	.device_group		= arm_smmu_device_group,
 	.domain_get_attr	= arm_smmu_domain_get_attr,
 	.domain_set_attr	= arm_smmu_domain_set_attr,
+	.of_xlate		= arm_smmu_of_xlate,
 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
 };
 
@@ -2151,6 +2095,24 @@
 					  1, ARM_SMMU_POLL_TIMEOUT_US);
 }
 
+/* GBPA is "special" */
+static int arm_smmu_update_gbpa(struct arm_smmu_device *smmu, u32 set, u32 clr)
+{
+	int ret;
+	u32 reg, __iomem *gbpa = smmu->base + ARM_SMMU_GBPA;
+
+	ret = readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
+					 1, ARM_SMMU_POLL_TIMEOUT_US);
+	if (ret)
+		return ret;
+
+	reg &= ~clr;
+	reg |= set;
+	writel_relaxed(reg | GBPA_UPDATE, gbpa);
+	return readl_relaxed_poll_timeout(gbpa, reg, !(reg & GBPA_UPDATE),
+					  1, ARM_SMMU_POLL_TIMEOUT_US);
+}
+
 static void arm_smmu_free_msis(void *data)
 {
 	struct device *dev = data;
@@ -2235,10 +2197,10 @@
 	/* Request interrupt lines */
 	irq = smmu->evtq.q.irq;
 	if (irq) {
-		ret = devm_request_threaded_irq(smmu->dev, irq,
-						arm_smmu_evtq_handler,
+		ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
 						arm_smmu_evtq_thread,
-						0, "arm-smmu-v3-evtq", smmu);
+						IRQF_ONESHOT,
+						"arm-smmu-v3-evtq", smmu);
 		if (ret < 0)
 			dev_warn(smmu->dev, "failed to enable evtq irq\n");
 	}
@@ -2263,10 +2225,10 @@
 	if (smmu->features & ARM_SMMU_FEAT_PRI) {
 		irq = smmu->priq.q.irq;
 		if (irq) {
-			ret = devm_request_threaded_irq(smmu->dev, irq,
-							arm_smmu_priq_handler,
+			ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
 							arm_smmu_priq_thread,
-							0, "arm-smmu-v3-priq",
+							IRQF_ONESHOT,
+							"arm-smmu-v3-priq",
 							smmu);
 			if (ret < 0)
 				dev_warn(smmu->dev,
@@ -2296,7 +2258,7 @@
 	return ret;
 }
 
-static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
+static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 {
 	int ret;
 	u32 reg, enables;
@@ -2397,8 +2359,17 @@
 		return ret;
 	}
 
-	/* Enable the SMMU interface */
-	enables |= CR0_SMMUEN;
+
+	/* Enable the SMMU interface, or ensure bypass */
+	if (!bypass || disable_bypass) {
+		enables |= CR0_SMMUEN;
+	} else {
+		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
+		if (ret) {
+			dev_err(smmu->dev, "GBPA not responding to update\n");
+			return ret;
+		}
+	}
 	ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
 				      ARM_SMMU_CR0ACK);
 	if (ret) {
@@ -2597,6 +2568,15 @@
 	struct resource *res;
 	struct arm_smmu_device *smmu;
 	struct device *dev = &pdev->dev;
+	bool bypass = true;
+	u32 cells;
+
+	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
+		dev_err(dev, "missing #iommu-cells property\n");
+	else if (cells != 1)
+		dev_err(dev, "invalid #iommu-cells value (%d)\n", cells);
+	else
+		bypass = false;
 
 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
 	if (!smmu) {
@@ -2649,7 +2629,24 @@
 	platform_set_drvdata(pdev, smmu);
 
 	/* Reset the device */
-	return arm_smmu_device_reset(smmu);
+	ret = arm_smmu_device_reset(smmu, bypass);
+	if (ret)
+		return ret;
+
+	/* And we're up. Go go go! */
+	of_iommu_set_ops(dev->of_node, &arm_smmu_ops);
+#ifdef CONFIG_PCI
+	pci_request_acs();
+	ret = bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
+	if (ret)
+		return ret;
+#endif
+#ifdef CONFIG_ARM_AMBA
+	ret = bus_set_iommu(&amba_bustype, &arm_smmu_ops);
+	if (ret)
+		return ret;
+#endif
+	return bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
 }
 
 static int arm_smmu_device_remove(struct platform_device *pdev)
@@ -2677,22 +2674,14 @@
 
 static int __init arm_smmu_init(void)
 {
-	struct device_node *np;
-	int ret;
+	static bool registered;
+	int ret = 0;
 
-	np = of_find_matching_node(NULL, arm_smmu_of_match);
-	if (!np)
-		return 0;
-
-	of_node_put(np);
-
-	ret = platform_driver_register(&arm_smmu_driver);
-	if (ret)
-		return ret;
-
-	pci_request_acs();
-
-	return bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
+	if (!registered) {
+		ret = platform_driver_register(&arm_smmu_driver);
+		registered = !ret;
+	}
+	return ret;
 }
 
 static void __exit arm_smmu_exit(void)
@@ -2703,6 +2692,20 @@
 subsys_initcall(arm_smmu_init);
 module_exit(arm_smmu_exit);
 
+static int __init arm_smmu_of_init(struct device_node *np)
+{
+	int ret = arm_smmu_init();
+
+	if (ret)
+		return ret;
+
+	if (!of_platform_device_create(np, NULL, platform_bus_type.dev_root))
+		return -ENODEV;
+
+	return 0;
+}
+IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3", arm_smmu_of_init);
+
 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations");
 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 2db74eb..c841eb7 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -28,6 +28,7 @@
 
 #define pr_fmt(fmt) "arm-smmu: " fmt
 
+#include <linux/atomic.h>
 #include <linux/delay.h>
 #include <linux/dma-iommu.h>
 #include <linux/dma-mapping.h>
@@ -40,6 +41,8 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_iommu.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
@@ -49,15 +52,9 @@
 
 #include "io-pgtable.h"
 
-/* Maximum number of stream IDs assigned to a single device */
-#define MAX_MASTER_STREAMIDS		128
-
 /* Maximum number of context banks per SMMU */
 #define ARM_SMMU_MAX_CBS		128
 
-/* Maximum number of mapping groups per SMMU */
-#define ARM_SMMU_MAX_SMRS		128
-
 /* SMMU global address space */
 #define ARM_SMMU_GR0(smmu)		((smmu)->base)
 #define ARM_SMMU_GR1(smmu)		((smmu)->base + (1 << (smmu)->pgshift))
@@ -165,21 +162,27 @@
 #define ARM_SMMU_GR0_SMR(n)		(0x800 + ((n) << 2))
 #define SMR_VALID			(1 << 31)
 #define SMR_MASK_SHIFT			16
-#define SMR_MASK_MASK			0x7fff
 #define SMR_ID_SHIFT			0
-#define SMR_ID_MASK			0x7fff
 
 #define ARM_SMMU_GR0_S2CR(n)		(0xc00 + ((n) << 2))
 #define S2CR_CBNDX_SHIFT		0
 #define S2CR_CBNDX_MASK			0xff
 #define S2CR_TYPE_SHIFT			16
 #define S2CR_TYPE_MASK			0x3
-#define S2CR_TYPE_TRANS			(0 << S2CR_TYPE_SHIFT)
-#define S2CR_TYPE_BYPASS		(1 << S2CR_TYPE_SHIFT)
-#define S2CR_TYPE_FAULT			(2 << S2CR_TYPE_SHIFT)
+enum arm_smmu_s2cr_type {
+	S2CR_TYPE_TRANS,
+	S2CR_TYPE_BYPASS,
+	S2CR_TYPE_FAULT,
+};
 
 #define S2CR_PRIVCFG_SHIFT		24
-#define S2CR_PRIVCFG_UNPRIV		(2 << S2CR_PRIVCFG_SHIFT)
+#define S2CR_PRIVCFG_MASK		0x3
+enum arm_smmu_s2cr_privcfg {
+	S2CR_PRIVCFG_DEFAULT,
+	S2CR_PRIVCFG_DIPAN,
+	S2CR_PRIVCFG_UNPRIV,
+	S2CR_PRIVCFG_PRIV,
+};
 
 /* Context bank attribute registers */
 #define ARM_SMMU_GR1_CBAR(n)		(0x0 + ((n) << 2))
@@ -217,6 +220,7 @@
 #define ARM_SMMU_CB_TTBR0		0x20
 #define ARM_SMMU_CB_TTBR1		0x28
 #define ARM_SMMU_CB_TTBCR		0x30
+#define ARM_SMMU_CB_CONTEXTIDR		0x34
 #define ARM_SMMU_CB_S1_MAIR0		0x38
 #define ARM_SMMU_CB_S1_MAIR1		0x3c
 #define ARM_SMMU_CB_PAR			0x50
@@ -239,7 +243,6 @@
 #define SCTLR_AFE			(1 << 2)
 #define SCTLR_TRE			(1 << 1)
 #define SCTLR_M				(1 << 0)
-#define SCTLR_EAE_SBOP			(SCTLR_AFE | SCTLR_TRE)
 
 #define ARM_MMU500_ACTLR_CPRE		(1 << 1)
 
@@ -296,23 +299,33 @@
 	CAVIUM_SMMUV2,
 };
 
+struct arm_smmu_s2cr {
+	struct iommu_group		*group;
+	int				count;
+	enum arm_smmu_s2cr_type		type;
+	enum arm_smmu_s2cr_privcfg	privcfg;
+	u8				cbndx;
+};
+
+#define s2cr_init_val (struct arm_smmu_s2cr){				\
+	.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,	\
+}
+
 struct arm_smmu_smr {
-	u8				idx;
 	u16				mask;
 	u16				id;
+	bool				valid;
 };
 
 struct arm_smmu_master_cfg {
-	int				num_streamids;
-	u16				streamids[MAX_MASTER_STREAMIDS];
-	struct arm_smmu_smr		*smrs;
+	struct arm_smmu_device		*smmu;
+	s16				smendx[];
 };
-
-struct arm_smmu_master {
-	struct device_node		*of_node;
-	struct rb_node			node;
-	struct arm_smmu_master_cfg	cfg;
-};
+#define INVALID_SMENDX			-1
+#define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
+#define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
+#define for_each_cfg_sme(fw, i, idx) \
+	for (i = 0; idx = __fwspec_cfg(fw)->smendx[i], i < fw->num_ids; ++i)
 
 struct arm_smmu_device {
 	struct device			*dev;
@@ -346,7 +359,11 @@
 	atomic_t			irptndx;
 
 	u32				num_mapping_groups;
-	DECLARE_BITMAP(smr_map, ARM_SMMU_MAX_SMRS);
+	u16				streamid_mask;
+	u16				smr_mask_mask;
+	struct arm_smmu_smr		*smrs;
+	struct arm_smmu_s2cr		*s2crs;
+	struct mutex			stream_map_mutex;
 
 	unsigned long			va_size;
 	unsigned long			ipa_size;
@@ -357,9 +374,6 @@
 	u32				num_context_irqs;
 	unsigned int			*irqs;
 
-	struct list_head		list;
-	struct rb_root			masters;
-
 	u32				cavium_id_base; /* Specific to Cavium */
 };
 
@@ -397,15 +411,6 @@
 	struct iommu_domain		domain;
 };
 
-struct arm_smmu_phandle_args {
-	struct device_node *np;
-	int args_count;
-	uint32_t args[MAX_MASTER_STREAMIDS];
-};
-
-static DEFINE_SPINLOCK(arm_smmu_devices_lock);
-static LIST_HEAD(arm_smmu_devices);
-
 struct arm_smmu_option_prop {
 	u32 opt;
 	const char *prop;
@@ -413,6 +418,8 @@
 
 static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);
 
+static bool using_legacy_binding, using_generic_binding;
+
 static struct arm_smmu_option_prop arm_smmu_options[] = {
 	{ ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
 	{ 0, NULL},
@@ -444,131 +451,86 @@
 
 		while (!pci_is_root_bus(bus))
 			bus = bus->parent;
-		return bus->bridge->parent->of_node;
+		return of_node_get(bus->bridge->parent->of_node);
 	}
 
-	return dev->of_node;
+	return of_node_get(dev->of_node);
 }
 
-static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu,
-						struct device_node *dev_node)
+static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
 {
-	struct rb_node *node = smmu->masters.rb_node;
-
-	while (node) {
-		struct arm_smmu_master *master;
-
-		master = container_of(node, struct arm_smmu_master, node);
-
-		if (dev_node < master->of_node)
-			node = node->rb_left;
-		else if (dev_node > master->of_node)
-			node = node->rb_right;
-		else
-			return master;
-	}
-
-	return NULL;
+	*((__be32 *)data) = cpu_to_be32(alias);
+	return 0; /* Continue walking */
 }
 
-static struct arm_smmu_master_cfg *
-find_smmu_master_cfg(struct device *dev)
+static int __find_legacy_master_phandle(struct device *dev, void *data)
 {
-	struct arm_smmu_master_cfg *cfg = NULL;
-	struct iommu_group *group = iommu_group_get(dev);
+	struct of_phandle_iterator *it = *(void **)data;
+	struct device_node *np = it->node;
+	int err;
 
-	if (group) {
-		cfg = iommu_group_get_iommudata(group);
-		iommu_group_put(group);
-	}
-
-	return cfg;
+	of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
+			    "#stream-id-cells", 0)
+		if (it->node == np) {
+			*(void **)data = dev;
+			return 1;
+		}
+	it->node = np;
+	return err == -ENOENT ? 0 : err;
 }
 
-static int insert_smmu_master(struct arm_smmu_device *smmu,
-			      struct arm_smmu_master *master)
+static struct platform_driver arm_smmu_driver;
+static struct iommu_ops arm_smmu_ops;
+
+static int arm_smmu_register_legacy_master(struct device *dev,
+					   struct arm_smmu_device **smmu)
 {
-	struct rb_node **new, *parent;
+	struct device *smmu_dev;
+	struct device_node *np;
+	struct of_phandle_iterator it;
+	void *data = &it;
+	u32 *sids;
+	__be32 pci_sid;
+	int err;
 
-	new = &smmu->masters.rb_node;
-	parent = NULL;
-	while (*new) {
-		struct arm_smmu_master *this
-			= container_of(*new, struct arm_smmu_master, node);
-
-		parent = *new;
-		if (master->of_node < this->of_node)
-			new = &((*new)->rb_left);
-		else if (master->of_node > this->of_node)
-			new = &((*new)->rb_right);
-		else
-			return -EEXIST;
+	np = dev_get_dev_node(dev);
+	if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
+		of_node_put(np);
+		return -ENODEV;
 	}
 
-	rb_link_node(&master->node, parent, new);
-	rb_insert_color(&master->node, &smmu->masters);
-	return 0;
-}
+	it.node = np;
+	err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
+				     __find_legacy_master_phandle);
+	smmu_dev = data;
+	of_node_put(np);
+	if (err == 0)
+		return -ENODEV;
+	if (err < 0)
+		return err;
 
-static int register_smmu_master(struct arm_smmu_device *smmu,
-				struct device *dev,
-				struct arm_smmu_phandle_args *masterspec)
-{
-	int i;
-	struct arm_smmu_master *master;
-
-	master = find_smmu_master(smmu, masterspec->np);
-	if (master) {
-		dev_err(dev,
-			"rejecting multiple registrations for master device %s\n",
-			masterspec->np->name);
-		return -EBUSY;
+	if (dev_is_pci(dev)) {
+		/* "mmu-masters" assumes Stream ID == Requester ID */
+		pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
+				       &pci_sid);
+		it.cur = &pci_sid;
+		it.cur_count = 1;
 	}
 
-	if (masterspec->args_count > MAX_MASTER_STREAMIDS) {
-		dev_err(dev,
-			"reached maximum number (%d) of stream IDs for master device %s\n",
-			MAX_MASTER_STREAMIDS, masterspec->np->name);
-		return -ENOSPC;
-	}
+	err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
+				&arm_smmu_ops);
+	if (err)
+		return err;
 
-	master = devm_kzalloc(dev, sizeof(*master), GFP_KERNEL);
-	if (!master)
+	sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
+	if (!sids)
 		return -ENOMEM;
 
-	master->of_node			= masterspec->np;
-	master->cfg.num_streamids	= masterspec->args_count;
-
-	for (i = 0; i < master->cfg.num_streamids; ++i) {
-		u16 streamid = masterspec->args[i];
-
-		if (!(smmu->features & ARM_SMMU_FEAT_STREAM_MATCH) &&
-		     (streamid >= smmu->num_mapping_groups)) {
-			dev_err(dev,
-				"stream ID for master device %s greater than maximum allowed (%d)\n",
-				masterspec->np->name, smmu->num_mapping_groups);
-			return -ERANGE;
-		}
-		master->cfg.streamids[i] = streamid;
-	}
-	return insert_smmu_master(smmu, master);
-}
-
-static struct arm_smmu_device *find_smmu_for_device(struct device *dev)
-{
-	struct arm_smmu_device *smmu;
-	struct arm_smmu_master *master = NULL;
-	struct device_node *dev_node = dev_get_dev_node(dev);
-
-	spin_lock(&arm_smmu_devices_lock);
-	list_for_each_entry(smmu, &arm_smmu_devices, list) {
-		master = find_smmu_master(smmu, dev_node);
-		if (master)
-			break;
-	}
-	spin_unlock(&arm_smmu_devices_lock);
-
-	return master ? smmu : NULL;
+	*smmu = dev_get_drvdata(smmu_dev);
+	of_phandle_iterator_args(&it, sids, it.cur_count);
+	err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
+	kfree(sids);
+	return err;
 }
 
 static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
@@ -738,7 +700,7 @@
 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 				       struct io_pgtable_cfg *pgtbl_cfg)
 {
-	u32 reg;
+	u32 reg, reg2;
 	u64 reg64;
 	bool stage1;
 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
@@ -781,14 +743,22 @@
 
 	/* TTBRs */
 	if (stage1) {
-		reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+		u16 asid = ARM_SMMU_CB_ASID(smmu, cfg);
 
-		reg64 |= ((u64)ARM_SMMU_CB_ASID(smmu, cfg)) << TTBRn_ASID_SHIFT;
-		writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
-
-		reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
-		reg64 |= ((u64)ARM_SMMU_CB_ASID(smmu, cfg)) << TTBRn_ASID_SHIFT;
-		writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1);
+		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
+			reg = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
+			writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0);
+			reg = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
+			writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1);
+			writel_relaxed(asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
+		} else {
+			reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+			reg64 |= (u64)asid << TTBRn_ASID_SHIFT;
+			writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
+			reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
+			reg64 |= (u64)asid << TTBRn_ASID_SHIFT;
+			writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1);
+		}
 	} else {
 		reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
 		writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
@@ -796,28 +766,36 @@
 
 	/* TTBCR */
 	if (stage1) {
-		reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
-		writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
-		if (smmu->version > ARM_SMMU_V1) {
-			reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
-			reg |= TTBCR2_SEP_UPSTREAM;
-			writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR2);
+		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
+			reg = pgtbl_cfg->arm_v7s_cfg.tcr;
+			reg2 = 0;
+		} else {
+			reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
+			reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
+			reg2 |= TTBCR2_SEP_UPSTREAM;
 		}
+		if (smmu->version > ARM_SMMU_V1)
+			writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2);
 	} else {
 		reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
-		writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
 	}
+	writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
 
 	/* MAIRs (stage-1 only) */
 	if (stage1) {
-		reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
+		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
+			reg = pgtbl_cfg->arm_v7s_cfg.prrr;
+			reg2 = pgtbl_cfg->arm_v7s_cfg.nmrr;
+		} else {
+			reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
+			reg2 = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
+		}
 		writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0);
-		reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
-		writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR1);
+		writel_relaxed(reg2, cb_base + ARM_SMMU_CB_S1_MAIR1);
 	}
 
 	/* SCTLR */
-	reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP;
+	reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
 	if (stage1)
 		reg |= SCTLR_S1_ASIDPNE;
 #ifdef __BIG_ENDIAN
@@ -841,12 +819,6 @@
 	if (smmu_domain->smmu)
 		goto out_unlock;
 
-	/* We're bypassing these SIDs, so don't allocate an actual context */
-	if (domain->type == IOMMU_DOMAIN_DMA) {
-		smmu_domain->smmu = smmu;
-		goto out_unlock;
-	}
-
 	/*
 	 * Mapping the requested stage onto what we support is surprisingly
 	 * complicated, mainly because the spec allows S1+S2 SMMUs without
@@ -880,6 +852,11 @@
 	 */
 	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
 		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
+	if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
+	    !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
+	    (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
+	    (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
+		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
 	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
 	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
 			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
@@ -899,10 +876,14 @@
 		oas = smmu->ipa_size;
 		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
 			fmt = ARM_64_LPAE_S1;
-		} else {
+		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
 			fmt = ARM_32_LPAE_S1;
 			ias = min(ias, 32UL);
 			oas = min(oas, 40UL);
+		} else {
+			fmt = ARM_V7S;
+			ias = min(ias, 32UL);
+			oas = min(oas, 32UL);
 		}
 		break;
 	case ARM_SMMU_DOMAIN_NESTED:
@@ -958,6 +939,8 @@
 
 	/* Update the domain's page sizes to reflect the page table format */
 	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+	domain->geometry.aperture_end = (1UL << ias) - 1;
+	domain->geometry.force_aperture = true;
 
 	/* Initialise the context bank with our page table cfg */
 	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
@@ -996,7 +979,7 @@
 	void __iomem *cb_base;
 	int irq;
 
-	if (!smmu || domain->type == IOMMU_DOMAIN_DMA)
+	if (!smmu)
 		return;
 
 	/*
@@ -1030,8 +1013,8 @@
 	if (!smmu_domain)
 		return NULL;
 
-	if (type == IOMMU_DOMAIN_DMA &&
-	    iommu_get_dma_cookie(&smmu_domain->domain)) {
+	if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
+	    iommu_get_dma_cookie(&smmu_domain->domain))) {
 		kfree(smmu_domain);
 		return NULL;
 	}
@@ -1055,162 +1038,197 @@
 	kfree(smmu_domain);
 }
 
-static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
-					  struct arm_smmu_master_cfg *cfg)
+static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
 {
-	int i;
-	struct arm_smmu_smr *smrs;
-	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+	struct arm_smmu_smr *smr = smmu->smrs + idx;
+	u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
 
-	if (!(smmu->features & ARM_SMMU_FEAT_STREAM_MATCH))
-		return 0;
-
-	if (cfg->smrs)
-		return -EEXIST;
-
-	smrs = kmalloc_array(cfg->num_streamids, sizeof(*smrs), GFP_KERNEL);
-	if (!smrs) {
-		dev_err(smmu->dev, "failed to allocate %d SMRs\n",
-			cfg->num_streamids);
-		return -ENOMEM;
-	}
-
-	/* Allocate the SMRs on the SMMU */
-	for (i = 0; i < cfg->num_streamids; ++i) {
-		int idx = __arm_smmu_alloc_bitmap(smmu->smr_map, 0,
-						  smmu->num_mapping_groups);
-		if (idx < 0) {
-			dev_err(smmu->dev, "failed to allocate free SMR\n");
-			goto err_free_smrs;
-		}
-
-		smrs[i] = (struct arm_smmu_smr) {
-			.idx	= idx,
-			.mask	= 0, /* We don't currently share SMRs */
-			.id	= cfg->streamids[i],
-		};
-	}
-
-	/* It worked! Now, poke the actual hardware */
-	for (i = 0; i < cfg->num_streamids; ++i) {
-		u32 reg = SMR_VALID | smrs[i].id << SMR_ID_SHIFT |
-			  smrs[i].mask << SMR_MASK_SHIFT;
-		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_SMR(smrs[i].idx));
-	}
-
-	cfg->smrs = smrs;
-	return 0;
-
-err_free_smrs:
-	while (--i >= 0)
-		__arm_smmu_free_bitmap(smmu->smr_map, smrs[i].idx);
-	kfree(smrs);
-	return -ENOSPC;
+	if (smr->valid)
+		reg |= SMR_VALID;
+	writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
 }
 
-static void arm_smmu_master_free_smrs(struct arm_smmu_device *smmu,
-				      struct arm_smmu_master_cfg *cfg)
+static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
 {
-	int i;
-	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
-	struct arm_smmu_smr *smrs = cfg->smrs;
+	struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
+	u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
+		  (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
+		  (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;
 
+	writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
+}
+
+static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
+{
+	arm_smmu_write_s2cr(smmu, idx);
+	if (smmu->smrs)
+		arm_smmu_write_smr(smmu, idx);
+}
+
+static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
+{
+	struct arm_smmu_smr *smrs = smmu->smrs;
+	int i, free_idx = -ENOSPC;
+
+	/* Stream indexing is blissfully easy */
 	if (!smrs)
-		return;
+		return id;
 
-	/* Invalidate the SMRs before freeing back to the allocator */
-	for (i = 0; i < cfg->num_streamids; ++i) {
-		u8 idx = smrs[i].idx;
-
-		writel_relaxed(~SMR_VALID, gr0_base + ARM_SMMU_GR0_SMR(idx));
-		__arm_smmu_free_bitmap(smmu->smr_map, idx);
+	/* Validating SMRs is... less so */
+	for (i = 0; i < smmu->num_mapping_groups; ++i) {
+		if (!smrs[i].valid) {
+			/*
+			 * Note the first free entry we come across, which
+			 * we'll claim in the end if nothing else matches.
+			 */
+			if (free_idx < 0)
+				free_idx = i;
+			continue;
+		}
+		/*
+		 * If the new entry is _entirely_ matched by an existing entry,
+		 * then reuse that, with the guarantee that there also cannot
+		 * be any subsequent conflicting entries. In normal use we'd
+		 * expect simply identical entries for this case, but there's
+		 * no harm in accommodating the generalisation.
+		 */
+		if ((mask & smrs[i].mask) == mask &&
+		    !((id ^ smrs[i].id) & ~smrs[i].mask))
+			return i;
+		/*
+		 * If the new entry has any other overlap with an existing one,
+		 * though, then there always exists at least one stream ID
+		 * which would cause a conflict, and we can't allow that risk.
+		 */
+		if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
+			return -EINVAL;
 	}
 
-	cfg->smrs = NULL;
-	kfree(smrs);
+	return free_idx;
+}
+
+static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
+{
+	if (--smmu->s2crs[idx].count)
+		return false;
+
+	smmu->s2crs[idx] = s2cr_init_val;
+	if (smmu->smrs)
+		smmu->smrs[idx].valid = false;
+
+	return true;
+}
+
+static int arm_smmu_master_alloc_smes(struct device *dev)
+{
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+	struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
+	struct arm_smmu_device *smmu = cfg->smmu;
+	struct arm_smmu_smr *smrs = smmu->smrs;
+	struct iommu_group *group;
+	int i, idx, ret;
+
+	mutex_lock(&smmu->stream_map_mutex);
+	/* Figure out a viable stream map entry allocation */
+	for_each_cfg_sme(fwspec, i, idx) {
+		u16 sid = fwspec->ids[i];
+		u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
+
+		if (idx != INVALID_SMENDX) {
+			ret = -EEXIST;
+			goto out_err;
+		}
+
+		ret = arm_smmu_find_sme(smmu, sid, mask);
+		if (ret < 0)
+			goto out_err;
+
+		idx = ret;
+		if (smrs && smmu->s2crs[idx].count == 0) {
+			smrs[idx].id = sid;
+			smrs[idx].mask = mask;
+			smrs[idx].valid = true;
+		}
+		smmu->s2crs[idx].count++;
+		cfg->smendx[i] = (s16)idx;
+	}
+
+	group = iommu_group_get_for_dev(dev);
+	if (!group)
+		group = ERR_PTR(-ENOMEM);
+	if (IS_ERR(group)) {
+		ret = PTR_ERR(group);
+		goto out_err;
+	}
+	iommu_group_put(group);
+
+	/* It worked! Now, poke the actual hardware */
+	for_each_cfg_sme(fwspec, i, idx) {
+		arm_smmu_write_sme(smmu, idx);
+		smmu->s2crs[idx].group = group;
+	}
+
+	mutex_unlock(&smmu->stream_map_mutex);
+	return 0;
+
+out_err:
+	while (i--) {
+		arm_smmu_free_sme(smmu, cfg->smendx[i]);
+		cfg->smendx[i] = INVALID_SMENDX;
+	}
+	mutex_unlock(&smmu->stream_map_mutex);
+	return ret;
+}
+
+static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
+{
+	struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
+	struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
+	int i, idx;
+
+	mutex_lock(&smmu->stream_map_mutex);
+	for_each_cfg_sme(fwspec, i, idx) {
+		if (arm_smmu_free_sme(smmu, idx))
+			arm_smmu_write_sme(smmu, idx);
+		cfg->smendx[i] = INVALID_SMENDX;
+	}
+	mutex_unlock(&smmu->stream_map_mutex);
 }
 
 static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
-				      struct arm_smmu_master_cfg *cfg)
+				      struct iommu_fwspec *fwspec)
 {
-	int i, ret;
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
+	enum arm_smmu_s2cr_type type = S2CR_TYPE_TRANS;
+	u8 cbndx = smmu_domain->cfg.cbndx;
+	int i, idx;
 
-	/*
-	 * FIXME: This won't be needed once we have IOMMU-backed DMA ops
-	 * for all devices behind the SMMU. Note that we need to take
-	 * care configuring SMRs for devices both a platform_device and
-	 * and a PCI device (i.e. a PCI host controller)
-	 */
-	if (smmu_domain->domain.type == IOMMU_DOMAIN_DMA)
-		return 0;
+	for_each_cfg_sme(fwspec, i, idx) {
+		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
+			continue;
 
-	/* Devices in an IOMMU group may already be configured */
-	ret = arm_smmu_master_configure_smrs(smmu, cfg);
-	if (ret)
-		return ret == -EEXIST ? 0 : ret;
-
-	for (i = 0; i < cfg->num_streamids; ++i) {
-		u32 idx, s2cr;
-
-		idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
-		s2cr = S2CR_TYPE_TRANS | S2CR_PRIVCFG_UNPRIV |
-		       (smmu_domain->cfg.cbndx << S2CR_CBNDX_SHIFT);
-		writel_relaxed(s2cr, gr0_base + ARM_SMMU_GR0_S2CR(idx));
+		s2cr[idx].type = type;
+		s2cr[idx].privcfg = S2CR_PRIVCFG_UNPRIV;
+		s2cr[idx].cbndx = cbndx;
+		arm_smmu_write_s2cr(smmu, idx);
 	}
-
 	return 0;
 }
 
-static void arm_smmu_domain_remove_master(struct arm_smmu_domain *smmu_domain,
-					  struct arm_smmu_master_cfg *cfg)
-{
-	int i;
-	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
-
-	/* An IOMMU group is torn down by the first device to be removed */
-	if ((smmu->features & ARM_SMMU_FEAT_STREAM_MATCH) && !cfg->smrs)
-		return;
-
-	/*
-	 * We *must* clear the S2CR first, because freeing the SMR means
-	 * that it can be re-allocated immediately.
-	 */
-	for (i = 0; i < cfg->num_streamids; ++i) {
-		u32 idx = cfg->smrs ? cfg->smrs[i].idx : cfg->streamids[i];
-		u32 reg = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS;
-
-		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_S2CR(idx));
-	}
-
-	arm_smmu_master_free_smrs(smmu, cfg);
-}
-
-static void arm_smmu_detach_dev(struct device *dev,
-				struct arm_smmu_master_cfg *cfg)
-{
-	struct iommu_domain *domain = dev->archdata.iommu;
-	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-
-	dev->archdata.iommu = NULL;
-	arm_smmu_domain_remove_master(smmu_domain, cfg);
-}
-
 static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 {
 	int ret;
-	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
 	struct arm_smmu_device *smmu;
-	struct arm_smmu_master_cfg *cfg;
+	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
 
-	smmu = find_smmu_for_device(dev);
-	if (!smmu) {
+	if (!fwspec || fwspec->ops != &arm_smmu_ops) {
 		dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
 		return -ENXIO;
 	}
 
+	smmu = fwspec_smmu(fwspec);
 	/* Ensure that the domain is finalised */
 	ret = arm_smmu_init_domain_context(domain, smmu);
 	if (ret < 0)
@@ -1228,18 +1246,7 @@
 	}
 
 	/* Looks ok, so add the device to the domain */
-	cfg = find_smmu_master_cfg(dev);
-	if (!cfg)
-		return -ENODEV;
-
-	/* Detach the dev from its current domain */
-	if (dev->archdata.iommu)
-		arm_smmu_detach_dev(dev, cfg);
-
-	ret = arm_smmu_domain_add_master(smmu_domain, cfg);
-	if (!ret)
-		dev->archdata.iommu = domain;
-	return ret;
+	return arm_smmu_domain_add_master(smmu_domain, fwspec);
 }
 
 static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
@@ -1358,111 +1365,114 @@
 	}
 }
 
-static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
+static int arm_smmu_match_node(struct device *dev, void *data)
 {
-	*((u16 *)data) = alias;
-	return 0; /* Continue walking */
+	return dev->of_node == data;
 }
 
-static void __arm_smmu_release_pci_iommudata(void *data)
+static struct arm_smmu_device *arm_smmu_get_by_node(struct device_node *np)
 {
-	kfree(data);
-}
-
-static int arm_smmu_init_pci_device(struct pci_dev *pdev,
-				    struct iommu_group *group)
-{
-	struct arm_smmu_master_cfg *cfg;
-	u16 sid;
-	int i;
-
-	cfg = iommu_group_get_iommudata(group);
-	if (!cfg) {
-		cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
-		if (!cfg)
-			return -ENOMEM;
-
-		iommu_group_set_iommudata(group, cfg,
-					  __arm_smmu_release_pci_iommudata);
-	}
-
-	if (cfg->num_streamids >= MAX_MASTER_STREAMIDS)
-		return -ENOSPC;
-
-	/*
-	 * Assume Stream ID == Requester ID for now.
-	 * We need a way to describe the ID mappings in FDT.
-	 */
-	pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &sid);
-	for (i = 0; i < cfg->num_streamids; ++i)
-		if (cfg->streamids[i] == sid)
-			break;
-
-	/* Avoid duplicate SIDs, as this can lead to SMR conflicts */
-	if (i == cfg->num_streamids)
-		cfg->streamids[cfg->num_streamids++] = sid;
-
-	return 0;
-}
-
-static int arm_smmu_init_platform_device(struct device *dev,
-					 struct iommu_group *group)
-{
-	struct arm_smmu_device *smmu = find_smmu_for_device(dev);
-	struct arm_smmu_master *master;
-
-	if (!smmu)
-		return -ENODEV;
-
-	master = find_smmu_master(smmu, dev->of_node);
-	if (!master)
-		return -ENODEV;
-
-	iommu_group_set_iommudata(group, &master->cfg, NULL);
-
-	return 0;
+	struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
+						np, arm_smmu_match_node);
+	put_device(dev);
+	return dev ? dev_get_drvdata(dev) : NULL;
 }
 
 static int arm_smmu_add_device(struct device *dev)
 {
-	struct iommu_group *group;
+	struct arm_smmu_device *smmu;
+	struct arm_smmu_master_cfg *cfg;
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+	int i, ret;
 
-	group = iommu_group_get_for_dev(dev);
-	if (IS_ERR(group))
-		return PTR_ERR(group);
+	if (using_legacy_binding) {
+		ret = arm_smmu_register_legacy_master(dev, &smmu);
+		fwspec = dev->iommu_fwspec;
+		if (ret)
+			goto out_free;
+	} else if (fwspec) {
+		smmu = arm_smmu_get_by_node(to_of_node(fwspec->iommu_fwnode));
+	} else {
+		return -ENODEV;
+	}
 
-	iommu_group_put(group);
+	ret = -EINVAL;
+	for (i = 0; i < fwspec->num_ids; i++) {
+		u16 sid = fwspec->ids[i];
+		u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
+
+		if (sid & ~smmu->streamid_mask) {
+			dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
+				sid, smmu->streamid_mask);
+			goto out_free;
+		}
+		if (mask & ~smmu->smr_mask_mask) {
+			dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
+				sid, smmu->smr_mask_mask);
+			goto out_free;
+		}
+	}
+
+	ret = -ENOMEM;
+	cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
+		      GFP_KERNEL);
+	if (!cfg)
+		goto out_free;
+
+	cfg->smmu = smmu;
+	fwspec->iommu_priv = cfg;
+	while (i--)
+		cfg->smendx[i] = INVALID_SMENDX;
+
+	ret = arm_smmu_master_alloc_smes(dev);
+	if (ret)
+		goto out_free;
+
 	return 0;
+
+out_free:
+	if (fwspec)
+		kfree(fwspec->iommu_priv);
+	iommu_fwspec_free(dev);
+	return ret;
 }
 
 static void arm_smmu_remove_device(struct device *dev)
 {
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+
+	if (!fwspec || fwspec->ops != &arm_smmu_ops)
+		return;
+
+	arm_smmu_master_free_smes(fwspec);
 	iommu_group_remove_device(dev);
+	kfree(fwspec->iommu_priv);
+	iommu_fwspec_free(dev);
 }
 
 static struct iommu_group *arm_smmu_device_group(struct device *dev)
 {
-	struct iommu_group *group;
-	int ret;
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+	struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
+	struct iommu_group *group = NULL;
+	int i, idx;
+
+	for_each_cfg_sme(fwspec, i, idx) {
+		if (group && smmu->s2crs[idx].group &&
+		    group != smmu->s2crs[idx].group)
+			return ERR_PTR(-EINVAL);
+
+		group = smmu->s2crs[idx].group;
+	}
+
+	if (group)
+		return group;
 
 	if (dev_is_pci(dev))
 		group = pci_device_group(dev);
 	else
 		group = generic_device_group(dev);
 
-	if (IS_ERR(group))
-		return group;
-
-	if (dev_is_pci(dev))
-		ret = arm_smmu_init_pci_device(to_pci_dev(dev), group);
-	else
-		ret = arm_smmu_init_platform_device(dev, group);
-
-	if (ret) {
-		iommu_group_put(group);
-		group = ERR_PTR(ret);
-	}
-
 	return group;
 }
 
@@ -1510,6 +1520,19 @@
 	return ret;
 }
 
+static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
+{
+	u32 fwid = 0;
+
+	if (args->args_count > 0)
+		fwid |= (u16)args->args[0];
+
+	if (args->args_count > 1)
+		fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
+
+	return iommu_fwspec_add_ids(dev, &fwid, 1);
+}
+
 static struct iommu_ops arm_smmu_ops = {
 	.capable		= arm_smmu_capable,
 	.domain_alloc		= arm_smmu_domain_alloc,
@@ -1524,6 +1547,7 @@
 	.device_group		= arm_smmu_device_group,
 	.domain_get_attr	= arm_smmu_domain_get_attr,
 	.domain_set_attr	= arm_smmu_domain_set_attr,
+	.of_xlate		= arm_smmu_of_xlate,
 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
 };
 
@@ -1531,19 +1555,19 @@
 {
 	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
 	void __iomem *cb_base;
-	int i = 0;
+	int i;
 	u32 reg, major;
 
 	/* clear global FSR */
 	reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
 	writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
 
-	/* Mark all SMRn as invalid and all S2CRn as bypass unless overridden */
-	reg = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS;
-	for (i = 0; i < smmu->num_mapping_groups; ++i) {
-		writel_relaxed(0, gr0_base + ARM_SMMU_GR0_SMR(i));
-		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_S2CR(i));
-	}
+	/*
+	 * Reset stream mapping groups: Initial values mark all SMRn as
+	 * invalid and all S2CRn as bypass unless overridden.
+	 */
+	for (i = 0; i < smmu->num_mapping_groups; ++i)
+		arm_smmu_write_sme(smmu, i);
 
 	/*
 	 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
@@ -1632,6 +1656,7 @@
 	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
 	u32 id;
 	bool cttw_dt, cttw_reg;
+	int i;
 
 	dev_notice(smmu->dev, "probing hardware configuration...\n");
 	dev_notice(smmu->dev, "SMMUv%d with:\n",
@@ -1690,39 +1715,55 @@
 		dev_notice(smmu->dev,
 			   "\t(IDR0.CTTW overridden by dma-coherent property)\n");
 
+	/* Max. number of entries we have for stream matching/indexing */
+	size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
+	smmu->streamid_mask = size - 1;
 	if (id & ID0_SMS) {
-		u32 smr, sid, mask;
+		u32 smr;
 
 		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
-		smmu->num_mapping_groups = (id >> ID0_NUMSMRG_SHIFT) &
-					   ID0_NUMSMRG_MASK;
-		if (smmu->num_mapping_groups == 0) {
+		size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
+		if (size == 0) {
 			dev_err(smmu->dev,
 				"stream-matching supported, but no SMRs present!\n");
 			return -ENODEV;
 		}
 
-		smr = SMR_MASK_MASK << SMR_MASK_SHIFT;
-		smr |= (SMR_ID_MASK << SMR_ID_SHIFT);
+		/*
+		 * SMR.ID bits may not be preserved if the corresponding MASK
+		 * bits are set, so check each one separately. We can reject
+		 * masters later if they try to claim IDs outside these masks.
+		 */
+		smr = smmu->streamid_mask << SMR_ID_SHIFT;
 		writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
 		smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
+		smmu->streamid_mask = smr >> SMR_ID_SHIFT;
 
-		mask = (smr >> SMR_MASK_SHIFT) & SMR_MASK_MASK;
-		sid = (smr >> SMR_ID_SHIFT) & SMR_ID_MASK;
-		if ((mask & sid) != sid) {
-			dev_err(smmu->dev,
-				"SMR mask bits (0x%x) insufficient for ID field (0x%x)\n",
-				mask, sid);
-			return -ENODEV;
-		}
+		smr = smmu->streamid_mask << SMR_MASK_SHIFT;
+		writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
+		smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
+		smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
+
+		/* Zero-initialised to mark as invalid */
+		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
+					  GFP_KERNEL);
+		if (!smmu->smrs)
+			return -ENOMEM;
 
 		dev_notice(smmu->dev,
-			   "\tstream matching with %u register groups, mask 0x%x",
-			   smmu->num_mapping_groups, mask);
-	} else {
-		smmu->num_mapping_groups = (id >> ID0_NUMSIDB_SHIFT) &
-					   ID0_NUMSIDB_MASK;
+			   "\tstream matching with %lu register groups, mask 0x%x",
+			   size, smmu->smr_mask_mask);
 	}
+	/* s2cr->type == 0 means translation, so initialise explicitly */
+	smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
+					 GFP_KERNEL);
+	if (!smmu->s2crs)
+		return -ENOMEM;
+	for (i = 0; i < size; i++)
+		smmu->s2crs[i] = s2cr_init_val;
+
+	smmu->num_mapping_groups = size;
+	mutex_init(&smmu->stream_map_mutex);
 
 	if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
 		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
@@ -1855,15 +1896,24 @@
 
 static int arm_smmu_device_dt_probe(struct platform_device *pdev)
 {
-	const struct of_device_id *of_id;
 	const struct arm_smmu_match_data *data;
 	struct resource *res;
 	struct arm_smmu_device *smmu;
 	struct device *dev = &pdev->dev;
-	struct rb_node *node;
-	struct of_phandle_iterator it;
-	struct arm_smmu_phandle_args *masterspec;
 	int num_irqs, i, err;
+	bool legacy_binding;
+
+	legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
+	if (legacy_binding && !using_generic_binding) {
+		if (!using_legacy_binding)
+			pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
+		using_legacy_binding = true;
+	} else if (!legacy_binding && !using_legacy_binding) {
+		using_generic_binding = true;
+	} else {
+		dev_err(dev, "not probing due to mismatched DT properties\n");
+		return -ENODEV;
+	}
 
 	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
 	if (!smmu) {
@@ -1872,8 +1922,7 @@
 	}
 	smmu->dev = dev;
 
-	of_id = of_match_node(arm_smmu_of_match, dev->of_node);
-	data = of_id->data;
+	data = of_device_get_match_data(dev);
 	smmu->version = data->version;
 	smmu->model = data->model;
 
@@ -1923,37 +1972,6 @@
 	if (err)
 		return err;
 
-	i = 0;
-	smmu->masters = RB_ROOT;
-
-	err = -ENOMEM;
-	/* No need to zero the memory for masterspec */
-	masterspec = kmalloc(sizeof(*masterspec), GFP_KERNEL);
-	if (!masterspec)
-		goto out_put_masters;
-
-	of_for_each_phandle(&it, err, dev->of_node,
-			    "mmu-masters", "#stream-id-cells", 0) {
-		int count = of_phandle_iterator_args(&it, masterspec->args,
-						     MAX_MASTER_STREAMIDS);
-		masterspec->np		= of_node_get(it.node);
-		masterspec->args_count	= count;
-
-		err = register_smmu_master(smmu, dev, masterspec);
-		if (err) {
-			dev_err(dev, "failed to add master %s\n",
-				masterspec->np->name);
-			kfree(masterspec);
-			goto out_put_masters;
-		}
-
-		i++;
-	}
-
-	dev_notice(dev, "registered %d master devices\n", i);
-
-	kfree(masterspec);
-
 	parse_driver_options(smmu);
 
 	if (smmu->version == ARM_SMMU_V2 &&
@@ -1961,8 +1979,7 @@
 		dev_err(dev,
 			"found only %d context interrupt(s) but %d required\n",
 			smmu->num_context_irqs, smmu->num_context_banks);
-		err = -ENODEV;
-		goto out_put_masters;
+		return -ENODEV;
 	}
 
 	for (i = 0; i < smmu->num_global_irqs; ++i) {
@@ -1974,59 +1991,39 @@
 		if (err) {
 			dev_err(dev, "failed to request global IRQ %d (%u)\n",
 				i, smmu->irqs[i]);
-			goto out_put_masters;
+			return err;
 		}
 	}
 
-	INIT_LIST_HEAD(&smmu->list);
-	spin_lock(&arm_smmu_devices_lock);
-	list_add(&smmu->list, &arm_smmu_devices);
-	spin_unlock(&arm_smmu_devices_lock);
-
+	of_iommu_set_ops(dev->of_node, &arm_smmu_ops);
+	platform_set_drvdata(pdev, smmu);
 	arm_smmu_device_reset(smmu);
-	return 0;
 
-out_put_masters:
-	for (node = rb_first(&smmu->masters); node; node = rb_next(node)) {
-		struct arm_smmu_master *master
-			= container_of(node, struct arm_smmu_master, node);
-		of_node_put(master->of_node);
+	/* Oh, for a proper bus abstraction */
+	if (!iommu_present(&platform_bus_type))
+		bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
+#ifdef CONFIG_ARM_AMBA
+	if (!iommu_present(&amba_bustype))
+		bus_set_iommu(&amba_bustype, &arm_smmu_ops);
+#endif
+#ifdef CONFIG_PCI
+	if (!iommu_present(&pci_bus_type)) {
+		pci_request_acs();
+		bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
 	}
-
-	return err;
+#endif
+	return 0;
 }
 
 static int arm_smmu_device_remove(struct platform_device *pdev)
 {
-	int i;
-	struct device *dev = &pdev->dev;
-	struct arm_smmu_device *curr, *smmu = NULL;
-	struct rb_node *node;
-
-	spin_lock(&arm_smmu_devices_lock);
-	list_for_each_entry(curr, &arm_smmu_devices, list) {
-		if (curr->dev == dev) {
-			smmu = curr;
-			list_del(&smmu->list);
-			break;
-		}
-	}
-	spin_unlock(&arm_smmu_devices_lock);
+	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
 
 	if (!smmu)
 		return -ENODEV;
 
-	for (node = rb_first(&smmu->masters); node; node = rb_next(node)) {
-		struct arm_smmu_master *master
-			= container_of(node, struct arm_smmu_master, node);
-		of_node_put(master->of_node);
-	}
-
 	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
-		dev_err(dev, "removing device with active domains!\n");
-
-	for (i = 0; i < smmu->num_global_irqs; ++i)
-		devm_free_irq(smmu->dev, smmu->irqs[i], smmu);
+		dev_err(&pdev->dev, "removing device with active domains!\n");
 
 	/* Turn the thing off */
 	writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
@@ -2044,41 +2041,14 @@
 
 static int __init arm_smmu_init(void)
 {
-	struct device_node *np;
-	int ret;
+	static bool registered;
+	int ret = 0;
 
-	/*
-	 * Play nice with systems that don't have an ARM SMMU by checking that
-	 * an ARM SMMU exists in the system before proceeding with the driver
-	 * and IOMMU bus operation registration.
-	 */
-	np = of_find_matching_node(NULL, arm_smmu_of_match);
-	if (!np)
-		return 0;
-
-	of_node_put(np);
-
-	ret = platform_driver_register(&arm_smmu_driver);
-	if (ret)
-		return ret;
-
-	/* Oh, for a proper bus abstraction */
-	if (!iommu_present(&platform_bus_type))
-		bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
-
-#ifdef CONFIG_ARM_AMBA
-	if (!iommu_present(&amba_bustype))
-		bus_set_iommu(&amba_bustype, &arm_smmu_ops);
-#endif
-
-#ifdef CONFIG_PCI
-	if (!iommu_present(&pci_bus_type)) {
-		pci_request_acs();
-		bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
+	if (!registered) {
+		ret = platform_driver_register(&arm_smmu_driver);
+		registered = !ret;
 	}
-#endif
-
-	return 0;
+	return ret;
 }
 
 static void __exit arm_smmu_exit(void)
@@ -2089,6 +2059,25 @@
 subsys_initcall(arm_smmu_init);
 module_exit(arm_smmu_exit);
 
+static int __init arm_smmu_of_init(struct device_node *np)
+{
+	int ret = arm_smmu_init();
+
+	if (ret)
+		return ret;
+
+	if (!of_platform_device_create(np, NULL, platform_bus_type.dev_root))
+		return -ENODEV;
+
+	return 0;
+}
+IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", arm_smmu_of_init);
+IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", arm_smmu_of_init);
+IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", arm_smmu_of_init);
+IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", arm_smmu_of_init);
+IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", arm_smmu_of_init);
+IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", arm_smmu_of_init);
+
 MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
 MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 00c8a08..c5ab866 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -25,10 +25,29 @@
 #include <linux/huge_mm.h>
 #include <linux/iommu.h>
 #include <linux/iova.h>
+#include <linux/irq.h>
 #include <linux/mm.h>
+#include <linux/pci.h>
 #include <linux/scatterlist.h>
 #include <linux/vmalloc.h>
 
+struct iommu_dma_msi_page {
+	struct list_head	list;
+	dma_addr_t		iova;
+	phys_addr_t		phys;
+};
+
+struct iommu_dma_cookie {
+	struct iova_domain	iovad;
+	struct list_head	msi_page_list;
+	spinlock_t		msi_lock;
+};
+
+static inline struct iova_domain *cookie_iovad(struct iommu_domain *domain)
+{
+	return &((struct iommu_dma_cookie *)domain->iova_cookie)->iovad;
+}
+
 int iommu_dma_init(void)
 {
 	return iova_cache_get();
@@ -43,15 +62,19 @@
  */
 int iommu_get_dma_cookie(struct iommu_domain *domain)
 {
-	struct iova_domain *iovad;
+	struct iommu_dma_cookie *cookie;
 
 	if (domain->iova_cookie)
 		return -EEXIST;
 
-	iovad = kzalloc(sizeof(*iovad), GFP_KERNEL);
-	domain->iova_cookie = iovad;
+	cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
+	if (!cookie)
+		return -ENOMEM;
 
-	return iovad ? 0 : -ENOMEM;
+	spin_lock_init(&cookie->msi_lock);
+	INIT_LIST_HEAD(&cookie->msi_page_list);
+	domain->iova_cookie = cookie;
+	return 0;
 }
 EXPORT_SYMBOL(iommu_get_dma_cookie);
 
@@ -63,32 +86,58 @@
  */
 void iommu_put_dma_cookie(struct iommu_domain *domain)
 {
-	struct iova_domain *iovad = domain->iova_cookie;
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iommu_dma_msi_page *msi, *tmp;
 
-	if (!iovad)
+	if (!cookie)
 		return;
 
-	if (iovad->granule)
-		put_iova_domain(iovad);
-	kfree(iovad);
+	if (cookie->iovad.granule)
+		put_iova_domain(&cookie->iovad);
+
+	list_for_each_entry_safe(msi, tmp, &cookie->msi_page_list, list) {
+		list_del(&msi->list);
+		kfree(msi);
+	}
+	kfree(cookie);
 	domain->iova_cookie = NULL;
 }
 EXPORT_SYMBOL(iommu_put_dma_cookie);
 
+static void iova_reserve_pci_windows(struct pci_dev *dev,
+		struct iova_domain *iovad)
+{
+	struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus);
+	struct resource_entry *window;
+	unsigned long lo, hi;
+
+	resource_list_for_each_entry(window, &bridge->windows) {
+		if (resource_type(window->res) != IORESOURCE_MEM &&
+		    resource_type(window->res) != IORESOURCE_IO)
+			continue;
+
+		lo = iova_pfn(iovad, window->res->start - window->offset);
+		hi = iova_pfn(iovad, window->res->end - window->offset);
+		reserve_iova(iovad, lo, hi);
+	}
+}
+
 /**
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
  * @base: IOVA at which the mappable address space starts
  * @size: Size of IOVA space
+ * @dev: Device the domain is being initialised for
  *
  * @base and @size should be exact multiples of IOMMU page granularity to
  * avoid rounding surprises. If necessary, we reserve the page at address 0
  * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
  * any change which could make prior IOVAs invalid will fail.
  */
-int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size)
+int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
+		u64 size, struct device *dev)
 {
-	struct iova_domain *iovad = domain->iova_cookie;
+	struct iova_domain *iovad = cookie_iovad(domain);
 	unsigned long order, base_pfn, end_pfn;
 
 	if (!iovad)
@@ -124,6 +173,8 @@
 		iovad->dma_32bit_pfn = end_pfn;
 	} else {
 		init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn);
+		if (dev && dev_is_pci(dev))
+			iova_reserve_pci_windows(to_pci_dev(dev), iovad);
 	}
 	return 0;
 }
@@ -155,7 +206,7 @@
 static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size,
 		dma_addr_t dma_limit)
 {
-	struct iova_domain *iovad = domain->iova_cookie;
+	struct iova_domain *iovad = cookie_iovad(domain);
 	unsigned long shift = iova_shift(iovad);
 	unsigned long length = iova_align(iovad, size) >> shift;
 
@@ -171,7 +222,7 @@
 /* The IOVA allocator knows what we mapped, so just unmap whatever that was */
 static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr)
 {
-	struct iova_domain *iovad = domain->iova_cookie;
+	struct iova_domain *iovad = cookie_iovad(domain);
 	unsigned long shift = iova_shift(iovad);
 	unsigned long pfn = dma_addr >> shift;
 	struct iova *iova = find_iova(iovad, pfn);
@@ -294,7 +345,7 @@
 		void (*flush_page)(struct device *, const void *, phys_addr_t))
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-	struct iova_domain *iovad = domain->iova_cookie;
+	struct iova_domain *iovad = cookie_iovad(domain);
 	struct iova *iova;
 	struct page **pages;
 	struct sg_table sgt;
@@ -386,7 +437,7 @@
 {
 	dma_addr_t dma_addr;
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-	struct iova_domain *iovad = domain->iova_cookie;
+	struct iova_domain *iovad = cookie_iovad(domain);
 	phys_addr_t phys = page_to_phys(page) + offset;
 	size_t iova_off = iova_offset(iovad, phys);
 	size_t len = iova_align(iovad, size + iova_off);
@@ -495,7 +546,7 @@
 		int nents, int prot)
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-	struct iova_domain *iovad = domain->iova_cookie;
+	struct iova_domain *iovad = cookie_iovad(domain);
 	struct iova *iova;
 	struct scatterlist *s, *prev = NULL;
 	dma_addr_t dma_addr;
@@ -587,3 +638,81 @@
 {
 	return dma_addr == DMA_ERROR_CODE;
 }
+
+static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
+		phys_addr_t msi_addr, struct iommu_domain *domain)
+{
+	struct iommu_dma_cookie *cookie = domain->iova_cookie;
+	struct iommu_dma_msi_page *msi_page;
+	struct iova_domain *iovad = &cookie->iovad;
+	struct iova *iova;
+	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;
+
+	msi_addr &= ~(phys_addr_t)iova_mask(iovad);
+	list_for_each_entry(msi_page, &cookie->msi_page_list, list)
+		if (msi_page->phys == msi_addr)
+			return msi_page;
+
+	msi_page = kzalloc(sizeof(*msi_page), GFP_ATOMIC);
+	if (!msi_page)
+		return NULL;
+
+	iova = __alloc_iova(domain, iovad->granule, dma_get_mask(dev));
+	if (!iova)
+		goto out_free_page;
+
+	msi_page->phys = msi_addr;
+	msi_page->iova = iova_dma_addr(iovad, iova);
+	if (iommu_map(domain, msi_page->iova, msi_addr, iovad->granule, prot))
+		goto out_free_iova;
+
+	INIT_LIST_HEAD(&msi_page->list);
+	list_add(&msi_page->list, &cookie->msi_page_list);
+	return msi_page;
+
+out_free_iova:
+	__free_iova(iovad, iova);
+out_free_page:
+	kfree(msi_page);
+	return NULL;
+}
+
+void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg)
+{
+	struct device *dev = msi_desc_to_dev(irq_get_msi_desc(irq));
+	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+	struct iommu_dma_cookie *cookie;
+	struct iommu_dma_msi_page *msi_page;
+	phys_addr_t msi_addr = (u64)msg->address_hi << 32 | msg->address_lo;
+	unsigned long flags;
+
+	if (!domain || !domain->iova_cookie)
+		return;
+
+	cookie = domain->iova_cookie;
+
+	/*
+	 * We disable IRQs to rule out a possible inversion against
+	 * irq_desc_lock if, say, someone tries to retarget the affinity
+	 * of an MSI from within an IPI handler.
+	 */
+	spin_lock_irqsave(&cookie->msi_lock, flags);
+	msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
+	spin_unlock_irqrestore(&cookie->msi_lock, flags);
+
+	if (WARN_ON(!msi_page)) {
+		/*
+		 * We're called from a void callback, so the best we can do is
+		 * 'fail' by filling the message with obviously bogus values.
+		 * Since we got this far due to an IOMMU being present, it's
+		 * not like the existing address would have worked anyway...
+		 */
+		msg->address_hi = ~0U;
+		msg->address_lo = ~0U;
+		msg->data = ~0U;
+	} else {
+		msg->address_hi = upper_32_bits(msi_page->iova);
+		msg->address_lo &= iova_mask(&cookie->iovad);
+		msg->address_lo += lower_32_bits(msi_page->iova);
+	}
+}
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 33dcc29..30808e9 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1345,8 +1345,8 @@
 		exynos_iommu_init();
 
 	pdev = of_platform_device_create(np, NULL, platform_bus_type.dev_root);
-	if (IS_ERR(pdev))
-		return PTR_ERR(pdev);
+	if (!pdev)
+		return -ENODEV;
 
 	/*
 	 * use the first registered sysmmu device for performing
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index ebb5bf3..a4407ea 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2452,20 +2452,15 @@
 	return 0;
 }
 
-/* domain is initialized */
-static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
+static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
 {
 	struct device_domain_info *info = NULL;
-	struct dmar_domain *domain, *tmp;
+	struct dmar_domain *domain = NULL;
 	struct intel_iommu *iommu;
 	u16 req_id, dma_alias;
 	unsigned long flags;
 	u8 bus, devfn;
 
-	domain = find_domain(dev);
-	if (domain)
-		return domain;
-
 	iommu = device_to_iommu(dev, &bus, &devfn);
 	if (!iommu)
 		return NULL;
@@ -2487,9 +2482,9 @@
 		}
 		spin_unlock_irqrestore(&device_domain_lock, flags);
 
-		/* DMA alias already has a domain, uses it */
+		/* DMA alias already has a domain, use it */
 		if (info)
-			goto found_domain;
+			goto out;
 	}
 
 	/* Allocate and initialize new domain for the device */
@@ -2501,28 +2496,67 @@
 		return NULL;
 	}
 
-	/* register PCI DMA alias device */
-	if (dev_is_pci(dev) && req_id != dma_alias) {
-		tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
-					       dma_alias & 0xff, NULL, domain);
+out:
 
-		if (!tmp || tmp != domain) {
-			domain_exit(domain);
-			domain = tmp;
+	return domain;
+}
+
+static struct dmar_domain *set_domain_for_dev(struct device *dev,
+					      struct dmar_domain *domain)
+{
+	struct intel_iommu *iommu;
+	struct dmar_domain *tmp;
+	u16 req_id, dma_alias;
+	u8 bus, devfn;
+
+	iommu = device_to_iommu(dev, &bus, &devfn);
+	if (!iommu)
+		return NULL;
+
+	req_id = ((u16)bus << 8) | devfn;
+
+	if (dev_is_pci(dev)) {
+		struct pci_dev *pdev = to_pci_dev(dev);
+
+		pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
+
+		/* register PCI DMA alias device */
+		if (req_id != dma_alias) {
+			tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
+					dma_alias & 0xff, NULL, domain);
+
+			if (!tmp || tmp != domain)
+				return tmp;
 		}
-
-		if (!domain)
-			return NULL;
 	}
 
-found_domain:
 	tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
+	if (!tmp || tmp != domain)
+		return tmp;
 
-	if (!tmp || tmp != domain) {
+	return domain;
+}
+
+static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
+{
+	struct dmar_domain *domain, *tmp;
+
+	domain = find_domain(dev);
+	if (domain)
+		goto out;
+
+	domain = find_or_alloc_domain(dev, gaw);
+	if (!domain)
+		goto out;
+
+	tmp = set_domain_for_dev(dev, domain);
+	if (!tmp || domain != tmp) {
 		domain_exit(domain);
 		domain = tmp;
 	}
 
+out:
+
 	return domain;
 }
 
@@ -3394,17 +3428,18 @@
 
 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
 {
+	struct dmar_domain *domain, *tmp;
 	struct dmar_rmrr_unit *rmrr;
-	struct dmar_domain *domain;
 	struct device *i_dev;
 	int i, ret;
 
-	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
-	if (!domain) {
-		pr_err("Allocating domain for %s failed\n",
-		       dev_name(dev));
-		return NULL;
-	}
+	domain = find_domain(dev);
+	if (domain)
+		goto out;
+
+	domain = find_or_alloc_domain(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
+	if (!domain)
+		goto out;
 
 	/* We have a new domain - setup possible RMRRs for the device */
 	rcu_read_lock();
@@ -3423,6 +3458,18 @@
 	}
 	rcu_read_unlock();
 
+	tmp = set_domain_for_dev(dev, domain);
+	if (!tmp || domain != tmp) {
+		domain_exit(domain);
+		domain = tmp;
+	}
+
+out:
+
+	if (!domain)
+		pr_err("Allocating domain for %s failed\n", dev_name(dev));
+
+
 	return domain;
 }
 
diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index def8ca1..f50e51c 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -633,6 +633,10 @@
 {
 	struct arm_v7s_io_pgtable *data;
 
+#ifdef PHYS_OFFSET
+	if (upper_32_bits(PHYS_OFFSET))
+		return NULL;
+#endif
 	if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
 		return NULL;
 
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index b06d935..9a2f196 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -31,6 +31,7 @@
 #include <linux/err.h>
 #include <linux/pci.h>
 #include <linux/bitops.h>
+#include <linux/property.h>
 #include <trace/events/iommu.h>
 
 static struct kset *iommu_group_kset;
@@ -1613,3 +1614,60 @@
 
 	return ret;
 }
+
+int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
+		      const struct iommu_ops *ops)
+{
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+
+	if (fwspec)
+		return ops == fwspec->ops ? 0 : -EINVAL;
+
+	fwspec = kzalloc(sizeof(*fwspec), GFP_KERNEL);
+	if (!fwspec)
+		return -ENOMEM;
+
+	of_node_get(to_of_node(iommu_fwnode));
+	fwspec->iommu_fwnode = iommu_fwnode;
+	fwspec->ops = ops;
+	dev->iommu_fwspec = fwspec;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_fwspec_init);
+
+void iommu_fwspec_free(struct device *dev)
+{
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+
+	if (fwspec) {
+		fwnode_handle_put(fwspec->iommu_fwnode);
+		kfree(fwspec);
+		dev->iommu_fwspec = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(iommu_fwspec_free);
+
+int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids)
+{
+	struct iommu_fwspec *fwspec = dev->iommu_fwspec;
+	size_t size;
+	int i;
+
+	if (!fwspec)
+		return -EINVAL;
+
+	size = offsetof(struct iommu_fwspec, ids[fwspec->num_ids + num_ids]);
+	if (size > sizeof(*fwspec)) {
+		fwspec = krealloc(dev->iommu_fwspec, size, GFP_KERNEL);
+		if (!fwspec)
+			return -ENOMEM;
+	}
+
+	for (i = 0; i < num_ids; i++)
+		fwspec->ids[fwspec->num_ids + i] = ids[i];
+
+	fwspec->num_ids += num_ids;
+	dev->iommu_fwspec = fwspec;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iommu_fwspec_add_ids);
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 2fdbac6..ace331d 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -636,7 +636,7 @@
 	spin_unlock(&ipmmu_devices_lock);
 
 	if (ret < 0)
-		return -ENODEV;
+		goto error;
 
 	for (i = 0; i < num_utlbs; ++i) {
 		if (utlbs[i] >= mmu->num_utlbs) {
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 57f23ea..5b82862 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -22,6 +22,7 @@
 #include <linux/limits.h>
 #include <linux/of.h>
 #include <linux/of_iommu.h>
+#include <linux/of_pci.h>
 #include <linux/slab.h>
 
 static const struct of_device_id __iommu_of_table_sentinel
@@ -134,6 +135,47 @@
 	return ops;
 }
 
+static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data)
+{
+	struct of_phandle_args *iommu_spec = data;
+
+	iommu_spec->args[0] = alias;
+	return iommu_spec->np == pdev->bus->dev.of_node;
+}
+
+static const struct iommu_ops
+*of_pci_iommu_configure(struct pci_dev *pdev, struct device_node *bridge_np)
+{
+	const struct iommu_ops *ops;
+	struct of_phandle_args iommu_spec;
+
+	/*
+	 * Start by tracing the RID alias down the PCI topology as
+	 * far as the host bridge whose OF node we have...
+	 * (we're not even attempting to handle multi-alias devices yet)
+	 */
+	iommu_spec.args_count = 1;
+	iommu_spec.np = bridge_np;
+	pci_for_each_dma_alias(pdev, __get_pci_rid, &iommu_spec);
+	/*
+	 * ...then find out what that becomes once it escapes the PCI
+	 * bus into the system beyond, and which IOMMU it ends up at.
+	 */
+	iommu_spec.np = NULL;
+	if (of_pci_map_rid(bridge_np, iommu_spec.args[0], "iommu-map",
+			   "iommu-map-mask", &iommu_spec.np, iommu_spec.args))
+		return NULL;
+
+	ops = of_iommu_get_ops(iommu_spec.np);
+	if (!ops || !ops->of_xlate ||
+	    iommu_fwspec_init(&pdev->dev, &iommu_spec.np->fwnode, ops) ||
+	    ops->of_xlate(&pdev->dev, &iommu_spec))
+		ops = NULL;
+
+	of_node_put(iommu_spec.np);
+	return ops;
+}
+
 const struct iommu_ops *of_iommu_configure(struct device *dev,
 					   struct device_node *master_np)
 {
@@ -142,12 +184,8 @@
 	const struct iommu_ops *ops = NULL;
 	int idx = 0;
 
-	/*
-	 * We can't do much for PCI devices without knowing how
-	 * device IDs are wired up from the PCI bus to the IOMMU.
-	 */
 	if (dev_is_pci(dev))
-		return NULL;
+		return of_pci_iommu_configure(to_pci_dev(dev), master_np);
 
 	/*
 	 * We don't currently walk up the tree looking for a parent IOMMU.
@@ -160,7 +198,9 @@
 		np = iommu_spec.np;
 		ops = of_iommu_get_ops(np);
 
-		if (!ops || !ops->of_xlate || ops->of_xlate(dev, &iommu_spec))
+		if (!ops || !ops->of_xlate ||
+		    iommu_fwspec_init(dev, &np->fwnode, ops) ||
+		    ops->of_xlate(dev, &iommu_spec))
 			goto err_put_node;
 
 		of_node_put(np);
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
index 35eb7ac..863e073 100644
--- a/drivers/irqchip/irq-gic-v2m.c
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -16,6 +16,7 @@
 #define pr_fmt(fmt) "GICv2m: " fmt
 
 #include <linux/acpi.h>
+#include <linux/dma-iommu.h>
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
 #include <linux/kernel.h>
@@ -108,6 +109,8 @@
 
 	if (v2m->flags & GICV2M_NEEDS_SPI_OFFSET)
 		msg->data -= v2m->spi_offset;
+
+	iommu_dma_map_msi_msg(data->irq, msg);
 }
 
 static struct irq_chip gicv2m_irq_chip = {
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 35c851c..003495d 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -19,6 +19,7 @@
 #include <linux/bitmap.h>
 #include <linux/cpu.h>
 #include <linux/delay.h>
+#include <linux/dma-iommu.h>
 #include <linux/interrupt.h>
 #include <linux/irqdomain.h>
 #include <linux/acpi_iort.h>
@@ -659,6 +660,8 @@
 	msg->address_lo		= addr & ((1UL << 32) - 1);
 	msg->address_hi		= addr >> 32;
 	msg->data		= its_get_event_id(d);
+
+	iommu_dma_map_msi_msg(d->irq, msg);
 }
 
 static struct irq_chip its_irq_chip = {
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index a2e68f7..393fea8 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -26,6 +26,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
+#include <linux/of_pci.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 
@@ -592,87 +593,16 @@
 			    u32 rid_in)
 {
 	struct device *parent_dev;
-	struct device_node *msi_controller_node;
-	struct device_node *msi_np = *np;
-	u32 map_mask, masked_rid, rid_base, msi_base, rid_len, phandle;
-	int msi_map_len;
-	bool matched;
 	u32 rid_out = rid_in;
-	const __be32 *msi_map = NULL;
 
 	/*
 	 * Walk up the device parent links looking for one with a
 	 * "msi-map" property.
 	 */
-	for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent) {
-		if (!parent_dev->of_node)
-			continue;
-
-		msi_map = of_get_property(parent_dev->of_node,
-					  "msi-map", &msi_map_len);
-		if (!msi_map)
-			continue;
-
-		if (msi_map_len % (4 * sizeof(__be32))) {
-			dev_err(parent_dev, "Error: Bad msi-map length: %d\n",
-				msi_map_len);
-			return rid_out;
-		}
-		/* We have a good parent_dev and msi_map, let's use them. */
-		break;
-	}
-	if (!msi_map)
-		return rid_out;
-
-	/* The default is to select all bits. */
-	map_mask = 0xffffffff;
-
-	/*
-	 * Can be overridden by "msi-map-mask" property.  If
-	 * of_property_read_u32() fails, the default is used.
-	 */
-	of_property_read_u32(parent_dev->of_node, "msi-map-mask", &map_mask);
-
-	masked_rid = map_mask & rid_in;
-	matched = false;
-	while (!matched && msi_map_len >= 4 * sizeof(__be32)) {
-		rid_base = be32_to_cpup(msi_map + 0);
-		phandle = be32_to_cpup(msi_map + 1);
-		msi_base = be32_to_cpup(msi_map + 2);
-		rid_len = be32_to_cpup(msi_map + 3);
-
-		if (rid_base & ~map_mask) {
-			dev_err(parent_dev,
-				"Invalid msi-map translation - msi-map-mask (0x%x) ignores rid-base (0x%x)\n",
-				map_mask, rid_base);
-			return rid_out;
-		}
-
-		msi_controller_node = of_find_node_by_phandle(phandle);
-
-		matched = (masked_rid >= rid_base &&
-			   masked_rid < rid_base + rid_len);
-		if (msi_np)
-			matched &= msi_np == msi_controller_node;
-
-		if (matched && !msi_np) {
-			*np = msi_np = msi_controller_node;
+	for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent)
+		if (!of_pci_map_rid(parent_dev->of_node, rid_in, "msi-map",
+				    "msi-map-mask", np, &rid_out))
 			break;
-		}
-
-		of_node_put(msi_controller_node);
-		msi_map_len -= 4 * sizeof(__be32);
-		msi_map += 4;
-	}
-	if (!matched)
-		return rid_out;
-
-	rid_out = masked_rid - rid_base + msi_base;
-	dev_dbg(dev,
-		"msi-map at: %s, using mask %08x, rid-base: %08x, msi-base: %08x, length: %08x, rid: %08x -> %08x\n",
-		dev_name(parent_dev), map_mask, rid_base, msi_base,
-		rid_len, rid_in, rid_out);
-
 	return rid_out;
 }
 
diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index 589b30c..b58be12 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -308,3 +308,105 @@
 EXPORT_SYMBOL_GPL(of_pci_find_msi_chip_by_node);
 
 #endif /* CONFIG_PCI_MSI */
+
+/**
+ * of_pci_map_rid - Translate a requester ID through a downstream mapping.
+ * @np: root complex device node.
+ * @rid: PCI requester ID to map.
+ * @map_name: property name of the map to use.
+ * @map_mask_name: optional property name of the mask to use.
+ * @target: optional pointer to a target device node.
+ * @id_out: optional pointer to receive the translated ID.
+ *
+ * Given a PCI requester ID, look up the appropriate implementation-defined
+ * platform ID and/or the target device which receives transactions on that
+ * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or
+ * @id_out may be NULL if only the other is required. If @target points to
+ * a non-NULL device node pointer, only entries targeting that node will be
+ * matched; if it points to a NULL value, it will receive the device node of
+ * the first matching target phandle, with a reference held.
+ *
+ * Return: 0 on success or a standard error code on failure.
+ */
+int of_pci_map_rid(struct device_node *np, u32 rid,
+		   const char *map_name, const char *map_mask_name,
+		   struct device_node **target, u32 *id_out)
+{
+	u32 map_mask, masked_rid;
+	int map_len;
+	const __be32 *map = NULL;
+
+	if (!np || !map_name || (!target && !id_out))
+		return -EINVAL;
+
+	map = of_get_property(np, map_name, &map_len);
+	if (!map) {
+		if (target)
+			return -ENODEV;
+		/* Otherwise, no map implies no translation */
+		*id_out = rid;
+		return 0;
+	}
+
+	if (!map_len || map_len % (4 * sizeof(*map))) {
+		pr_err("%s: Error: Bad %s length: %d\n", np->full_name,
+			map_name, map_len);
+		return -EINVAL;
+	}
+
+	/* The default is to select all bits. */
+	map_mask = 0xffffffff;
+
+	/*
+	 * Can be overridden by "{iommu,msi}-map-mask" property.
+	 * If of_property_read_u32() fails, the default is used.
+	 */
+	if (map_mask_name)
+		of_property_read_u32(np, map_mask_name, &map_mask);
+
+	masked_rid = map_mask & rid;
+	for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) {
+		struct device_node *phandle_node;
+		u32 rid_base = be32_to_cpup(map + 0);
+		u32 phandle = be32_to_cpup(map + 1);
+		u32 out_base = be32_to_cpup(map + 2);
+		u32 rid_len = be32_to_cpup(map + 3);
+
+		if (rid_base & ~map_mask) {
+			pr_err("%s: Invalid %s translation - %s-mask (0x%x) ignores rid-base (0x%x)\n",
+				np->full_name, map_name, map_name,
+				map_mask, rid_base);
+			return -EFAULT;
+		}
+
+		if (masked_rid < rid_base || masked_rid >= rid_base + rid_len)
+			continue;
+
+		phandle_node = of_find_node_by_phandle(phandle);
+		if (!phandle_node)
+			return -ENODEV;
+
+		if (target) {
+			if (*target)
+				of_node_put(phandle_node);
+			else
+				*target = phandle_node;
+
+			if (*target != phandle_node)
+				continue;
+		}
+
+		if (id_out)
+			*id_out = masked_rid - rid_base + out_base;
+
+		pr_debug("%s: %s, using mask %08x, rid-base: %08x, out-base: %08x, length: %08x, rid: %08x -> %08x\n",
+			np->full_name, map_name, map_mask, rid_base, out_base,
+			rid_len, rid, *id_out);
+		return 0;
+	}
+
+	pr_err("%s: Invalid %s translation - no match for rid 0x%x on %s\n",
+		np->full_name, map_name, rid,
+		target && *target ? (*target)->full_name : "any target");
+	return -EFAULT;
+}
diff --git a/include/dt-bindings/memory/mt2701-larb-port.h b/include/dt-bindings/memory/mt2701-larb-port.h
index 78f6678..6764d74 100644
--- a/include/dt-bindings/memory/mt2701-larb-port.h
+++ b/include/dt-bindings/memory/mt2701-larb-port.h
@@ -26,7 +26,7 @@
 #define LARB0_PORT_OFFSET		0
 #define LARB1_PORT_OFFSET		11
 #define LARB2_PORT_OFFSET		21
-#define LARB3_PORT_OFFSET		43
+#define LARB3_PORT_OFFSET		44
 
 #define MT2701_M4U_ID_LARB0(port)	((port) + LARB0_PORT_OFFSET)
 #define MT2701_M4U_ID_LARB1(port)	((port) + LARB1_PORT_OFFSET)
diff --git a/include/linux/device.h b/include/linux/device.h
index 38f0281..bc41e87 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -41,6 +41,7 @@
 struct fwnode_handle;
 struct iommu_ops;
 struct iommu_group;
+struct iommu_fwspec;
 
 struct bus_attribute {
 	struct attribute	attr;
@@ -765,6 +766,7 @@
  * 		gone away. This should be set by the allocator of the
  * 		device (i.e. the bus driver that discovered the device).
  * @iommu_group: IOMMU group the device belongs to.
+ * @iommu_fwspec: IOMMU-specific properties supplied by firmware.
  *
  * @offline_disabled: If set, the device is permanently online.
  * @offline:	Set after successful invocation of bus type's .offline().
@@ -849,6 +851,7 @@
 
 	void	(*release)(struct device *dev);
 	struct iommu_group	*iommu_group;
+	struct iommu_fwspec	*iommu_fwspec;
 
 	bool			offline_disabled:1;
 	bool			offline:1;
diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h
index 81c5c8d1..32c5890 100644
--- a/include/linux/dma-iommu.h
+++ b/include/linux/dma-iommu.h
@@ -21,6 +21,7 @@
 
 #ifdef CONFIG_IOMMU_DMA
 #include <linux/iommu.h>
+#include <linux/msi.h>
 
 int iommu_dma_init(void);
 
@@ -29,7 +30,8 @@
 void iommu_put_dma_cookie(struct iommu_domain *domain);
 
 /* Setup call for arch DMA mapping code */
-int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size);
+int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
+		u64 size, struct device *dev);
 
 /* General helpers for DMA-API <-> IOMMU-API interaction */
 int dma_direction_to_prot(enum dma_data_direction dir, bool coherent);
@@ -62,9 +64,13 @@
 int iommu_dma_supported(struct device *dev, u64 mask);
 int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr);
 
+/* The DMA API isn't _quite_ the whole story, though... */
+void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg);
+
 #else
 
 struct iommu_domain;
+struct msi_msg;
 
 static inline int iommu_dma_init(void)
 {
@@ -80,6 +86,10 @@
 {
 }
 
+static inline void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg)
+{
+}
+
 #endif	/* CONFIG_IOMMU_DMA */
 #endif	/* __KERNEL__ */
 #endif	/* __DMA_IOMMU_H */
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a35fb8b..436dc21 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -331,10 +331,32 @@
 /* Generic device grouping function */
 extern struct iommu_group *generic_device_group(struct device *dev);
 
+/**
+ * struct iommu_fwspec - per-device IOMMU instance data
+ * @ops: ops for this device's IOMMU
+ * @iommu_fwnode: firmware handle for this device's IOMMU
+ * @iommu_priv: IOMMU driver private data for this device
+ * @num_ids: number of associated device IDs
+ * @ids: IDs which this device may present to the IOMMU
+ */
+struct iommu_fwspec {
+	const struct iommu_ops	*ops;
+	struct fwnode_handle	*iommu_fwnode;
+	void			*iommu_priv;
+	unsigned int		num_ids;
+	u32			ids[1];
+};
+
+int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
+		      const struct iommu_ops *ops);
+void iommu_fwspec_free(struct device *dev);
+int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids);
+
 #else /* CONFIG_IOMMU_API */
 
 struct iommu_ops {};
 struct iommu_group {};
+struct iommu_fwspec {};
 
 static inline bool iommu_present(struct bus_type *bus)
 {
@@ -541,6 +563,23 @@
 {
 }
 
+static inline int iommu_fwspec_init(struct device *dev,
+				    struct fwnode_handle *iommu_fwnode,
+				    const struct iommu_ops *ops)
+{
+	return -ENODEV;
+}
+
+static inline void iommu_fwspec_free(struct device *dev)
+{
+}
+
+static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids,
+				       int num_ids)
+{
+	return -ENODEV;
+}
+
 #endif /* CONFIG_IOMMU_API */
 
 #endif /* __LINUX_IOMMU_H */
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index b969e94..7fd5cfc 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -17,6 +17,9 @@
 int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
 int of_get_pci_domain_nr(struct device_node *node);
 void of_pci_check_probe_only(void);
+int of_pci_map_rid(struct device_node *np, u32 rid,
+		   const char *map_name, const char *map_mask_name,
+		   struct device_node **target, u32 *id_out);
 #else
 static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq)
 {
@@ -52,6 +55,13 @@
 	return -1;
 }
 
+static inline int of_pci_map_rid(struct device_node *np, u32 rid,
+			const char *map_name, const char *map_mask_name,
+			struct device_node **target, u32 *id_out)
+{
+	return -EINVAL;
+}
+
 static inline void of_pci_check_probe_only(void) { }
 #endif