Merge branches 'arm/omap', 'arm/msm', 'arm/rockchip', 'arm/renesas', 'arm/smmu', 'x86/vt-d', 'x86/amd' and 'core' into next Conflicts: drivers/iommu/arm-smmu.c

commit: 76771c938e95ce4106c6e8092f4f614d4d1e0ecc [log] [tgz]
author: Joerg Roedel <jroedel@suse.de> Tue Dec 02 13:07:13 2014 +0100
committer: Joerg Roedel <jroedel@suse.de> Tue Dec 02 13:07:13 2014 +0100
tree: c2fb0f7e70013f745f62fb74899f04c40f052640
parent: 99cb9aee770e482ac5ab1fe77213a38df9dd4e77 [diff]
parent: 263bc3fd6a8e50dc3ef423f36a1ec9cd951a76b8 [diff]
parent: 11175886daa6b31dfc713b6664215405de9ea1c7 [diff]
parent: e222d6a4259c134553acab493b5ab9fc05d70132 [diff]
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 1d54996..22f15f9 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig

@@ -152,6 +152,19 @@
 
 	  Say N unless you know you need this.
 
+config ROCKCHIP_IOMMU
+	bool "Rockchip IOMMU Support"
+	depends on ARM
+	depends on ARCH_ROCKCHIP || COMPILE_TEST
+	select IOMMU_API
+	select ARM_DMA_USE_IOMMU
+	help
+	  Support for IOMMUs found on Rockchip rk32xx SOCs.
+	  These IOMMUs allow virtualization of the address space used by most
+	  cores within the multimedia subsystem.
+	  Say Y here if you are using a Rockchip SoC that includes an IOMMU
+	  device.
+
 config TEGRA_IOMMU_GART
 	bool "Tegra GART IOMMU Support"
 	depends on ARCH_TEGRA_2x_SOC

diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 18fa446a..7b976f2 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile

@@ -12,6 +12,7 @@
 obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
 obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
 obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
+obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
 obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
 obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
 obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 505a9ad..b205f76 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c

@@ -3411,6 +3411,8 @@
 		return true;
 	case IOMMU_CAP_INTR_REMAP:
 		return (irq_remapping_enabled == 1);
+	case IOMMU_CAP_NOEXEC:
+		return false;
 	}
 
 	return false;
@@ -3424,6 +3426,7 @@
 	.detach_dev = amd_iommu_detach_device,
 	.map = amd_iommu_map,
 	.unmap = amd_iommu_unmap,
+	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = amd_iommu_iova_to_phys,
 	.pgsize_bitmap	= AMD_IOMMU_PGSIZES,
 };

diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 90d734b..a2d87a6 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c

@@ -279,10 +279,8 @@
 
 static void put_pasid_state(struct pasid_state *pasid_state)
 {
-	if (atomic_dec_and_test(&pasid_state->count)) {
-		put_device_state(pasid_state->device_state);
+	if (atomic_dec_and_test(&pasid_state->count))
 		wake_up(&pasid_state->wq);
-	}
 }
 
 static void put_pasid_state_wait(struct pasid_state *pasid_state)
@@ -291,9 +289,7 @@
 
 	prepare_to_wait(&pasid_state->wq, &wait, TASK_UNINTERRUPTIBLE);
 
-	if (atomic_dec_and_test(&pasid_state->count))
-		put_device_state(pasid_state->device_state);
-	else
+	if (!atomic_dec_and_test(&pasid_state->count))
 		schedule();
 
 	finish_wait(&pasid_state->wq, &wait);

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 60558f7..b8aac13 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c

@@ -404,9 +404,16 @@
 #define ARM_SMMU_CB_ASID(cfg)		((cfg)->cbndx)
 #define ARM_SMMU_CB_VMID(cfg)		((cfg)->cbndx + 1)
 
+enum arm_smmu_domain_stage {
+	ARM_SMMU_DOMAIN_S1 = 0,
+	ARM_SMMU_DOMAIN_S2,
+	ARM_SMMU_DOMAIN_NESTED,
+};
+
 struct arm_smmu_domain {
 	struct arm_smmu_device		*smmu;
 	struct arm_smmu_cfg		cfg;
+	enum arm_smmu_domain_stage	stage;
 	spinlock_t			lock;
 };
 
@@ -906,19 +913,46 @@
 	if (smmu_domain->smmu)
 		goto out_unlock;
 
-	if (smmu->features & ARM_SMMU_FEAT_TRANS_NESTED) {
+	/*
+	 * Mapping the requested stage onto what we support is surprisingly
+	 * complicated, mainly because the spec allows S1+S2 SMMUs without
+	 * support for nested translation. That means we end up with the
+	 * following table:
+	 *
+	 * Requested        Supported        Actual
+	 *     S1               N              S1
+	 *     S1             S1+S2            S1
+	 *     S1               S2             S2
+	 *     S1               S1             S1
+	 *     N                N              N
+	 *     N              S1+S2            S2
+	 *     N                S2             S2
+	 *     N                S1             S1
+	 *
+	 * Note that you can't actually request stage-2 mappings.
+	 */
+	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
+		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
+	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
+		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+
+	switch (smmu_domain->stage) {
+	case ARM_SMMU_DOMAIN_S1:
+		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
+		start = smmu->num_s2_context_banks;
+		break;
+	case ARM_SMMU_DOMAIN_NESTED:
 		/*
 		 * We will likely want to change this if/when KVM gets
 		 * involved.
 		 */
-		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
-		start = smmu->num_s2_context_banks;
-	} else if (smmu->features & ARM_SMMU_FEAT_TRANS_S1) {
-		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
-		start = smmu->num_s2_context_banks;
-	} else {
+	case ARM_SMMU_DOMAIN_S2:
 		cfg->cbar = CBAR_TYPE_S2_TRANS;
 		start = 0;
+		break;
+	default:
+		ret = -EINVAL;
+		goto out_unlock;
 	}
 
 	ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
@@ -1281,7 +1315,7 @@
 				   unsigned long pfn, int prot, int stage)
 {
 	pte_t *pte, *start;
-	pteval_t pteval = ARM_SMMU_PTE_PAGE | ARM_SMMU_PTE_AF | ARM_SMMU_PTE_XN;
+	pteval_t pteval = ARM_SMMU_PTE_PAGE | ARM_SMMU_PTE_AF;
 
 	if (pmd_none(*pmd)) {
 		/* Allocate a new set of tables */
@@ -1315,10 +1349,11 @@
 			pteval |= ARM_SMMU_PTE_MEMATTR_NC;
 	}
 
+	if (prot & IOMMU_NOEXEC)
+		pteval |= ARM_SMMU_PTE_XN;
+
 	/* If no access, create a faulting entry to avoid TLB fills */
-	if (prot & IOMMU_EXEC)
-		pteval &= ~ARM_SMMU_PTE_XN;
-	else if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
+	if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
 		pteval &= ~ARM_SMMU_PTE_PAGE;
 
 	pteval |= ARM_SMMU_PTE_SH_IS;
@@ -1568,6 +1603,8 @@
 		return true;
 	case IOMMU_CAP_INTR_REMAP:
 		return true; /* MSIs are just memory writes */
+	case IOMMU_CAP_NOEXEC:
+		return true;
 	default:
 		return false;
 	}
@@ -1644,20 +1681,57 @@
 	iommu_group_remove_device(dev);
 }
 
+static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
+				    enum iommu_attr attr, void *data)
+{
+	struct arm_smmu_domain *smmu_domain = domain->priv;
+
+	switch (attr) {
+	case DOMAIN_ATTR_NESTING:
+		*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
+		return 0;
+	default:
+		return -ENODEV;
+	}
+}
+
+static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
+				    enum iommu_attr attr, void *data)
+{
+	struct arm_smmu_domain *smmu_domain = domain->priv;
+
+	switch (attr) {
+	case DOMAIN_ATTR_NESTING:
+		if (smmu_domain->smmu)
+			return -EPERM;
+		if (*(int *)data)
+			smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
+		else
+			smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+
+		return 0;
+	default:
+		return -ENODEV;
+	}
+}
+
 static const struct iommu_ops arm_smmu_ops = {
-	.capable	= arm_smmu_capable,
-	.domain_init	= arm_smmu_domain_init,
-	.domain_destroy	= arm_smmu_domain_destroy,
-	.attach_dev	= arm_smmu_attach_dev,
-	.detach_dev	= arm_smmu_detach_dev,
-	.map		= arm_smmu_map,
-	.unmap		= arm_smmu_unmap,
-	.iova_to_phys	= arm_smmu_iova_to_phys,
-	.add_device	= arm_smmu_add_device,
-	.remove_device	= arm_smmu_remove_device,
-	.pgsize_bitmap	= (SECTION_SIZE |
-			   ARM_SMMU_PTE_CONT_SIZE |
-			   PAGE_SIZE),
+	.capable		= arm_smmu_capable,
+	.domain_init		= arm_smmu_domain_init,
+	.domain_destroy		= arm_smmu_domain_destroy,
+	.attach_dev		= arm_smmu_attach_dev,
+	.detach_dev		= arm_smmu_detach_dev,
+	.map			= arm_smmu_map,
+	.unmap			= arm_smmu_unmap,
+	.map_sg			= default_iommu_map_sg,
+	.iova_to_phys		= arm_smmu_iova_to_phys,
+	.add_device		= arm_smmu_add_device,
+	.remove_device		= arm_smmu_remove_device,
+	.domain_get_attr	= arm_smmu_domain_get_attr,
+	.domain_set_attr	= arm_smmu_domain_set_attr,
+	.pgsize_bitmap		= (SECTION_SIZE |
+				   ARM_SMMU_PTE_CONT_SIZE |
+				   PAGE_SIZE),
 };
 
 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
@@ -2072,8 +2146,20 @@
 
 static int __init arm_smmu_init(void)
 {
+	struct device_node *np;
 	int ret;
 
+	/*
+	 * Play nice with systems that don't have an ARM SMMU by checking that
+	 * an ARM SMMU exists in the system before proceeding with the driver
+	 * and IOMMU bus operation registration.
+	 */
+	np = of_find_matching_node(NULL, arm_smmu_of_match);
+	if (!np)
+		return 0;
+
+	of_node_put(np);
+
 	ret = platform_driver_register(&arm_smmu_driver);
 	if (ret)
 		return ret;

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index c5c61ca..9847613 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c

@@ -44,6 +44,14 @@
 
 #include "irq_remapping.h"
 
+typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
+struct dmar_res_callback {
+	dmar_res_handler_t	cb[ACPI_DMAR_TYPE_RESERVED];
+	void			*arg[ACPI_DMAR_TYPE_RESERVED];
+	bool			ignore_unhandled;
+	bool			print_entry;
+};
+
 /*
  * Assumptions:
  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
@@ -62,11 +70,12 @@
 struct acpi_table_header * __initdata dmar_tbl;
 static acpi_size dmar_tbl_size;
 static int dmar_dev_scope_status = 1;
+static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
 
 static int alloc_iommu(struct dmar_drhd_unit *drhd);
 static void free_iommu(struct intel_iommu *iommu);
 
-static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
+static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
 {
 	/*
 	 * add INCLUDE_ALL at the tail, so scan the list will find it at
@@ -344,24 +353,45 @@
 	.priority = INT_MIN,
 };
 
+static struct dmar_drhd_unit *
+dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
+{
+	struct dmar_drhd_unit *dmaru;
+
+	list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list)
+		if (dmaru->segment == drhd->segment &&
+		    dmaru->reg_base_addr == drhd->address)
+			return dmaru;
+
+	return NULL;
+}
+
 /**
  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
  * structure which uniquely represent one DMA remapping hardware unit
  * present in the platform
  */
-static int __init
-dmar_parse_one_drhd(struct acpi_dmar_header *header)
+static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
 {
 	struct acpi_dmar_hardware_unit *drhd;
 	struct dmar_drhd_unit *dmaru;
 	int ret = 0;
 
 	drhd = (struct acpi_dmar_hardware_unit *)header;
-	dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
+	dmaru = dmar_find_dmaru(drhd);
+	if (dmaru)
+		goto out;
+
+	dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
 	if (!dmaru)
 		return -ENOMEM;
 
-	dmaru->hdr = header;
+	/*
+	 * If header is allocated from slab by ACPI _DSM method, we need to
+	 * copy the content because the memory buffer will be freed on return.
+	 */
+	dmaru->hdr = (void *)(dmaru + 1);
+	memcpy(dmaru->hdr, header, header->length);
 	dmaru->reg_base_addr = drhd->address;
 	dmaru->segment = drhd->segment;
 	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
@@ -381,6 +411,11 @@
 		return ret;
 	}
 	dmar_register_drhd_unit(dmaru);
+
+out:
+	if (arg)
+		(*(int *)arg)++;
+
 	return 0;
 }
 
@@ -393,7 +428,8 @@
 	kfree(dmaru);
 }
 
-static int __init dmar_parse_one_andd(struct acpi_dmar_header *header)
+static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
+				      void *arg)
 {
 	struct acpi_dmar_andd *andd = (void *)header;
 
@@ -414,8 +450,7 @@
 }
 
 #ifdef CONFIG_ACPI_NUMA
-static int __init
-dmar_parse_one_rhsa(struct acpi_dmar_header *header)
+static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
 {
 	struct acpi_dmar_rhsa *rhsa;
 	struct dmar_drhd_unit *drhd;
@@ -442,6 +477,8 @@
 
 	return 0;
 }
+#else
+#define	dmar_parse_one_rhsa		dmar_res_noop
 #endif
 
 static void __init
@@ -503,6 +540,52 @@
 	return (ACPI_SUCCESS(status) ? 1 : 0);
 }
 
+static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
+				       size_t len, struct dmar_res_callback *cb)
+{
+	int ret = 0;
+	struct acpi_dmar_header *iter, *next;
+	struct acpi_dmar_header *end = ((void *)start) + len;
+
+	for (iter = start; iter < end && ret == 0; iter = next) {
+		next = (void *)iter + iter->length;
+		if (iter->length == 0) {
+			/* Avoid looping forever on bad ACPI tables */
+			pr_debug(FW_BUG "Invalid 0-length structure\n");
+			break;
+		} else if (next > end) {
+			/* Avoid passing table end */
+			pr_warn(FW_BUG "record passes table end\n");
+			ret = -EINVAL;
+			break;
+		}
+
+		if (cb->print_entry)
+			dmar_table_print_dmar_entry(iter);
+
+		if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
+			/* continue for forward compatibility */
+			pr_debug("Unknown DMAR structure type %d\n",
+				 iter->type);
+		} else if (cb->cb[iter->type]) {
+			ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
+		} else if (!cb->ignore_unhandled) {
+			pr_warn("No handler for DMAR structure type %d\n",
+				iter->type);
+			ret = -EINVAL;
+		}
+	}
+
+	return ret;
+}
+
+static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
+				       struct dmar_res_callback *cb)
+{
+	return dmar_walk_remapping_entries((void *)(dmar + 1),
+			dmar->header.length - sizeof(*dmar), cb);
+}
+
 /**
  * parse_dmar_table - parses the DMA reporting table
  */
@@ -510,9 +593,18 @@
 parse_dmar_table(void)
 {
 	struct acpi_table_dmar *dmar;
-	struct acpi_dmar_header *entry_header;
 	int ret = 0;
 	int drhd_count = 0;
+	struct dmar_res_callback cb = {
+		.print_entry = true,
+		.ignore_unhandled = true,
+		.arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
+		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
+		.cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
+		.cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
+		.cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
+		.cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
+	};
 
 	/*
 	 * Do it again, earlier dmar_tbl mapping could be mapped with
@@ -536,51 +628,10 @@
 	}
 
 	pr_info("Host address width %d\n", dmar->width + 1);
-
-	entry_header = (struct acpi_dmar_header *)(dmar + 1);
-	while (((unsigned long)entry_header) <
-			(((unsigned long)dmar) + dmar_tbl->length)) {
-		/* Avoid looping forever on bad ACPI tables */
-		if (entry_header->length == 0) {
-			pr_warn("Invalid 0-length structure\n");
-			ret = -EINVAL;
-			break;
-		}
-
-		dmar_table_print_dmar_entry(entry_header);
-
-		switch (entry_header->type) {
-		case ACPI_DMAR_TYPE_HARDWARE_UNIT:
-			drhd_count++;
-			ret = dmar_parse_one_drhd(entry_header);
-			break;
-		case ACPI_DMAR_TYPE_RESERVED_MEMORY:
-			ret = dmar_parse_one_rmrr(entry_header);
-			break;
-		case ACPI_DMAR_TYPE_ROOT_ATS:
-			ret = dmar_parse_one_atsr(entry_header);
-			break;
-		case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
-#ifdef CONFIG_ACPI_NUMA
-			ret = dmar_parse_one_rhsa(entry_header);
-#endif
-			break;
-		case ACPI_DMAR_TYPE_NAMESPACE:
-			ret = dmar_parse_one_andd(entry_header);
-			break;
-		default:
-			pr_warn("Unknown DMAR structure type %d\n",
-				entry_header->type);
-			ret = 0; /* for forward compatibility */
-			break;
-		}
-		if (ret)
-			break;
-
-		entry_header = ((void *)entry_header + entry_header->length);
-	}
-	if (drhd_count == 0)
+	ret = dmar_walk_dmar_table(dmar, &cb);
+	if (ret == 0 && drhd_count == 0)
 		pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
+
 	return ret;
 }
 
@@ -778,76 +829,68 @@
 		dmi_get_system_info(DMI_PRODUCT_VERSION));
 }
 
-static int __init check_zero_address(void)
+static int __ref
+dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
 {
-	struct acpi_table_dmar *dmar;
-	struct acpi_dmar_header *entry_header;
 	struct acpi_dmar_hardware_unit *drhd;
+	void __iomem *addr;
+	u64 cap, ecap;
 
-	dmar = (struct acpi_table_dmar *)dmar_tbl;
-	entry_header = (struct acpi_dmar_header *)(dmar + 1);
-
-	while (((unsigned long)entry_header) <
-			(((unsigned long)dmar) + dmar_tbl->length)) {
-		/* Avoid looping forever on bad ACPI tables */
-		if (entry_header->length == 0) {
-			pr_warn("Invalid 0-length structure\n");
-			return 0;
-		}
-
-		if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
-			void __iomem *addr;
-			u64 cap, ecap;
-
-			drhd = (void *)entry_header;
-			if (!drhd->address) {
-				warn_invalid_dmar(0, "");
-				goto failed;
-			}
-
-			addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
-			if (!addr ) {
-				printk("IOMMU: can't validate: %llx\n", drhd->address);
-				goto failed;
-			}
-			cap = dmar_readq(addr + DMAR_CAP_REG);
-			ecap = dmar_readq(addr + DMAR_ECAP_REG);
-			early_iounmap(addr, VTD_PAGE_SIZE);
-			if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
-				warn_invalid_dmar(drhd->address,
-						  " returns all ones");
-				goto failed;
-			}
-		}
-
-		entry_header = ((void *)entry_header + entry_header->length);
+	drhd = (void *)entry;
+	if (!drhd->address) {
+		warn_invalid_dmar(0, "");
+		return -EINVAL;
 	}
-	return 1;
 
-failed:
+	if (arg)
+		addr = ioremap(drhd->address, VTD_PAGE_SIZE);
+	else
+		addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
+	if (!addr) {
+		pr_warn("IOMMU: can't validate: %llx\n", drhd->address);
+		return -EINVAL;
+	}
+
+	cap = dmar_readq(addr + DMAR_CAP_REG);
+	ecap = dmar_readq(addr + DMAR_ECAP_REG);
+
+	if (arg)
+		iounmap(addr);
+	else
+		early_iounmap(addr, VTD_PAGE_SIZE);
+
+	if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
+		warn_invalid_dmar(drhd->address, " returns all ones");
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
 int __init detect_intel_iommu(void)
 {
 	int ret;
+	struct dmar_res_callback validate_drhd_cb = {
+		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
+		.ignore_unhandled = true,
+	};
 
 	down_write(&dmar_global_lock);
 	ret = dmar_table_detect();
 	if (ret)
-		ret = check_zero_address();
-	{
-		if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
-			iommu_detected = 1;
-			/* Make sure ACS will be enabled */
-			pci_request_acs();
-		}
+		ret = !dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
+					    &validate_drhd_cb);
+	if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
+		iommu_detected = 1;
+		/* Make sure ACS will be enabled */
+		pci_request_acs();
+	}
 
 #ifdef CONFIG_X86
-		if (ret)
-			x86_init.iommu.iommu_init = intel_iommu_init;
+	if (ret)
+		x86_init.iommu.iommu_init = intel_iommu_init;
 #endif
-	}
+
 	early_acpi_os_unmap_memory((void __iomem *)dmar_tbl, dmar_tbl_size);
 	dmar_tbl = NULL;
 	up_write(&dmar_global_lock);
@@ -931,11 +974,32 @@
 	return err;
 }
 
+static int dmar_alloc_seq_id(struct intel_iommu *iommu)
+{
+	iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
+					    DMAR_UNITS_SUPPORTED);
+	if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
+		iommu->seq_id = -1;
+	} else {
+		set_bit(iommu->seq_id, dmar_seq_ids);
+		sprintf(iommu->name, "dmar%d", iommu->seq_id);
+	}
+
+	return iommu->seq_id;
+}
+
+static void dmar_free_seq_id(struct intel_iommu *iommu)
+{
+	if (iommu->seq_id >= 0) {
+		clear_bit(iommu->seq_id, dmar_seq_ids);
+		iommu->seq_id = -1;
+	}
+}
+
 static int alloc_iommu(struct dmar_drhd_unit *drhd)
 {
 	struct intel_iommu *iommu;
 	u32 ver, sts;
-	static int iommu_allocated = 0;
 	int agaw = 0;
 	int msagaw = 0;
 	int err;
@@ -949,13 +1013,16 @@
 	if (!iommu)
 		return -ENOMEM;
 
-	iommu->seq_id = iommu_allocated++;
-	sprintf (iommu->name, "dmar%d", iommu->seq_id);
+	if (dmar_alloc_seq_id(iommu) < 0) {
+		pr_err("IOMMU: failed to allocate seq_id\n");
+		err = -ENOSPC;
+		goto error;
+	}
 
 	err = map_iommu(iommu, drhd->reg_base_addr);
 	if (err) {
 		pr_err("IOMMU: failed to map %s\n", iommu->name);
-		goto error;
+		goto error_free_seq_id;
 	}
 
 	err = -EINVAL;
@@ -1005,9 +1072,11 @@
 
 	return 0;
 
- err_unmap:
+err_unmap:
 	unmap_iommu(iommu);
- error:
+error_free_seq_id:
+	dmar_free_seq_id(iommu);
+error:
 	kfree(iommu);
 	return err;
 }
@@ -1031,6 +1100,7 @@
 	if (iommu->reg)
 		unmap_iommu(iommu);
 
+	dmar_free_seq_id(iommu);
 	kfree(iommu);
 }
 
@@ -1661,12 +1731,17 @@
 	return dmar->flags & 0x1;
 }
 
+/* Check whether DMAR units are in use */
+static inline bool dmar_in_use(void)
+{
+	return irq_remapping_enabled || intel_iommu_enabled;
+}
+
 static int __init dmar_free_unused_resources(void)
 {
 	struct dmar_drhd_unit *dmaru, *dmaru_n;
 
-	/* DMAR units are in use */
-	if (irq_remapping_enabled || intel_iommu_enabled)
+	if (dmar_in_use())
 		return 0;
 
 	if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
@@ -1684,3 +1759,242 @@
 
 late_initcall(dmar_free_unused_resources);
 IOMMU_INIT_POST(detect_intel_iommu);
+
+/*
+ * DMAR Hotplug Support
+ * For more details, please refer to Intel(R) Virtualization Technology
+ * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
+ * "Remapping Hardware Unit Hot Plug".
+ */
+static u8 dmar_hp_uuid[] = {
+	/* 0000 */    0xA6, 0xA3, 0xC1, 0xD8, 0x9B, 0xBE, 0x9B, 0x4C,
+	/* 0008 */    0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF
+};
+
+/*
+ * Currently there's only one revision and BIOS will not check the revision id,
+ * so use 0 for safety.
+ */
+#define	DMAR_DSM_REV_ID			0
+#define	DMAR_DSM_FUNC_DRHD		1
+#define	DMAR_DSM_FUNC_ATSR		2
+#define	DMAR_DSM_FUNC_RHSA		3
+
+static inline bool dmar_detect_dsm(acpi_handle handle, int func)
+{
+	return acpi_check_dsm(handle, dmar_hp_uuid, DMAR_DSM_REV_ID, 1 << func);
+}
+
+static int dmar_walk_dsm_resource(acpi_handle handle, int func,
+				  dmar_res_handler_t handler, void *arg)
+{
+	int ret = -ENODEV;
+	union acpi_object *obj;
+	struct acpi_dmar_header *start;
+	struct dmar_res_callback callback;
+	static int res_type[] = {
+		[DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
+		[DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
+		[DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
+	};
+
+	if (!dmar_detect_dsm(handle, func))
+		return 0;
+
+	obj = acpi_evaluate_dsm_typed(handle, dmar_hp_uuid, DMAR_DSM_REV_ID,
+				      func, NULL, ACPI_TYPE_BUFFER);
+	if (!obj)
+		return -ENODEV;
+
+	memset(&callback, 0, sizeof(callback));
+	callback.cb[res_type[func]] = handler;
+	callback.arg[res_type[func]] = arg;
+	start = (struct acpi_dmar_header *)obj->buffer.pointer;
+	ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
+
+	ACPI_FREE(obj);
+
+	return ret;
+}
+
+static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
+{
+	int ret;
+	struct dmar_drhd_unit *dmaru;
+
+	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
+	if (!dmaru)
+		return -ENODEV;
+
+	ret = dmar_ir_hotplug(dmaru, true);
+	if (ret == 0)
+		ret = dmar_iommu_hotplug(dmaru, true);
+
+	return ret;
+}
+
+static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
+{
+	int i, ret;
+	struct device *dev;
+	struct dmar_drhd_unit *dmaru;
+
+	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
+	if (!dmaru)
+		return 0;
+
+	/*
+	 * All PCI devices managed by this unit should have been destroyed.
+	 */
+	if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt)
+		for_each_active_dev_scope(dmaru->devices,
+					  dmaru->devices_cnt, i, dev)
+			return -EBUSY;
+
+	ret = dmar_ir_hotplug(dmaru, false);
+	if (ret == 0)
+		ret = dmar_iommu_hotplug(dmaru, false);
+
+	return ret;
+}
+
+static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
+{
+	struct dmar_drhd_unit *dmaru;
+
+	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
+	if (dmaru) {
+		list_del_rcu(&dmaru->list);
+		synchronize_rcu();
+		dmar_free_drhd(dmaru);
+	}
+
+	return 0;
+}
+
+static int dmar_hotplug_insert(acpi_handle handle)
+{
+	int ret;
+	int drhd_count = 0;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+				     &dmar_validate_one_drhd, (void *)1);
+	if (ret)
+		goto out;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+				     &dmar_parse_one_drhd, (void *)&drhd_count);
+	if (ret == 0 && drhd_count == 0) {
+		pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
+		goto out;
+	} else if (ret) {
+		goto release_drhd;
+	}
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
+				     &dmar_parse_one_rhsa, NULL);
+	if (ret)
+		goto release_drhd;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
+				     &dmar_parse_one_atsr, NULL);
+	if (ret)
+		goto release_atsr;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+				     &dmar_hp_add_drhd, NULL);
+	if (!ret)
+		return 0;
+
+	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+			       &dmar_hp_remove_drhd, NULL);
+release_atsr:
+	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
+			       &dmar_release_one_atsr, NULL);
+release_drhd:
+	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+			       &dmar_hp_release_drhd, NULL);
+out:
+	return ret;
+}
+
+static int dmar_hotplug_remove(acpi_handle handle)
+{
+	int ret;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
+				     &dmar_check_one_atsr, NULL);
+	if (ret)
+		return ret;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+				     &dmar_hp_remove_drhd, NULL);
+	if (ret == 0) {
+		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
+					       &dmar_release_one_atsr, NULL));
+		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+					       &dmar_hp_release_drhd, NULL));
+	} else {
+		dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+				       &dmar_hp_add_drhd, NULL);
+	}
+
+	return ret;
+}
+
+static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
+				       void *context, void **retval)
+{
+	acpi_handle *phdl = retval;
+
+	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
+		*phdl = handle;
+		return AE_CTRL_TERMINATE;
+	}
+
+	return AE_OK;
+}
+
+static int dmar_device_hotplug(acpi_handle handle, bool insert)
+{
+	int ret;
+	acpi_handle tmp = NULL;
+	acpi_status status;
+
+	if (!dmar_in_use())
+		return 0;
+
+	if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
+		tmp = handle;
+	} else {
+		status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
+					     ACPI_UINT32_MAX,
+					     dmar_get_dsm_handle,
+					     NULL, NULL, &tmp);
+		if (ACPI_FAILURE(status)) {
+			pr_warn("Failed to locate _DSM method.\n");
+			return -ENXIO;
+		}
+	}
+	if (tmp == NULL)
+		return 0;
+
+	down_write(&dmar_global_lock);
+	if (insert)
+		ret = dmar_hotplug_insert(tmp);
+	else
+		ret = dmar_hotplug_remove(tmp);
+	up_write(&dmar_global_lock);
+
+	return ret;
+}
+
+int dmar_device_add(acpi_handle handle)
+{
+	return dmar_device_hotplug(handle, true);
+}
+
+int dmar_device_remove(acpi_handle handle)
+{
+	return dmar_device_hotplug(handle, false);
+}

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 7423318..28372b8 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c

@@ -1178,6 +1178,7 @@
 	.detach_dev = exynos_iommu_detach_device,
 	.map = exynos_iommu_map,
 	.unmap = exynos_iommu_unmap,
+	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = exynos_iommu_iova_to_phys,
 	.add_device = exynos_iommu_add_device,
 	.remove_device = exynos_iommu_remove_device,

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a27d6cb..1232336 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c

@@ -195,6 +195,7 @@
 }
 static inline void set_root_value(struct root_entry *root, unsigned long value)
 {
+	root->val &= ~VTD_PAGE_MASK;
 	root->val |= value & VTD_PAGE_MASK;
 }
 
@@ -247,6 +248,7 @@
 static inline void context_set_address_root(struct context_entry *context,
 					    unsigned long value)
 {
+	context->lo &= ~VTD_PAGE_MASK;
 	context->lo |= value & VTD_PAGE_MASK;
 }
 
@@ -328,17 +330,10 @@
 /* si_domain contains mulitple devices */
 #define DOMAIN_FLAG_STATIC_IDENTITY	(1 << 1)
 
-/* define the limit of IOMMUs supported in each domain */
-#ifdef	CONFIG_X86
-# define	IOMMU_UNITS_SUPPORTED	MAX_IO_APICS
-#else
-# define	IOMMU_UNITS_SUPPORTED	64
-#endif
-
 struct dmar_domain {
 	int	id;			/* domain id */
 	int	nid;			/* node id */
-	DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
+	DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED);
 					/* bitmap of iommus this domain uses*/
 
 	struct list_head devices; 	/* all devices' list */
@@ -1132,8 +1127,11 @@
 	unsigned long flags;
 
 	root = (struct root_entry *)alloc_pgtable_page(iommu->node);
-	if (!root)
+	if (!root) {
+		pr_err("IOMMU: allocating root entry for %s failed\n",
+			iommu->name);
 		return -ENOMEM;
+	}
 
 	__iommu_flush_cache(iommu, root, ROOT_SIZE);
 
@@ -1473,7 +1471,7 @@
 	return 0;
 }
 
-static void free_dmar_iommu(struct intel_iommu *iommu)
+static void disable_dmar_iommu(struct intel_iommu *iommu)
 {
 	struct dmar_domain *domain;
 	int i;
@@ -1497,11 +1495,16 @@
 
 	if (iommu->gcmd & DMA_GCMD_TE)
 		iommu_disable_translation(iommu);
+}
 
-	kfree(iommu->domains);
-	kfree(iommu->domain_ids);
-	iommu->domains = NULL;
-	iommu->domain_ids = NULL;
+static void free_dmar_iommu(struct intel_iommu *iommu)
+{
+	if ((iommu->domains) && (iommu->domain_ids)) {
+		kfree(iommu->domains);
+		kfree(iommu->domain_ids);
+		iommu->domains = NULL;
+		iommu->domain_ids = NULL;
+	}
 
 	g_iommus[iommu->seq_id] = NULL;
 
@@ -1983,7 +1986,7 @@
 {
 	struct dma_pte *first_pte = NULL, *pte = NULL;
 	phys_addr_t uninitialized_var(pteval);
-	unsigned long sg_res;
+	unsigned long sg_res = 0;
 	unsigned int largepage_lvl = 0;
 	unsigned long lvl_pages = 0;
 
@@ -1994,10 +1997,8 @@
 
 	prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
 
-	if (sg)
-		sg_res = 0;
-	else {
-		sg_res = nr_pages + 1;
+	if (!sg) {
+		sg_res = nr_pages;
 		pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
 	}
 
@@ -2708,6 +2709,41 @@
 	return 0;
 }
 
+static void intel_iommu_init_qi(struct intel_iommu *iommu)
+{
+	/*
+	 * Start from the sane iommu hardware state.
+	 * If the queued invalidation is already initialized by us
+	 * (for example, while enabling interrupt-remapping) then
+	 * we got the things already rolling from a sane state.
+	 */
+	if (!iommu->qi) {
+		/*
+		 * Clear any previous faults.
+		 */
+		dmar_fault(-1, iommu);
+		/*
+		 * Disable queued invalidation if supported and already enabled
+		 * before OS handover.
+		 */
+		dmar_disable_qi(iommu);
+	}
+
+	if (dmar_enable_qi(iommu)) {
+		/*
+		 * Queued Invalidate not enabled, use Register Based Invalidate
+		 */
+		iommu->flush.flush_context = __iommu_flush_context;
+		iommu->flush.flush_iotlb = __iommu_flush_iotlb;
+		pr_info("IOMMU: %s using Register based invalidation\n",
+			iommu->name);
+	} else {
+		iommu->flush.flush_context = qi_flush_context;
+		iommu->flush.flush_iotlb = qi_flush_iotlb;
+		pr_info("IOMMU: %s using Queued invalidation\n", iommu->name);
+	}
+}
+
 static int __init init_dmars(void)
 {
 	struct dmar_drhd_unit *drhd;
@@ -2728,14 +2764,18 @@
 		 * threaded kernel __init code path all other access are read
 		 * only
 		 */
-		if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
+		if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
 			g_num_of_iommus++;
 			continue;
 		}
 		printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
-			  IOMMU_UNITS_SUPPORTED);
+			  DMAR_UNITS_SUPPORTED);
 	}
 
+	/* Preallocate enough resources for IOMMU hot-addition */
+	if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
+		g_num_of_iommus = DMAR_UNITS_SUPPORTED;
+
 	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
 			GFP_KERNEL);
 	if (!g_iommus) {
@@ -2764,58 +2804,14 @@
 		 * among all IOMMU's. Need to Split it later.
 		 */
 		ret = iommu_alloc_root_entry(iommu);
-		if (ret) {
-			printk(KERN_ERR "IOMMU: allocate root entry failed\n");
+		if (ret)
 			goto free_iommu;
-		}
 		if (!ecap_pass_through(iommu->ecap))
 			hw_pass_through = 0;
 	}
 
-	/*
-	 * Start from the sane iommu hardware state.
-	 */
-	for_each_active_iommu(iommu, drhd) {
-		/*
-		 * If the queued invalidation is already initialized by us
-		 * (for example, while enabling interrupt-remapping) then
-		 * we got the things already rolling from a sane state.
-		 */
-		if (iommu->qi)
-			continue;
-
-		/*
-		 * Clear any previous faults.
-		 */
-		dmar_fault(-1, iommu);
-		/*
-		 * Disable queued invalidation if supported and already enabled
-		 * before OS handover.
-		 */
-		dmar_disable_qi(iommu);
-	}
-
-	for_each_active_iommu(iommu, drhd) {
-		if (dmar_enable_qi(iommu)) {
-			/*
-			 * Queued Invalidate not enabled, use Register Based
-			 * Invalidate
-			 */
-			iommu->flush.flush_context = __iommu_flush_context;
-			iommu->flush.flush_iotlb = __iommu_flush_iotlb;
-			printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
-			       "invalidation\n",
-				iommu->seq_id,
-			       (unsigned long long)drhd->reg_base_addr);
-		} else {
-			iommu->flush.flush_context = qi_flush_context;
-			iommu->flush.flush_iotlb = qi_flush_iotlb;
-			printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
-			       "invalidation\n",
-				iommu->seq_id,
-			       (unsigned long long)drhd->reg_base_addr);
-		}
-	}
+	for_each_active_iommu(iommu, drhd)
+		intel_iommu_init_qi(iommu);
 
 	if (iommu_pass_through)
 		iommu_identity_mapping |= IDENTMAP_ALL;
@@ -2901,8 +2897,10 @@
 	return 0;
 
 free_iommu:
-	for_each_active_iommu(iommu, drhd)
+	for_each_active_iommu(iommu, drhd) {
+		disable_dmar_iommu(iommu);
 		free_dmar_iommu(iommu);
+	}
 	kfree(deferred_flush);
 free_g_iommus:
 	kfree(g_iommus);
@@ -3682,7 +3680,7 @@
 #endif	/* CONFIG_PM */
 
 
-int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
+int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
 {
 	struct acpi_dmar_reserved_memory *rmrr;
 	struct dmar_rmrr_unit *rmrru;
@@ -3708,17 +3706,48 @@
 	return 0;
 }
 
-int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
+static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
+{
+	struct dmar_atsr_unit *atsru;
+	struct acpi_dmar_atsr *tmp;
+
+	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
+		tmp = (struct acpi_dmar_atsr *)atsru->hdr;
+		if (atsr->segment != tmp->segment)
+			continue;
+		if (atsr->header.length != tmp->header.length)
+			continue;
+		if (memcmp(atsr, tmp, atsr->header.length) == 0)
+			return atsru;
+	}
+
+	return NULL;
+}
+
+int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
 {
 	struct acpi_dmar_atsr *atsr;
 	struct dmar_atsr_unit *atsru;
 
+	if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
+		return 0;
+
 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
-	atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
+	atsru = dmar_find_atsr(atsr);
+	if (atsru)
+		return 0;
+
+	atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
 	if (!atsru)
 		return -ENOMEM;
 
-	atsru->hdr = hdr;
+	/*
+	 * If memory is allocated from slab by ACPI _DSM method, we need to
+	 * copy the memory content because the memory buffer will be freed
+	 * on return.
+	 */
+	atsru->hdr = (void *)(atsru + 1);
+	memcpy(atsru->hdr, hdr, hdr->length);
 	atsru->include_all = atsr->flags & 0x1;
 	if (!atsru->include_all) {
 		atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
@@ -3741,6 +3770,138 @@
 	kfree(atsru);
 }
 
+int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
+{
+	struct acpi_dmar_atsr *atsr;
+	struct dmar_atsr_unit *atsru;
+
+	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
+	atsru = dmar_find_atsr(atsr);
+	if (atsru) {
+		list_del_rcu(&atsru->list);
+		synchronize_rcu();
+		intel_iommu_free_atsr(atsru);
+	}
+
+	return 0;
+}
+
+int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
+{
+	int i;
+	struct device *dev;
+	struct acpi_dmar_atsr *atsr;
+	struct dmar_atsr_unit *atsru;
+
+	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
+	atsru = dmar_find_atsr(atsr);
+	if (!atsru)
+		return 0;
+
+	if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
+		for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
+					  i, dev)
+			return -EBUSY;
+
+	return 0;
+}
+
+static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
+{
+	int sp, ret = 0;
+	struct intel_iommu *iommu = dmaru->iommu;
+
+	if (g_iommus[iommu->seq_id])
+		return 0;
+
+	if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
+		pr_warn("IOMMU: %s doesn't support hardware pass through.\n",
+			iommu->name);
+		return -ENXIO;
+	}
+	if (!ecap_sc_support(iommu->ecap) &&
+	    domain_update_iommu_snooping(iommu)) {
+		pr_warn("IOMMU: %s doesn't support snooping.\n",
+			iommu->name);
+		return -ENXIO;
+	}
+	sp = domain_update_iommu_superpage(iommu) - 1;
+	if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
+		pr_warn("IOMMU: %s doesn't support large page.\n",
+			iommu->name);
+		return -ENXIO;
+	}
+
+	/*
+	 * Disable translation if already enabled prior to OS handover.
+	 */
+	if (iommu->gcmd & DMA_GCMD_TE)
+		iommu_disable_translation(iommu);
+
+	g_iommus[iommu->seq_id] = iommu;
+	ret = iommu_init_domains(iommu);
+	if (ret == 0)
+		ret = iommu_alloc_root_entry(iommu);
+	if (ret)
+		goto out;
+
+	if (dmaru->ignored) {
+		/*
+		 * we always have to disable PMRs or DMA may fail on this device
+		 */
+		if (force_on)
+			iommu_disable_protect_mem_regions(iommu);
+		return 0;
+	}
+
+	intel_iommu_init_qi(iommu);
+	iommu_flush_write_buffer(iommu);
+	ret = dmar_set_interrupt(iommu);
+	if (ret)
+		goto disable_iommu;
+
+	iommu_set_root_entry(iommu);
+	iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
+	iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+	iommu_enable_translation(iommu);
+
+	if (si_domain) {
+		ret = iommu_attach_domain(si_domain, iommu);
+		if (ret < 0 || si_domain->id != ret)
+			goto disable_iommu;
+		domain_attach_iommu(si_domain, iommu);
+	}
+
+	iommu_disable_protect_mem_regions(iommu);
+	return 0;
+
+disable_iommu:
+	disable_dmar_iommu(iommu);
+out:
+	free_dmar_iommu(iommu);
+	return ret;
+}
+
+int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
+{
+	int ret = 0;
+	struct intel_iommu *iommu = dmaru->iommu;
+
+	if (!intel_iommu_enabled)
+		return 0;
+	if (iommu == NULL)
+		return -EINVAL;
+
+	if (insert) {
+		ret = intel_iommu_add(dmaru);
+	} else {
+		disable_dmar_iommu(iommu);
+		free_dmar_iommu(iommu);
+	}
+
+	return ret;
+}
+
 static void intel_iommu_free_dmars(void)
 {
 	struct dmar_rmrr_unit *rmrru, *rmrr_n;
@@ -4467,6 +4628,7 @@
 	.detach_dev	= intel_iommu_detach_device,
 	.map		= intel_iommu_map,
 	.unmap		= intel_iommu_unmap,
+	.map_sg		= default_iommu_map_sg,
 	.iova_to_phys	= intel_iommu_iova_to_phys,
 	.add_device	= intel_iommu_add_device,
 	.remove_device	= intel_iommu_remove_device,

diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 7c80661..27541d4 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c

@@ -36,7 +36,6 @@
 
 static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
 static struct hpet_scope ir_hpet[MAX_HPET_TBS];
-static int ir_ioapic_num, ir_hpet_num;
 
 /*
  * Lock ordering:
@@ -206,7 +205,7 @@
 	int i;
 
 	for (i = 0; i < MAX_HPET_TBS; i++)
-		if (ir_hpet[i].id == hpet_id)
+		if (ir_hpet[i].id == hpet_id && ir_hpet[i].iommu)
 			return ir_hpet[i].iommu;
 	return NULL;
 }
@@ -216,7 +215,7 @@
 	int i;
 
 	for (i = 0; i < MAX_IO_APICS; i++)
-		if (ir_ioapic[i].id == apic)
+		if (ir_ioapic[i].id == apic && ir_ioapic[i].iommu)
 			return ir_ioapic[i].iommu;
 	return NULL;
 }
@@ -325,7 +324,7 @@
 
 	down_read(&dmar_global_lock);
 	for (i = 0; i < MAX_IO_APICS; i++) {
-		if (ir_ioapic[i].id == apic) {
+		if (ir_ioapic[i].iommu && ir_ioapic[i].id == apic) {
 			sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;
 			break;
 		}
@@ -352,7 +351,7 @@
 
 	down_read(&dmar_global_lock);
 	for (i = 0; i < MAX_HPET_TBS; i++) {
-		if (ir_hpet[i].id == id) {
+		if (ir_hpet[i].iommu && ir_hpet[i].id == id) {
 			sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn;
 			break;
 		}
@@ -473,17 +472,17 @@
 	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
-
-static int intel_setup_irq_remapping(struct intel_iommu *iommu, int mode)
+static int intel_setup_irq_remapping(struct intel_iommu *iommu)
 {
 	struct ir_table *ir_table;
 	struct page *pages;
 	unsigned long *bitmap;
 
-	ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
-					     GFP_ATOMIC);
+	if (iommu->ir_table)
+		return 0;
 
-	if (!iommu->ir_table)
+	ir_table = kzalloc(sizeof(struct ir_table), GFP_ATOMIC);
+	if (!ir_table)
 		return -ENOMEM;
 
 	pages = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
@@ -492,24 +491,37 @@
 	if (!pages) {
 		pr_err("IR%d: failed to allocate pages of order %d\n",
 		       iommu->seq_id, INTR_REMAP_PAGE_ORDER);
-		kfree(iommu->ir_table);
-		return -ENOMEM;
+		goto out_free_table;
 	}
 
 	bitmap = kcalloc(BITS_TO_LONGS(INTR_REMAP_TABLE_ENTRIES),
 			 sizeof(long), GFP_ATOMIC);
 	if (bitmap == NULL) {
 		pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id);
-		__free_pages(pages, INTR_REMAP_PAGE_ORDER);
-		kfree(ir_table);
-		return -ENOMEM;
+		goto out_free_pages;
 	}
 
 	ir_table->base = page_address(pages);
 	ir_table->bitmap = bitmap;
-
-	iommu_set_irq_remapping(iommu, mode);
+	iommu->ir_table = ir_table;
 	return 0;
+
+out_free_pages:
+	__free_pages(pages, INTR_REMAP_PAGE_ORDER);
+out_free_table:
+	kfree(ir_table);
+	return -ENOMEM;
+}
+
+static void intel_teardown_irq_remapping(struct intel_iommu *iommu)
+{
+	if (iommu && iommu->ir_table) {
+		free_pages((unsigned long)iommu->ir_table->base,
+			   INTR_REMAP_PAGE_ORDER);
+		kfree(iommu->ir_table->bitmap);
+		kfree(iommu->ir_table);
+		iommu->ir_table = NULL;
+	}
 }
 
 /*
@@ -666,9 +678,10 @@
 		if (!ecap_ir_support(iommu->ecap))
 			continue;
 
-		if (intel_setup_irq_remapping(iommu, eim))
+		if (intel_setup_irq_remapping(iommu))
 			goto error;
 
+		iommu_set_irq_remapping(iommu, eim);
 		setup = 1;
 	}
 
@@ -689,9 +702,11 @@
 	return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE;
 
 error:
-	/*
-	 * handle error condition gracefully here!
-	 */
+	for_each_iommu(iommu, drhd)
+		if (ecap_ir_support(iommu->ecap)) {
+			iommu_disable_irq_remapping(iommu);
+			intel_teardown_irq_remapping(iommu);
+		}
 
 	if (x2apic_present)
 		pr_warn("Failed to enable irq remapping.  You are vulnerable to irq-injection attacks.\n");
@@ -699,12 +714,13 @@
 	return -1;
 }
 
-static void ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope,
-				      struct intel_iommu *iommu)
+static int ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope,
+				   struct intel_iommu *iommu,
+				   struct acpi_dmar_hardware_unit *drhd)
 {
 	struct acpi_dmar_pci_path *path;
 	u8 bus;
-	int count;
+	int count, free = -1;
 
 	bus = scope->bus;
 	path = (struct acpi_dmar_pci_path *)(scope + 1);
@@ -720,19 +736,36 @@
 					   PCI_SECONDARY_BUS);
 		path++;
 	}
-	ir_hpet[ir_hpet_num].bus   = bus;
-	ir_hpet[ir_hpet_num].devfn = PCI_DEVFN(path->device, path->function);
-	ir_hpet[ir_hpet_num].iommu = iommu;
-	ir_hpet[ir_hpet_num].id    = scope->enumeration_id;
-	ir_hpet_num++;
+
+	for (count = 0; count < MAX_HPET_TBS; count++) {
+		if (ir_hpet[count].iommu == iommu &&
+		    ir_hpet[count].id == scope->enumeration_id)
+			return 0;
+		else if (ir_hpet[count].iommu == NULL && free == -1)
+			free = count;
+	}
+	if (free == -1) {
+		pr_warn("Exceeded Max HPET blocks\n");
+		return -ENOSPC;
+	}
+
+	ir_hpet[free].iommu = iommu;
+	ir_hpet[free].id    = scope->enumeration_id;
+	ir_hpet[free].bus   = bus;
+	ir_hpet[free].devfn = PCI_DEVFN(path->device, path->function);
+	pr_info("HPET id %d under DRHD base 0x%Lx\n",
+		scope->enumeration_id, drhd->address);
+
+	return 0;
 }
 
-static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
-				      struct intel_iommu *iommu)
+static int ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
+				     struct intel_iommu *iommu,
+				     struct acpi_dmar_hardware_unit *drhd)
 {
 	struct acpi_dmar_pci_path *path;
 	u8 bus;
-	int count;
+	int count, free = -1;
 
 	bus = scope->bus;
 	path = (struct acpi_dmar_pci_path *)(scope + 1);
@@ -749,54 +782,63 @@
 		path++;
 	}
 
-	ir_ioapic[ir_ioapic_num].bus   = bus;
-	ir_ioapic[ir_ioapic_num].devfn = PCI_DEVFN(path->device, path->function);
-	ir_ioapic[ir_ioapic_num].iommu = iommu;
-	ir_ioapic[ir_ioapic_num].id    = scope->enumeration_id;
-	ir_ioapic_num++;
+	for (count = 0; count < MAX_IO_APICS; count++) {
+		if (ir_ioapic[count].iommu == iommu &&
+		    ir_ioapic[count].id == scope->enumeration_id)
+			return 0;
+		else if (ir_ioapic[count].iommu == NULL && free == -1)
+			free = count;
+	}
+	if (free == -1) {
+		pr_warn("Exceeded Max IO APICS\n");
+		return -ENOSPC;
+	}
+
+	ir_ioapic[free].bus   = bus;
+	ir_ioapic[free].devfn = PCI_DEVFN(path->device, path->function);
+	ir_ioapic[free].iommu = iommu;
+	ir_ioapic[free].id    = scope->enumeration_id;
+	pr_info("IOAPIC id %d under DRHD base  0x%Lx IOMMU %d\n",
+		scope->enumeration_id, drhd->address, iommu->seq_id);
+
+	return 0;
 }
 
 static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header,
 				      struct intel_iommu *iommu)
 {
+	int ret = 0;
 	struct acpi_dmar_hardware_unit *drhd;
 	struct acpi_dmar_device_scope *scope;
 	void *start, *end;
 
 	drhd = (struct acpi_dmar_hardware_unit *)header;
-
 	start = (void *)(drhd + 1);
 	end = ((void *)drhd) + header->length;
 
-	while (start < end) {
+	while (start < end && ret == 0) {
 		scope = start;
-		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
-			if (ir_ioapic_num == MAX_IO_APICS) {
-				printk(KERN_WARNING "Exceeded Max IO APICS\n");
-				return -1;
-			}
-
-			printk(KERN_INFO "IOAPIC id %d under DRHD base "
-			       " 0x%Lx IOMMU %d\n", scope->enumeration_id,
-			       drhd->address, iommu->seq_id);
-
-			ir_parse_one_ioapic_scope(scope, iommu);
-		} else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET) {
-			if (ir_hpet_num == MAX_HPET_TBS) {
-				printk(KERN_WARNING "Exceeded Max HPET blocks\n");
-				return -1;
-			}
-
-			printk(KERN_INFO "HPET id %d under DRHD base"
-			       " 0x%Lx\n", scope->enumeration_id,
-			       drhd->address);
-
-			ir_parse_one_hpet_scope(scope, iommu);
-		}
+		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC)
+			ret = ir_parse_one_ioapic_scope(scope, iommu, drhd);
+		else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET)
+			ret = ir_parse_one_hpet_scope(scope, iommu, drhd);
 		start += scope->length;
 	}
 
-	return 0;
+	return ret;
+}
+
+static void ir_remove_ioapic_hpet_scope(struct intel_iommu *iommu)
+{
+	int i;
+
+	for (i = 0; i < MAX_HPET_TBS; i++)
+		if (ir_hpet[i].iommu == iommu)
+			ir_hpet[i].iommu = NULL;
+
+	for (i = 0; i < MAX_IO_APICS; i++)
+		if (ir_ioapic[i].iommu == iommu)
+			ir_ioapic[i].iommu = NULL;
 }
 
 /*
@@ -1171,3 +1213,86 @@
 	.msi_setup_irq		= intel_msi_setup_irq,
 	.alloc_hpet_msi		= intel_alloc_hpet_msi,
 };
+
+/*
+ * Support of Interrupt Remapping Unit Hotplug
+ */
+static int dmar_ir_add(struct dmar_drhd_unit *dmaru, struct intel_iommu *iommu)
+{
+	int ret;
+	int eim = x2apic_enabled();
+
+	if (eim && !ecap_eim_support(iommu->ecap)) {
+		pr_info("DRHD %Lx: EIM not supported by DRHD, ecap %Lx\n",
+			iommu->reg_phys, iommu->ecap);
+		return -ENODEV;
+	}
+
+	if (ir_parse_ioapic_hpet_scope(dmaru->hdr, iommu)) {
+		pr_warn("DRHD %Lx: failed to parse managed IOAPIC/HPET\n",
+			iommu->reg_phys);
+		return -ENODEV;
+	}
+
+	/* TODO: check all IOAPICs are covered by IOMMU */
+
+	/* Setup Interrupt-remapping now. */
+	ret = intel_setup_irq_remapping(iommu);
+	if (ret) {
+		pr_err("DRHD %Lx: failed to allocate resource\n",
+		       iommu->reg_phys);
+		ir_remove_ioapic_hpet_scope(iommu);
+		return ret;
+	}
+
+	if (!iommu->qi) {
+		/* Clear previous faults. */
+		dmar_fault(-1, iommu);
+		iommu_disable_irq_remapping(iommu);
+		dmar_disable_qi(iommu);
+	}
+
+	/* Enable queued invalidation */
+	ret = dmar_enable_qi(iommu);
+	if (!ret) {
+		iommu_set_irq_remapping(iommu, eim);
+	} else {
+		pr_err("DRHD %Lx: failed to enable queued invalidation, ecap %Lx, ret %d\n",
+		       iommu->reg_phys, iommu->ecap, ret);
+		intel_teardown_irq_remapping(iommu);
+		ir_remove_ioapic_hpet_scope(iommu);
+	}
+
+	return ret;
+}
+
+int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
+{
+	int ret = 0;
+	struct intel_iommu *iommu = dmaru->iommu;
+
+	if (!irq_remapping_enabled)
+		return 0;
+	if (iommu == NULL)
+		return -EINVAL;
+	if (!ecap_ir_support(iommu->ecap))
+		return 0;
+
+	if (insert) {
+		if (!iommu->ir_table)
+			ret = dmar_ir_add(dmaru, iommu);
+	} else {
+		if (iommu->ir_table) {
+			if (!bitmap_empty(iommu->ir_table->bitmap,
+					  INTR_REMAP_TABLE_ENTRIES)) {
+				ret = -EBUSY;
+			} else {
+				iommu_disable_irq_remapping(iommu);
+				intel_teardown_irq_remapping(iommu);
+				ir_remove_ioapic_hpet_scope(iommu);
+			}
+		}
+	}
+
+	return ret;
+}

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index ed8b048..1bd6335 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c

@@ -818,7 +818,15 @@
 		kfree(nb);
 		return err;
 	}
-	return bus_for_each_dev(bus, NULL, &cb, add_iommu_group);
+
+	err = bus_for_each_dev(bus, NULL, &cb, add_iommu_group);
+	if (err) {
+		bus_unregister_notifier(bus, nb);
+		kfree(nb);
+		return err;
+	}
+
+	return 0;
 }
 
 /**
@@ -836,13 +844,19 @@
  */
 int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops)
 {
+	int err;
+
 	if (bus->iommu_ops != NULL)
 		return -EBUSY;
 
 	bus->iommu_ops = ops;
 
 	/* Do IOMMU specific setup for this bus-type */
-	return iommu_bus_init(bus, ops);
+	err = iommu_bus_init(bus, ops);
+	if (err)
+		bus->iommu_ops = NULL;
+
+	return err;
 }
 EXPORT_SYMBOL_GPL(bus_set_iommu);
 
@@ -1124,6 +1138,48 @@
 }
 EXPORT_SYMBOL_GPL(iommu_unmap);
 
+size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+			 struct scatterlist *sg, unsigned int nents, int prot)
+{
+	struct scatterlist *s;
+	size_t mapped = 0;
+	unsigned int i, min_pagesz;
+	int ret;
+
+	if (unlikely(domain->ops->pgsize_bitmap == 0UL))
+		return 0;
+
+	min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
+
+	for_each_sg(sg, s, nents, i) {
+		phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset;
+
+		/*
+		 * We are mapping on IOMMU page boundaries, so offset within
+		 * the page must be 0. However, the IOMMU may support pages
+		 * smaller than PAGE_SIZE, so s->offset may still represent
+		 * an offset of that boundary within the CPU page.
+		 */
+		if (!IS_ALIGNED(s->offset, min_pagesz))
+			goto out_err;
+
+		ret = iommu_map(domain, iova + mapped, phys, s->length, prot);
+		if (ret)
+			goto out_err;
+
+		mapped += s->length;
+	}
+
+	return mapped;
+
+out_err:
+	/* undo mappings already done */
+	iommu_unmap(domain, iova, mapped);
+
+	return 0;
+
+}
+EXPORT_SYMBOL_GPL(default_iommu_map_sg);
 
 int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
 			       phys_addr_t paddr, u64 size, int prot)

diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 7dab5cb..99effbb 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c

@@ -1127,6 +1127,7 @@
 	.detach_dev = ipmmu_detach_device,
 	.map = ipmmu_map,
 	.unmap = ipmmu_unmap,
+	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = ipmmu_iova_to_phys,
 	.add_device = ipmmu_add_device,
 	.remove_device = ipmmu_remove_device,
@@ -1184,7 +1185,7 @@
 			       dev_name(&pdev->dev), mmu);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to request IRQ %d\n", irq);
-		return irq;
+		return ret;
 	}
 
 	ipmmu_device_reset(mmu);

diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 6e3dcc28..e1b0537 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c

@@ -73,8 +73,7 @@
 
 static void __disable_clocks(struct msm_iommu_drvdata *drvdata)
 {
-	if (drvdata->clk)
-		clk_disable(drvdata->clk);
+	clk_disable(drvdata->clk);
 	clk_disable(drvdata->pclk);
 }
 
@@ -681,6 +680,7 @@
 	.detach_dev = msm_iommu_detach_dev,
 	.map = msm_iommu_map,
 	.unmap = msm_iommu_unmap,
+	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = msm_iommu_iova_to_phys,
 	.pgsize_bitmap = MSM_IOMMU_PGSIZES,
 };

diff --git a/drivers/iommu/msm_iommu_dev.c b/drivers/iommu/msm_iommu_dev.c
index 61def7cb..b6d01f9 100644
--- a/drivers/iommu/msm_iommu_dev.c
+++ b/drivers/iommu/msm_iommu_dev.c

@@ -131,7 +131,7 @@
 	struct clk *iommu_clk;
 	struct clk *iommu_pclk;
 	struct msm_iommu_drvdata *drvdata;
-	struct msm_iommu_dev *iommu_dev = pdev->dev.platform_data;
+	struct msm_iommu_dev *iommu_dev = dev_get_platdata(&pdev->dev);
 	void __iomem *regs_base;
 	int ret, irq, par;
 
@@ -224,8 +224,7 @@
 
 	platform_set_drvdata(pdev, drvdata);
 
-	if (iommu_clk)
-		clk_disable(iommu_clk);
+	clk_disable(iommu_clk);
 
 	clk_disable(iommu_pclk);
 
@@ -264,7 +263,7 @@
 
 static int msm_iommu_ctx_probe(struct platform_device *pdev)
 {
-	struct msm_iommu_ctx_dev *c = pdev->dev.platform_data;
+	struct msm_iommu_ctx_dev *c = dev_get_platdata(&pdev->dev);
 	struct msm_iommu_drvdata *drvdata;
 	struct msm_iommu_ctx_drvdata *ctx_drvdata;
 	int i, ret;
@@ -323,8 +322,7 @@
 		SET_NSCFG(drvdata->base, mid, 3);
 	}
 
-	if (drvdata->clk)
-		clk_disable(drvdata->clk);
+	clk_disable(drvdata->clk);
 	clk_disable(drvdata->pclk);
 
 	dev_info(&pdev->dev, "context %s using bank %d\n", c->name, c->num);

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 839cd8b..bbb7dce 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c

@@ -1364,6 +1364,7 @@
 	.detach_dev	= omap_iommu_detach_dev,
 	.map		= omap_iommu_map,
 	.unmap		= omap_iommu_unmap,
+	.map_sg		= default_iommu_map_sg,
 	.iova_to_phys	= omap_iommu_iova_to_phys,
 	.add_device	= omap_iommu_add_device,
 	.remove_device	= omap_iommu_remove_device,

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
new file mode 100644
index 0000000..b2023af
--- /dev/null
+++ b/drivers/iommu/rockchip-iommu.c

@@ -0,0 +1,1038 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <asm/cacheflush.h>
+#include <asm/pgtable.h>
+#include <linux/compiler.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iommu.h>
+#include <linux/jiffies.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+/** MMU register offsets */
+#define RK_MMU_DTE_ADDR		0x00	/* Directory table address */
+#define RK_MMU_STATUS		0x04
+#define RK_MMU_COMMAND		0x08
+#define RK_MMU_PAGE_FAULT_ADDR	0x0C	/* IOVA of last page fault */
+#define RK_MMU_ZAP_ONE_LINE	0x10	/* Shootdown one IOTLB entry */
+#define RK_MMU_INT_RAWSTAT	0x14	/* IRQ status ignoring mask */
+#define RK_MMU_INT_CLEAR	0x18	/* Acknowledge and re-arm irq */
+#define RK_MMU_INT_MASK		0x1C	/* IRQ enable */
+#define RK_MMU_INT_STATUS	0x20	/* IRQ status after masking */
+#define RK_MMU_AUTO_GATING	0x24
+
+#define DTE_ADDR_DUMMY		0xCAFEBABE
+#define FORCE_RESET_TIMEOUT	100	/* ms */
+
+/* RK_MMU_STATUS fields */
+#define RK_MMU_STATUS_PAGING_ENABLED       BIT(0)
+#define RK_MMU_STATUS_PAGE_FAULT_ACTIVE    BIT(1)
+#define RK_MMU_STATUS_STALL_ACTIVE         BIT(2)
+#define RK_MMU_STATUS_IDLE                 BIT(3)
+#define RK_MMU_STATUS_REPLAY_BUFFER_EMPTY  BIT(4)
+#define RK_MMU_STATUS_PAGE_FAULT_IS_WRITE  BIT(5)
+#define RK_MMU_STATUS_STALL_NOT_ACTIVE     BIT(31)
+
+/* RK_MMU_COMMAND command values */
+#define RK_MMU_CMD_ENABLE_PAGING    0  /* Enable memory translation */
+#define RK_MMU_CMD_DISABLE_PAGING   1  /* Disable memory translation */
+#define RK_MMU_CMD_ENABLE_STALL     2  /* Stall paging to allow other cmds */
+#define RK_MMU_CMD_DISABLE_STALL    3  /* Stop stall re-enables paging */
+#define RK_MMU_CMD_ZAP_CACHE        4  /* Shoot down entire IOTLB */
+#define RK_MMU_CMD_PAGE_FAULT_DONE  5  /* Clear page fault */
+#define RK_MMU_CMD_FORCE_RESET      6  /* Reset all registers */
+
+/* RK_MMU_INT_* register fields */
+#define RK_MMU_IRQ_PAGE_FAULT    0x01  /* page fault */
+#define RK_MMU_IRQ_BUS_ERROR     0x02  /* bus read error */
+#define RK_MMU_IRQ_MASK          (RK_MMU_IRQ_PAGE_FAULT | RK_MMU_IRQ_BUS_ERROR)
+
+#define NUM_DT_ENTRIES 1024
+#define NUM_PT_ENTRIES 1024
+
+#define SPAGE_ORDER 12
+#define SPAGE_SIZE (1 << SPAGE_ORDER)
+
+ /*
+  * Support mapping any size that fits in one page table:
+  *   4 KiB to 4 MiB
+  */
+#define RK_IOMMU_PGSIZE_BITMAP 0x007ff000
+
+#define IOMMU_REG_POLL_COUNT_FAST 1000
+
+struct rk_iommu_domain {
+	struct list_head iommus;
+	u32 *dt; /* page directory table */
+	spinlock_t iommus_lock; /* lock for iommus list */
+	spinlock_t dt_lock; /* lock for modifying page directory table */
+};
+
+struct rk_iommu {
+	struct device *dev;
+	void __iomem *base;
+	int irq;
+	struct list_head node; /* entry in rk_iommu_domain.iommus */
+	struct iommu_domain *domain; /* domain to which iommu is attached */
+};
+
+static inline void rk_table_flush(u32 *va, unsigned int count)
+{
+	phys_addr_t pa_start = virt_to_phys(va);
+	phys_addr_t pa_end = virt_to_phys(va + count);
+	size_t size = pa_end - pa_start;
+
+	__cpuc_flush_dcache_area(va, size);
+	outer_flush_range(pa_start, pa_end);
+}
+
+/**
+ * Inspired by _wait_for in intel_drv.h
+ * This is NOT safe for use in interrupt context.
+ *
+ * Note that it's important that we check the condition again after having
+ * timed out, since the timeout could be due to preemption or similar and
+ * we've never had a chance to check the condition before the timeout.
+ */
+#define rk_wait_for(COND, MS) ({ \
+	unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1;	\
+	int ret__ = 0;							\
+	while (!(COND)) {						\
+		if (time_after(jiffies, timeout__)) {			\
+			ret__ = (COND) ? 0 : -ETIMEDOUT;		\
+			break;						\
+		}							\
+		usleep_range(50, 100);					\
+	}								\
+	ret__;								\
+})
+
+/*
+ * The Rockchip rk3288 iommu uses a 2-level page table.
+ * The first level is the "Directory Table" (DT).
+ * The DT consists of 1024 4-byte Directory Table Entries (DTEs), each pointing
+ * to a "Page Table".
+ * The second level is the 1024 Page Tables (PT).
+ * Each PT consists of 1024 4-byte Page Table Entries (PTEs), each pointing to
+ * a 4 KB page of physical memory.
+ *
+ * The DT and each PT fits in a single 4 KB page (4-bytes * 1024 entries).
+ * Each iommu device has a MMU_DTE_ADDR register that contains the physical
+ * address of the start of the DT page.
+ *
+ * The structure of the page table is as follows:
+ *
+ *                   DT
+ * MMU_DTE_ADDR -> +-----+
+ *                 |     |
+ *                 +-----+     PT
+ *                 | DTE | -> +-----+
+ *                 +-----+    |     |     Memory
+ *                 |     |    +-----+     Page
+ *                 |     |    | PTE | -> +-----+
+ *                 +-----+    +-----+    |     |
+ *                            |     |    |     |
+ *                            |     |    |     |
+ *                            +-----+    |     |
+ *                                       |     |
+ *                                       |     |
+ *                                       +-----+
+ */
+
+/*
+ * Each DTE has a PT address and a valid bit:
+ * +---------------------+-----------+-+
+ * | PT address          | Reserved  |V|
+ * +---------------------+-----------+-+
+ *  31:12 - PT address (PTs always starts on a 4 KB boundary)
+ *  11: 1 - Reserved
+ *      0 - 1 if PT @ PT address is valid
+ */
+#define RK_DTE_PT_ADDRESS_MASK    0xfffff000
+#define RK_DTE_PT_VALID           BIT(0)
+
+static inline phys_addr_t rk_dte_pt_address(u32 dte)
+{
+	return (phys_addr_t)dte & RK_DTE_PT_ADDRESS_MASK;
+}
+
+static inline bool rk_dte_is_pt_valid(u32 dte)
+{
+	return dte & RK_DTE_PT_VALID;
+}
+
+static u32 rk_mk_dte(u32 *pt)
+{
+	phys_addr_t pt_phys = virt_to_phys(pt);
+	return (pt_phys & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID;
+}
+
+/*
+ * Each PTE has a Page address, some flags and a valid bit:
+ * +---------------------+---+-------+-+
+ * | Page address        |Rsv| Flags |V|
+ * +---------------------+---+-------+-+
+ *  31:12 - Page address (Pages always start on a 4 KB boundary)
+ *  11: 9 - Reserved
+ *   8: 1 - Flags
+ *      8 - Read allocate - allocate cache space on read misses
+ *      7 - Read cache - enable cache & prefetch of data
+ *      6 - Write buffer - enable delaying writes on their way to memory
+ *      5 - Write allocate - allocate cache space on write misses
+ *      4 - Write cache - different writes can be merged together
+ *      3 - Override cache attributes
+ *          if 1, bits 4-8 control cache attributes
+ *          if 0, the system bus defaults are used
+ *      2 - Writable
+ *      1 - Readable
+ *      0 - 1 if Page @ Page address is valid
+ */
+#define RK_PTE_PAGE_ADDRESS_MASK  0xfffff000
+#define RK_PTE_PAGE_FLAGS_MASK    0x000001fe
+#define RK_PTE_PAGE_WRITABLE      BIT(2)
+#define RK_PTE_PAGE_READABLE      BIT(1)
+#define RK_PTE_PAGE_VALID         BIT(0)
+
+static inline phys_addr_t rk_pte_page_address(u32 pte)
+{
+	return (phys_addr_t)pte & RK_PTE_PAGE_ADDRESS_MASK;
+}
+
+static inline bool rk_pte_is_page_valid(u32 pte)
+{
+	return pte & RK_PTE_PAGE_VALID;
+}
+
+/* TODO: set cache flags per prot IOMMU_CACHE */
+static u32 rk_mk_pte(phys_addr_t page, int prot)
+{
+	u32 flags = 0;
+	flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE : 0;
+	flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE : 0;
+	page &= RK_PTE_PAGE_ADDRESS_MASK;
+	return page | flags | RK_PTE_PAGE_VALID;
+}
+
+static u32 rk_mk_pte_invalid(u32 pte)
+{
+	return pte & ~RK_PTE_PAGE_VALID;
+}
+
+/*
+ * rk3288 iova (IOMMU Virtual Address) format
+ *  31       22.21       12.11          0
+ * +-----------+-----------+-------------+
+ * | DTE index | PTE index | Page offset |
+ * +-----------+-----------+-------------+
+ *  31:22 - DTE index   - index of DTE in DT
+ *  21:12 - PTE index   - index of PTE in PT @ DTE.pt_address
+ *  11: 0 - Page offset - offset into page @ PTE.page_address
+ */
+#define RK_IOVA_DTE_MASK    0xffc00000
+#define RK_IOVA_DTE_SHIFT   22
+#define RK_IOVA_PTE_MASK    0x003ff000
+#define RK_IOVA_PTE_SHIFT   12
+#define RK_IOVA_PAGE_MASK   0x00000fff
+#define RK_IOVA_PAGE_SHIFT  0
+
+static u32 rk_iova_dte_index(dma_addr_t iova)
+{
+	return (u32)(iova & RK_IOVA_DTE_MASK) >> RK_IOVA_DTE_SHIFT;
+}
+
+static u32 rk_iova_pte_index(dma_addr_t iova)
+{
+	return (u32)(iova & RK_IOVA_PTE_MASK) >> RK_IOVA_PTE_SHIFT;
+}
+
+static u32 rk_iova_page_offset(dma_addr_t iova)
+{
+	return (u32)(iova & RK_IOVA_PAGE_MASK) >> RK_IOVA_PAGE_SHIFT;
+}
+
+static u32 rk_iommu_read(struct rk_iommu *iommu, u32 offset)
+{
+	return readl(iommu->base + offset);
+}
+
+static void rk_iommu_write(struct rk_iommu *iommu, u32 offset, u32 value)
+{
+	writel(value, iommu->base + offset);
+}
+
+static void rk_iommu_command(struct rk_iommu *iommu, u32 command)
+{
+	writel(command, iommu->base + RK_MMU_COMMAND);
+}
+
+static void rk_iommu_zap_lines(struct rk_iommu *iommu, dma_addr_t iova,
+			       size_t size)
+{
+	dma_addr_t iova_end = iova + size;
+	/*
+	 * TODO(djkurtz): Figure out when it is more efficient to shootdown the
+	 * entire iotlb rather than iterate over individual iovas.
+	 */
+	for (; iova < iova_end; iova += SPAGE_SIZE)
+		rk_iommu_write(iommu, RK_MMU_ZAP_ONE_LINE, iova);
+}
+
+static bool rk_iommu_is_stall_active(struct rk_iommu *iommu)
+{
+	return rk_iommu_read(iommu, RK_MMU_STATUS) & RK_MMU_STATUS_STALL_ACTIVE;
+}
+
+static bool rk_iommu_is_paging_enabled(struct rk_iommu *iommu)
+{
+	return rk_iommu_read(iommu, RK_MMU_STATUS) &
+			     RK_MMU_STATUS_PAGING_ENABLED;
+}
+
+static int rk_iommu_enable_stall(struct rk_iommu *iommu)
+{
+	int ret;
+
+	if (rk_iommu_is_stall_active(iommu))
+		return 0;
+
+	/* Stall can only be enabled if paging is enabled */
+	if (!rk_iommu_is_paging_enabled(iommu))
+		return 0;
+
+	rk_iommu_command(iommu, RK_MMU_CMD_ENABLE_STALL);
+
+	ret = rk_wait_for(rk_iommu_is_stall_active(iommu), 1);
+	if (ret)
+		dev_err(iommu->dev, "Enable stall request timed out, status: %#08x\n",
+			rk_iommu_read(iommu, RK_MMU_STATUS));
+
+	return ret;
+}
+
+static int rk_iommu_disable_stall(struct rk_iommu *iommu)
+{
+	int ret;
+
+	if (!rk_iommu_is_stall_active(iommu))
+		return 0;
+
+	rk_iommu_command(iommu, RK_MMU_CMD_DISABLE_STALL);
+
+	ret = rk_wait_for(!rk_iommu_is_stall_active(iommu), 1);
+	if (ret)
+		dev_err(iommu->dev, "Disable stall request timed out, status: %#08x\n",
+			rk_iommu_read(iommu, RK_MMU_STATUS));
+
+	return ret;
+}
+
+static int rk_iommu_enable_paging(struct rk_iommu *iommu)
+{
+	int ret;
+
+	if (rk_iommu_is_paging_enabled(iommu))
+		return 0;
+
+	rk_iommu_command(iommu, RK_MMU_CMD_ENABLE_PAGING);
+
+	ret = rk_wait_for(rk_iommu_is_paging_enabled(iommu), 1);
+	if (ret)
+		dev_err(iommu->dev, "Enable paging request timed out, status: %#08x\n",
+			rk_iommu_read(iommu, RK_MMU_STATUS));
+
+	return ret;
+}
+
+static int rk_iommu_disable_paging(struct rk_iommu *iommu)
+{
+	int ret;
+
+	if (!rk_iommu_is_paging_enabled(iommu))
+		return 0;
+
+	rk_iommu_command(iommu, RK_MMU_CMD_DISABLE_PAGING);
+
+	ret = rk_wait_for(!rk_iommu_is_paging_enabled(iommu), 1);
+	if (ret)
+		dev_err(iommu->dev, "Disable paging request timed out, status: %#08x\n",
+			rk_iommu_read(iommu, RK_MMU_STATUS));
+
+	return ret;
+}
+
+static int rk_iommu_force_reset(struct rk_iommu *iommu)
+{
+	int ret;
+	u32 dte_addr;
+
+	/*
+	 * Check if register DTE_ADDR is working by writing DTE_ADDR_DUMMY
+	 * and verifying that upper 5 nybbles are read back.
+	 */
+	rk_iommu_write(iommu, RK_MMU_DTE_ADDR, DTE_ADDR_DUMMY);
+
+	dte_addr = rk_iommu_read(iommu, RK_MMU_DTE_ADDR);
+	if (dte_addr != (DTE_ADDR_DUMMY & RK_DTE_PT_ADDRESS_MASK)) {
+		dev_err(iommu->dev, "Error during raw reset. MMU_DTE_ADDR is not functioning\n");
+		return -EFAULT;
+	}
+
+	rk_iommu_command(iommu, RK_MMU_CMD_FORCE_RESET);
+
+	ret = rk_wait_for(rk_iommu_read(iommu, RK_MMU_DTE_ADDR) == 0x00000000,
+			  FORCE_RESET_TIMEOUT);
+	if (ret)
+		dev_err(iommu->dev, "FORCE_RESET command timed out\n");
+
+	return ret;
+}
+
+static void log_iova(struct rk_iommu *iommu, dma_addr_t iova)
+{
+	u32 dte_index, pte_index, page_offset;
+	u32 mmu_dte_addr;
+	phys_addr_t mmu_dte_addr_phys, dte_addr_phys;
+	u32 *dte_addr;
+	u32 dte;
+	phys_addr_t pte_addr_phys = 0;
+	u32 *pte_addr = NULL;
+	u32 pte = 0;
+	phys_addr_t page_addr_phys = 0;
+	u32 page_flags = 0;
+
+	dte_index = rk_iova_dte_index(iova);
+	pte_index = rk_iova_pte_index(iova);
+	page_offset = rk_iova_page_offset(iova);
+
+	mmu_dte_addr = rk_iommu_read(iommu, RK_MMU_DTE_ADDR);
+	mmu_dte_addr_phys = (phys_addr_t)mmu_dte_addr;
+
+	dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index);
+	dte_addr = phys_to_virt(dte_addr_phys);
+	dte = *dte_addr;
+
+	if (!rk_dte_is_pt_valid(dte))
+		goto print_it;
+
+	pte_addr_phys = rk_dte_pt_address(dte) + (pte_index * 4);
+	pte_addr = phys_to_virt(pte_addr_phys);
+	pte = *pte_addr;
+
+	if (!rk_pte_is_page_valid(pte))
+		goto print_it;
+
+	page_addr_phys = rk_pte_page_address(pte) + page_offset;
+	page_flags = pte & RK_PTE_PAGE_FLAGS_MASK;
+
+print_it:
+	dev_err(iommu->dev, "iova = %pad: dte_index: %#03x pte_index: %#03x page_offset: %#03x\n",
+		&iova, dte_index, pte_index, page_offset);
+	dev_err(iommu->dev, "mmu_dte_addr: %pa dte@%pa: %#08x valid: %u pte@%pa: %#08x valid: %u page@%pa flags: %#03x\n",
+		&mmu_dte_addr_phys, &dte_addr_phys, dte,
+		rk_dte_is_pt_valid(dte), &pte_addr_phys, pte,
+		rk_pte_is_page_valid(pte), &page_addr_phys, page_flags);
+}
+
+static irqreturn_t rk_iommu_irq(int irq, void *dev_id)
+{
+	struct rk_iommu *iommu = dev_id;
+	u32 status;
+	u32 int_status;
+	dma_addr_t iova;
+
+	int_status = rk_iommu_read(iommu, RK_MMU_INT_STATUS);
+	if (int_status == 0)
+		return IRQ_NONE;
+
+	iova = rk_iommu_read(iommu, RK_MMU_PAGE_FAULT_ADDR);
+
+	if (int_status & RK_MMU_IRQ_PAGE_FAULT) {
+		int flags;
+
+		status = rk_iommu_read(iommu, RK_MMU_STATUS);
+		flags = (status & RK_MMU_STATUS_PAGE_FAULT_IS_WRITE) ?
+				IOMMU_FAULT_WRITE : IOMMU_FAULT_READ;
+
+		dev_err(iommu->dev, "Page fault at %pad of type %s\n",
+			&iova,
+			(flags == IOMMU_FAULT_WRITE) ? "write" : "read");
+
+		log_iova(iommu, iova);
+
+		/*
+		 * Report page fault to any installed handlers.
+		 * Ignore the return code, though, since we always zap cache
+		 * and clear the page fault anyway.
+		 */
+		if (iommu->domain)
+			report_iommu_fault(iommu->domain, iommu->dev, iova,
+					   flags);
+		else
+			dev_err(iommu->dev, "Page fault while iommu not attached to domain?\n");
+
+		rk_iommu_command(iommu, RK_MMU_CMD_ZAP_CACHE);
+		rk_iommu_command(iommu, RK_MMU_CMD_PAGE_FAULT_DONE);
+	}
+
+	if (int_status & RK_MMU_IRQ_BUS_ERROR)
+		dev_err(iommu->dev, "BUS_ERROR occurred at %pad\n", &iova);
+
+	if (int_status & ~RK_MMU_IRQ_MASK)
+		dev_err(iommu->dev, "unexpected int_status: %#08x\n",
+			int_status);
+
+	rk_iommu_write(iommu, RK_MMU_INT_CLEAR, int_status);
+
+	return IRQ_HANDLED;
+}
+
+static phys_addr_t rk_iommu_iova_to_phys(struct iommu_domain *domain,
+					 dma_addr_t iova)
+{
+	struct rk_iommu_domain *rk_domain = domain->priv;
+	unsigned long flags;
+	phys_addr_t pt_phys, phys = 0;
+	u32 dte, pte;
+	u32 *page_table;
+
+	spin_lock_irqsave(&rk_domain->dt_lock, flags);
+
+	dte = rk_domain->dt[rk_iova_dte_index(iova)];
+	if (!rk_dte_is_pt_valid(dte))
+		goto out;
+
+	pt_phys = rk_dte_pt_address(dte);
+	page_table = (u32 *)phys_to_virt(pt_phys);
+	pte = page_table[rk_iova_pte_index(iova)];
+	if (!rk_pte_is_page_valid(pte))
+		goto out;
+
+	phys = rk_pte_page_address(pte) + rk_iova_page_offset(iova);
+out:
+	spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
+
+	return phys;
+}
+
+static void rk_iommu_zap_iova(struct rk_iommu_domain *rk_domain,
+			      dma_addr_t iova, size_t size)
+{
+	struct list_head *pos;
+	unsigned long flags;
+
+	/* shootdown these iova from all iommus using this domain */
+	spin_lock_irqsave(&rk_domain->iommus_lock, flags);
+	list_for_each(pos, &rk_domain->iommus) {
+		struct rk_iommu *iommu;
+		iommu = list_entry(pos, struct rk_iommu, node);
+		rk_iommu_zap_lines(iommu, iova, size);
+	}
+	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
+}
+
+static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
+				  dma_addr_t iova)
+{
+	u32 *page_table, *dte_addr;
+	u32 dte;
+	phys_addr_t pt_phys;
+
+	assert_spin_locked(&rk_domain->dt_lock);
+
+	dte_addr = &rk_domain->dt[rk_iova_dte_index(iova)];
+	dte = *dte_addr;
+	if (rk_dte_is_pt_valid(dte))
+		goto done;
+
+	page_table = (u32 *)get_zeroed_page(GFP_ATOMIC | GFP_DMA32);
+	if (!page_table)
+		return ERR_PTR(-ENOMEM);
+
+	dte = rk_mk_dte(page_table);
+	*dte_addr = dte;
+
+	rk_table_flush(page_table, NUM_PT_ENTRIES);
+	rk_table_flush(dte_addr, 1);
+
+	/*
+	 * Zap the first iova of newly allocated page table so iommu evicts
+	 * old cached value of new dte from the iotlb.
+	 */
+	rk_iommu_zap_iova(rk_domain, iova, SPAGE_SIZE);
+
+done:
+	pt_phys = rk_dte_pt_address(dte);
+	return (u32 *)phys_to_virt(pt_phys);
+}
+
+static size_t rk_iommu_unmap_iova(struct rk_iommu_domain *rk_domain,
+				  u32 *pte_addr, dma_addr_t iova, size_t size)
+{
+	unsigned int pte_count;
+	unsigned int pte_total = size / SPAGE_SIZE;
+
+	assert_spin_locked(&rk_domain->dt_lock);
+
+	for (pte_count = 0; pte_count < pte_total; pte_count++) {
+		u32 pte = pte_addr[pte_count];
+		if (!rk_pte_is_page_valid(pte))
+			break;
+
+		pte_addr[pte_count] = rk_mk_pte_invalid(pte);
+	}
+
+	rk_table_flush(pte_addr, pte_count);
+
+	return pte_count * SPAGE_SIZE;
+}
+
+static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr,
+			     dma_addr_t iova, phys_addr_t paddr, size_t size,
+			     int prot)
+{
+	unsigned int pte_count;
+	unsigned int pte_total = size / SPAGE_SIZE;
+	phys_addr_t page_phys;
+
+	assert_spin_locked(&rk_domain->dt_lock);
+
+	for (pte_count = 0; pte_count < pte_total; pte_count++) {
+		u32 pte = pte_addr[pte_count];
+
+		if (rk_pte_is_page_valid(pte))
+			goto unwind;
+
+		pte_addr[pte_count] = rk_mk_pte(paddr, prot);
+
+		paddr += SPAGE_SIZE;
+	}
+
+	rk_table_flush(pte_addr, pte_count);
+
+	return 0;
+unwind:
+	/* Unmap the range of iovas that we just mapped */
+	rk_iommu_unmap_iova(rk_domain, pte_addr, iova, pte_count * SPAGE_SIZE);
+
+	iova += pte_count * SPAGE_SIZE;
+	page_phys = rk_pte_page_address(pte_addr[pte_count]);
+	pr_err("iova: %pad already mapped to %pa cannot remap to phys: %pa prot: %#x\n",
+	       &iova, &page_phys, &paddr, prot);
+
+	return -EADDRINUSE;
+}
+
+static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
+			phys_addr_t paddr, size_t size, int prot)
+{
+	struct rk_iommu_domain *rk_domain = domain->priv;
+	unsigned long flags;
+	dma_addr_t iova = (dma_addr_t)_iova;
+	u32 *page_table, *pte_addr;
+	int ret;
+
+	spin_lock_irqsave(&rk_domain->dt_lock, flags);
+
+	/*
+	 * pgsize_bitmap specifies iova sizes that fit in one page table
+	 * (1024 4-KiB pages = 4 MiB).
+	 * So, size will always be 4096 <= size <= 4194304.
+	 * Since iommu_map() guarantees that both iova and size will be
+	 * aligned, we will always only be mapping from a single dte here.
+	 */
+	page_table = rk_dte_get_page_table(rk_domain, iova);
+	if (IS_ERR(page_table)) {
+		spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
+		return PTR_ERR(page_table);
+	}
+
+	pte_addr = &page_table[rk_iova_pte_index(iova)];
+	ret = rk_iommu_map_iova(rk_domain, pte_addr, iova, paddr, size, prot);
+	spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
+
+	return ret;
+}
+
+static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
+			     size_t size)
+{
+	struct rk_iommu_domain *rk_domain = domain->priv;
+	unsigned long flags;
+	dma_addr_t iova = (dma_addr_t)_iova;
+	phys_addr_t pt_phys;
+	u32 dte;
+	u32 *pte_addr;
+	size_t unmap_size;
+
+	spin_lock_irqsave(&rk_domain->dt_lock, flags);
+
+	/*
+	 * pgsize_bitmap specifies iova sizes that fit in one page table
+	 * (1024 4-KiB pages = 4 MiB).
+	 * So, size will always be 4096 <= size <= 4194304.
+	 * Since iommu_unmap() guarantees that both iova and size will be
+	 * aligned, we will always only be unmapping from a single dte here.
+	 */
+	dte = rk_domain->dt[rk_iova_dte_index(iova)];
+	/* Just return 0 if iova is unmapped */
+	if (!rk_dte_is_pt_valid(dte)) {
+		spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
+		return 0;
+	}
+
+	pt_phys = rk_dte_pt_address(dte);
+	pte_addr = (u32 *)phys_to_virt(pt_phys) + rk_iova_pte_index(iova);
+	unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, iova, size);
+
+	spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
+
+	/* Shootdown iotlb entries for iova range that was just unmapped */
+	rk_iommu_zap_iova(rk_domain, iova, unmap_size);
+
+	return unmap_size;
+}
+
+static struct rk_iommu *rk_iommu_from_dev(struct device *dev)
+{
+	struct iommu_group *group;
+	struct device *iommu_dev;
+	struct rk_iommu *rk_iommu;
+
+	group = iommu_group_get(dev);
+	if (!group)
+		return NULL;
+	iommu_dev = iommu_group_get_iommudata(group);
+	rk_iommu = dev_get_drvdata(iommu_dev);
+	iommu_group_put(group);
+
+	return rk_iommu;
+}
+
+static int rk_iommu_attach_device(struct iommu_domain *domain,
+				  struct device *dev)
+{
+	struct rk_iommu *iommu;
+	struct rk_iommu_domain *rk_domain = domain->priv;
+	unsigned long flags;
+	int ret;
+	phys_addr_t dte_addr;
+
+	/*
+	 * Allow 'virtual devices' (e.g., drm) to attach to domain.
+	 * Such a device does not belong to an iommu group.
+	 */
+	iommu = rk_iommu_from_dev(dev);
+	if (!iommu)
+		return 0;
+
+	ret = rk_iommu_enable_stall(iommu);
+	if (ret)
+		return ret;
+
+	ret = rk_iommu_force_reset(iommu);
+	if (ret)
+		return ret;
+
+	iommu->domain = domain;
+
+	ret = devm_request_irq(dev, iommu->irq, rk_iommu_irq,
+			       IRQF_SHARED, dev_name(dev), iommu);
+	if (ret)
+		return ret;
+
+	dte_addr = virt_to_phys(rk_domain->dt);
+	rk_iommu_write(iommu, RK_MMU_DTE_ADDR, dte_addr);
+	rk_iommu_command(iommu, RK_MMU_CMD_ZAP_CACHE);
+	rk_iommu_write(iommu, RK_MMU_INT_MASK, RK_MMU_IRQ_MASK);
+
+	ret = rk_iommu_enable_paging(iommu);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&rk_domain->iommus_lock, flags);
+	list_add_tail(&iommu->node, &rk_domain->iommus);
+	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
+
+	dev_info(dev, "Attached to iommu domain\n");
+
+	rk_iommu_disable_stall(iommu);
+
+	return 0;
+}
+
+static void rk_iommu_detach_device(struct iommu_domain *domain,
+				   struct device *dev)
+{
+	struct rk_iommu *iommu;
+	struct rk_iommu_domain *rk_domain = domain->priv;
+	unsigned long flags;
+
+	/* Allow 'virtual devices' (eg drm) to detach from domain */
+	iommu = rk_iommu_from_dev(dev);
+	if (!iommu)
+		return;
+
+	spin_lock_irqsave(&rk_domain->iommus_lock, flags);
+	list_del_init(&iommu->node);
+	spin_unlock_irqrestore(&rk_domain->iommus_lock, flags);
+
+	/* Ignore error while disabling, just keep going */
+	rk_iommu_enable_stall(iommu);
+	rk_iommu_disable_paging(iommu);
+	rk_iommu_write(iommu, RK_MMU_INT_MASK, 0);
+	rk_iommu_write(iommu, RK_MMU_DTE_ADDR, 0);
+	rk_iommu_disable_stall(iommu);
+
+	devm_free_irq(dev, iommu->irq, iommu);
+
+	iommu->domain = NULL;
+
+	dev_info(dev, "Detached from iommu domain\n");
+}
+
+static int rk_iommu_domain_init(struct iommu_domain *domain)
+{
+	struct rk_iommu_domain *rk_domain;
+
+	rk_domain = kzalloc(sizeof(*rk_domain), GFP_KERNEL);
+	if (!rk_domain)
+		return -ENOMEM;
+
+	/*
+	 * rk32xx iommus use a 2 level pagetable.
+	 * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries.
+	 * Allocate one 4 KiB page for each table.
+	 */
+	rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32);
+	if (!rk_domain->dt)
+		goto err_dt;
+
+	rk_table_flush(rk_domain->dt, NUM_DT_ENTRIES);
+
+	spin_lock_init(&rk_domain->iommus_lock);
+	spin_lock_init(&rk_domain->dt_lock);
+	INIT_LIST_HEAD(&rk_domain->iommus);
+
+	domain->priv = rk_domain;
+
+	return 0;
+err_dt:
+	kfree(rk_domain);
+	return -ENOMEM;
+}
+
+static void rk_iommu_domain_destroy(struct iommu_domain *domain)
+{
+	struct rk_iommu_domain *rk_domain = domain->priv;
+	int i;
+
+	WARN_ON(!list_empty(&rk_domain->iommus));
+
+	for (i = 0; i < NUM_DT_ENTRIES; i++) {
+		u32 dte = rk_domain->dt[i];
+		if (rk_dte_is_pt_valid(dte)) {
+			phys_addr_t pt_phys = rk_dte_pt_address(dte);
+			u32 *page_table = phys_to_virt(pt_phys);
+			free_page((unsigned long)page_table);
+		}
+	}
+
+	free_page((unsigned long)rk_domain->dt);
+	kfree(domain->priv);
+	domain->priv = NULL;
+}
+
+static bool rk_iommu_is_dev_iommu_master(struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	int ret;
+
+	/*
+	 * An iommu master has an iommus property containing a list of phandles
+	 * to iommu nodes, each with an #iommu-cells property with value 0.
+	 */
+	ret = of_count_phandle_with_args(np, "iommus", "#iommu-cells");
+	return (ret > 0);
+}
+
+static int rk_iommu_group_set_iommudata(struct iommu_group *group,
+					struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	struct platform_device *pd;
+	int ret;
+	struct of_phandle_args args;
+
+	/*
+	 * An iommu master has an iommus property containing a list of phandles
+	 * to iommu nodes, each with an #iommu-cells property with value 0.
+	 */
+	ret = of_parse_phandle_with_args(np, "iommus", "#iommu-cells", 0,
+					 &args);
+	if (ret) {
+		dev_err(dev, "of_parse_phandle_with_args(%s) => %d\n",
+			np->full_name, ret);
+		return ret;
+	}
+	if (args.args_count != 0) {
+		dev_err(dev, "incorrect number of iommu params found for %s (found %d, expected 0)\n",
+			args.np->full_name, args.args_count);
+		return -EINVAL;
+	}
+
+	pd = of_find_device_by_node(args.np);
+	of_node_put(args.np);
+	if (!pd) {
+		dev_err(dev, "iommu %s not found\n", args.np->full_name);
+		return -EPROBE_DEFER;
+	}
+
+	/* TODO(djkurtz): handle multiple slave iommus for a single master */
+	iommu_group_set_iommudata(group, &pd->dev, NULL);
+
+	return 0;
+}
+
+static int rk_iommu_add_device(struct device *dev)
+{
+	struct iommu_group *group;
+	int ret;
+
+	if (!rk_iommu_is_dev_iommu_master(dev))
+		return -ENODEV;
+
+	group = iommu_group_get(dev);
+	if (!group) {
+		group = iommu_group_alloc();
+		if (IS_ERR(group)) {
+			dev_err(dev, "Failed to allocate IOMMU group\n");
+			return PTR_ERR(group);
+		}
+	}
+
+	ret = iommu_group_add_device(group, dev);
+	if (ret)
+		goto err_put_group;
+
+	ret = rk_iommu_group_set_iommudata(group, dev);
+	if (ret)
+		goto err_remove_device;
+
+	iommu_group_put(group);
+
+	return 0;
+
+err_remove_device:
+	iommu_group_remove_device(dev);
+err_put_group:
+	iommu_group_put(group);
+	return ret;
+}
+
+static void rk_iommu_remove_device(struct device *dev)
+{
+	if (!rk_iommu_is_dev_iommu_master(dev))
+		return;
+
+	iommu_group_remove_device(dev);
+}
+
+static const struct iommu_ops rk_iommu_ops = {
+	.domain_init = rk_iommu_domain_init,
+	.domain_destroy = rk_iommu_domain_destroy,
+	.attach_dev = rk_iommu_attach_device,
+	.detach_dev = rk_iommu_detach_device,
+	.map = rk_iommu_map,
+	.unmap = rk_iommu_unmap,
+	.add_device = rk_iommu_add_device,
+	.remove_device = rk_iommu_remove_device,
+	.iova_to_phys = rk_iommu_iova_to_phys,
+	.pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP,
+};
+
+static int rk_iommu_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct rk_iommu *iommu;
+	struct resource *res;
+
+	iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL);
+	if (!iommu)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, iommu);
+	iommu->dev = dev;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	iommu->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(iommu->base))
+		return PTR_ERR(iommu->base);
+
+	iommu->irq = platform_get_irq(pdev, 0);
+	if (iommu->irq < 0) {
+		dev_err(dev, "Failed to get IRQ, %d\n", iommu->irq);
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+static int rk_iommu_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id rk_iommu_dt_ids[] = {
+	{ .compatible = "rockchip,iommu" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, rk_iommu_dt_ids);
+#endif
+
+static struct platform_driver rk_iommu_driver = {
+	.probe = rk_iommu_probe,
+	.remove = rk_iommu_remove,
+	.driver = {
+		   .name = "rk_iommu",
+		   .owner = THIS_MODULE,
+		   .of_match_table = of_match_ptr(rk_iommu_dt_ids),
+	},
+};
+
+static int __init rk_iommu_init(void)
+{
+	int ret;
+
+	ret = bus_set_iommu(&platform_bus_type, &rk_iommu_ops);
+	if (ret)
+		return ret;
+
+	return platform_driver_register(&rk_iommu_driver);
+}
+static void __exit rk_iommu_exit(void)
+{
+	platform_driver_unregister(&rk_iommu_driver);
+}
+
+subsys_initcall(rk_iommu_init);
+module_exit(rk_iommu_exit);
+
+MODULE_DESCRIPTION("IOMMU API for Rockchip");
+MODULE_AUTHOR("Simon Xue <xxm@rock-chips.com> and Daniel Kurtz <djkurtz@chromium.org>");
+MODULE_ALIAS("platform:rockchip-iommu");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c
index 1333e6fb..f1b0077 100644
--- a/drivers/iommu/shmobile-iommu.c
+++ b/drivers/iommu/shmobile-iommu.c

@@ -361,6 +361,7 @@
 	.detach_dev = shmobile_iommu_detach_device,
 	.map = shmobile_iommu_map,
 	.unmap = shmobile_iommu_unmap,
+	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = shmobile_iommu_iova_to_phys,
 	.add_device = shmobile_iommu_add_device,
 	.pgsize_bitmap = SZ_1M | SZ_64K | SZ_4K,

diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 3afdf43..73e845a 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c

@@ -955,6 +955,7 @@
 	.detach_dev	= smmu_iommu_detach_dev,
 	.map		= smmu_iommu_map,
 	.unmap		= smmu_iommu_unmap,
+	.map_sg		= default_iommu_map_sg,
 	.iova_to_phys	= smmu_iommu_iova_to_phys,
 	.pgsize_bitmap	= SMMU_IOMMU_PGSIZES,
 };
commit	76771c938e95ce4106c6e8092f4f614d4d1e0ecc	[log] [tgz]
author	Joerg Roedel <jroedel@suse.de>	Tue Dec 02 13:07:13 2014 +0100
committer	Joerg Roedel <jroedel@suse.de>	Tue Dec 02 13:07:13 2014 +0100
tree	c2fb0f7e70013f745f62fb74899f04c40f052640
parent	99cb9aee770e482ac5ab1fe77213a38df9dd4e77 [diff]
parent	263bc3fd6a8e50dc3ef423f36a1ec9cd951a76b8 [diff]
parent	11175886daa6b31dfc713b6664215405de9ea1c7 [diff]
parent	e222d6a4259c134553acab493b5ab9fc05d70132 [diff]
parent	2d565a57f2520b6bc2de0eef1d77d9683f397947 [diff]
parent	cc4f14aa170d895c9a43bdb56f62070c8a6da908 [diff]
parent	1c51099a427dc8bfee917b2d0e53ad3fafbb930d [diff]
parent	18f23409909a9547ac7c149013286f36fcffa433 [diff]