VT-d: handle Invalidation Queue Error to avoid system hang

When hardware detects any error with a descriptor from the invalidation
queue, it stops fetching new descriptors from the queue until software
clears the Invalidation Queue Error bit in the Fault Status register.
Following fix handles the IQE so the kernel won't be trapped in an
infinite loop.

Signed-off-by: Yu Zhao <yu.zhao@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 2b4162d..8d3e9c2 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -573,19 +573,49 @@
 	}
 }
 
+static int qi_check_fault(struct intel_iommu *iommu, int index)
+{
+	u32 fault;
+	int head;
+	struct q_inval *qi = iommu->qi;
+	int wait_index = (index + 1) % QI_LENGTH;
+
+	fault = readl(iommu->reg + DMAR_FSTS_REG);
+
+	/*
+	 * If IQE happens, the head points to the descriptor associated
+	 * with the error. No new descriptors are fetched until the IQE
+	 * is cleared.
+	 */
+	if (fault & DMA_FSTS_IQE) {
+		head = readl(iommu->reg + DMAR_IQH_REG);
+		if ((head >> 4) == index) {
+			memcpy(&qi->desc[index], &qi->desc[wait_index],
+					sizeof(struct qi_desc));
+			__iommu_flush_cache(iommu, &qi->desc[index],
+					sizeof(struct qi_desc));
+			writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Submit the queued invalidation descriptor to the remapping
  * hardware unit and wait for its completion.
  */
-void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
+int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
 {
+	int rc = 0;
 	struct q_inval *qi = iommu->qi;
 	struct qi_desc *hw, wait_desc;
 	int wait_index, index;
 	unsigned long flags;
 
 	if (!qi)
-		return;
+		return 0;
 
 	hw = qi->desc;
 
@@ -603,7 +633,8 @@
 
 	hw[index] = *desc;
 
-	wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
+	wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
+			QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
 	wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
 
 	hw[wait_index] = wait_desc;
@@ -614,13 +645,11 @@
 	qi->free_head = (qi->free_head + 2) % QI_LENGTH;
 	qi->free_cnt -= 2;
 
-	spin_lock(&iommu->register_lock);
 	/*
 	 * update the HW tail register indicating the presence of
 	 * new descriptors.
 	 */
 	writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
-	spin_unlock(&iommu->register_lock);
 
 	while (qi->desc_status[wait_index] != QI_DONE) {
 		/*
@@ -630,15 +659,21 @@
 		 * a deadlock where the interrupt context can wait indefinitely
 		 * for free slots in the queue.
 		 */
+		rc = qi_check_fault(iommu, index);
+		if (rc)
+			goto out;
+
 		spin_unlock(&qi->q_lock);
 		cpu_relax();
 		spin_lock(&qi->q_lock);
 	}
-
-	qi->desc_status[index] = QI_DONE;
+out:
+	qi->desc_status[index] = qi->desc_status[wait_index] = QI_DONE;
 
 	reclaim_free_desc(qi);
 	spin_unlock_irqrestore(&qi->q_lock, flags);
+
+	return rc;
 }
 
 /*
@@ -651,13 +686,13 @@
 	desc.low = QI_IEC_TYPE;
 	desc.high = 0;
 
+	/* should never fail */
 	qi_submit_sync(&desc, iommu);
 }
 
 int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
 		     u64 type, int non_present_entry_flush)
 {
-
 	struct qi_desc desc;
 
 	if (non_present_entry_flush) {
@@ -671,10 +706,7 @@
 			| QI_CC_GRAN(type) | QI_CC_TYPE;
 	desc.high = 0;
 
-	qi_submit_sync(&desc, iommu);
-
-	return 0;
-
+	return qi_submit_sync(&desc, iommu);
 }
 
 int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
@@ -704,10 +736,7 @@
 	desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
 		| QI_IOTLB_AM(size_order);
 
-	qi_submit_sync(&desc, iommu);
-
-	return 0;
-
+	return qi_submit_sync(&desc, iommu);
 }
 
 /*