blob: 82d53e15b86504e0560f2cd6dd6960ce792dbddf [file] [log] [blame]
David Woodhouse8a94ade2015-03-24 14:54:56 +00001/*
2 * Copyright © 2015 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * Authors: David Woodhouse <dwmw2@infradead.org>
14 */
15
16#include <linux/intel-iommu.h>
David Woodhouse2f26e0a2015-09-09 11:40:47 +010017#include <linux/mmu_notifier.h>
18#include <linux/sched.h>
19#include <linux/slab.h>
20#include <linux/intel-svm.h>
21#include <linux/rculist.h>
22#include <linux/pci.h>
23#include <linux/pci-ats.h>
24
25struct pasid_entry {
26 u64 val;
27};
David Woodhouse8a94ade2015-03-24 14:54:56 +000028
29int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
30{
31 struct page *pages;
32 int order;
33
34 order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT;
35 if (order < 0)
36 order = 0;
37
38 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
39 if (!pages) {
40 pr_warn("IOMMU: %s: Failed to allocate PASID table\n",
41 iommu->name);
42 return -ENOMEM;
43 }
44 iommu->pasid_table = page_address(pages);
45 pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order);
46
47 if (ecap_dis(iommu->ecap)) {
48 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
49 if (pages)
50 iommu->pasid_state_table = page_address(pages);
51 else
52 pr_warn("IOMMU: %s: Failed to allocate PASID state table\n",
53 iommu->name);
54 }
55
David Woodhouse2f26e0a2015-09-09 11:40:47 +010056 idr_init(&iommu->pasid_idr);
57
David Woodhouse8a94ade2015-03-24 14:54:56 +000058 return 0;
59}
60
61int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
62{
63 int order;
64
65 order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT;
66 if (order < 0)
67 order = 0;
68
69 if (iommu->pasid_table) {
70 free_pages((unsigned long)iommu->pasid_table, order);
71 iommu->pasid_table = NULL;
72 }
73 if (iommu->pasid_state_table) {
74 free_pages((unsigned long)iommu->pasid_state_table, order);
75 iommu->pasid_state_table = NULL;
76 }
David Woodhouse2f26e0a2015-09-09 11:40:47 +010077 idr_destroy(&iommu->pasid_idr);
David Woodhouse8a94ade2015-03-24 14:54:56 +000078 return 0;
79}
David Woodhouse2f26e0a2015-09-09 11:40:47 +010080
81static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
82 unsigned long address, int pages, int ih)
83{
84 struct qi_desc desc;
85 int mask = ilog2(__roundup_pow_of_two(pages));
86
87 if (pages == -1 || !cap_pgsel_inv(svm->iommu->cap) ||
88 mask > cap_max_amask_val(svm->iommu->cap)) {
89 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
90 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
91 desc.high = 0;
92 } else {
93 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
94 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE;
95 desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(1) |
96 QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask);
97 }
98
99 qi_submit_sync(&desc, svm->iommu);
100
101 if (sdev->dev_iotlb) {
102 desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) |
103 QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE;
104 if (mask) {
105 unsigned long adr, delta;
106
107 /* Least significant zero bits in the address indicate the
108 * range of the request. So mask them out according to the
109 * size. */
110 adr = address & ((1<<(VTD_PAGE_SHIFT + mask)) - 1);
111
112 /* Now ensure that we round down further if the original
113 * request was not aligned w.r.t. its size */
114 delta = address - adr;
115 if (delta + (pages << VTD_PAGE_SHIFT) >= (1 << (VTD_PAGE_SHIFT + mask)))
116 adr &= ~(1 << (VTD_PAGE_SHIFT + mask));
117 desc.high = QI_DEV_EIOTLB_ADDR(adr) | QI_DEV_EIOTLB_SIZE;
118 } else {
119 desc.high = QI_DEV_EIOTLB_ADDR(address);
120 }
121 qi_submit_sync(&desc, svm->iommu);
122 }
123}
124
125static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
126 int pages, int ih)
127{
128 struct intel_svm_dev *sdev;
129
130 rcu_read_lock();
131 list_for_each_entry_rcu(sdev, &svm->devs, list)
132 intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
133 rcu_read_unlock();
134}
135
136static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm,
137 unsigned long address, pte_t pte)
138{
139 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
140
141 intel_flush_svm_range(svm, address, 1, 1);
142}
143
144static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
145 unsigned long address)
146{
147 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
148
149 intel_flush_svm_range(svm, address, 1, 1);
150}
151
152/* Pages have been freed at this point */
153static void intel_invalidate_range(struct mmu_notifier *mn,
154 struct mm_struct *mm,
155 unsigned long start, unsigned long end)
156{
157 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
158
159 intel_flush_svm_range(svm, start,
160 (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT , 0);
161}
162
163
164static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev)
165{
166 struct qi_desc desc;
167
168 desc.high = 0;
169 desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(svm->pasid);
170
171 qi_submit_sync(&desc, svm->iommu);
172}
173
174static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
175{
176 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
177
178 svm->iommu->pasid_table[svm->pasid].val = 0;
179
180 /* There's no need to do any flush because we can't get here if there
181 * are any devices left anyway. */
182 WARN_ON(!list_empty(&svm->devs));
183}
184
185static const struct mmu_notifier_ops intel_mmuops = {
186 .release = intel_mm_release,
187 .change_pte = intel_change_pte,
188 .invalidate_page = intel_invalidate_page,
189 .invalidate_range = intel_invalidate_range,
190};
191
192static DEFINE_MUTEX(pasid_mutex);
193
194int intel_svm_bind_mm(struct device *dev, int *pasid)
195{
196 struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
197 struct intel_svm_dev *sdev;
198 struct intel_svm *svm = NULL;
199 int pasid_max;
200 int ret;
201
202 BUG_ON(pasid && !current->mm);
203
204 if (WARN_ON(!iommu))
205 return -EINVAL;
206
207 if (dev_is_pci(dev)) {
208 pasid_max = pci_max_pasids(to_pci_dev(dev));
209 if (pasid_max < 0)
210 return -EINVAL;
211 } else
212 pasid_max = 1 << 20;
213
214 mutex_lock(&pasid_mutex);
215 if (pasid) {
216 int i;
217
218 idr_for_each_entry(&iommu->pasid_idr, svm, i) {
219 if (svm->mm != current->mm)
220 continue;
221
222 if (svm->pasid >= pasid_max) {
223 dev_warn(dev,
224 "Limited PASID width. Cannot use existing PASID %d\n",
225 svm->pasid);
226 ret = -ENOSPC;
227 goto out;
228 }
229
230 list_for_each_entry(sdev, &svm->devs, list) {
231 if (dev == sdev->dev) {
232 sdev->users++;
233 goto success;
234 }
235 }
236
237 break;
238 }
239 }
240
241 sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
242 if (!sdev) {
243 ret = -ENOMEM;
244 goto out;
245 }
246 sdev->dev = dev;
247
248 ret = intel_iommu_enable_pasid(iommu, sdev);
249 if (ret || !pasid) {
250 /* If they don't actually want to assign a PASID, this is
251 * just an enabling check/preparation. */
252 kfree(sdev);
253 goto out;
254 }
255 /* Finish the setup now we know we're keeping it */
256 sdev->users = 1;
257 init_rcu_head(&sdev->rcu);
258
259 if (!svm) {
260 svm = kzalloc(sizeof(*svm), GFP_KERNEL);
261 if (!svm) {
262 ret = -ENOMEM;
263 kfree(sdev);
264 goto out;
265 }
266 svm->iommu = iommu;
267
268 if (pasid_max > 2 << ecap_pss(iommu->ecap))
269 pasid_max = 2 << ecap_pss(iommu->ecap);
270
271 ret = idr_alloc(&iommu->pasid_idr, svm, 0, pasid_max - 1,
272 GFP_KERNEL);
273 if (ret < 0) {
274 kfree(svm);
275 goto out;
276 }
277 svm->pasid = ret;
278 svm->notifier.ops = &intel_mmuops;
279 svm->mm = get_task_mm(current);
280 INIT_LIST_HEAD_RCU(&svm->devs);
281 ret = -ENOMEM;
282 if (!svm->mm || (ret = mmu_notifier_register(&svm->notifier, svm->mm))) {
283 idr_remove(&svm->iommu->pasid_idr, svm->pasid);
284 kfree(svm);
285 kfree(sdev);
286 goto out;
287 }
288 iommu->pasid_table[svm->pasid].val = (u64)__pa(svm->mm->pgd) | 1;
289 wmb();
290 }
291 list_add_rcu(&sdev->list, &svm->devs);
292
293 success:
294 *pasid = svm->pasid;
295 ret = 0;
296 out:
297 mutex_unlock(&pasid_mutex);
298 return ret;
299}
300EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
301
302int intel_svm_unbind_mm(struct device *dev, int pasid)
303{
304 struct intel_svm_dev *sdev;
305 struct intel_iommu *iommu;
306 struct intel_svm *svm;
307 int ret = -EINVAL;
308
309 mutex_lock(&pasid_mutex);
310 iommu = intel_svm_device_to_iommu(dev);
311 if (!iommu || !iommu->pasid_table)
312 goto out;
313
314 svm = idr_find(&iommu->pasid_idr, pasid);
315 if (!svm)
316 goto out;
317
318 list_for_each_entry(sdev, &svm->devs, list) {
319 if (dev == sdev->dev) {
320 ret = 0;
321 sdev->users--;
322 if (!sdev->users) {
323 list_del_rcu(&sdev->list);
324 /* Flush the PASID cache and IOTLB for this device.
325 * Note that we do depend on the hardware *not* using
326 * the PASID any more. Just as we depend on other
327 * devices never using PASIDs that they have no right
328 * to use. We have a *shared* PASID table, because it's
329 * large and has to be physically contiguous. So it's
330 * hard to be as defensive as we might like. */
331 intel_flush_pasid_dev(svm, sdev);
332 intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
333 kfree_rcu(sdev, rcu);
334
335 if (list_empty(&svm->devs)) {
336 mmu_notifier_unregister(&svm->notifier, svm->mm);
337
338 idr_remove(&svm->iommu->pasid_idr, svm->pasid);
339 mmput(svm->mm);
340 /* We mandate that no page faults may be outstanding
341 * for the PASID when intel_svm_unbind_mm() is called.
342 * If that is not obeyed, subtle errors will happen.
343 * Let's make them less subtle... */
344 memset(svm, 0x6b, sizeof(*svm));
345 kfree(svm);
346 }
347 }
348 break;
349 }
350 }
351 out:
352 mutex_unlock(&pasid_mutex);
353
354 return ret;
355}
356EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);