blob: a64720b5bd34ecb059a58f5a2db58b3ab6f57b44 [file] [log] [blame]
David Woodhouse8a94ade2015-03-24 14:54:56 +00001/*
2 * Copyright © 2015 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * Authors: David Woodhouse <dwmw2@infradead.org>
14 */
15
16#include <linux/intel-iommu.h>
David Woodhouse2f26e0a2015-09-09 11:40:47 +010017#include <linux/mmu_notifier.h>
18#include <linux/sched.h>
19#include <linux/slab.h>
20#include <linux/intel-svm.h>
21#include <linux/rculist.h>
22#include <linux/pci.h>
23#include <linux/pci-ats.h>
24
25struct pasid_entry {
26 u64 val;
27};
David Woodhouse8a94ade2015-03-24 14:54:56 +000028
David Woodhouse907fea32015-10-13 14:11:13 +010029struct pasid_state_entry {
30 u64 val;
31};
32
David Woodhouse8a94ade2015-03-24 14:54:56 +000033int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
34{
35 struct page *pages;
36 int order;
37
38 order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT;
39 if (order < 0)
40 order = 0;
41
42 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
43 if (!pages) {
44 pr_warn("IOMMU: %s: Failed to allocate PASID table\n",
45 iommu->name);
46 return -ENOMEM;
47 }
48 iommu->pasid_table = page_address(pages);
49 pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order);
50
51 if (ecap_dis(iommu->ecap)) {
52 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
53 if (pages)
54 iommu->pasid_state_table = page_address(pages);
55 else
56 pr_warn("IOMMU: %s: Failed to allocate PASID state table\n",
57 iommu->name);
58 }
59
David Woodhouse2f26e0a2015-09-09 11:40:47 +010060 idr_init(&iommu->pasid_idr);
61
David Woodhouse8a94ade2015-03-24 14:54:56 +000062 return 0;
63}
64
65int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
66{
67 int order;
68
69 order = ecap_pss(iommu->ecap) + 7 - PAGE_SHIFT;
70 if (order < 0)
71 order = 0;
72
73 if (iommu->pasid_table) {
74 free_pages((unsigned long)iommu->pasid_table, order);
75 iommu->pasid_table = NULL;
76 }
77 if (iommu->pasid_state_table) {
78 free_pages((unsigned long)iommu->pasid_state_table, order);
79 iommu->pasid_state_table = NULL;
80 }
David Woodhouse2f26e0a2015-09-09 11:40:47 +010081 idr_destroy(&iommu->pasid_idr);
David Woodhouse8a94ade2015-03-24 14:54:56 +000082 return 0;
83}
David Woodhouse2f26e0a2015-09-09 11:40:47 +010084
85static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
86 unsigned long address, int pages, int ih)
87{
88 struct qi_desc desc;
89 int mask = ilog2(__roundup_pow_of_two(pages));
90
91 if (pages == -1 || !cap_pgsel_inv(svm->iommu->cap) ||
92 mask > cap_max_amask_val(svm->iommu->cap)) {
93 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
94 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
95 desc.high = 0;
96 } else {
97 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
98 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE;
99 desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(1) |
100 QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask);
101 }
102
103 qi_submit_sync(&desc, svm->iommu);
104
105 if (sdev->dev_iotlb) {
106 desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) |
107 QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE;
108 if (mask) {
109 unsigned long adr, delta;
110
111 /* Least significant zero bits in the address indicate the
112 * range of the request. So mask them out according to the
113 * size. */
114 adr = address & ((1<<(VTD_PAGE_SHIFT + mask)) - 1);
115
116 /* Now ensure that we round down further if the original
117 * request was not aligned w.r.t. its size */
118 delta = address - adr;
119 if (delta + (pages << VTD_PAGE_SHIFT) >= (1 << (VTD_PAGE_SHIFT + mask)))
120 adr &= ~(1 << (VTD_PAGE_SHIFT + mask));
121 desc.high = QI_DEV_EIOTLB_ADDR(adr) | QI_DEV_EIOTLB_SIZE;
122 } else {
123 desc.high = QI_DEV_EIOTLB_ADDR(address);
124 }
125 qi_submit_sync(&desc, svm->iommu);
126 }
127}
128
129static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
130 int pages, int ih)
131{
132 struct intel_svm_dev *sdev;
133
David Woodhouse907fea32015-10-13 14:11:13 +0100134 /* Try deferred invalidate if available */
135 if (svm->iommu->pasid_state_table &&
136 !cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63))
137 return;
138
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100139 rcu_read_lock();
140 list_for_each_entry_rcu(sdev, &svm->devs, list)
141 intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
142 rcu_read_unlock();
143}
144
145static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm,
146 unsigned long address, pte_t pte)
147{
148 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
149
150 intel_flush_svm_range(svm, address, 1, 1);
151}
152
153static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
154 unsigned long address)
155{
156 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
157
158 intel_flush_svm_range(svm, address, 1, 1);
159}
160
161/* Pages have been freed at this point */
162static void intel_invalidate_range(struct mmu_notifier *mn,
163 struct mm_struct *mm,
164 unsigned long start, unsigned long end)
165{
166 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
167
168 intel_flush_svm_range(svm, start,
169 (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT , 0);
170}
171
172
173static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev)
174{
175 struct qi_desc desc;
176
177 desc.high = 0;
178 desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(svm->pasid);
179
180 qi_submit_sync(&desc, svm->iommu);
181}
182
183static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
184{
185 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
186
187 svm->iommu->pasid_table[svm->pasid].val = 0;
188
189 /* There's no need to do any flush because we can't get here if there
190 * are any devices left anyway. */
191 WARN_ON(!list_empty(&svm->devs));
192}
193
194static const struct mmu_notifier_ops intel_mmuops = {
195 .release = intel_mm_release,
196 .change_pte = intel_change_pte,
197 .invalidate_page = intel_invalidate_page,
198 .invalidate_range = intel_invalidate_range,
199};
200
201static DEFINE_MUTEX(pasid_mutex);
202
203int intel_svm_bind_mm(struct device *dev, int *pasid)
204{
205 struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
206 struct intel_svm_dev *sdev;
207 struct intel_svm *svm = NULL;
208 int pasid_max;
209 int ret;
210
211 BUG_ON(pasid && !current->mm);
212
213 if (WARN_ON(!iommu))
214 return -EINVAL;
215
216 if (dev_is_pci(dev)) {
217 pasid_max = pci_max_pasids(to_pci_dev(dev));
218 if (pasid_max < 0)
219 return -EINVAL;
220 } else
221 pasid_max = 1 << 20;
222
223 mutex_lock(&pasid_mutex);
224 if (pasid) {
225 int i;
226
227 idr_for_each_entry(&iommu->pasid_idr, svm, i) {
228 if (svm->mm != current->mm)
229 continue;
230
231 if (svm->pasid >= pasid_max) {
232 dev_warn(dev,
233 "Limited PASID width. Cannot use existing PASID %d\n",
234 svm->pasid);
235 ret = -ENOSPC;
236 goto out;
237 }
238
239 list_for_each_entry(sdev, &svm->devs, list) {
240 if (dev == sdev->dev) {
241 sdev->users++;
242 goto success;
243 }
244 }
245
246 break;
247 }
248 }
249
250 sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
251 if (!sdev) {
252 ret = -ENOMEM;
253 goto out;
254 }
255 sdev->dev = dev;
256
257 ret = intel_iommu_enable_pasid(iommu, sdev);
258 if (ret || !pasid) {
259 /* If they don't actually want to assign a PASID, this is
260 * just an enabling check/preparation. */
261 kfree(sdev);
262 goto out;
263 }
264 /* Finish the setup now we know we're keeping it */
265 sdev->users = 1;
266 init_rcu_head(&sdev->rcu);
267
268 if (!svm) {
269 svm = kzalloc(sizeof(*svm), GFP_KERNEL);
270 if (!svm) {
271 ret = -ENOMEM;
272 kfree(sdev);
273 goto out;
274 }
275 svm->iommu = iommu;
276
277 if (pasid_max > 2 << ecap_pss(iommu->ecap))
278 pasid_max = 2 << ecap_pss(iommu->ecap);
279
280 ret = idr_alloc(&iommu->pasid_idr, svm, 0, pasid_max - 1,
281 GFP_KERNEL);
282 if (ret < 0) {
283 kfree(svm);
284 goto out;
285 }
286 svm->pasid = ret;
287 svm->notifier.ops = &intel_mmuops;
288 svm->mm = get_task_mm(current);
289 INIT_LIST_HEAD_RCU(&svm->devs);
290 ret = -ENOMEM;
291 if (!svm->mm || (ret = mmu_notifier_register(&svm->notifier, svm->mm))) {
292 idr_remove(&svm->iommu->pasid_idr, svm->pasid);
293 kfree(svm);
294 kfree(sdev);
295 goto out;
296 }
297 iommu->pasid_table[svm->pasid].val = (u64)__pa(svm->mm->pgd) | 1;
298 wmb();
299 }
300 list_add_rcu(&sdev->list, &svm->devs);
301
302 success:
303 *pasid = svm->pasid;
304 ret = 0;
305 out:
306 mutex_unlock(&pasid_mutex);
307 return ret;
308}
309EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
310
311int intel_svm_unbind_mm(struct device *dev, int pasid)
312{
313 struct intel_svm_dev *sdev;
314 struct intel_iommu *iommu;
315 struct intel_svm *svm;
316 int ret = -EINVAL;
317
318 mutex_lock(&pasid_mutex);
319 iommu = intel_svm_device_to_iommu(dev);
320 if (!iommu || !iommu->pasid_table)
321 goto out;
322
323 svm = idr_find(&iommu->pasid_idr, pasid);
324 if (!svm)
325 goto out;
326
327 list_for_each_entry(sdev, &svm->devs, list) {
328 if (dev == sdev->dev) {
329 ret = 0;
330 sdev->users--;
331 if (!sdev->users) {
332 list_del_rcu(&sdev->list);
333 /* Flush the PASID cache and IOTLB for this device.
334 * Note that we do depend on the hardware *not* using
335 * the PASID any more. Just as we depend on other
336 * devices never using PASIDs that they have no right
337 * to use. We have a *shared* PASID table, because it's
338 * large and has to be physically contiguous. So it's
339 * hard to be as defensive as we might like. */
340 intel_flush_pasid_dev(svm, sdev);
341 intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
342 kfree_rcu(sdev, rcu);
343
344 if (list_empty(&svm->devs)) {
345 mmu_notifier_unregister(&svm->notifier, svm->mm);
346
347 idr_remove(&svm->iommu->pasid_idr, svm->pasid);
348 mmput(svm->mm);
349 /* We mandate that no page faults may be outstanding
350 * for the PASID when intel_svm_unbind_mm() is called.
351 * If that is not obeyed, subtle errors will happen.
352 * Let's make them less subtle... */
353 memset(svm, 0x6b, sizeof(*svm));
354 kfree(svm);
355 }
356 }
357 break;
358 }
359 }
360 out:
361 mutex_unlock(&pasid_mutex);
362
363 return ret;
364}
365EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);