blob: 569b6a0b6f567a4c99a1a27d1f700753d16b105a [file] [log] [blame]
Joerg Roedele3c495c2011-11-09 12:31:15 +01001/*
2 * Copyright (C) 2010-2012 Advanced Micro Devices, Inc.
3 * Author: Joerg Roedel <joerg.roedel@amd.com>
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 as published
7 * by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
Joerg Roedel8736b2c2011-11-24 16:21:52 +010019#include <linux/mmu_notifier.h>
Joerg Roedeled96f222011-11-23 17:30:39 +010020#include <linux/amd-iommu.h>
21#include <linux/mm_types.h>
Joerg Roedel8736b2c2011-11-24 16:21:52 +010022#include <linux/profile.h>
Joerg Roedele3c495c2011-11-09 12:31:15 +010023#include <linux/module.h>
Joerg Roedel2d5503b2011-11-24 10:41:57 +010024#include <linux/sched.h>
Joerg Roedeled96f222011-11-23 17:30:39 +010025#include <linux/iommu.h>
Joerg Roedel028eeac2011-11-24 12:48:13 +010026#include <linux/wait.h>
Joerg Roedeled96f222011-11-23 17:30:39 +010027#include <linux/pci.h>
28#include <linux/gfp.h>
29
Joerg Roedel028eeac2011-11-24 12:48:13 +010030#include "amd_iommu_types.h"
Joerg Roedeled96f222011-11-23 17:30:39 +010031#include "amd_iommu_proto.h"
Joerg Roedele3c495c2011-11-09 12:31:15 +010032
33MODULE_LICENSE("GPL v2");
34MODULE_AUTHOR("Joerg Roedel <joerg.roedel@amd.com>");
35
Joerg Roedeled96f222011-11-23 17:30:39 +010036#define MAX_DEVICES 0x10000
37#define PRI_QUEUE_SIZE 512
38
39struct pri_queue {
40 atomic_t inflight;
41 bool finish;
Joerg Roedel028eeac2011-11-24 12:48:13 +010042 int status;
Joerg Roedeled96f222011-11-23 17:30:39 +010043};
44
45struct pasid_state {
46 struct list_head list; /* For global state-list */
47 atomic_t count; /* Reference count */
48 struct task_struct *task; /* Task bound to this PASID */
49 struct mm_struct *mm; /* mm_struct for the faults */
Joerg Roedel8736b2c2011-11-24 16:21:52 +010050 struct mmu_notifier mn; /* mmu_otifier handle */
Joerg Roedeled96f222011-11-23 17:30:39 +010051 struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */
52 struct device_state *device_state; /* Link to our device_state */
53 int pasid; /* PASID index */
Joerg Roedel028eeac2011-11-24 12:48:13 +010054 spinlock_t lock; /* Protect pri_queues */
55 wait_queue_head_t wq; /* To wait for count == 0 */
Joerg Roedeled96f222011-11-23 17:30:39 +010056};
57
58struct device_state {
Joerg Roedel741669c2014-05-20 23:18:23 +020059 struct list_head list;
60 u16 devid;
Joerg Roedeled96f222011-11-23 17:30:39 +010061 atomic_t count;
62 struct pci_dev *pdev;
63 struct pasid_state **states;
64 struct iommu_domain *domain;
65 int pasid_levels;
66 int max_pasids;
Joerg Roedel175d6142011-11-28 14:36:36 +010067 amd_iommu_invalid_ppr_cb inv_ppr_cb;
Joerg Roedelbc216622011-12-07 12:24:42 +010068 amd_iommu_invalidate_ctx inv_ctx_cb;
Joerg Roedeled96f222011-11-23 17:30:39 +010069 spinlock_t lock;
Joerg Roedel028eeac2011-11-24 12:48:13 +010070 wait_queue_head_t wq;
71};
72
73struct fault {
74 struct work_struct work;
75 struct device_state *dev_state;
76 struct pasid_state *state;
77 struct mm_struct *mm;
78 u64 address;
79 u16 devid;
80 u16 pasid;
81 u16 tag;
82 u16 finish;
83 u16 flags;
Joerg Roedeled96f222011-11-23 17:30:39 +010084};
85
Joerg Roedel741669c2014-05-20 23:18:23 +020086static LIST_HEAD(state_list);
Joerg Roedeled96f222011-11-23 17:30:39 +010087static spinlock_t state_lock;
88
89/* List and lock for all pasid_states */
90static LIST_HEAD(pasid_state_list);
Joerg Roedel2d5503b2011-11-24 10:41:57 +010091static DEFINE_SPINLOCK(ps_lock);
92
Joerg Roedel028eeac2011-11-24 12:48:13 +010093static struct workqueue_struct *iommu_wq;
94
Joerg Roedel8736b2c2011-11-24 16:21:52 +010095/*
96 * Empty page table - Used between
97 * mmu_notifier_invalidate_range_start and
98 * mmu_notifier_invalidate_range_end
99 */
100static u64 *empty_page_table;
101
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100102static void free_pasid_states(struct device_state *dev_state);
103static void unbind_pasid(struct device_state *dev_state, int pasid);
Joerg Roedel8736b2c2011-11-24 16:21:52 +0100104static int task_exit(struct notifier_block *nb, unsigned long e, void *data);
Joerg Roedeled96f222011-11-23 17:30:39 +0100105
106static u16 device_id(struct pci_dev *pdev)
107{
108 u16 devid;
109
110 devid = pdev->bus->number;
111 devid = (devid << 8) | pdev->devfn;
112
113 return devid;
114}
115
Joerg Roedelb87d2d72014-05-20 23:18:22 +0200116static struct device_state *__get_device_state(u16 devid)
117{
Joerg Roedel741669c2014-05-20 23:18:23 +0200118 struct device_state *dev_state;
119
120 list_for_each_entry(dev_state, &state_list, list) {
121 if (dev_state->devid == devid)
122 return dev_state;
123 }
124
125 return NULL;
Joerg Roedelb87d2d72014-05-20 23:18:22 +0200126}
127
Joerg Roedeled96f222011-11-23 17:30:39 +0100128static struct device_state *get_device_state(u16 devid)
129{
130 struct device_state *dev_state;
131 unsigned long flags;
132
133 spin_lock_irqsave(&state_lock, flags);
Joerg Roedelb87d2d72014-05-20 23:18:22 +0200134 dev_state = __get_device_state(devid);
Joerg Roedeled96f222011-11-23 17:30:39 +0100135 if (dev_state != NULL)
136 atomic_inc(&dev_state->count);
137 spin_unlock_irqrestore(&state_lock, flags);
138
139 return dev_state;
140}
141
142static void free_device_state(struct device_state *dev_state)
143{
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100144 /*
145 * First detach device from domain - No more PRI requests will arrive
146 * from that device after it is unbound from the IOMMUv2 domain.
147 */
Joerg Roedeled96f222011-11-23 17:30:39 +0100148 iommu_detach_device(dev_state->domain, &dev_state->pdev->dev);
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100149
150 /* Everything is down now, free the IOMMUv2 domain */
Joerg Roedeled96f222011-11-23 17:30:39 +0100151 iommu_domain_free(dev_state->domain);
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100152
153 /* Finally get rid of the device-state */
Joerg Roedeled96f222011-11-23 17:30:39 +0100154 kfree(dev_state);
155}
156
157static void put_device_state(struct device_state *dev_state)
158{
159 if (atomic_dec_and_test(&dev_state->count))
Joerg Roedel028eeac2011-11-24 12:48:13 +0100160 wake_up(&dev_state->wq);
Joerg Roedeled96f222011-11-23 17:30:39 +0100161}
162
Joerg Roedel028eeac2011-11-24 12:48:13 +0100163static void put_device_state_wait(struct device_state *dev_state)
164{
165 DEFINE_WAIT(wait);
166
167 prepare_to_wait(&dev_state->wq, &wait, TASK_UNINTERRUPTIBLE);
168 if (!atomic_dec_and_test(&dev_state->count))
169 schedule();
170 finish_wait(&dev_state->wq, &wait);
171
172 free_device_state(dev_state);
173}
Joerg Roedel8736b2c2011-11-24 16:21:52 +0100174
175static struct notifier_block profile_nb = {
176 .notifier_call = task_exit,
177};
178
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100179static void link_pasid_state(struct pasid_state *pasid_state)
180{
181 spin_lock(&ps_lock);
182 list_add_tail(&pasid_state->list, &pasid_state_list);
183 spin_unlock(&ps_lock);
184}
185
186static void __unlink_pasid_state(struct pasid_state *pasid_state)
187{
188 list_del(&pasid_state->list);
189}
190
191static void unlink_pasid_state(struct pasid_state *pasid_state)
192{
193 spin_lock(&ps_lock);
194 __unlink_pasid_state(pasid_state);
195 spin_unlock(&ps_lock);
196}
197
198/* Must be called under dev_state->lock */
199static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
200 int pasid, bool alloc)
201{
202 struct pasid_state **root, **ptr;
203 int level, index;
204
205 level = dev_state->pasid_levels;
206 root = dev_state->states;
207
208 while (true) {
209
210 index = (pasid >> (9 * level)) & 0x1ff;
211 ptr = &root[index];
212
213 if (level == 0)
214 break;
215
216 if (*ptr == NULL) {
217 if (!alloc)
218 return NULL;
219
220 *ptr = (void *)get_zeroed_page(GFP_ATOMIC);
221 if (*ptr == NULL)
222 return NULL;
223 }
224
225 root = (struct pasid_state **)*ptr;
226 level -= 1;
227 }
228
229 return ptr;
230}
231
232static int set_pasid_state(struct device_state *dev_state,
233 struct pasid_state *pasid_state,
234 int pasid)
235{
236 struct pasid_state **ptr;
237 unsigned long flags;
238 int ret;
239
240 spin_lock_irqsave(&dev_state->lock, flags);
241 ptr = __get_pasid_state_ptr(dev_state, pasid, true);
242
243 ret = -ENOMEM;
244 if (ptr == NULL)
245 goto out_unlock;
246
247 ret = -ENOMEM;
248 if (*ptr != NULL)
249 goto out_unlock;
250
251 *ptr = pasid_state;
252
253 ret = 0;
254
255out_unlock:
256 spin_unlock_irqrestore(&dev_state->lock, flags);
257
258 return ret;
259}
260
261static void clear_pasid_state(struct device_state *dev_state, int pasid)
262{
263 struct pasid_state **ptr;
264 unsigned long flags;
265
266 spin_lock_irqsave(&dev_state->lock, flags);
267 ptr = __get_pasid_state_ptr(dev_state, pasid, true);
268
269 if (ptr == NULL)
270 goto out_unlock;
271
272 *ptr = NULL;
273
274out_unlock:
275 spin_unlock_irqrestore(&dev_state->lock, flags);
276}
277
278static struct pasid_state *get_pasid_state(struct device_state *dev_state,
279 int pasid)
280{
281 struct pasid_state **ptr, *ret = NULL;
282 unsigned long flags;
283
284 spin_lock_irqsave(&dev_state->lock, flags);
285 ptr = __get_pasid_state_ptr(dev_state, pasid, false);
286
287 if (ptr == NULL)
288 goto out_unlock;
289
290 ret = *ptr;
291 if (ret)
292 atomic_inc(&ret->count);
293
294out_unlock:
295 spin_unlock_irqrestore(&dev_state->lock, flags);
296
297 return ret;
298}
299
300static void free_pasid_state(struct pasid_state *pasid_state)
301{
302 kfree(pasid_state);
303}
304
305static void put_pasid_state(struct pasid_state *pasid_state)
306{
307 if (atomic_dec_and_test(&pasid_state->count)) {
308 put_device_state(pasid_state->device_state);
Joerg Roedel028eeac2011-11-24 12:48:13 +0100309 wake_up(&pasid_state->wq);
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100310 }
311}
312
Joerg Roedel028eeac2011-11-24 12:48:13 +0100313static void put_pasid_state_wait(struct pasid_state *pasid_state)
314{
315 DEFINE_WAIT(wait);
316
317 prepare_to_wait(&pasid_state->wq, &wait, TASK_UNINTERRUPTIBLE);
318
319 if (atomic_dec_and_test(&pasid_state->count))
320 put_device_state(pasid_state->device_state);
321 else
322 schedule();
323
324 finish_wait(&pasid_state->wq, &wait);
325 mmput(pasid_state->mm);
326 free_pasid_state(pasid_state);
327}
328
Joerg Roedel8736b2c2011-11-24 16:21:52 +0100329static void __unbind_pasid(struct pasid_state *pasid_state)
330{
331 struct iommu_domain *domain;
332
333 domain = pasid_state->device_state->domain;
334
335 amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid);
336 clear_pasid_state(pasid_state->device_state, pasid_state->pasid);
337
338 /* Make sure no more pending faults are in the queue */
339 flush_workqueue(iommu_wq);
340
341 mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
342
343 put_pasid_state(pasid_state); /* Reference taken in bind() function */
344}
345
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100346static void unbind_pasid(struct device_state *dev_state, int pasid)
347{
348 struct pasid_state *pasid_state;
349
350 pasid_state = get_pasid_state(dev_state, pasid);
351 if (pasid_state == NULL)
352 return;
353
354 unlink_pasid_state(pasid_state);
Joerg Roedel8736b2c2011-11-24 16:21:52 +0100355 __unbind_pasid(pasid_state);
356 put_pasid_state_wait(pasid_state); /* Reference taken in this function */
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100357}
358
359static void free_pasid_states_level1(struct pasid_state **tbl)
360{
361 int i;
362
363 for (i = 0; i < 512; ++i) {
364 if (tbl[i] == NULL)
365 continue;
366
367 free_page((unsigned long)tbl[i]);
368 }
369}
370
371static void free_pasid_states_level2(struct pasid_state **tbl)
372{
373 struct pasid_state **ptr;
374 int i;
375
376 for (i = 0; i < 512; ++i) {
377 if (tbl[i] == NULL)
378 continue;
379
380 ptr = (struct pasid_state **)tbl[i];
381 free_pasid_states_level1(ptr);
382 }
383}
384
385static void free_pasid_states(struct device_state *dev_state)
386{
387 struct pasid_state *pasid_state;
388 int i;
389
390 for (i = 0; i < dev_state->max_pasids; ++i) {
391 pasid_state = get_pasid_state(dev_state, i);
392 if (pasid_state == NULL)
393 continue;
394
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100395 put_pasid_state(pasid_state);
Joerg Roedel028eeac2011-11-24 12:48:13 +0100396 unbind_pasid(dev_state, i);
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100397 }
398
399 if (dev_state->pasid_levels == 2)
400 free_pasid_states_level2(dev_state->states);
401 else if (dev_state->pasid_levels == 1)
402 free_pasid_states_level1(dev_state->states);
403 else if (dev_state->pasid_levels != 0)
404 BUG();
405
406 free_page((unsigned long)dev_state->states);
407}
408
Joerg Roedel8736b2c2011-11-24 16:21:52 +0100409static struct pasid_state *mn_to_state(struct mmu_notifier *mn)
410{
411 return container_of(mn, struct pasid_state, mn);
412}
413
414static void __mn_flush_page(struct mmu_notifier *mn,
415 unsigned long address)
416{
417 struct pasid_state *pasid_state;
418 struct device_state *dev_state;
419
420 pasid_state = mn_to_state(mn);
421 dev_state = pasid_state->device_state;
422
423 amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address);
424}
425
426static int mn_clear_flush_young(struct mmu_notifier *mn,
427 struct mm_struct *mm,
428 unsigned long address)
429{
430 __mn_flush_page(mn, address);
431
432 return 0;
433}
434
435static void mn_change_pte(struct mmu_notifier *mn,
436 struct mm_struct *mm,
437 unsigned long address,
438 pte_t pte)
439{
440 __mn_flush_page(mn, address);
441}
442
443static void mn_invalidate_page(struct mmu_notifier *mn,
444 struct mm_struct *mm,
445 unsigned long address)
446{
447 __mn_flush_page(mn, address);
448}
449
450static void mn_invalidate_range_start(struct mmu_notifier *mn,
451 struct mm_struct *mm,
452 unsigned long start, unsigned long end)
453{
454 struct pasid_state *pasid_state;
455 struct device_state *dev_state;
456
457 pasid_state = mn_to_state(mn);
458 dev_state = pasid_state->device_state;
459
460 amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
461 __pa(empty_page_table));
462}
463
464static void mn_invalidate_range_end(struct mmu_notifier *mn,
465 struct mm_struct *mm,
466 unsigned long start, unsigned long end)
467{
468 struct pasid_state *pasid_state;
469 struct device_state *dev_state;
470
471 pasid_state = mn_to_state(mn);
472 dev_state = pasid_state->device_state;
473
474 amd_iommu_domain_set_gcr3(dev_state->domain, pasid_state->pasid,
475 __pa(pasid_state->mm->pgd));
476}
477
478static struct mmu_notifier_ops iommu_mn = {
479 .clear_flush_young = mn_clear_flush_young,
480 .change_pte = mn_change_pte,
481 .invalidate_page = mn_invalidate_page,
482 .invalidate_range_start = mn_invalidate_range_start,
483 .invalidate_range_end = mn_invalidate_range_end,
484};
485
Joerg Roedel028eeac2011-11-24 12:48:13 +0100486static void set_pri_tag_status(struct pasid_state *pasid_state,
487 u16 tag, int status)
488{
489 unsigned long flags;
490
491 spin_lock_irqsave(&pasid_state->lock, flags);
492 pasid_state->pri[tag].status = status;
493 spin_unlock_irqrestore(&pasid_state->lock, flags);
494}
495
496static void finish_pri_tag(struct device_state *dev_state,
497 struct pasid_state *pasid_state,
498 u16 tag)
499{
500 unsigned long flags;
501
502 spin_lock_irqsave(&pasid_state->lock, flags);
503 if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) &&
504 pasid_state->pri[tag].finish) {
505 amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid,
506 pasid_state->pri[tag].status, tag);
507 pasid_state->pri[tag].finish = false;
508 pasid_state->pri[tag].status = PPR_SUCCESS;
509 }
510 spin_unlock_irqrestore(&pasid_state->lock, flags);
511}
512
513static void do_fault(struct work_struct *work)
514{
515 struct fault *fault = container_of(work, struct fault, work);
516 int npages, write;
517 struct page *page;
518
519 write = !!(fault->flags & PPR_FAULT_WRITE);
520
Jay Cornwall4378d992014-04-28 17:27:46 -0500521 down_read(&fault->state->mm->mmap_sem);
Joerg Roedel028eeac2011-11-24 12:48:13 +0100522 npages = get_user_pages(fault->state->task, fault->state->mm,
523 fault->address, 1, write, 0, &page, NULL);
Jay Cornwall4378d992014-04-28 17:27:46 -0500524 up_read(&fault->state->mm->mmap_sem);
Joerg Roedel028eeac2011-11-24 12:48:13 +0100525
Joerg Roedel175d6142011-11-28 14:36:36 +0100526 if (npages == 1) {
Joerg Roedel028eeac2011-11-24 12:48:13 +0100527 put_page(page);
Joerg Roedel175d6142011-11-28 14:36:36 +0100528 } else if (fault->dev_state->inv_ppr_cb) {
529 int status;
530
531 status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev,
532 fault->pasid,
533 fault->address,
534 fault->flags);
535 switch (status) {
536 case AMD_IOMMU_INV_PRI_RSP_SUCCESS:
537 set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS);
538 break;
539 case AMD_IOMMU_INV_PRI_RSP_INVALID:
540 set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
541 break;
542 case AMD_IOMMU_INV_PRI_RSP_FAIL:
543 set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE);
544 break;
545 default:
546 BUG();
547 }
548 } else {
Joerg Roedel028eeac2011-11-24 12:48:13 +0100549 set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
Joerg Roedel175d6142011-11-28 14:36:36 +0100550 }
Joerg Roedel028eeac2011-11-24 12:48:13 +0100551
552 finish_pri_tag(fault->dev_state, fault->state, fault->tag);
553
554 put_pasid_state(fault->state);
555
556 kfree(fault);
557}
558
559static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
560{
561 struct amd_iommu_fault *iommu_fault;
562 struct pasid_state *pasid_state;
563 struct device_state *dev_state;
564 unsigned long flags;
565 struct fault *fault;
566 bool finish;
567 u16 tag;
568 int ret;
569
570 iommu_fault = data;
571 tag = iommu_fault->tag & 0x1ff;
572 finish = (iommu_fault->tag >> 9) & 1;
573
574 ret = NOTIFY_DONE;
575 dev_state = get_device_state(iommu_fault->device_id);
576 if (dev_state == NULL)
577 goto out;
578
579 pasid_state = get_pasid_state(dev_state, iommu_fault->pasid);
580 if (pasid_state == NULL) {
581 /* We know the device but not the PASID -> send INVALID */
582 amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid,
583 PPR_INVALID, tag);
584 goto out_drop_state;
585 }
586
587 spin_lock_irqsave(&pasid_state->lock, flags);
588 atomic_inc(&pasid_state->pri[tag].inflight);
589 if (finish)
590 pasid_state->pri[tag].finish = true;
591 spin_unlock_irqrestore(&pasid_state->lock, flags);
592
593 fault = kzalloc(sizeof(*fault), GFP_ATOMIC);
594 if (fault == NULL) {
595 /* We are OOM - send success and let the device re-fault */
596 finish_pri_tag(dev_state, pasid_state, tag);
597 goto out_drop_state;
598 }
599
600 fault->dev_state = dev_state;
601 fault->address = iommu_fault->address;
602 fault->state = pasid_state;
603 fault->tag = tag;
604 fault->finish = finish;
605 fault->flags = iommu_fault->flags;
606 INIT_WORK(&fault->work, do_fault);
607
608 queue_work(iommu_wq, &fault->work);
609
610 ret = NOTIFY_OK;
611
612out_drop_state:
613 put_device_state(dev_state);
614
615out:
616 return ret;
617}
618
619static struct notifier_block ppr_nb = {
620 .notifier_call = ppr_notifier,
621};
622
Joerg Roedel8736b2c2011-11-24 16:21:52 +0100623static int task_exit(struct notifier_block *nb, unsigned long e, void *data)
624{
625 struct pasid_state *pasid_state;
626 struct task_struct *task;
627
628 task = data;
629
630 /*
631 * Using this notifier is a hack - but there is no other choice
632 * at the moment. What I really want is a sleeping notifier that
633 * is called when an MM goes down. But such a notifier doesn't
634 * exist yet. The notifier needs to sleep because it has to make
635 * sure that the device does not use the PASID and the address
636 * space anymore before it is destroyed. This includes waiting
637 * for pending PRI requests to pass the workqueue. The
638 * MMU-Notifiers would be a good fit, but they use RCU and so
639 * they are not allowed to sleep. Lets see how we can solve this
640 * in a more intelligent way in the future.
641 */
642again:
643 spin_lock(&ps_lock);
644 list_for_each_entry(pasid_state, &pasid_state_list, list) {
645 struct device_state *dev_state;
646 int pasid;
647
648 if (pasid_state->task != task)
649 continue;
650
651 /* Drop Lock and unbind */
652 spin_unlock(&ps_lock);
653
654 dev_state = pasid_state->device_state;
655 pasid = pasid_state->pasid;
656
Joerg Roedelbc216622011-12-07 12:24:42 +0100657 if (pasid_state->device_state->inv_ctx_cb)
658 dev_state->inv_ctx_cb(dev_state->pdev, pasid);
659
Joerg Roedel8736b2c2011-11-24 16:21:52 +0100660 unbind_pasid(dev_state, pasid);
661
662 /* Task may be in the list multiple times */
663 goto again;
664 }
665 spin_unlock(&ps_lock);
666
667 return NOTIFY_OK;
668}
669
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100670int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
671 struct task_struct *task)
672{
673 struct pasid_state *pasid_state;
674 struct device_state *dev_state;
675 u16 devid;
676 int ret;
677
678 might_sleep();
679
680 if (!amd_iommu_v2_supported())
681 return -ENODEV;
682
683 devid = device_id(pdev);
684 dev_state = get_device_state(devid);
685
686 if (dev_state == NULL)
687 return -EINVAL;
688
689 ret = -EINVAL;
690 if (pasid < 0 || pasid >= dev_state->max_pasids)
691 goto out;
692
693 ret = -ENOMEM;
694 pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL);
695 if (pasid_state == NULL)
696 goto out;
697
698 atomic_set(&pasid_state->count, 1);
Joerg Roedel028eeac2011-11-24 12:48:13 +0100699 init_waitqueue_head(&pasid_state->wq);
Joerg Roedel2c13d472012-07-19 10:56:10 +0200700 spin_lock_init(&pasid_state->lock);
701
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100702 pasid_state->task = task;
703 pasid_state->mm = get_task_mm(task);
704 pasid_state->device_state = dev_state;
705 pasid_state->pasid = pasid;
Joerg Roedel8736b2c2011-11-24 16:21:52 +0100706 pasid_state->mn.ops = &iommu_mn;
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100707
708 if (pasid_state->mm == NULL)
709 goto out_free;
710
Joerg Roedel8736b2c2011-11-24 16:21:52 +0100711 mmu_notifier_register(&pasid_state->mn, pasid_state->mm);
712
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100713 ret = set_pasid_state(dev_state, pasid_state, pasid);
714 if (ret)
Joerg Roedel8736b2c2011-11-24 16:21:52 +0100715 goto out_unregister;
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100716
717 ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid,
718 __pa(pasid_state->mm->pgd));
719 if (ret)
720 goto out_clear_state;
721
722 link_pasid_state(pasid_state);
723
724 return 0;
725
726out_clear_state:
727 clear_pasid_state(dev_state, pasid);
728
Joerg Roedel8736b2c2011-11-24 16:21:52 +0100729out_unregister:
730 mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
731
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100732out_free:
Joerg Roedel028eeac2011-11-24 12:48:13 +0100733 free_pasid_state(pasid_state);
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100734
735out:
736 put_device_state(dev_state);
737
738 return ret;
739}
740EXPORT_SYMBOL(amd_iommu_bind_pasid);
741
742void amd_iommu_unbind_pasid(struct pci_dev *pdev, int pasid)
743{
744 struct device_state *dev_state;
745 u16 devid;
746
747 might_sleep();
748
749 if (!amd_iommu_v2_supported())
750 return;
751
752 devid = device_id(pdev);
753 dev_state = get_device_state(devid);
754 if (dev_state == NULL)
755 return;
756
757 if (pasid < 0 || pasid >= dev_state->max_pasids)
758 goto out;
759
760 unbind_pasid(dev_state, pasid);
761
762out:
763 put_device_state(dev_state);
764}
765EXPORT_SYMBOL(amd_iommu_unbind_pasid);
766
Joerg Roedeled96f222011-11-23 17:30:39 +0100767int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
768{
769 struct device_state *dev_state;
770 unsigned long flags;
771 int ret, tmp;
772 u16 devid;
773
774 might_sleep();
775
776 if (!amd_iommu_v2_supported())
777 return -ENODEV;
778
779 if (pasids <= 0 || pasids > (PASID_MASK + 1))
780 return -EINVAL;
781
782 devid = device_id(pdev);
783
784 dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL);
785 if (dev_state == NULL)
786 return -ENOMEM;
787
788 spin_lock_init(&dev_state->lock);
Joerg Roedel028eeac2011-11-24 12:48:13 +0100789 init_waitqueue_head(&dev_state->wq);
Joerg Roedel741669c2014-05-20 23:18:23 +0200790 dev_state->pdev = pdev;
791 dev_state->devid = devid;
Joerg Roedeled96f222011-11-23 17:30:39 +0100792
793 tmp = pasids;
794 for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9)
795 dev_state->pasid_levels += 1;
796
797 atomic_set(&dev_state->count, 1);
798 dev_state->max_pasids = pasids;
799
800 ret = -ENOMEM;
801 dev_state->states = (void *)get_zeroed_page(GFP_KERNEL);
802 if (dev_state->states == NULL)
803 goto out_free_dev_state;
804
805 dev_state->domain = iommu_domain_alloc(&pci_bus_type);
806 if (dev_state->domain == NULL)
807 goto out_free_states;
808
809 amd_iommu_domain_direct_map(dev_state->domain);
810
811 ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
812 if (ret)
813 goto out_free_domain;
814
815 ret = iommu_attach_device(dev_state->domain, &pdev->dev);
816 if (ret != 0)
817 goto out_free_domain;
818
819 spin_lock_irqsave(&state_lock, flags);
820
Joerg Roedel741669c2014-05-20 23:18:23 +0200821 if (__get_device_state(devid) != NULL) {
Joerg Roedeled96f222011-11-23 17:30:39 +0100822 spin_unlock_irqrestore(&state_lock, flags);
823 ret = -EBUSY;
824 goto out_free_domain;
825 }
826
Joerg Roedel741669c2014-05-20 23:18:23 +0200827 list_add_tail(&dev_state->list, &state_list);
Joerg Roedeled96f222011-11-23 17:30:39 +0100828
829 spin_unlock_irqrestore(&state_lock, flags);
830
831 return 0;
832
833out_free_domain:
834 iommu_domain_free(dev_state->domain);
835
836out_free_states:
837 free_page((unsigned long)dev_state->states);
838
839out_free_dev_state:
840 kfree(dev_state);
841
842 return ret;
843}
844EXPORT_SYMBOL(amd_iommu_init_device);
845
846void amd_iommu_free_device(struct pci_dev *pdev)
847{
848 struct device_state *dev_state;
849 unsigned long flags;
850 u16 devid;
851
852 if (!amd_iommu_v2_supported())
853 return;
854
855 devid = device_id(pdev);
856
857 spin_lock_irqsave(&state_lock, flags);
858
Joerg Roedelb87d2d72014-05-20 23:18:22 +0200859 dev_state = __get_device_state(devid);
Joerg Roedeled96f222011-11-23 17:30:39 +0100860 if (dev_state == NULL) {
861 spin_unlock_irqrestore(&state_lock, flags);
862 return;
863 }
864
Joerg Roedel741669c2014-05-20 23:18:23 +0200865 list_del(&dev_state->list);
Joerg Roedeled96f222011-11-23 17:30:39 +0100866
867 spin_unlock_irqrestore(&state_lock, flags);
868
Joerg Roedel2d5503b2011-11-24 10:41:57 +0100869 /* Get rid of any remaining pasid states */
870 free_pasid_states(dev_state);
871
Joerg Roedel028eeac2011-11-24 12:48:13 +0100872 put_device_state_wait(dev_state);
Joerg Roedeled96f222011-11-23 17:30:39 +0100873}
874EXPORT_SYMBOL(amd_iommu_free_device);
875
Joerg Roedel175d6142011-11-28 14:36:36 +0100876int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
877 amd_iommu_invalid_ppr_cb cb)
878{
879 struct device_state *dev_state;
880 unsigned long flags;
881 u16 devid;
882 int ret;
883
884 if (!amd_iommu_v2_supported())
885 return -ENODEV;
886
887 devid = device_id(pdev);
888
889 spin_lock_irqsave(&state_lock, flags);
890
891 ret = -EINVAL;
Joerg Roedelb87d2d72014-05-20 23:18:22 +0200892 dev_state = __get_device_state(devid);
Joerg Roedel175d6142011-11-28 14:36:36 +0100893 if (dev_state == NULL)
894 goto out_unlock;
895
896 dev_state->inv_ppr_cb = cb;
897
898 ret = 0;
899
900out_unlock:
901 spin_unlock_irqrestore(&state_lock, flags);
902
903 return ret;
904}
905EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb);
906
Joerg Roedelbc216622011-12-07 12:24:42 +0100907int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
908 amd_iommu_invalidate_ctx cb)
909{
910 struct device_state *dev_state;
911 unsigned long flags;
912 u16 devid;
913 int ret;
914
915 if (!amd_iommu_v2_supported())
916 return -ENODEV;
917
918 devid = device_id(pdev);
919
920 spin_lock_irqsave(&state_lock, flags);
921
922 ret = -EINVAL;
Joerg Roedelb87d2d72014-05-20 23:18:22 +0200923 dev_state = __get_device_state(devid);
Joerg Roedelbc216622011-12-07 12:24:42 +0100924 if (dev_state == NULL)
925 goto out_unlock;
926
927 dev_state->inv_ctx_cb = cb;
928
929 ret = 0;
930
931out_unlock:
932 spin_unlock_irqrestore(&state_lock, flags);
933
934 return ret;
935}
936EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb);
937
Joerg Roedele3c495c2011-11-09 12:31:15 +0100938static int __init amd_iommu_v2_init(void)
939{
Joerg Roedel028eeac2011-11-24 12:48:13 +0100940 int ret;
Joerg Roedeled96f222011-11-23 17:30:39 +0100941
Joerg Roedel474d567d2012-03-15 12:46:40 +0100942 pr_info("AMD IOMMUv2 driver by Joerg Roedel <joerg.roedel@amd.com>\n");
943
944 if (!amd_iommu_v2_supported()) {
Masanari Iida07db0402012-07-22 02:21:32 +0900945 pr_info("AMD IOMMUv2 functionality not available on this system\n");
Joerg Roedel474d567d2012-03-15 12:46:40 +0100946 /*
947 * Load anyway to provide the symbols to other modules
948 * which may use AMD IOMMUv2 optionally.
949 */
950 return 0;
951 }
Joerg Roedele3c495c2011-11-09 12:31:15 +0100952
Joerg Roedeled96f222011-11-23 17:30:39 +0100953 spin_lock_init(&state_lock);
954
Joerg Roedel028eeac2011-11-24 12:48:13 +0100955 ret = -ENOMEM;
956 iommu_wq = create_workqueue("amd_iommu_v2");
Joerg Roedel8736b2c2011-11-24 16:21:52 +0100957 if (iommu_wq == NULL)
Joerg Roedel741669c2014-05-20 23:18:23 +0200958 goto out;
Joerg Roedel8736b2c2011-11-24 16:21:52 +0100959
960 ret = -ENOMEM;
961 empty_page_table = (u64 *)get_zeroed_page(GFP_KERNEL);
962 if (empty_page_table == NULL)
963 goto out_destroy_wq;
Joerg Roedel028eeac2011-11-24 12:48:13 +0100964
965 amd_iommu_register_ppr_notifier(&ppr_nb);
Joerg Roedel8736b2c2011-11-24 16:21:52 +0100966 profile_event_register(PROFILE_TASK_EXIT, &profile_nb);
Joerg Roedel028eeac2011-11-24 12:48:13 +0100967
Joerg Roedele3c495c2011-11-09 12:31:15 +0100968 return 0;
Joerg Roedel028eeac2011-11-24 12:48:13 +0100969
Joerg Roedel8736b2c2011-11-24 16:21:52 +0100970out_destroy_wq:
971 destroy_workqueue(iommu_wq);
972
Joerg Roedel741669c2014-05-20 23:18:23 +0200973out:
Joerg Roedel028eeac2011-11-24 12:48:13 +0100974 return ret;
Joerg Roedele3c495c2011-11-09 12:31:15 +0100975}
976
977static void __exit amd_iommu_v2_exit(void)
978{
Joerg Roedeled96f222011-11-23 17:30:39 +0100979 struct device_state *dev_state;
Joerg Roedeled96f222011-11-23 17:30:39 +0100980 int i;
981
Joerg Roedel474d567d2012-03-15 12:46:40 +0100982 if (!amd_iommu_v2_supported())
983 return;
984
Joerg Roedel8736b2c2011-11-24 16:21:52 +0100985 profile_event_unregister(PROFILE_TASK_EXIT, &profile_nb);
Joerg Roedel028eeac2011-11-24 12:48:13 +0100986 amd_iommu_unregister_ppr_notifier(&ppr_nb);
987
988 flush_workqueue(iommu_wq);
989
990 /*
991 * The loop below might call flush_workqueue(), so call
992 * destroy_workqueue() after it
993 */
Joerg Roedeled96f222011-11-23 17:30:39 +0100994 for (i = 0; i < MAX_DEVICES; ++i) {
995 dev_state = get_device_state(i);
996
997 if (dev_state == NULL)
998 continue;
999
1000 WARN_ON_ONCE(1);
1001
Joerg Roedeled96f222011-11-23 17:30:39 +01001002 put_device_state(dev_state);
Joerg Roedel028eeac2011-11-24 12:48:13 +01001003 amd_iommu_free_device(dev_state->pdev);
Joerg Roedeled96f222011-11-23 17:30:39 +01001004 }
1005
Joerg Roedel028eeac2011-11-24 12:48:13 +01001006 destroy_workqueue(iommu_wq);
1007
Joerg Roedel8736b2c2011-11-24 16:21:52 +01001008 free_page((unsigned long)empty_page_table);
Joerg Roedele3c495c2011-11-09 12:31:15 +01001009}
1010
1011module_init(amd_iommu_v2_init);
1012module_exit(amd_iommu_v2_exit);