aio: percpu ioctx refcount

This just converts the ioctx refcount to the new generic dynamic percpu
refcount code.

Signed-off-by: Kent Overstreet <koverstreet@google.com>
Cc: Zach Brown <zab@redhat.com>
Cc: Felipe Balbi <balbi@ti.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Asai Thambi S P <asamymuthupa@micron.com>
Cc: Selvan Mani <smani@micron.com>
Cc: Sam Bradshaw <sbradshaw@micron.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Reviewed-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
diff --git a/fs/aio.c b/fs/aio.c
index bb1a6c4..7b470bf 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -39,6 +39,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/migrate.h>
 #include <linux/ramfs.h>
+#include <linux/percpu-refcount.h>
 
 #include <asm/kmap_types.h>
 #include <asm/uaccess.h>
@@ -70,7 +71,7 @@
 };
 
 struct kioctx {
-	atomic_t		users;
+	struct percpu_ref	users;
 	atomic_t		dead;
 
 	/* This needs improving */
@@ -103,7 +104,7 @@
 	long			nr_pages;
 
 	struct rcu_head		rcu_head;
-	struct work_struct	rcu_work;
+	struct work_struct	free_work;
 
 	struct {
 		/*
@@ -403,8 +404,9 @@
  * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
  * now it's safe to cancel any that need to be.
  */
-static void free_ioctx(struct kioctx *ctx)
+static void free_ioctx(struct work_struct *work)
 {
+	struct kioctx *ctx = container_of(work, struct kioctx, free_work);
 	struct aio_ring *ring;
 	struct io_event res;
 	struct kiocb *req;
@@ -462,10 +464,12 @@
 	call_rcu(&ctx->rcu_head, free_ioctx_rcu);
 }
 
-static void put_ioctx(struct kioctx *ctx)
+static void free_ioctx_ref(struct percpu_ref *ref)
 {
-	if (unlikely(atomic_dec_and_test(&ctx->users)))
-		free_ioctx(ctx);
+	struct kioctx *ctx = container_of(ref, struct kioctx, users);
+
+	INIT_WORK(&ctx->free_work, free_ioctx);
+	schedule_work(&ctx->free_work);
 }
 
 /* ioctx_alloc
@@ -505,8 +509,9 @@
 
 	ctx->max_reqs = nr_events;
 
-	atomic_set(&ctx->users, 2);
-	atomic_set(&ctx->dead, 0);
+	if (percpu_ref_init(&ctx->users, free_ioctx_ref))
+		goto out_freectx;
+
 	spin_lock_init(&ctx->ctx_lock);
 	spin_lock_init(&ctx->completion_lock);
 	mutex_init(&ctx->ring_lock);
@@ -516,7 +521,7 @@
 
 	ctx->cpu = alloc_percpu(struct kioctx_cpu);
 	if (!ctx->cpu)
-		goto out_freectx;
+		goto out_freeref;
 
 	if (aio_setup_ring(ctx) < 0)
 		goto out_freepcpu;
@@ -535,6 +540,8 @@
 	aio_nr += ctx->max_reqs;
 	spin_unlock(&aio_nr_lock);
 
+	percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */
+
 	/* now link into global list. */
 	spin_lock(&mm->ioctx_lock);
 	hlist_add_head_rcu(&ctx->list, &mm->ioctx_list);
@@ -549,6 +556,8 @@
 	aio_free_ring(ctx);
 out_freepcpu:
 	free_percpu(ctx->cpu);
+out_freeref:
+	free_percpu(ctx->users.pcpu_count);
 out_freectx:
 	if (ctx->aio_ring_file)
 		fput(ctx->aio_ring_file);
@@ -557,22 +566,6 @@
 	return ERR_PTR(err);
 }
 
-static void kill_ioctx_work(struct work_struct *work)
-{
-	struct kioctx *ctx = container_of(work, struct kioctx, rcu_work);
-
-	wake_up_all(&ctx->wait);
-	put_ioctx(ctx);
-}
-
-static void kill_ioctx_rcu(struct rcu_head *head)
-{
-	struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
-
-	INIT_WORK(&ctx->rcu_work, kill_ioctx_work);
-	schedule_work(&ctx->rcu_work);
-}
-
 /* kill_ioctx
  *	Cancels all outstanding aio requests on an aio context.  Used
  *	when the processes owning a context have all exited to encourage
@@ -582,6 +575,8 @@
 {
 	if (!atomic_xchg(&ctx->dead, 1)) {
 		hlist_del_rcu(&ctx->list);
+		/* percpu_ref_kill() will do the necessary call_rcu() */
+		wake_up_all(&ctx->wait);
 
 		/*
 		 * It'd be more correct to do this in free_ioctx(), after all
@@ -598,8 +593,7 @@
 		if (ctx->mmap_size)
 			vm_munmap(ctx->mmap_base, ctx->mmap_size);
 
-		/* Between hlist_del_rcu() and dropping the initial ref */
-		call_rcu(&ctx->rcu_head, kill_ioctx_rcu);
+		percpu_ref_kill(&ctx->users);
 	}
 }
 
@@ -633,12 +627,6 @@
 	struct hlist_node *n;
 
 	hlist_for_each_entry_safe(ctx, n, &mm->ioctx_list, list) {
-		if (1 != atomic_read(&ctx->users))
-			printk(KERN_DEBUG
-				"exit_aio:ioctx still alive: %d %d %d\n",
-				atomic_read(&ctx->users),
-				atomic_read(&ctx->dead),
-				atomic_read(&ctx->reqs_available));
 		/*
 		 * We don't need to bother with munmap() here -
 		 * exit_mmap(mm) is coming and it'll unmap everything.
@@ -757,7 +745,7 @@
 
 	hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) {
 		if (ctx->user_id == ctx_id) {
-			atomic_inc(&ctx->users);
+			percpu_ref_get(&ctx->users);
 			ret = ctx;
 			break;
 		}
@@ -1054,7 +1042,7 @@
 		ret = put_user(ioctx->user_id, ctxp);
 		if (ret)
 			kill_ioctx(ioctx);
-		put_ioctx(ioctx);
+		percpu_ref_put(&ioctx->users);
 	}
 
 out:
@@ -1072,7 +1060,7 @@
 	struct kioctx *ioctx = lookup_ioctx(ctx);
 	if (likely(NULL != ioctx)) {
 		kill_ioctx(ioctx);
-		put_ioctx(ioctx);
+		percpu_ref_put(&ioctx->users);
 		return 0;
 	}
 	pr_debug("EINVAL: io_destroy: invalid context id\n");
@@ -1394,7 +1382,7 @@
 	}
 	blk_finish_plug(&plug);
 
-	put_ioctx(ctx);
+	percpu_ref_put(&ctx->users);
 	return i ? i : ret;
 }
 
@@ -1483,7 +1471,7 @@
 			ret = -EFAULT;
 	}
 
-	put_ioctx(ctx);
+	percpu_ref_put(&ctx->users);
 
 	return ret;
 }
@@ -1512,7 +1500,7 @@
 	if (likely(ioctx)) {
 		if (likely(min_nr <= nr && min_nr >= 0))
 			ret = read_events(ioctx, min_nr, nr, events, timeout);
-		put_ioctx(ioctx);
+		percpu_ref_put(&ioctx->users);
 	}
 	return ret;
 }