Blame - block/blk-mq.c - kernel/hikey-linaro

blob: 3c4f1fceef8e0171cab79dd615dbe841bd2bbb02 [file] [log] [blame]

Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1	#include <linux/kernel.h>
				2	#include <linux/module.h>
				3	#include <linux/backing-dev.h>
				4	#include <linux/bio.h>
				5	#include <linux/blkdev.h>
				6	#include <linux/mm.h>
				7	#include <linux/init.h>
				8	#include <linux/slab.h>
				9	#include <linux/workqueue.h>
				10	#include <linux/smp.h>
				11	#include <linux/llist.h>
				12	#include <linux/list_sort.h>
				13	#include <linux/cpu.h>
				14	#include <linux/cache.h>
				15	#include <linux/sched/sysctl.h>
				16	#include <linux/delay.h>
				17
				18	#include <trace/events/block.h>
				19
				20	#include <linux/blk-mq.h>
				21	#include "blk.h"
				22	#include "blk-mq.h"
				23	#include "blk-mq-tag.h"
				24
				25	static DEFINE_MUTEX(all_q_mutex);
				26	static LIST_HEAD(all_q_list);
				27
				28	static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx);
				29
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	30	static struct blk_mq_ctx __blk_mq_get_ctx(struct request_queue q,
				31	unsigned int cpu)
				32	{
				33	return per_cpu_ptr(q->queue_ctx, cpu);
				34	}
				35
				36	/*
				37	* This assumes per-cpu software queueing queues. They could be per-node
				38	* as well, for instance. For now this is hardcoded as-is. Note that we don't
				39	* care about preemption, since we know the ctx's are persistent. This does
				40	* mean that we can't rely on ctx always matching the currently running CPU.
				41	*/
				42	static struct blk_mq_ctx blk_mq_get_ctx(struct request_queue q)
				43	{
				44	return __blk_mq_get_ctx(q, get_cpu());
				45	}
				46
				47	static void blk_mq_put_ctx(struct blk_mq_ctx *ctx)
				48	{
				49	put_cpu();
				50	}
				51
				52	/*
				53	* Check if any of the ctx's have pending work in this hardware queue
				54	*/
				55	static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
				56	{
				57	unsigned int i;
				58
				59	for (i = 0; i < hctx->nr_ctx_map; i++)
				60	if (hctx->ctx_map[i])
				61	return true;
				62
				63	return false;
				64	}
				65
				66	/*
				67	* Mark this ctx as having pending work in this hardware queue
				68	*/
				69	static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx,
				70	struct blk_mq_ctx *ctx)
				71	{
				72	if (!test_bit(ctx->index_hw, hctx->ctx_map))
				73	set_bit(ctx->index_hw, hctx->ctx_map);
				74	}
				75
Christoph Hellwig	081241e	2014-02-20 15:32:36 -0800	[diff] [blame]	76	static struct request __blk_mq_alloc_request(struct blk_mq_hw_ctx hctx,
Jens Axboe	4bb659b	2014-05-09 09:36:49 -0600	[diff] [blame]	77	struct blk_mq_ctx *ctx,
Christoph Hellwig	081241e	2014-02-20 15:32:36 -0800	[diff] [blame]	78	gfp_t gfp, bool reserved)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	79	{
				80	struct request *rq;
				81	unsigned int tag;
				82
Jens Axboe	0d2602c	2014-05-13 15:10:52 -0600	[diff] [blame^]	83	tag = blk_mq_get_tag(hctx, &ctx->last_tag, gfp, reserved);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	84	if (tag != BLK_MQ_TAG_FAIL) {
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	85	rq = hctx->tags->rqs[tag];
Jens Axboe	0d2602c	2014-05-13 15:10:52 -0600	[diff] [blame^]	86
				87	rq->cmd_flags = 0;
				88	if (blk_mq_tag_busy(hctx)) {
				89	rq->cmd_flags = REQ_MQ_INFLIGHT;
				90	atomic_inc(&hctx->nr_active);
				91	}
				92
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	93	rq->tag = tag;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	94	return rq;
				95	}
				96
				97	return NULL;
				98	}
				99
				100	static int blk_mq_queue_enter(struct request_queue *q)
				101	{
				102	int ret;
				103
				104	__percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
				105	smp_wmb();
				106	/* we have problems to freeze the queue if it's initializing */
				107	if (!blk_queue_bypass(q) \|\| !blk_queue_init_done(q))
				108	return 0;
				109
				110	__percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
				111
				112	spin_lock_irq(q->queue_lock);
				113	ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq,
Ming Lei	43a5e4e	2013-12-26 21:31:35 +0800	[diff] [blame]	114	!blk_queue_bypass(q) \|\| blk_queue_dying(q),
				115	*q->queue_lock);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	116	/* inc usage with lock hold to avoid freeze_queue runs here */
Ming Lei	43a5e4e	2013-12-26 21:31:35 +0800	[diff] [blame]	117	if (!ret && !blk_queue_dying(q))
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	118	__percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
Ming Lei	43a5e4e	2013-12-26 21:31:35 +0800	[diff] [blame]	119	else if (blk_queue_dying(q))
				120	ret = -ENODEV;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	121	spin_unlock_irq(q->queue_lock);
				122
				123	return ret;
				124	}
				125
				126	static void blk_mq_queue_exit(struct request_queue *q)
				127	{
				128	__percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
				129	}
				130
Ming Lei	43a5e4e	2013-12-26 21:31:35 +0800	[diff] [blame]	131	static void __blk_mq_drain_queue(struct request_queue *q)
				132	{
				133	while (true) {
				134	s64 count;
				135
				136	spin_lock_irq(q->queue_lock);
				137	count = percpu_counter_sum(&q->mq_usage_counter);
				138	spin_unlock_irq(q->queue_lock);
				139
				140	if (count == 0)
				141	break;
				142	blk_mq_run_queues(q, false);
				143	msleep(10);
				144	}
				145	}
				146
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	147	/*
				148	* Guarantee no request is in use, so we can change any data structure of
				149	* the queue afterward.
				150	*/
				151	static void blk_mq_freeze_queue(struct request_queue *q)
				152	{
				153	bool drain;
				154
				155	spin_lock_irq(q->queue_lock);
				156	drain = !q->bypass_depth++;
				157	queue_flag_set(QUEUE_FLAG_BYPASS, q);
				158	spin_unlock_irq(q->queue_lock);
				159
Ming Lei	43a5e4e	2013-12-26 21:31:35 +0800	[diff] [blame]	160	if (drain)
				161	__blk_mq_drain_queue(q);
				162	}
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	163
Ming Lei	43a5e4e	2013-12-26 21:31:35 +0800	[diff] [blame]	164	void blk_mq_drain_queue(struct request_queue *q)
				165	{
				166	__blk_mq_drain_queue(q);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	167	}
				168
				169	static void blk_mq_unfreeze_queue(struct request_queue *q)
				170	{
				171	bool wake = false;
				172
				173	spin_lock_irq(q->queue_lock);
				174	if (!--q->bypass_depth) {
				175	queue_flag_clear(QUEUE_FLAG_BYPASS, q);
				176	wake = true;
				177	}
				178	WARN_ON_ONCE(q->bypass_depth < 0);
				179	spin_unlock_irq(q->queue_lock);
				180	if (wake)
				181	wake_up_all(&q->mq_freeze_wq);
				182	}
				183
				184	bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
				185	{
				186	return blk_mq_has_free_tags(hctx->tags);
				187	}
				188	EXPORT_SYMBOL(blk_mq_can_queue);
				189
Jens Axboe	94eddfb	2013-11-19 09:25:07 -0700	[diff] [blame]	190	static void blk_mq_rq_ctx_init(struct request_queue q, struct blk_mq_ctx ctx,
				191	struct request *rq, unsigned int rw_flags)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	192	{
Jens Axboe	94eddfb	2013-11-19 09:25:07 -0700	[diff] [blame]	193	if (blk_queue_io_stat(q))
				194	rw_flags \|= REQ_IO_STAT;
				195
Christoph Hellwig	af76e55	2014-05-06 12:12:45 +0200	[diff] [blame]	196	INIT_LIST_HEAD(&rq->queuelist);
				197	/* csd/requeue_work/fifo_time is initialized before use */
				198	rq->q = q;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	199	rq->mq_ctx = ctx;
Jens Axboe	0d2602c	2014-05-13 15:10:52 -0600	[diff] [blame^]	200	rq->cmd_flags \|= rw_flags;
Christoph Hellwig	af76e55	2014-05-06 12:12:45 +0200	[diff] [blame]	201	rq->cmd_type = 0;
				202	/* do not touch atomic flags, it needs atomic ops against the timer */
				203	rq->cpu = -1;
				204	rq->__data_len = 0;
				205	rq->__sector = (sector_t) -1;
				206	rq->bio = NULL;
				207	rq->biotail = NULL;
				208	INIT_HLIST_NODE(&rq->hash);
				209	RB_CLEAR_NODE(&rq->rb_node);
				210	memset(&rq->flush, 0, max(sizeof(rq->flush), sizeof(rq->elv)));
				211	rq->rq_disk = NULL;
				212	rq->part = NULL;
Ming Lei	0fec08b	2014-01-03 10:00:08 -0700	[diff] [blame]	213	rq->start_time = jiffies;
Christoph Hellwig	af76e55	2014-05-06 12:12:45 +0200	[diff] [blame]	214	#ifdef CONFIG_BLK_CGROUP
				215	rq->rl = NULL;
Ming Lei	0fec08b	2014-01-03 10:00:08 -0700	[diff] [blame]	216	set_start_time_ns(rq);
Christoph Hellwig	af76e55	2014-05-06 12:12:45 +0200	[diff] [blame]	217	rq->io_start_time_ns = 0;
				218	#endif
				219	rq->nr_phys_segments = 0;
				220	#if defined(CONFIG_BLK_DEV_INTEGRITY)
				221	rq->nr_integrity_segments = 0;
				222	#endif
				223	rq->ioprio = 0;
				224	rq->special = NULL;
				225	/* tag was already set */
				226	rq->errors = 0;
				227	memset(rq->__cmd, 0, sizeof(rq->__cmd));
				228	rq->cmd = rq->__cmd;
				229	rq->cmd_len = BLK_MAX_CDB;
				230
				231	rq->extra_len = 0;
				232	rq->sense_len = 0;
				233	rq->resid_len = 0;
				234	rq->sense = NULL;
				235
				236	rq->deadline = 0;
				237	INIT_LIST_HEAD(&rq->timeout_list);
				238	rq->timeout = 0;
				239	rq->retries = 0;
				240	rq->end_io = NULL;
				241	rq->end_io_data = NULL;
				242	rq->next_rq = NULL;
				243
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	244	ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
				245	}
				246
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	247	static struct request blk_mq_alloc_request_pinned(struct request_queue q,
				248	int rw, gfp_t gfp,
				249	bool reserved)
				250	{
				251	struct request *rq;
				252
				253	do {
				254	struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
				255	struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu);
				256
Jens Axboe	4bb659b	2014-05-09 09:36:49 -0600	[diff] [blame]	257	rq = __blk_mq_alloc_request(hctx, ctx, gfp & ~__GFP_WAIT,
				258	reserved);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	259	if (rq) {
Jens Axboe	94eddfb	2013-11-19 09:25:07 -0700	[diff] [blame]	260	blk_mq_rq_ctx_init(q, ctx, rq, rw);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	261	break;
Jeff Moyer	959a35f	2013-12-03 14:23:00 -0700	[diff] [blame]	262	}
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	263
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	264	if (gfp & __GFP_WAIT) {
				265	__blk_mq_run_hw_queue(hctx);
				266	blk_mq_put_ctx(ctx);
				267	} else {
				268	blk_mq_put_ctx(ctx);
Jeff Moyer	959a35f	2013-12-03 14:23:00 -0700	[diff] [blame]	269	break;
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	270	}
Jeff Moyer	959a35f	2013-12-03 14:23:00 -0700	[diff] [blame]	271
Jens Axboe	0d2602c	2014-05-13 15:10:52 -0600	[diff] [blame^]	272	blk_mq_wait_for_tags(hctx, reserved);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	273	} while (1);
				274
				275	return rq;
				276	}
				277
Christoph Hellwig	1874198	2014-02-10 09:29:00 -0700	[diff] [blame]	278	struct request blk_mq_alloc_request(struct request_queue q, int rw, gfp_t gfp)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	279	{
				280	struct request *rq;
				281
				282	if (blk_mq_queue_enter(q))
				283	return NULL;
				284
Christoph Hellwig	1874198	2014-02-10 09:29:00 -0700	[diff] [blame]	285	rq = blk_mq_alloc_request_pinned(q, rw, gfp, false);
Jeff Moyer	959a35f	2013-12-03 14:23:00 -0700	[diff] [blame]	286	if (rq)
				287	blk_mq_put_ctx(rq->mq_ctx);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	288	return rq;
				289	}
Jens Axboe	4bb659b	2014-05-09 09:36:49 -0600	[diff] [blame]	290	EXPORT_SYMBOL(blk_mq_alloc_request);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	291
				292	struct request blk_mq_alloc_reserved_request(struct request_queue q, int rw,
				293	gfp_t gfp)
				294	{
				295	struct request *rq;
				296
				297	if (blk_mq_queue_enter(q))
				298	return NULL;
				299
				300	rq = blk_mq_alloc_request_pinned(q, rw, gfp, true);
Jeff Moyer	959a35f	2013-12-03 14:23:00 -0700	[diff] [blame]	301	if (rq)
				302	blk_mq_put_ctx(rq->mq_ctx);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	303	return rq;
				304	}
				305	EXPORT_SYMBOL(blk_mq_alloc_reserved_request);
				306
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	307	static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
				308	struct blk_mq_ctx ctx, struct request rq)
				309	{
				310	const int tag = rq->tag;
				311	struct request_queue *q = rq->q;
				312
Jens Axboe	0d2602c	2014-05-13 15:10:52 -0600	[diff] [blame^]	313	if (rq->cmd_flags & REQ_MQ_INFLIGHT)
				314	atomic_dec(&hctx->nr_active);
				315
Christoph Hellwig	af76e55	2014-05-06 12:12:45 +0200	[diff] [blame]	316	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
Jens Axboe	0d2602c	2014-05-13 15:10:52 -0600	[diff] [blame^]	317	blk_mq_put_tag(hctx, tag, &ctx->last_tag);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	318	blk_mq_queue_exit(q);
				319	}
				320
				321	void blk_mq_free_request(struct request *rq)
				322	{
				323	struct blk_mq_ctx *ctx = rq->mq_ctx;
				324	struct blk_mq_hw_ctx *hctx;
				325	struct request_queue *q = rq->q;
				326
				327	ctx->rq_completed[rq_is_sync(rq)]++;
				328
				329	hctx = q->mq_ops->map_queue(q, ctx->cpu);
				330	__blk_mq_free_request(hctx, ctx, rq);
				331	}
				332
Christoph Hellwig	8727af4	2014-04-14 10:30:08 +0200	[diff] [blame]	333	/*
				334	* Clone all relevant state from a request that has been put on hold in
				335	* the flush state machine into the preallocated flush request that hangs
				336	* off the request queue.
				337	*
				338	* For a driver the flush request should be invisible, that's why we are
				339	* impersonating the original request here.
				340	*/
				341	void blk_mq_clone_flush_request(struct request *flush_rq,
				342	struct request *orig_rq)
				343	{
				344	struct blk_mq_hw_ctx *hctx =
				345	orig_rq->q->mq_ops->map_queue(orig_rq->q, orig_rq->mq_ctx->cpu);
				346
				347	flush_rq->mq_ctx = orig_rq->mq_ctx;
				348	flush_rq->tag = orig_rq->tag;
				349	memcpy(blk_mq_rq_to_pdu(flush_rq), blk_mq_rq_to_pdu(orig_rq),
				350	hctx->cmd_size);
				351	}
				352
Christoph Hellwig	63151a4	2014-04-16 09:44:52 +0200	[diff] [blame]	353	inline void __blk_mq_end_io(struct request *rq, int error)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	354	{
Ming Lei	0d11e6a	2013-12-05 10:50:39 -0700	[diff] [blame]	355	blk_account_io_done(rq);
				356
Christoph Hellwig	91b6363	2014-04-16 09:44:53 +0200	[diff] [blame]	357	if (rq->end_io) {
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	358	rq->end_io(rq, error);
Christoph Hellwig	91b6363	2014-04-16 09:44:53 +0200	[diff] [blame]	359	} else {
				360	if (unlikely(blk_bidi_rq(rq)))
				361	blk_mq_free_request(rq->next_rq);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	362	blk_mq_free_request(rq);
Christoph Hellwig	91b6363	2014-04-16 09:44:53 +0200	[diff] [blame]	363	}
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	364	}
Christoph Hellwig	63151a4	2014-04-16 09:44:52 +0200	[diff] [blame]	365	EXPORT_SYMBOL(__blk_mq_end_io);
				366
				367	void blk_mq_end_io(struct request *rq, int error)
				368	{
				369	if (blk_update_request(rq, error, blk_rq_bytes(rq)))
				370	BUG();
				371	__blk_mq_end_io(rq, error);
				372	}
				373	EXPORT_SYMBOL(blk_mq_end_io);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	374
Christoph Hellwig	30a91cb	2014-02-10 03:24:38 -0800	[diff] [blame]	375	static void __blk_mq_complete_request_remote(void *data)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	376	{
Christoph Hellwig	3d6efbf	2014-01-08 09:33:37 -0800	[diff] [blame]	377	struct request *rq = data;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	378
Christoph Hellwig	30a91cb	2014-02-10 03:24:38 -0800	[diff] [blame]	379	rq->q->softirq_done_fn(rq);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	380	}
				381
Christoph Hellwig	30a91cb	2014-02-10 03:24:38 -0800	[diff] [blame]	382	void __blk_mq_complete_request(struct request *rq)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	383	{
				384	struct blk_mq_ctx *ctx = rq->mq_ctx;
Christoph Hellwig	3853520	2014-04-25 02:32:53 -0700	[diff] [blame]	385	bool shared = false;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	386	int cpu;
				387
Christoph Hellwig	3853520	2014-04-25 02:32:53 -0700	[diff] [blame]	388	if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
Christoph Hellwig	30a91cb	2014-02-10 03:24:38 -0800	[diff] [blame]	389	rq->q->softirq_done_fn(rq);
				390	return;
				391	}
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	392
				393	cpu = get_cpu();
Christoph Hellwig	3853520	2014-04-25 02:32:53 -0700	[diff] [blame]	394	if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
				395	shared = cpus_share_cache(cpu, ctx->cpu);
				396
				397	if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
Christoph Hellwig	30a91cb	2014-02-10 03:24:38 -0800	[diff] [blame]	398	rq->csd.func = __blk_mq_complete_request_remote;
Christoph Hellwig	3d6efbf	2014-01-08 09:33:37 -0800	[diff] [blame]	399	rq->csd.info = rq;
				400	rq->csd.flags = 0;
Frederic Weisbecker	c46fff2	2014-02-24 16:40:02 +0100	[diff] [blame]	401	smp_call_function_single_async(ctx->cpu, &rq->csd);
Christoph Hellwig	3d6efbf	2014-01-08 09:33:37 -0800	[diff] [blame]	402	} else {
Christoph Hellwig	30a91cb	2014-02-10 03:24:38 -0800	[diff] [blame]	403	rq->q->softirq_done_fn(rq);
Christoph Hellwig	3d6efbf	2014-01-08 09:33:37 -0800	[diff] [blame]	404	}
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	405	put_cpu();
				406	}
Christoph Hellwig	30a91cb	2014-02-10 03:24:38 -0800	[diff] [blame]	407
				408	/**
				409	* blk_mq_complete_request - end I/O on a request
				410	* @rq: the request being processed
				411	*
				412	* Description:
				413	* Ends all I/O on a request. It does not handle partial completions.
				414	* The actual completion happens out-of-order, through a IPI handler.
				415	**/
				416	void blk_mq_complete_request(struct request *rq)
				417	{
				418	if (unlikely(blk_should_fake_timeout(rq->q)))
				419	return;
				420	if (!blk_mark_rq_complete(rq))
				421	__blk_mq_complete_request(rq);
				422	}
				423	EXPORT_SYMBOL(blk_mq_complete_request);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	424
Christoph Hellwig	49f5baa	2014-02-11 08:27:14 -0800	[diff] [blame]	425	static void blk_mq_start_request(struct request *rq, bool last)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	426	{
				427	struct request_queue *q = rq->q;
				428
				429	trace_block_rq_issue(q, rq);
				430
Christoph Hellwig	742ee69	2014-04-14 10:30:06 +0200	[diff] [blame]	431	rq->resid_len = blk_rq_bytes(rq);
Christoph Hellwig	91b6363	2014-04-16 09:44:53 +0200	[diff] [blame]	432	if (unlikely(blk_bidi_rq(rq)))
				433	rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);
Christoph Hellwig	742ee69	2014-04-14 10:30:06 +0200	[diff] [blame]	434
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	435	/*
				436	* Just mark start time and set the started bit. Due to memory
				437	* ordering, we know we'll see the correct deadline as long as
				438	* REQ_ATOMIC_STARTED is seen.
				439	*/
				440	rq->deadline = jiffies + q->rq_timeout;
Jens Axboe	87ee7b1	2014-04-24 08:51:47 -0600	[diff] [blame]	441
				442	/*
				443	* Mark us as started and clear complete. Complete might have been
				444	* set if requeue raced with timeout, which then marked it as
				445	* complete. So be sure to clear complete again when we start
				446	* the request, otherwise we'll ignore the completion event.
				447	*/
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	448	set_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
Jens Axboe	87ee7b1	2014-04-24 08:51:47 -0600	[diff] [blame]	449	clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
Christoph Hellwig	49f5baa	2014-02-11 08:27:14 -0800	[diff] [blame]	450
				451	if (q->dma_drain_size && blk_rq_bytes(rq)) {
				452	/*
				453	* Make sure space for the drain appears. We know we can do
				454	* this because max_hw_segments has been adjusted to be one
				455	* fewer than the device can handle.
				456	*/
				457	rq->nr_phys_segments++;
				458	}
				459
				460	/*
				461	* Flag the last request in the series so that drivers know when IO
				462	* should be kicked off, if they don't do it on a per-request basis.
				463	*
				464	* Note: the flag isn't the only condition drivers should do kick off.
				465	* If drive is busy, the last request might not have the bit set.
				466	*/
				467	if (last)
				468	rq->cmd_flags \|= REQ_END;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	469	}
				470
Christoph Hellwig	ed0791b	2014-04-16 09:44:57 +0200	[diff] [blame]	471	static void __blk_mq_requeue_request(struct request *rq)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	472	{
				473	struct request_queue *q = rq->q;
				474
				475	trace_block_rq_requeue(q, rq);
				476	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
Christoph Hellwig	49f5baa	2014-02-11 08:27:14 -0800	[diff] [blame]	477
				478	rq->cmd_flags &= ~REQ_END;
				479
				480	if (q->dma_drain_size && blk_rq_bytes(rq))
				481	rq->nr_phys_segments--;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	482	}
				483
Christoph Hellwig	ed0791b	2014-04-16 09:44:57 +0200	[diff] [blame]	484	void blk_mq_requeue_request(struct request *rq)
				485	{
Christoph Hellwig	ed0791b	2014-04-16 09:44:57 +0200	[diff] [blame]	486	__blk_mq_requeue_request(rq);
				487	blk_clear_rq_complete(rq);
				488
Christoph Hellwig	ed0791b	2014-04-16 09:44:57 +0200	[diff] [blame]	489	BUG_ON(blk_queued_rq(rq));
				490	blk_mq_insert_request(rq, true, true, false);
				491	}
				492	EXPORT_SYMBOL(blk_mq_requeue_request);
				493
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	494	struct request blk_mq_tag_to_rq(struct blk_mq_tags tags, unsigned int tag)
				495	{
				496	return tags->rqs[tag];
				497	}
				498	EXPORT_SYMBOL(blk_mq_tag_to_rq);
				499
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	500	struct blk_mq_timeout_data {
				501	struct blk_mq_hw_ctx *hctx;
				502	unsigned long *next;
				503	unsigned int *next_set;
				504	};
				505
				506	static void blk_mq_timeout_check(void __data, unsigned long free_tags)
				507	{
				508	struct blk_mq_timeout_data *data = __data;
				509	struct blk_mq_hw_ctx *hctx = data->hctx;
				510	unsigned int tag;
				511
				512	/* It may not be in flight yet (this is where
				513	* the REQ_ATOMIC_STARTED flag comes in). The requests are
				514	* statically allocated, so we know it's always safe to access the
				515	* memory associated with a bit offset into ->rqs[].
				516	*/
				517	tag = 0;
				518	do {
				519	struct request *rq;
				520
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	521	tag = find_next_zero_bit(free_tags, hctx->tags->nr_tags, tag);
				522	if (tag >= hctx->tags->nr_tags)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	523	break;
				524
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	525	rq = blk_mq_tag_to_rq(hctx->tags, tag++);
				526	if (rq->q != hctx->queue)
				527	continue;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	528	if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
				529	continue;
				530
				531	blk_rq_check_expired(rq, data->next, data->next_set);
				532	} while (1);
				533	}
				534
				535	static void blk_mq_hw_ctx_check_timeout(struct blk_mq_hw_ctx *hctx,
				536	unsigned long *next,
				537	unsigned int *next_set)
				538	{
				539	struct blk_mq_timeout_data data = {
				540	.hctx = hctx,
				541	.next = next,
				542	.next_set = next_set,
				543	};
				544
				545	/*
				546	* Ask the tagging code to iterate busy requests, so we can
				547	* check them for timeout.
				548	*/
				549	blk_mq_tag_busy_iter(hctx->tags, blk_mq_timeout_check, &data);
				550	}
				551
Jens Axboe	87ee7b1	2014-04-24 08:51:47 -0600	[diff] [blame]	552	static enum blk_eh_timer_return blk_mq_rq_timed_out(struct request *rq)
				553	{
				554	struct request_queue *q = rq->q;
				555
				556	/*
				557	* We know that complete is set at this point. If STARTED isn't set
				558	* anymore, then the request isn't active and the "timeout" should
				559	* just be ignored. This can happen due to the bitflag ordering.
				560	* Timeout first checks if STARTED is set, and if it is, assumes
				561	* the request is active. But if we race with completion, then
				562	* we both flags will get cleared. So check here again, and ignore
				563	* a timeout event with a request that isn't active.
				564	*/
				565	if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
				566	return BLK_EH_NOT_HANDLED;
				567
				568	if (!q->mq_ops->timeout)
				569	return BLK_EH_RESET_TIMER;
				570
				571	return q->mq_ops->timeout(rq);
				572	}
				573
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	574	static void blk_mq_rq_timer(unsigned long data)
				575	{
				576	struct request_queue q = (struct request_queue ) data;
				577	struct blk_mq_hw_ctx *hctx;
				578	unsigned long next = 0;
				579	int i, next_set = 0;
				580
				581	queue_for_each_hw_ctx(q, hctx, i)
				582	blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set);
				583
Jens Axboe	0d2602c	2014-05-13 15:10:52 -0600	[diff] [blame^]	584	if (next_set) {
				585	next = blk_rq_timeout(round_jiffies_up(next));
				586	mod_timer(&q->timeout, next);
				587	} else {
				588	queue_for_each_hw_ctx(q, hctx, i)
				589	blk_mq_tag_idle(hctx);
				590	}
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	591	}
				592
				593	/*
				594	* Reverse check our software queue for entries that we could potentially
				595	* merge with. Currently includes a hand-wavy stop count of 8, to not spend
				596	* too much time checking for merges.
				597	*/
				598	static bool blk_mq_attempt_merge(struct request_queue *q,
				599	struct blk_mq_ctx ctx, struct bio bio)
				600	{
				601	struct request *rq;
				602	int checked = 8;
				603
				604	list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) {
				605	int el_ret;
				606
				607	if (!checked--)
				608	break;
				609
				610	if (!blk_rq_merge_ok(rq, bio))
				611	continue;
				612
				613	el_ret = blk_try_merge(rq, bio);
				614	if (el_ret == ELEVATOR_BACK_MERGE) {
				615	if (bio_attempt_back_merge(q, rq, bio)) {
				616	ctx->rq_merged++;
				617	return true;
				618	}
				619	break;
				620	} else if (el_ret == ELEVATOR_FRONT_MERGE) {
				621	if (bio_attempt_front_merge(q, rq, bio)) {
				622	ctx->rq_merged++;
				623	return true;
				624	}
				625	break;
				626	}
				627	}
				628
				629	return false;
				630	}
				631
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	632	/*
				633	* Run this hardware queue, pulling any software queues mapped to it in.
				634	* Note that this function currently has various problems around ordering
				635	* of IO. In particular, we'd like FIFO behaviour on handling existing
				636	* items on the hctx->dispatch list. Ignore that for now.
				637	*/
				638	static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
				639	{
				640	struct request_queue *q = hctx->queue;
				641	struct blk_mq_ctx *ctx;
				642	struct request *rq;
				643	LIST_HEAD(rq_list);
				644	int bit, queued;
				645
Jens Axboe	fd1270d	2014-04-16 09:23:48 -0600	[diff] [blame]	646	WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask));
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	647
Jens Axboe	5d12f90	2014-03-19 15:25:02 -0600	[diff] [blame]	648	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	649	return;
				650
				651	hctx->run++;
				652
				653	/*
				654	* Touch any software queue that has pending entries.
				655	*/
				656	for_each_set_bit(bit, hctx->ctx_map, hctx->nr_ctx) {
				657	clear_bit(bit, hctx->ctx_map);
				658	ctx = hctx->ctxs[bit];
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	659
				660	spin_lock(&ctx->lock);
				661	list_splice_tail_init(&ctx->rq_list, &rq_list);
				662	spin_unlock(&ctx->lock);
				663	}
				664
				665	/*
				666	* If we have previous entries on our dispatch list, grab them
				667	* and stuff them at the front for more fair dispatch.
				668	*/
				669	if (!list_empty_careful(&hctx->dispatch)) {
				670	spin_lock(&hctx->lock);
				671	if (!list_empty(&hctx->dispatch))
				672	list_splice_init(&hctx->dispatch, &rq_list);
				673	spin_unlock(&hctx->lock);
				674	}
				675
				676	/*
				677	* Delete and return all entries from our dispatch list
				678	*/
				679	queued = 0;
				680
				681	/*
				682	* Now process all the entries, sending them to the driver.
				683	*/
				684	while (!list_empty(&rq_list)) {
				685	int ret;
				686
				687	rq = list_first_entry(&rq_list, struct request, queuelist);
				688	list_del_init(&rq->queuelist);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	689
Christoph Hellwig	49f5baa	2014-02-11 08:27:14 -0800	[diff] [blame]	690	blk_mq_start_request(rq, list_empty(&rq_list));
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	691
				692	ret = q->mq_ops->queue_rq(hctx, rq);
				693	switch (ret) {
				694	case BLK_MQ_RQ_QUEUE_OK:
				695	queued++;
				696	continue;
				697	case BLK_MQ_RQ_QUEUE_BUSY:
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	698	list_add(&rq->queuelist, &rq_list);
Christoph Hellwig	ed0791b	2014-04-16 09:44:57 +0200	[diff] [blame]	699	__blk_mq_requeue_request(rq);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	700	break;
				701	default:
				702	pr_err("blk-mq: bad return on queue: %d\n", ret);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	703	case BLK_MQ_RQ_QUEUE_ERROR:
Christoph Hellwig	1e93b8c	2014-02-11 08:27:13 -0800	[diff] [blame]	704	rq->errors = -EIO;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	705	blk_mq_end_io(rq, rq->errors);
				706	break;
				707	}
				708
				709	if (ret == BLK_MQ_RQ_QUEUE_BUSY)
				710	break;
				711	}
				712
				713	if (!queued)
				714	hctx->dispatched[0]++;
				715	else if (queued < (1 << (BLK_MQ_MAX_DISPATCH_ORDER - 1)))
				716	hctx->dispatched[ilog2(queued) + 1]++;
				717
				718	/*
				719	* Any items that need requeuing? Stuff them into hctx->dispatch,
				720	* that is where we will continue on next queue run.
				721	*/
				722	if (!list_empty(&rq_list)) {
				723	spin_lock(&hctx->lock);
				724	list_splice(&rq_list, &hctx->dispatch);
				725	spin_unlock(&hctx->lock);
				726	}
				727	}
				728
Jens Axboe	506e931	2014-05-07 10:26:44 -0600	[diff] [blame]	729	/*
				730	* It'd be great if the workqueue API had a way to pass
				731	* in a mask and had some smarts for more clever placement.
				732	* For now we just round-robin here, switching for every
				733	* BLK_MQ_CPU_WORK_BATCH queued items.
				734	*/
				735	static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
				736	{
				737	int cpu = hctx->next_cpu;
				738
				739	if (--hctx->next_cpu_batch <= 0) {
				740	int next_cpu;
				741
				742	next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask);
				743	if (next_cpu >= nr_cpu_ids)
				744	next_cpu = cpumask_first(hctx->cpumask);
				745
				746	hctx->next_cpu = next_cpu;
				747	hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
				748	}
				749
				750	return cpu;
				751	}
				752
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	753	void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
				754	{
Jens Axboe	5d12f90	2014-03-19 15:25:02 -0600	[diff] [blame]	755	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	756	return;
				757
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	758	if (!async && cpumask_test_cpu(smp_processor_id(), hctx->cpumask))
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	759	__blk_mq_run_hw_queue(hctx);
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	760	else if (hctx->queue->nr_hw_queues == 1)
Christoph Hellwig	70f4db6	2014-04-16 10:48:08 -0600	[diff] [blame]	761	kblockd_schedule_delayed_work(&hctx->run_work, 0);
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	762	else {
				763	unsigned int cpu;
				764
Jens Axboe	506e931	2014-05-07 10:26:44 -0600	[diff] [blame]	765	cpu = blk_mq_hctx_next_cpu(hctx);
Christoph Hellwig	70f4db6	2014-04-16 10:48:08 -0600	[diff] [blame]	766	kblockd_schedule_delayed_work_on(cpu, &hctx->run_work, 0);
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	767	}
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	768	}
				769
				770	void blk_mq_run_queues(struct request_queue *q, bool async)
				771	{
				772	struct blk_mq_hw_ctx *hctx;
				773	int i;
				774
				775	queue_for_each_hw_ctx(q, hctx, i) {
				776	if ((!blk_mq_hctx_has_pending(hctx) &&
				777	list_empty_careful(&hctx->dispatch)) \|\|
Jens Axboe	5d12f90	2014-03-19 15:25:02 -0600	[diff] [blame]	778	test_bit(BLK_MQ_S_STOPPED, &hctx->state))
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	779	continue;
				780
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	781	preempt_disable();
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	782	blk_mq_run_hw_queue(hctx, async);
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	783	preempt_enable();
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	784	}
				785	}
				786	EXPORT_SYMBOL(blk_mq_run_queues);
				787
				788	void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
				789	{
Christoph Hellwig	70f4db6	2014-04-16 10:48:08 -0600	[diff] [blame]	790	cancel_delayed_work(&hctx->run_work);
				791	cancel_delayed_work(&hctx->delay_work);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	792	set_bit(BLK_MQ_S_STOPPED, &hctx->state);
				793	}
				794	EXPORT_SYMBOL(blk_mq_stop_hw_queue);
				795
Christoph Hellwig	280d45f	2013-10-25 14:45:58 +0100	[diff] [blame]	796	void blk_mq_stop_hw_queues(struct request_queue *q)
				797	{
				798	struct blk_mq_hw_ctx *hctx;
				799	int i;
				800
				801	queue_for_each_hw_ctx(q, hctx, i)
				802	blk_mq_stop_hw_queue(hctx);
				803	}
				804	EXPORT_SYMBOL(blk_mq_stop_hw_queues);
				805
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	806	void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
				807	{
				808	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	809
				810	preempt_disable();
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	811	__blk_mq_run_hw_queue(hctx);
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	812	preempt_enable();
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	813	}
				814	EXPORT_SYMBOL(blk_mq_start_hw_queue);
				815
Christoph Hellwig	2f26855	2014-04-16 09:44:56 +0200	[diff] [blame]	816	void blk_mq_start_hw_queues(struct request_queue *q)
				817	{
				818	struct blk_mq_hw_ctx *hctx;
				819	int i;
				820
				821	queue_for_each_hw_ctx(q, hctx, i)
				822	blk_mq_start_hw_queue(hctx);
				823	}
				824	EXPORT_SYMBOL(blk_mq_start_hw_queues);
				825
				826
Christoph Hellwig	1b4a325	2014-04-16 09:44:54 +0200	[diff] [blame]	827	void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	828	{
				829	struct blk_mq_hw_ctx *hctx;
				830	int i;
				831
				832	queue_for_each_hw_ctx(q, hctx, i) {
				833	if (!test_bit(BLK_MQ_S_STOPPED, &hctx->state))
				834	continue;
				835
				836	clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	837	preempt_disable();
Christoph Hellwig	1b4a325	2014-04-16 09:44:54 +0200	[diff] [blame]	838	blk_mq_run_hw_queue(hctx, async);
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	839	preempt_enable();
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	840	}
				841	}
				842	EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
				843
Christoph Hellwig	70f4db6	2014-04-16 10:48:08 -0600	[diff] [blame]	844	static void blk_mq_run_work_fn(struct work_struct *work)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	845	{
				846	struct blk_mq_hw_ctx *hctx;
				847
Christoph Hellwig	70f4db6	2014-04-16 10:48:08 -0600	[diff] [blame]	848	hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	849
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	850	__blk_mq_run_hw_queue(hctx);
				851	}
				852
Christoph Hellwig	70f4db6	2014-04-16 10:48:08 -0600	[diff] [blame]	853	static void blk_mq_delay_work_fn(struct work_struct *work)
				854	{
				855	struct blk_mq_hw_ctx *hctx;
				856
				857	hctx = container_of(work, struct blk_mq_hw_ctx, delay_work.work);
				858
				859	if (test_and_clear_bit(BLK_MQ_S_STOPPED, &hctx->state))
				860	__blk_mq_run_hw_queue(hctx);
				861	}
				862
				863	void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
				864	{
				865	unsigned long tmo = msecs_to_jiffies(msecs);
				866
				867	if (hctx->queue->nr_hw_queues == 1)
				868	kblockd_schedule_delayed_work(&hctx->delay_work, tmo);
				869	else {
				870	unsigned int cpu;
				871
Jens Axboe	506e931	2014-05-07 10:26:44 -0600	[diff] [blame]	872	cpu = blk_mq_hctx_next_cpu(hctx);
Christoph Hellwig	70f4db6	2014-04-16 10:48:08 -0600	[diff] [blame]	873	kblockd_schedule_delayed_work_on(cpu, &hctx->delay_work, tmo);
				874	}
				875	}
				876	EXPORT_SYMBOL(blk_mq_delay_queue);
				877
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	878	static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
Christoph Hellwig	72a0a36	2014-02-07 10:22:36 -0800	[diff] [blame]	879	struct request *rq, bool at_head)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	880	{
				881	struct blk_mq_ctx *ctx = rq->mq_ctx;
				882
Jens Axboe	01b983c	2013-11-19 18:59:10 -0700	[diff] [blame]	883	trace_block_rq_insert(hctx->queue, rq);
				884
Christoph Hellwig	72a0a36	2014-02-07 10:22:36 -0800	[diff] [blame]	885	if (at_head)
				886	list_add(&rq->queuelist, &ctx->rq_list);
				887	else
				888	list_add_tail(&rq->queuelist, &ctx->rq_list);
Jens Axboe	4bb659b	2014-05-09 09:36:49 -0600	[diff] [blame]	889
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	890	blk_mq_hctx_mark_pending(hctx, ctx);
				891
				892	/*
				893	* We do this early, to ensure we are on the right CPU.
				894	*/
Jens Axboe	87ee7b1	2014-04-24 08:51:47 -0600	[diff] [blame]	895	blk_add_timer(rq);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	896	}
				897
Christoph Hellwig	eeabc85	2014-03-21 08:57:37 -0600	[diff] [blame]	898	void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
				899	bool async)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	900	{
				901	struct request_queue *q = rq->q;
				902	struct blk_mq_hw_ctx *hctx;
Christoph Hellwig	eeabc85	2014-03-21 08:57:37 -0600	[diff] [blame]	903	struct blk_mq_ctx ctx = rq->mq_ctx, current_ctx;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	904
				905	current_ctx = blk_mq_get_ctx(q);
Christoph Hellwig	eeabc85	2014-03-21 08:57:37 -0600	[diff] [blame]	906	if (!cpu_online(ctx->cpu))
				907	rq->mq_ctx = ctx = current_ctx;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	908
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	909	hctx = q->mq_ops->map_queue(q, ctx->cpu);
				910
Christoph Hellwig	eeabc85	2014-03-21 08:57:37 -0600	[diff] [blame]	911	if (rq->cmd_flags & (REQ_FLUSH \| REQ_FUA) &&
				912	!(rq->cmd_flags & (REQ_FLUSH_SEQ))) {
				913	blk_insert_flush(rq);
				914	} else {
				915	spin_lock(&ctx->lock);
				916	__blk_mq_insert_request(hctx, rq, at_head);
				917	spin_unlock(&ctx->lock);
				918	}
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	919
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	920	if (run_queue)
				921	blk_mq_run_hw_queue(hctx, async);
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	922
				923	blk_mq_put_ctx(current_ctx);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	924	}
				925
				926	static void blk_mq_insert_requests(struct request_queue *q,
				927	struct blk_mq_ctx *ctx,
				928	struct list_head *list,
				929	int depth,
				930	bool from_schedule)
				931
				932	{
				933	struct blk_mq_hw_ctx *hctx;
				934	struct blk_mq_ctx *current_ctx;
				935
				936	trace_block_unplug(q, depth, !from_schedule);
				937
				938	current_ctx = blk_mq_get_ctx(q);
				939
				940	if (!cpu_online(ctx->cpu))
				941	ctx = current_ctx;
				942	hctx = q->mq_ops->map_queue(q, ctx->cpu);
				943
				944	/*
				945	* preemption doesn't flush plug list, so it's possible ctx->cpu is
				946	* offline now
				947	*/
				948	spin_lock(&ctx->lock);
				949	while (!list_empty(list)) {
				950	struct request *rq;
				951
				952	rq = list_first_entry(list, struct request, queuelist);
				953	list_del_init(&rq->queuelist);
				954	rq->mq_ctx = ctx;
Christoph Hellwig	72a0a36	2014-02-07 10:22:36 -0800	[diff] [blame]	955	__blk_mq_insert_request(hctx, rq, false);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	956	}
				957	spin_unlock(&ctx->lock);
				958
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	959	blk_mq_run_hw_queue(hctx, from_schedule);
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	960	blk_mq_put_ctx(current_ctx);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	961	}
				962
				963	static int plug_ctx_cmp(void priv, struct list_head a, struct list_head *b)
				964	{
				965	struct request *rqa = container_of(a, struct request, queuelist);
				966	struct request *rqb = container_of(b, struct request, queuelist);
				967
				968	return !(rqa->mq_ctx < rqb->mq_ctx \|\|
				969	(rqa->mq_ctx == rqb->mq_ctx &&
				970	blk_rq_pos(rqa) < blk_rq_pos(rqb)));
				971	}
				972
				973	void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
				974	{
				975	struct blk_mq_ctx *this_ctx;
				976	struct request_queue *this_q;
				977	struct request *rq;
				978	LIST_HEAD(list);
				979	LIST_HEAD(ctx_list);
				980	unsigned int depth;
				981
				982	list_splice_init(&plug->mq_list, &list);
				983
				984	list_sort(NULL, &list, plug_ctx_cmp);
				985
				986	this_q = NULL;
				987	this_ctx = NULL;
				988	depth = 0;
				989
				990	while (!list_empty(&list)) {
				991	rq = list_entry_rq(list.next);
				992	list_del_init(&rq->queuelist);
				993	BUG_ON(!rq->q);
				994	if (rq->mq_ctx != this_ctx) {
				995	if (this_ctx) {
				996	blk_mq_insert_requests(this_q, this_ctx,
				997	&ctx_list, depth,
				998	from_schedule);
				999	}
				1000
				1001	this_ctx = rq->mq_ctx;
				1002	this_q = rq->q;
				1003	depth = 0;
				1004	}
				1005
				1006	depth++;
				1007	list_add_tail(&rq->queuelist, &ctx_list);
				1008	}
				1009
				1010	/*
				1011	* If 'this_ctx' is set, we know we have entries to complete
				1012	* on 'ctx_list'. Do those.
				1013	*/
				1014	if (this_ctx) {
				1015	blk_mq_insert_requests(this_q, this_ctx, &ctx_list, depth,
				1016	from_schedule);
				1017	}
				1018	}
				1019
				1020	static void blk_mq_bio_to_request(struct request rq, struct bio bio)
				1021	{
				1022	init_request_from_bio(rq, bio);
				1023	blk_account_io_start(rq, 1);
				1024	}
				1025
				1026	static void blk_mq_make_request(struct request_queue q, struct bio bio)
				1027	{
				1028	struct blk_mq_hw_ctx *hctx;
				1029	struct blk_mq_ctx *ctx;
				1030	const int is_sync = rw_is_sync(bio->bi_rw);
				1031	const int is_flush_fua = bio->bi_rw & (REQ_FLUSH \| REQ_FUA);
				1032	int rw = bio_data_dir(bio);
				1033	struct request *rq;
				1034	unsigned int use_plug, request_count = 0;
				1035
				1036	/*
				1037	* If we have multiple hardware queues, just go directly to
				1038	* one of those for sync IO.
				1039	*/
				1040	use_plug = !is_flush_fua && ((q->nr_hw_queues == 1) \|\| !is_sync);
				1041
				1042	blk_queue_bounce(q, &bio);
				1043
Nicholas Bellinger	14ec77f	2014-02-07 13:45:39 -0700	[diff] [blame]	1044	if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
				1045	bio_endio(bio, -EIO);
				1046	return;
				1047	}
				1048
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1049	if (use_plug && blk_attempt_plug_merge(q, bio, &request_count))
				1050	return;
				1051
				1052	if (blk_mq_queue_enter(q)) {
				1053	bio_endio(bio, -EIO);
				1054	return;
				1055	}
				1056
				1057	ctx = blk_mq_get_ctx(q);
				1058	hctx = q->mq_ops->map_queue(q, ctx->cpu);
				1059
Shaohua Li	27fbf4e8	2014-02-19 20:20:21 +0800	[diff] [blame]	1060	if (is_sync)
				1061	rw \|= REQ_SYNC;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1062	trace_block_getrq(q, bio, rw);
Jens Axboe	4bb659b	2014-05-09 09:36:49 -0600	[diff] [blame]	1063	rq = __blk_mq_alloc_request(hctx, ctx, GFP_ATOMIC, false);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1064	if (likely(rq))
Christoph Hellwig	1874198	2014-02-10 09:29:00 -0700	[diff] [blame]	1065	blk_mq_rq_ctx_init(q, ctx, rq, rw);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1066	else {
				1067	blk_mq_put_ctx(ctx);
				1068	trace_block_sleeprq(q, bio, rw);
Christoph Hellwig	1874198	2014-02-10 09:29:00 -0700	[diff] [blame]	1069	rq = blk_mq_alloc_request_pinned(q, rw, __GFP_WAIT\|GFP_ATOMIC,
				1070	false);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1071	ctx = rq->mq_ctx;
				1072	hctx = q->mq_ops->map_queue(q, ctx->cpu);
				1073	}
				1074
				1075	hctx->queued++;
				1076
				1077	if (unlikely(is_flush_fua)) {
				1078	blk_mq_bio_to_request(rq, bio);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1079	blk_insert_flush(rq);
				1080	goto run_queue;
				1081	}
				1082
				1083	/*
				1084	* A task plug currently exists. Since this is completely lockless,
				1085	* utilize that to temporarily store requests until the task is
				1086	* either done or scheduled away.
				1087	*/
				1088	if (use_plug) {
				1089	struct blk_plug *plug = current->plug;
				1090
				1091	if (plug) {
				1092	blk_mq_bio_to_request(rq, bio);
Shaohua Li	92f399c	2013-10-29 12:01:03 -0600	[diff] [blame]	1093	if (list_empty(&plug->mq_list))
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1094	trace_block_plug(q);
				1095	else if (request_count >= BLK_MAX_REQUEST_COUNT) {
				1096	blk_flush_plug_list(plug, false);
				1097	trace_block_plug(q);
				1098	}
				1099	list_add_tail(&rq->queuelist, &plug->mq_list);
				1100	blk_mq_put_ctx(ctx);
				1101	return;
				1102	}
				1103	}
				1104
Jens Axboe	c6d600c	2014-04-30 13:43:56 -0600	[diff] [blame]	1105	if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE)) {
				1106	init_request_from_bio(rq, bio);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1107
Jens Axboe	c6d600c	2014-04-30 13:43:56 -0600	[diff] [blame]	1108	spin_lock(&ctx->lock);
				1109	insert_rq:
Christoph Hellwig	72a0a36	2014-02-07 10:22:36 -0800	[diff] [blame]	1110	__blk_mq_insert_request(hctx, rq, false);
Jens Axboe	c6d600c	2014-04-30 13:43:56 -0600	[diff] [blame]	1111	spin_unlock(&ctx->lock);
				1112	blk_account_io_start(rq, 1);
				1113	} else {
				1114	spin_lock(&ctx->lock);
				1115	if (!blk_mq_attempt_merge(q, ctx, bio)) {
				1116	init_request_from_bio(rq, bio);
				1117	goto insert_rq;
				1118	}
				1119
				1120	spin_unlock(&ctx->lock);
				1121	__blk_mq_free_request(hctx, ctx, rq);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1122	}
				1123
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1124
				1125	/*
				1126	* For a SYNC request, send it to the hardware immediately. For an
				1127	* ASYNC request, just ensure that we run it later on. The latter
				1128	* allows for merging opportunities and more efficient dispatching.
				1129	*/
				1130	run_queue:
				1131	blk_mq_run_hw_queue(hctx, !is_sync \|\| is_flush_fua);
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	1132	blk_mq_put_ctx(ctx);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1133	}
				1134
				1135	/*
				1136	* Default mapping to a software queue, since we use one per CPU.
				1137	*/
				1138	struct blk_mq_hw_ctx blk_mq_map_queue(struct request_queue q, const int cpu)
				1139	{
				1140	return q->queue_hw_ctx[q->mq_map[cpu]];
				1141	}
				1142	EXPORT_SYMBOL(blk_mq_map_queue);
				1143
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1144	struct blk_mq_hw_ctx blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set set,
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1145	unsigned int hctx_index)
				1146	{
Jens Axboe	4bb659b	2014-05-09 09:36:49 -0600	[diff] [blame]	1147	return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL,
				1148	set->numa_node);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1149	}
				1150	EXPORT_SYMBOL(blk_mq_alloc_single_hw_queue);
				1151
				1152	void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *hctx,
				1153	unsigned int hctx_index)
				1154	{
				1155	kfree(hctx);
				1156	}
				1157	EXPORT_SYMBOL(blk_mq_free_single_hw_queue);
				1158
				1159	static void blk_mq_hctx_notify(void *data, unsigned long action,
				1160	unsigned int cpu)
				1161	{
				1162	struct blk_mq_hw_ctx *hctx = data;
Jens Axboe	bccb5f7	2014-04-04 21:34:48 -0600	[diff] [blame]	1163	struct request_queue *q = hctx->queue;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1164	struct blk_mq_ctx *ctx;
				1165	LIST_HEAD(tmp);
				1166
				1167	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
				1168	return;
				1169
				1170	/*
				1171	* Move ctx entries to new CPU, if this one is going away.
				1172	*/
Jens Axboe	bccb5f7	2014-04-04 21:34:48 -0600	[diff] [blame]	1173	ctx = __blk_mq_get_ctx(q, cpu);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1174
				1175	spin_lock(&ctx->lock);
				1176	if (!list_empty(&ctx->rq_list)) {
				1177	list_splice_init(&ctx->rq_list, &tmp);
				1178	clear_bit(ctx->index_hw, hctx->ctx_map);
				1179	}
				1180	spin_unlock(&ctx->lock);
				1181
				1182	if (list_empty(&tmp))
				1183	return;
				1184
Jens Axboe	bccb5f7	2014-04-04 21:34:48 -0600	[diff] [blame]	1185	ctx = blk_mq_get_ctx(q);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1186	spin_lock(&ctx->lock);
				1187
				1188	while (!list_empty(&tmp)) {
				1189	struct request *rq;
				1190
				1191	rq = list_first_entry(&tmp, struct request, queuelist);
				1192	rq->mq_ctx = ctx;
				1193	list_move_tail(&rq->queuelist, &ctx->rq_list);
				1194	}
				1195
Jens Axboe	bccb5f7	2014-04-04 21:34:48 -0600	[diff] [blame]	1196	hctx = q->mq_ops->map_queue(q, ctx->cpu);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1197	blk_mq_hctx_mark_pending(hctx, ctx);
				1198
				1199	spin_unlock(&ctx->lock);
Jens Axboe	bccb5f7	2014-04-04 21:34:48 -0600	[diff] [blame]	1200
				1201	blk_mq_run_hw_queue(hctx, true);
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	1202	blk_mq_put_ctx(ctx);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1203	}
				1204
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1205	static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
				1206	struct blk_mq_tags *tags, unsigned int hctx_idx)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1207	{
				1208	struct page *page;
				1209
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1210	if (tags->rqs && set->ops->exit_request) {
Christoph Hellwig	e9b267d	2014-04-15 13:59:10 -0600	[diff] [blame]	1211	int i;
				1212
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1213	for (i = 0; i < tags->nr_tags; i++) {
				1214	if (!tags->rqs[i])
Christoph Hellwig	e9b267d	2014-04-15 13:59:10 -0600	[diff] [blame]	1215	continue;
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1216	set->ops->exit_request(set->driver_data, tags->rqs[i],
				1217	hctx_idx, i);
Christoph Hellwig	e9b267d	2014-04-15 13:59:10 -0600	[diff] [blame]	1218	}
				1219	}
				1220
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1221	while (!list_empty(&tags->page_list)) {
				1222	page = list_first_entry(&tags->page_list, struct page, lru);
Dave Hansen	6753471	2014-01-08 20:17:46 -0700	[diff] [blame]	1223	list_del_init(&page->lru);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1224	__free_pages(page, page->private);
				1225	}
				1226
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1227	kfree(tags->rqs);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1228
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1229	blk_mq_free_tags(tags);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1230	}
				1231
				1232	static size_t order_to_size(unsigned int order)
				1233	{
Ming Lei	4ca0850	2014-04-19 18:00:18 +0800	[diff] [blame]	1234	return (size_t)PAGE_SIZE << order;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1235	}
				1236
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1237	static struct blk_mq_tags blk_mq_init_rq_map(struct blk_mq_tag_set set,
				1238	unsigned int hctx_idx)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1239	{
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1240	struct blk_mq_tags *tags;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1241	unsigned int i, j, entries_per_page, max_order = 4;
				1242	size_t rq_size, left;
				1243
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1244	tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags,
				1245	set->numa_node);
				1246	if (!tags)
				1247	return NULL;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1248
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1249	INIT_LIST_HEAD(&tags->page_list);
				1250
				1251	tags->rqs = kmalloc_node(set->queue_depth * sizeof(struct request *),
				1252	GFP_KERNEL, set->numa_node);
				1253	if (!tags->rqs) {
				1254	blk_mq_free_tags(tags);
				1255	return NULL;
				1256	}
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1257
				1258	/*
				1259	* rq_size is the size of the request plus driver payload, rounded
				1260	* to the cacheline size
				1261	*/
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1262	rq_size = round_up(sizeof(struct request) + set->cmd_size,
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1263	cache_line_size());
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1264	left = rq_size * set->queue_depth;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1265
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1266	for (i = 0; i < set->queue_depth; ) {
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1267	int this_order = max_order;
				1268	struct page *page;
				1269	int to_do;
				1270	void *p;
				1271
				1272	while (left < order_to_size(this_order - 1) && this_order)
				1273	this_order--;
				1274
				1275	do {
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1276	page = alloc_pages_node(set->numa_node, GFP_KERNEL,
				1277	this_order);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1278	if (page)
				1279	break;
				1280	if (!this_order--)
				1281	break;
				1282	if (order_to_size(this_order) < rq_size)
				1283	break;
				1284	} while (1);
				1285
				1286	if (!page)
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1287	goto fail;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1288
				1289	page->private = this_order;
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1290	list_add_tail(&page->lru, &tags->page_list);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1291
				1292	p = page_address(page);
				1293	entries_per_page = order_to_size(this_order) / rq_size;
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1294	to_do = min(entries_per_page, set->queue_depth - i);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1295	left -= to_do * rq_size;
				1296	for (j = 0; j < to_do; j++) {
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1297	tags->rqs[i] = p;
				1298	if (set->ops->init_request) {
				1299	if (set->ops->init_request(set->driver_data,
				1300	tags->rqs[i], hctx_idx, i,
				1301	set->numa_node))
				1302	goto fail;
Christoph Hellwig	e9b267d	2014-04-15 13:59:10 -0600	[diff] [blame]	1303	}
				1304
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1305	p += rq_size;
				1306	i++;
				1307	}
				1308	}
				1309
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1310	return tags;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1311
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1312	fail:
				1313	pr_warn("%s: failed to allocate requests\n", __func__);
				1314	blk_mq_free_rq_map(set, tags, hctx_idx);
				1315	return NULL;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1316	}
				1317
				1318	static int blk_mq_init_hw_queues(struct request_queue *q,
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1319	struct blk_mq_tag_set *set)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1320	{
				1321	struct blk_mq_hw_ctx *hctx;
				1322	unsigned int i, j;
				1323
				1324	/*
				1325	* Initialize hardware queues
				1326	*/
				1327	queue_for_each_hw_ctx(q, hctx, i) {
				1328	unsigned int num_maps;
				1329	int node;
				1330
				1331	node = hctx->numa_node;
				1332	if (node == NUMA_NO_NODE)
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1333	node = hctx->numa_node = set->numa_node;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1334
Christoph Hellwig	70f4db6	2014-04-16 10:48:08 -0600	[diff] [blame]	1335	INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
				1336	INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1337	spin_lock_init(&hctx->lock);
				1338	INIT_LIST_HEAD(&hctx->dispatch);
				1339	hctx->queue = q;
				1340	hctx->queue_num = i;
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1341	hctx->flags = set->flags;
				1342	hctx->cmd_size = set->cmd_size;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1343
				1344	blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
				1345	blk_mq_hctx_notify, hctx);
				1346	blk_mq_register_cpu_notifier(&hctx->cpu_notifier);
				1347
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1348	hctx->tags = set->tags[i];
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1349
				1350	/*
				1351	* Allocate space for all possible cpus to avoid allocation in
				1352	* runtime
				1353	*/
				1354	hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *),
				1355	GFP_KERNEL, node);
				1356	if (!hctx->ctxs)
				1357	break;
				1358
				1359	num_maps = ALIGN(nr_cpu_ids, BITS_PER_LONG) / BITS_PER_LONG;
				1360	hctx->ctx_map = kzalloc_node(num_maps * sizeof(unsigned long),
				1361	GFP_KERNEL, node);
				1362	if (!hctx->ctx_map)
				1363	break;
				1364
				1365	hctx->nr_ctx_map = num_maps;
				1366	hctx->nr_ctx = 0;
				1367
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1368	if (set->ops->init_hctx &&
				1369	set->ops->init_hctx(hctx, set->driver_data, i))
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1370	break;
				1371	}
				1372
				1373	if (i == q->nr_hw_queues)
				1374	return 0;
				1375
				1376	/*
				1377	* Init failed
				1378	*/
				1379	queue_for_each_hw_ctx(q, hctx, j) {
				1380	if (i == j)
				1381	break;
				1382
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1383	if (set->ops->exit_hctx)
				1384	set->ops->exit_hctx(hctx, j);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1385
				1386	blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1387	kfree(hctx->ctxs);
Ming Lei	11471e0	2014-04-19 18:00:16 +0800	[diff] [blame]	1388	kfree(hctx->ctx_map);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1389	}
				1390
				1391	return 1;
				1392	}
				1393
				1394	static void blk_mq_init_cpu_queues(struct request_queue *q,
				1395	unsigned int nr_hw_queues)
				1396	{
				1397	unsigned int i;
				1398
				1399	for_each_possible_cpu(i) {
				1400	struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
				1401	struct blk_mq_hw_ctx *hctx;
				1402
				1403	memset(__ctx, 0, sizeof(*__ctx));
				1404	__ctx->cpu = i;
				1405	spin_lock_init(&__ctx->lock);
				1406	INIT_LIST_HEAD(&__ctx->rq_list);
				1407	__ctx->queue = q;
				1408
				1409	/* If the cpu isn't online, the cpu is mapped to first hctx */
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1410	if (!cpu_online(i))
				1411	continue;
				1412
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	1413	hctx = q->mq_ops->map_queue(q, i);
				1414	cpumask_set_cpu(i, hctx->cpumask);
				1415	hctx->nr_ctx++;
				1416
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1417	/*
				1418	* Set local node, IFF we have more than one hw queue. If
				1419	* not, we remain on the home node of the device
				1420	*/
				1421	if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE)
				1422	hctx->numa_node = cpu_to_node(i);
				1423	}
				1424	}
				1425
				1426	static void blk_mq_map_swqueue(struct request_queue *q)
				1427	{
				1428	unsigned int i;
				1429	struct blk_mq_hw_ctx *hctx;
				1430	struct blk_mq_ctx *ctx;
				1431
				1432	queue_for_each_hw_ctx(q, hctx, i) {
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	1433	cpumask_clear(hctx->cpumask);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1434	hctx->nr_ctx = 0;
				1435	}
				1436
				1437	/*
				1438	* Map software to hardware queues
				1439	*/
				1440	queue_for_each_ctx(q, ctx, i) {
				1441	/* If the cpu isn't online, the cpu is mapped to first hctx */
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	1442	if (!cpu_online(i))
				1443	continue;
				1444
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1445	hctx = q->mq_ops->map_queue(q, i);
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	1446	cpumask_set_cpu(i, hctx->cpumask);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1447	ctx->index_hw = hctx->nr_ctx;
				1448	hctx->ctxs[hctx->nr_ctx++] = ctx;
				1449	}
Jens Axboe	506e931	2014-05-07 10:26:44 -0600	[diff] [blame]	1450
				1451	queue_for_each_hw_ctx(q, hctx, i) {
				1452	hctx->next_cpu = cpumask_first(hctx->cpumask);
				1453	hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
				1454	}
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1455	}
				1456
Jens Axboe	0d2602c	2014-05-13 15:10:52 -0600	[diff] [blame^]	1457	static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set)
				1458	{
				1459	struct blk_mq_hw_ctx *hctx;
				1460	struct request_queue *q;
				1461	bool shared;
				1462	int i;
				1463
				1464	if (set->tag_list.next == set->tag_list.prev)
				1465	shared = false;
				1466	else
				1467	shared = true;
				1468
				1469	list_for_each_entry(q, &set->tag_list, tag_set_list) {
				1470	blk_mq_freeze_queue(q);
				1471
				1472	queue_for_each_hw_ctx(q, hctx, i) {
				1473	if (shared)
				1474	hctx->flags \|= BLK_MQ_F_TAG_SHARED;
				1475	else
				1476	hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
				1477	}
				1478	blk_mq_unfreeze_queue(q);
				1479	}
				1480	}
				1481
				1482	static void blk_mq_del_queue_tag_set(struct request_queue *q)
				1483	{
				1484	struct blk_mq_tag_set *set = q->tag_set;
				1485
				1486	blk_mq_freeze_queue(q);
				1487
				1488	mutex_lock(&set->tag_list_lock);
				1489	list_del_init(&q->tag_set_list);
				1490	blk_mq_update_tag_set_depth(set);
				1491	mutex_unlock(&set->tag_list_lock);
				1492
				1493	blk_mq_unfreeze_queue(q);
				1494	}
				1495
				1496	static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
				1497	struct request_queue *q)
				1498	{
				1499	q->tag_set = set;
				1500
				1501	mutex_lock(&set->tag_list_lock);
				1502	list_add_tail(&q->tag_set_list, &set->tag_list);
				1503	blk_mq_update_tag_set_depth(set);
				1504	mutex_unlock(&set->tag_list_lock);
				1505	}
				1506
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1507	struct request_queue blk_mq_init_queue(struct blk_mq_tag_set set)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1508	{
				1509	struct blk_mq_hw_ctx **hctxs;
				1510	struct blk_mq_ctx *ctx;
				1511	struct request_queue *q;
				1512	int i;
				1513
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1514	ctx = alloc_percpu(struct blk_mq_ctx);
				1515	if (!ctx)
				1516	return ERR_PTR(-ENOMEM);
				1517
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1518	hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
				1519	set->numa_node);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1520
				1521	if (!hctxs)
				1522	goto err_percpu;
				1523
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1524	for (i = 0; i < set->nr_hw_queues; i++) {
				1525	hctxs[i] = set->ops->alloc_hctx(set, i);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1526	if (!hctxs[i])
				1527	goto err_hctxs;
				1528
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	1529	if (!zalloc_cpumask_var(&hctxs[i]->cpumask, GFP_KERNEL))
				1530	goto err_hctxs;
				1531
Jens Axboe	0d2602c	2014-05-13 15:10:52 -0600	[diff] [blame^]	1532	atomic_set(&hctxs[i]->nr_active, 0);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1533	hctxs[i]->numa_node = NUMA_NO_NODE;
				1534	hctxs[i]->queue_num = i;
				1535	}
				1536
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1537	q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1538	if (!q)
				1539	goto err_hctxs;
				1540
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1541	q->mq_map = blk_mq_make_queue_map(set);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1542	if (!q->mq_map)
				1543	goto err_map;
				1544
				1545	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
				1546	blk_queue_rq_timeout(q, 30000);
				1547
				1548	q->nr_queues = nr_cpu_ids;
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1549	q->nr_hw_queues = set->nr_hw_queues;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1550
				1551	q->queue_ctx = ctx;
				1552	q->queue_hw_ctx = hctxs;
				1553
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1554	q->mq_ops = set->ops;
Jens Axboe	94eddfb	2013-11-19 09:25:07 -0700	[diff] [blame]	1555	q->queue_flags \|= QUEUE_FLAG_MQ_DEFAULT;
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1556
Christoph Hellwig	1be036e	2014-02-07 10:22:39 -0800	[diff] [blame]	1557	q->sg_reserved_size = INT_MAX;
				1558
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1559	blk_queue_make_request(q, blk_mq_make_request);
Jens Axboe	87ee7b1	2014-04-24 08:51:47 -0600	[diff] [blame]	1560	blk_queue_rq_timed_out(q, blk_mq_rq_timed_out);
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1561	if (set->timeout)
				1562	blk_queue_rq_timeout(q, set->timeout);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1563
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1564	if (set->ops->complete)
				1565	blk_queue_softirq_done(q, set->ops->complete);
Christoph Hellwig	30a91cb	2014-02-10 03:24:38 -0800	[diff] [blame]	1566
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1567	blk_mq_init_flush(q);
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1568	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1569
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1570	q->flush_rq = kzalloc(round_up(sizeof(struct request) +
				1571	set->cmd_size, cache_line_size()),
				1572	GFP_KERNEL);
Christoph Hellwig	1874198	2014-02-10 09:29:00 -0700	[diff] [blame]	1573	if (!q->flush_rq)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1574	goto err_hw;
				1575
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1576	if (blk_mq_init_hw_queues(q, set))
Christoph Hellwig	1874198	2014-02-10 09:29:00 -0700	[diff] [blame]	1577	goto err_flush_rq;
				1578
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1579	blk_mq_map_swqueue(q);
				1580
				1581	mutex_lock(&all_q_mutex);
				1582	list_add_tail(&q->all_q_node, &all_q_list);
				1583	mutex_unlock(&all_q_mutex);
				1584
Jens Axboe	0d2602c	2014-05-13 15:10:52 -0600	[diff] [blame^]	1585	blk_mq_add_queue_tag_set(set, q);
				1586
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1587	return q;
Christoph Hellwig	1874198	2014-02-10 09:29:00 -0700	[diff] [blame]	1588
				1589	err_flush_rq:
				1590	kfree(q->flush_rq);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1591	err_hw:
				1592	kfree(q->mq_map);
				1593	err_map:
				1594	blk_cleanup_queue(q);
				1595	err_hctxs:
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1596	for (i = 0; i < set->nr_hw_queues; i++) {
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1597	if (!hctxs[i])
				1598	break;
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	1599	free_cpumask_var(hctxs[i]->cpumask);
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1600	set->ops->free_hctx(hctxs[i], i);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1601	}
				1602	kfree(hctxs);
				1603	err_percpu:
				1604	free_percpu(ctx);
				1605	return ERR_PTR(-ENOMEM);
				1606	}
				1607	EXPORT_SYMBOL(blk_mq_init_queue);
				1608
				1609	void blk_mq_free_queue(struct request_queue *q)
				1610	{
				1611	struct blk_mq_hw_ctx *hctx;
				1612	int i;
				1613
Jens Axboe	0d2602c	2014-05-13 15:10:52 -0600	[diff] [blame^]	1614	blk_mq_del_queue_tag_set(q);
				1615
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1616	queue_for_each_hw_ctx(q, hctx, i) {
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1617	kfree(hctx->ctx_map);
				1618	kfree(hctx->ctxs);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1619	blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
				1620	if (q->mq_ops->exit_hctx)
				1621	q->mq_ops->exit_hctx(hctx, i);
Jens Axboe	e4043dc	2014-04-09 10:18:23 -0600	[diff] [blame]	1622	free_cpumask_var(hctx->cpumask);
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1623	q->mq_ops->free_hctx(hctx, i);
				1624	}
				1625
				1626	free_percpu(q->queue_ctx);
				1627	kfree(q->queue_hw_ctx);
				1628	kfree(q->mq_map);
				1629
				1630	q->queue_ctx = NULL;
				1631	q->queue_hw_ctx = NULL;
				1632	q->mq_map = NULL;
				1633
				1634	mutex_lock(&all_q_mutex);
				1635	list_del_init(&q->all_q_node);
				1636	mutex_unlock(&all_q_mutex);
				1637	}
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1638
				1639	/* Basically redo blk_mq_init_queue with queue frozen */
Paul Gortmaker	f618ef7	2013-11-14 08:26:02 -0700	[diff] [blame]	1640	static void blk_mq_queue_reinit(struct request_queue *q)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1641	{
				1642	blk_mq_freeze_queue(q);
				1643
				1644	blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues);
				1645
				1646	/*
				1647	* redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe
				1648	* we should change hctx numa_node according to new topology (this
				1649	* involves free and re-allocate memory, worthy doing?)
				1650	*/
				1651
				1652	blk_mq_map_swqueue(q);
				1653
				1654	blk_mq_unfreeze_queue(q);
				1655	}
				1656
Paul Gortmaker	f618ef7	2013-11-14 08:26:02 -0700	[diff] [blame]	1657	static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
				1658	unsigned long action, void *hcpu)
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1659	{
				1660	struct request_queue *q;
				1661
				1662	/*
Jens Axboe	9fccfed	2014-05-08 14:50:19 -0600	[diff] [blame]	1663	* Before new mappings are established, hotadded cpu might already
				1664	* start handling requests. This doesn't break anything as we map
				1665	* offline CPUs to first hardware queue. We will re-init the queue
				1666	* below to get optimal settings.
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1667	*/
				1668	if (action != CPU_DEAD && action != CPU_DEAD_FROZEN &&
				1669	action != CPU_ONLINE && action != CPU_ONLINE_FROZEN)
				1670	return NOTIFY_OK;
				1671
				1672	mutex_lock(&all_q_mutex);
				1673	list_for_each_entry(q, &all_q_list, all_q_node)
				1674	blk_mq_queue_reinit(q);
				1675	mutex_unlock(&all_q_mutex);
				1676	return NOTIFY_OK;
				1677	}
				1678
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1679	int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
				1680	{
				1681	int i;
				1682
				1683	if (!set->nr_hw_queues)
				1684	return -EINVAL;
				1685	if (!set->queue_depth \|\| set->queue_depth > BLK_MQ_MAX_DEPTH)
				1686	return -EINVAL;
				1687	if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
				1688	return -EINVAL;
				1689
				1690	if (!set->nr_hw_queues \|\|
				1691	!set->ops->queue_rq \|\| !set->ops->map_queue \|\|
				1692	!set->ops->alloc_hctx \|\| !set->ops->free_hctx)
				1693	return -EINVAL;
				1694
				1695
Ming Lei	48479005	2014-04-19 18:00:17 +0800	[diff] [blame]	1696	set->tags = kmalloc_node(set->nr_hw_queues *
				1697	sizeof(struct blk_mq_tags *),
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1698	GFP_KERNEL, set->numa_node);
				1699	if (!set->tags)
				1700	goto out;
				1701
				1702	for (i = 0; i < set->nr_hw_queues; i++) {
				1703	set->tags[i] = blk_mq_init_rq_map(set, i);
				1704	if (!set->tags[i])
				1705	goto out_unwind;
				1706	}
				1707
Jens Axboe	0d2602c	2014-05-13 15:10:52 -0600	[diff] [blame^]	1708	mutex_init(&set->tag_list_lock);
				1709	INIT_LIST_HEAD(&set->tag_list);
				1710
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1711	return 0;
				1712
				1713	out_unwind:
				1714	while (--i >= 0)
				1715	blk_mq_free_rq_map(set, set->tags[i], i);
				1716	out:
				1717	return -ENOMEM;
				1718	}
				1719	EXPORT_SYMBOL(blk_mq_alloc_tag_set);
				1720
				1721	void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
				1722	{
				1723	int i;
				1724
				1725	for (i = 0; i < set->nr_hw_queues; i++)
				1726	blk_mq_free_rq_map(set, set->tags[i], i);
Ming Lei	981bd18	2014-04-24 00:07:34 +0800	[diff] [blame]	1727	kfree(set->tags);
Christoph Hellwig	24d2f90	2014-04-15 14:14:00 -0600	[diff] [blame]	1728	}
				1729	EXPORT_SYMBOL(blk_mq_free_tag_set);
				1730
Jens Axboe	676141e	2014-03-20 13:29:18 -0600	[diff] [blame]	1731	void blk_mq_disable_hotplug(void)
				1732	{
				1733	mutex_lock(&all_q_mutex);
				1734	}
				1735
				1736	void blk_mq_enable_hotplug(void)
				1737	{
				1738	mutex_unlock(&all_q_mutex);
				1739	}
				1740
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1741	static int __init blk_mq_init(void)
				1742	{
Jens Axboe	320ae51	2013-10-24 09:20:05 +0100	[diff] [blame]	1743	blk_mq_cpu_init();
				1744
				1745	/* Must be called after percpu_counter_hotcpu_callback() */
				1746	hotcpu_notifier(blk_mq_queue_reinit_notify, -10);
				1747
				1748	return 0;
				1749	}
				1750	subsys_initcall(blk_mq_init);