blob: 9b16788118d2e3c662483c9013f907a87030b125 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) 2003 Sistina Software Limited.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This file is released under the GPL.
6 */
7
8#include "dm.h"
9#include "dm-path-selector.h"
10#include "dm-hw-handler.h"
11#include "dm-bio-list.h"
12#include "dm-bio-record.h"
Mike Andersonb15546f2007-10-19 22:48:02 +010013#include "dm-uevent.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070014
15#include <linux/ctype.h>
16#include <linux/init.h>
17#include <linux/mempool.h>
18#include <linux/module.h>
19#include <linux/pagemap.h>
20#include <linux/slab.h>
21#include <linux/time.h>
22#include <linux/workqueue.h>
Chandra Seetharamancfae5c92008-05-01 14:50:11 -070023#include <scsi/scsi_dh.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <asm/atomic.h>
25
Alasdair G Kergon72d94862006-06-26 00:27:35 -070026#define DM_MSG_PREFIX "multipath"
Linus Torvalds1da177e2005-04-16 15:20:36 -070027#define MESG_STR(x) x, sizeof(x)
28
29/* Path properties */
30struct pgpath {
31 struct list_head list;
32
33 struct priority_group *pg; /* Owning PG */
34 unsigned fail_count; /* Cumulative failure count */
35
Josef "Jeff" Sipekc922d5f2006-12-08 02:36:33 -080036 struct dm_path path;
Linus Torvalds1da177e2005-04-16 15:20:36 -070037};
38
39#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
40
41/*
42 * Paths are grouped into Priority Groups and numbered from 1 upwards.
43 * Each has a path selector which controls which path gets used.
44 */
45struct priority_group {
46 struct list_head list;
47
48 struct multipath *m; /* Owning multipath instance */
49 struct path_selector ps;
50
51 unsigned pg_num; /* Reference number */
52 unsigned bypassed; /* Temporarily bypass this PG? */
53
54 unsigned nr_pgpaths; /* Number of paths in PG */
55 struct list_head pgpaths;
56};
57
58/* Multipath context */
59struct multipath {
60 struct list_head list;
61 struct dm_target *ti;
62
63 spinlock_t lock;
64
Chandra Seetharamancfae5c92008-05-01 14:50:11 -070065 const char *hw_handler_name;
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -070066 struct work_struct activate_path;
Linus Torvalds1da177e2005-04-16 15:20:36 -070067 unsigned nr_priority_groups;
68 struct list_head priority_groups;
69 unsigned pg_init_required; /* pg_init needs calling? */
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -070070 unsigned pg_init_in_progress; /* Only one pg_init allowed at once */
Linus Torvalds1da177e2005-04-16 15:20:36 -070071
72 unsigned nr_valid_paths; /* Total number of usable paths */
73 struct pgpath *current_pgpath;
74 struct priority_group *current_pg;
75 struct priority_group *next_pg; /* Switch to this PG if set */
76 unsigned repeat_count; /* I/Os left before calling PS again */
77
78 unsigned queue_io; /* Must we queue all I/O? */
79 unsigned queue_if_no_path; /* Queue I/O if last path fails? */
Alasdair G Kergon436d4102005-07-12 15:53:03 -070080 unsigned saved_queue_if_no_path;/* Saved state during suspension */
Dave Wysochanskic9e45582007-10-19 22:47:53 +010081 unsigned pg_init_retries; /* Number of times to retry pg_init */
82 unsigned pg_init_count; /* Number of times pg_init called */
Linus Torvalds1da177e2005-04-16 15:20:36 -070083
84 struct work_struct process_queued_ios;
85 struct bio_list queued_ios;
86 unsigned queue_size;
87
88 struct work_struct trigger_event;
89
90 /*
Alasdair G Kergon028867a2007-07-12 17:26:32 +010091 * We must use a mempool of dm_mpath_io structs so that we
Linus Torvalds1da177e2005-04-16 15:20:36 -070092 * can resubmit bios on error.
93 */
94 mempool_t *mpio_pool;
95};
96
97/*
98 * Context information attached to each bio we process.
99 */
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100100struct dm_mpath_io {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700101 struct pgpath *pgpath;
102 struct dm_bio_details details;
103};
104
105typedef int (*action_fn) (struct pgpath *pgpath);
106
107#define MIN_IOS 256 /* Mempool size */
108
Christoph Lametere18b8902006-12-06 20:33:20 -0800109static struct kmem_cache *_mpio_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700111static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
David Howellsc4028952006-11-22 14:57:56 +0000112static void process_queued_ios(struct work_struct *work);
113static void trigger_event(struct work_struct *work);
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700114static void activate_path(struct work_struct *work);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115
116
117/*-----------------------------------------------
118 * Allocation routines
119 *-----------------------------------------------*/
120
121static struct pgpath *alloc_pgpath(void)
122{
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700123 struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700125 if (pgpath)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126 pgpath->path.is_active = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127
128 return pgpath;
129}
130
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100131static void free_pgpath(struct pgpath *pgpath)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132{
133 kfree(pgpath);
134}
135
136static struct priority_group *alloc_priority_group(void)
137{
138 struct priority_group *pg;
139
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700140 pg = kzalloc(sizeof(*pg), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700142 if (pg)
143 INIT_LIST_HEAD(&pg->pgpaths);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144
145 return pg;
146}
147
148static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
149{
150 struct pgpath *pgpath, *tmp;
151
152 list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
153 list_del(&pgpath->list);
154 dm_put_device(ti, pgpath->path.dev);
155 free_pgpath(pgpath);
156 }
157}
158
159static void free_priority_group(struct priority_group *pg,
160 struct dm_target *ti)
161{
162 struct path_selector *ps = &pg->ps;
163
164 if (ps->type) {
165 ps->type->destroy(ps);
166 dm_put_path_selector(ps->type);
167 }
168
169 free_pgpaths(&pg->pgpaths, ti);
170 kfree(pg);
171}
172
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700173static struct multipath *alloc_multipath(struct dm_target *ti)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174{
175 struct multipath *m;
176
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700177 m = kzalloc(sizeof(*m), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 if (m) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179 INIT_LIST_HEAD(&m->priority_groups);
180 spin_lock_init(&m->lock);
181 m->queue_io = 1;
David Howellsc4028952006-11-22 14:57:56 +0000182 INIT_WORK(&m->process_queued_ios, process_queued_ios);
183 INIT_WORK(&m->trigger_event, trigger_event);
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700184 INIT_WORK(&m->activate_path, activate_path);
Matthew Dobson93d23412006-03-26 01:37:50 -0800185 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186 if (!m->mpio_pool) {
187 kfree(m);
188 return NULL;
189 }
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700190 m->ti = ti;
191 ti->private = m;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700192 }
193
194 return m;
195}
196
197static void free_multipath(struct multipath *m)
198{
199 struct priority_group *pg, *tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200
201 list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
202 list_del(&pg->list);
203 free_priority_group(pg, m->ti);
204 }
205
Chandra Seetharamancfae5c92008-05-01 14:50:11 -0700206 kfree(m->hw_handler_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 mempool_destroy(m->mpio_pool);
208 kfree(m);
209}
210
211
212/*-----------------------------------------------
213 * Path selection
214 *-----------------------------------------------*/
215
216static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
217{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218 m->current_pg = pgpath->pg;
219
220 /* Must we initialise the PG first, and queue I/O till it's ready? */
Chandra Seetharamancfae5c92008-05-01 14:50:11 -0700221 if (m->hw_handler_name) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222 m->pg_init_required = 1;
223 m->queue_io = 1;
224 } else {
225 m->pg_init_required = 0;
226 m->queue_io = 0;
227 }
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100228
229 m->pg_init_count = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230}
231
232static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg)
233{
Josef "Jeff" Sipekc922d5f2006-12-08 02:36:33 -0800234 struct dm_path *path;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235
236 path = pg->ps.type->select_path(&pg->ps, &m->repeat_count);
237 if (!path)
238 return -ENXIO;
239
240 m->current_pgpath = path_to_pgpath(path);
241
242 if (m->current_pg != pg)
243 __switch_pg(m, m->current_pgpath);
244
245 return 0;
246}
247
248static void __choose_pgpath(struct multipath *m)
249{
250 struct priority_group *pg;
251 unsigned bypassed = 1;
252
253 if (!m->nr_valid_paths)
254 goto failed;
255
256 /* Were we instructed to switch PG? */
257 if (m->next_pg) {
258 pg = m->next_pg;
259 m->next_pg = NULL;
260 if (!__choose_path_in_pg(m, pg))
261 return;
262 }
263
264 /* Don't change PG until it has no remaining paths */
265 if (m->current_pg && !__choose_path_in_pg(m, m->current_pg))
266 return;
267
268 /*
269 * Loop through priority groups until we find a valid path.
270 * First time we skip PGs marked 'bypassed'.
271 * Second time we only try the ones we skipped.
272 */
273 do {
274 list_for_each_entry(pg, &m->priority_groups, list) {
275 if (pg->bypassed == bypassed)
276 continue;
277 if (!__choose_path_in_pg(m, pg))
278 return;
279 }
280 } while (bypassed--);
281
282failed:
283 m->current_pgpath = NULL;
284 m->current_pg = NULL;
285}
286
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800287/*
288 * Check whether bios must be queued in the device-mapper core rather
289 * than here in the target.
290 *
291 * m->lock must be held on entry.
292 *
293 * If m->queue_if_no_path and m->saved_queue_if_no_path hold the
294 * same value then we are not between multipath_presuspend()
295 * and multipath_resume() calls and we have no need to check
296 * for the DMF_NOFLUSH_SUSPENDING flag.
297 */
298static int __must_push_back(struct multipath *m)
299{
300 return (m->queue_if_no_path != m->saved_queue_if_no_path &&
301 dm_noflush_suspending(m->ti));
302}
303
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100304static int map_io(struct multipath *m, struct bio *bio,
305 struct dm_mpath_io *mpio, unsigned was_queued)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306{
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -0800307 int r = DM_MAPIO_REMAPPED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308 unsigned long flags;
309 struct pgpath *pgpath;
310
311 spin_lock_irqsave(&m->lock, flags);
312
313 /* Do we need to select a new pgpath? */
314 if (!m->current_pgpath ||
315 (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
316 __choose_pgpath(m);
317
318 pgpath = m->current_pgpath;
319
320 if (was_queued)
321 m->queue_size--;
322
323 if ((pgpath && m->queue_io) ||
Alasdair G Kergon436d4102005-07-12 15:53:03 -0700324 (!pgpath && m->queue_if_no_path)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 /* Queue for the daemon to resubmit */
326 bio_list_add(&m->queued_ios, bio);
327 m->queue_size++;
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700328 if ((m->pg_init_required && !m->pg_init_in_progress) ||
329 !m->queue_io)
Alasdair G Kergonc5573082005-05-05 16:16:07 -0700330 queue_work(kmultipathd, &m->process_queued_ios);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 pgpath = NULL;
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -0800332 r = DM_MAPIO_SUBMITTED;
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800333 } else if (pgpath)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 bio->bi_bdev = pgpath->path.dev->bdev;
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800335 else if (__must_push_back(m))
336 r = DM_MAPIO_REQUEUE;
337 else
338 r = -EIO; /* Failed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339
340 mpio->pgpath = pgpath;
341
342 spin_unlock_irqrestore(&m->lock, flags);
343
344 return r;
345}
346
347/*
348 * If we run out of usable paths, should we queue I/O or error it?
349 */
Alasdair G Kergon485ef692005-09-27 21:45:45 -0700350static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path,
351 unsigned save_old_value)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352{
353 unsigned long flags;
354
355 spin_lock_irqsave(&m->lock, flags);
356
Alasdair G Kergon485ef692005-09-27 21:45:45 -0700357 if (save_old_value)
358 m->saved_queue_if_no_path = m->queue_if_no_path;
359 else
360 m->saved_queue_if_no_path = queue_if_no_path;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361 m->queue_if_no_path = queue_if_no_path;
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700362 if (!m->queue_if_no_path && m->queue_size)
Alasdair G Kergonc5573082005-05-05 16:16:07 -0700363 queue_work(kmultipathd, &m->process_queued_ios);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364
365 spin_unlock_irqrestore(&m->lock, flags);
366
367 return 0;
368}
369
370/*-----------------------------------------------------------------
371 * The multipath daemon is responsible for resubmitting queued ios.
372 *---------------------------------------------------------------*/
373
374static void dispatch_queued_ios(struct multipath *m)
375{
376 int r;
377 unsigned long flags;
378 struct bio *bio = NULL, *next;
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100379 struct dm_mpath_io *mpio;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380 union map_info *info;
381
382 spin_lock_irqsave(&m->lock, flags);
383 bio = bio_list_get(&m->queued_ios);
384 spin_unlock_irqrestore(&m->lock, flags);
385
386 while (bio) {
387 next = bio->bi_next;
388 bio->bi_next = NULL;
389
390 info = dm_get_mapinfo(bio);
391 mpio = info->ptr;
392
393 r = map_io(m, bio, mpio, 1);
394 if (r < 0)
NeilBrown6712ecf2007-09-27 12:47:43 +0200395 bio_endio(bio, r);
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -0800396 else if (r == DM_MAPIO_REMAPPED)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 generic_make_request(bio);
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800398 else if (r == DM_MAPIO_REQUEUE)
NeilBrown6712ecf2007-09-27 12:47:43 +0200399 bio_endio(bio, -EIO);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400
401 bio = next;
402 }
403}
404
David Howellsc4028952006-11-22 14:57:56 +0000405static void process_queued_ios(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406{
David Howellsc4028952006-11-22 14:57:56 +0000407 struct multipath *m =
408 container_of(work, struct multipath, process_queued_ios);
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700409 struct pgpath *pgpath = NULL;
410 unsigned init_required = 0, must_queue = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 unsigned long flags;
412
413 spin_lock_irqsave(&m->lock, flags);
414
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700415 if (!m->queue_size)
416 goto out;
417
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 if (!m->current_pgpath)
419 __choose_pgpath(m);
420
421 pgpath = m->current_pgpath;
422
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700423 if ((pgpath && !m->queue_io) ||
424 (!pgpath && !m->queue_if_no_path))
425 must_queue = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700427 if (m->pg_init_required && !m->pg_init_in_progress) {
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100428 m->pg_init_count++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 m->pg_init_required = 0;
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700430 m->pg_init_in_progress = 1;
431 init_required = 1;
432 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700434out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 spin_unlock_irqrestore(&m->lock, flags);
436
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700437 if (init_required)
438 queue_work(kmpath_handlerd, &m->activate_path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439
440 if (!must_queue)
441 dispatch_queued_ios(m);
442}
443
444/*
445 * An event is triggered whenever a path is taken out of use.
446 * Includes path failure and PG bypass.
447 */
David Howellsc4028952006-11-22 14:57:56 +0000448static void trigger_event(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449{
David Howellsc4028952006-11-22 14:57:56 +0000450 struct multipath *m =
451 container_of(work, struct multipath, trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452
453 dm_table_event(m->ti->table);
454}
455
456/*-----------------------------------------------------------------
457 * Constructor/argument parsing:
458 * <#multipath feature args> [<arg>]*
459 * <#hw_handler args> [hw_handler [<arg>]*]
460 * <#priority groups>
461 * <initial priority group>
462 * [<selector> <#selector args> [<arg>]*
463 * <#paths> <#per-path selector args>
464 * [<path> [<arg>]* ]+ ]+
465 *---------------------------------------------------------------*/
466struct param {
467 unsigned min;
468 unsigned max;
469 char *error;
470};
471
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472static int read_param(struct param *param, char *str, unsigned *v, char **error)
473{
474 if (!str ||
475 (sscanf(str, "%u", v) != 1) ||
476 (*v < param->min) ||
477 (*v > param->max)) {
478 *error = param->error;
479 return -EINVAL;
480 }
481
482 return 0;
483}
484
485struct arg_set {
486 unsigned argc;
487 char **argv;
488};
489
490static char *shift(struct arg_set *as)
491{
492 char *r;
493
494 if (as->argc) {
495 as->argc--;
496 r = *as->argv;
497 as->argv++;
498 return r;
499 }
500
501 return NULL;
502}
503
504static void consume(struct arg_set *as, unsigned n)
505{
506 BUG_ON (as->argc < n);
507 as->argc -= n;
508 as->argv += n;
509}
510
511static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
512 struct dm_target *ti)
513{
514 int r;
515 struct path_selector_type *pst;
516 unsigned ps_argc;
517
518 static struct param _params[] = {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700519 {0, 1024, "invalid number of path selector args"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520 };
521
522 pst = dm_get_path_selector(shift(as));
523 if (!pst) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700524 ti->error = "unknown path selector type";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 return -EINVAL;
526 }
527
528 r = read_param(_params, shift(as), &ps_argc, &ti->error);
529 if (r)
530 return -EINVAL;
531
532 r = pst->create(&pg->ps, ps_argc, as->argv);
533 if (r) {
534 dm_put_path_selector(pst);
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700535 ti->error = "path selector constructor failed";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 return r;
537 }
538
539 pg->ps.type = pst;
540 consume(as, ps_argc);
541
542 return 0;
543}
544
545static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
546 struct dm_target *ti)
547{
548 int r;
549 struct pgpath *p;
550
551 /* we need at least a path arg */
552 if (as->argc < 1) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700553 ti->error = "no device given";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 return NULL;
555 }
556
557 p = alloc_pgpath();
558 if (!p)
559 return NULL;
560
561 r = dm_get_device(ti, shift(as), ti->begin, ti->len,
562 dm_table_get_mode(ti->table), &p->path.dev);
563 if (r) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700564 ti->error = "error getting device";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565 goto bad;
566 }
567
568 r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
569 if (r) {
570 dm_put_device(ti, p->path.dev);
571 goto bad;
572 }
573
574 return p;
575
576 bad:
577 free_pgpath(p);
578 return NULL;
579}
580
581static struct priority_group *parse_priority_group(struct arg_set *as,
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700582 struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583{
584 static struct param _params[] = {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700585 {1, 1024, "invalid number of paths"},
586 {0, 1024, "invalid number of selector args"}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587 };
588
589 int r;
590 unsigned i, nr_selector_args, nr_params;
591 struct priority_group *pg;
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700592 struct dm_target *ti = m->ti;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593
594 if (as->argc < 2) {
595 as->argc = 0;
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700596 ti->error = "not enough priority group aruments";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 return NULL;
598 }
599
600 pg = alloc_priority_group();
601 if (!pg) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700602 ti->error = "couldn't allocate priority group";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603 return NULL;
604 }
605 pg->m = m;
606
607 r = parse_path_selector(as, pg, ti);
608 if (r)
609 goto bad;
610
611 /*
612 * read the paths
613 */
614 r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error);
615 if (r)
616 goto bad;
617
618 r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error);
619 if (r)
620 goto bad;
621
622 nr_params = 1 + nr_selector_args;
623 for (i = 0; i < pg->nr_pgpaths; i++) {
624 struct pgpath *pgpath;
625 struct arg_set path_args;
626
627 if (as->argc < nr_params)
628 goto bad;
629
630 path_args.argc = nr_params;
631 path_args.argv = as->argv;
632
633 pgpath = parse_path(&path_args, &pg->ps, ti);
634 if (!pgpath)
635 goto bad;
636
637 pgpath->pg = pg;
638 list_add_tail(&pgpath->list, &pg->pgpaths);
639 consume(as, nr_params);
640 }
641
642 return pg;
643
644 bad:
645 free_priority_group(pg, ti);
646 return NULL;
647}
648
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700649static int parse_hw_handler(struct arg_set *as, struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700650{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651 unsigned hw_argc;
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700652 struct dm_target *ti = m->ti;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653
654 static struct param _params[] = {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700655 {0, 1024, "invalid number of hardware handler args"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700656 };
657
Chandra Seetharamancfae5c92008-05-01 14:50:11 -0700658 if (read_param(_params, shift(as), &hw_argc, &ti->error))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659 return -EINVAL;
660
661 if (!hw_argc)
662 return 0;
663
Chandra Seetharamancfae5c92008-05-01 14:50:11 -0700664 m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
665 request_module("scsi_dh_%s", m->hw_handler_name);
666 if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700667 ti->error = "unknown hardware handler type";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668 return -EINVAL;
669 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670 consume(as, hw_argc - 1);
671
672 return 0;
673}
674
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700675static int parse_features(struct arg_set *as, struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676{
677 int r;
678 unsigned argc;
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700679 struct dm_target *ti = m->ti;
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100680 const char *param_name;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681
682 static struct param _params[] = {
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100683 {0, 3, "invalid number of feature args"},
684 {1, 50, "pg_init_retries must be between 1 and 50"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 };
686
687 r = read_param(_params, shift(as), &argc, &ti->error);
688 if (r)
689 return -EINVAL;
690
691 if (!argc)
692 return 0;
693
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100694 do {
695 param_name = shift(as);
696 argc--;
697
698 if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
699 r = queue_if_no_path(m, 1, 0);
700 continue;
701 }
702
703 if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
704 (argc >= 1)) {
705 r = read_param(_params + 1, shift(as),
706 &m->pg_init_retries, &ti->error);
707 argc--;
708 continue;
709 }
710
Linus Torvalds1da177e2005-04-16 15:20:36 -0700711 ti->error = "Unrecognised multipath feature request";
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100712 r = -EINVAL;
713 } while (argc && !r);
714
715 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716}
717
718static int multipath_ctr(struct dm_target *ti, unsigned int argc,
719 char **argv)
720{
721 /* target parameters */
722 static struct param _params[] = {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700723 {1, 1024, "invalid number of priority groups"},
724 {1, 1024, "invalid initial priority group number"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700725 };
726
727 int r;
728 struct multipath *m;
729 struct arg_set as;
730 unsigned pg_count = 0;
731 unsigned next_pg_num;
732
733 as.argc = argc;
734 as.argv = argv;
735
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700736 m = alloc_multipath(ti);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700737 if (!m) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700738 ti->error = "can't allocate multipath";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 return -EINVAL;
740 }
741
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700742 r = parse_features(&as, m);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743 if (r)
744 goto bad;
745
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700746 r = parse_hw_handler(&as, m);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 if (r)
748 goto bad;
749
750 r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
751 if (r)
752 goto bad;
753
754 r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error);
755 if (r)
756 goto bad;
757
758 /* parse the priority groups */
759 while (as.argc) {
760 struct priority_group *pg;
761
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700762 pg = parse_priority_group(&as, m);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 if (!pg) {
764 r = -EINVAL;
765 goto bad;
766 }
767
768 m->nr_valid_paths += pg->nr_pgpaths;
769 list_add_tail(&pg->list, &m->priority_groups);
770 pg_count++;
771 pg->pg_num = pg_count;
772 if (!--next_pg_num)
773 m->next_pg = pg;
774 }
775
776 if (pg_count != m->nr_priority_groups) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700777 ti->error = "priority group count mismatch";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778 r = -EINVAL;
779 goto bad;
780 }
781
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782 return 0;
783
784 bad:
785 free_multipath(m);
786 return r;
787}
788
789static void multipath_dtr(struct dm_target *ti)
790{
791 struct multipath *m = (struct multipath *) ti->private;
Alasdair G Kergona044d012005-07-12 15:53:02 -0700792
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700793 flush_workqueue(kmpath_handlerd);
Alasdair G Kergona044d012005-07-12 15:53:02 -0700794 flush_workqueue(kmultipathd);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795 free_multipath(m);
796}
797
798/*
799 * Map bios, recording original fields for later in case we have to resubmit
800 */
801static int multipath_map(struct dm_target *ti, struct bio *bio,
802 union map_info *map_context)
803{
804 int r;
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100805 struct dm_mpath_io *mpio;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 struct multipath *m = (struct multipath *) ti->private;
807
808 mpio = mempool_alloc(m->mpio_pool, GFP_NOIO);
809 dm_bio_record(&mpio->details, bio);
810
811 map_context->ptr = mpio;
812 bio->bi_rw |= (1 << BIO_RW_FAILFAST);
813 r = map_io(m, bio, mpio, 0);
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800814 if (r < 0 || r == DM_MAPIO_REQUEUE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 mempool_free(mpio, m->mpio_pool);
816
817 return r;
818}
819
820/*
821 * Take a path out of use.
822 */
823static int fail_path(struct pgpath *pgpath)
824{
825 unsigned long flags;
826 struct multipath *m = pgpath->pg->m;
827
828 spin_lock_irqsave(&m->lock, flags);
829
830 if (!pgpath->path.is_active)
831 goto out;
832
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700833 DMWARN("Failing path %s.", pgpath->path.dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834
835 pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
836 pgpath->path.is_active = 0;
837 pgpath->fail_count++;
838
839 m->nr_valid_paths--;
840
841 if (pgpath == m->current_pgpath)
842 m->current_pgpath = NULL;
843
Mike Andersonb15546f2007-10-19 22:48:02 +0100844 dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
845 pgpath->path.dev->name, m->nr_valid_paths);
846
Alasdair G Kergonc5573082005-05-05 16:16:07 -0700847 queue_work(kmultipathd, &m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848
849out:
850 spin_unlock_irqrestore(&m->lock, flags);
851
852 return 0;
853}
854
855/*
856 * Reinstate a previously-failed path
857 */
858static int reinstate_path(struct pgpath *pgpath)
859{
860 int r = 0;
861 unsigned long flags;
862 struct multipath *m = pgpath->pg->m;
863
864 spin_lock_irqsave(&m->lock, flags);
865
866 if (pgpath->path.is_active)
867 goto out;
868
869 if (!pgpath->pg->ps.type) {
870 DMWARN("Reinstate path not supported by path selector %s",
871 pgpath->pg->ps.type->name);
872 r = -EINVAL;
873 goto out;
874 }
875
876 r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
877 if (r)
878 goto out;
879
880 pgpath->path.is_active = 1;
881
882 m->current_pgpath = NULL;
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700883 if (!m->nr_valid_paths++ && m->queue_size)
Alasdair G Kergonc5573082005-05-05 16:16:07 -0700884 queue_work(kmultipathd, &m->process_queued_ios);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885
Mike Andersonb15546f2007-10-19 22:48:02 +0100886 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
887 pgpath->path.dev->name, m->nr_valid_paths);
888
Alasdair G Kergonc5573082005-05-05 16:16:07 -0700889 queue_work(kmultipathd, &m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700890
891out:
892 spin_unlock_irqrestore(&m->lock, flags);
893
894 return r;
895}
896
897/*
898 * Fail or reinstate all paths that match the provided struct dm_dev.
899 */
900static int action_dev(struct multipath *m, struct dm_dev *dev,
901 action_fn action)
902{
903 int r = 0;
904 struct pgpath *pgpath;
905 struct priority_group *pg;
906
907 list_for_each_entry(pg, &m->priority_groups, list) {
908 list_for_each_entry(pgpath, &pg->pgpaths, list) {
909 if (pgpath->path.dev == dev)
910 r = action(pgpath);
911 }
912 }
913
914 return r;
915}
916
917/*
918 * Temporarily try to avoid having to use the specified PG
919 */
920static void bypass_pg(struct multipath *m, struct priority_group *pg,
921 int bypassed)
922{
923 unsigned long flags;
924
925 spin_lock_irqsave(&m->lock, flags);
926
927 pg->bypassed = bypassed;
928 m->current_pgpath = NULL;
929 m->current_pg = NULL;
930
931 spin_unlock_irqrestore(&m->lock, flags);
932
Alasdair G Kergonc5573082005-05-05 16:16:07 -0700933 queue_work(kmultipathd, &m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934}
935
936/*
937 * Switch to using the specified PG from the next I/O that gets mapped
938 */
939static int switch_pg_num(struct multipath *m, const char *pgstr)
940{
941 struct priority_group *pg;
942 unsigned pgnum;
943 unsigned long flags;
944
945 if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
946 (pgnum > m->nr_priority_groups)) {
947 DMWARN("invalid PG number supplied to switch_pg_num");
948 return -EINVAL;
949 }
950
951 spin_lock_irqsave(&m->lock, flags);
952 list_for_each_entry(pg, &m->priority_groups, list) {
953 pg->bypassed = 0;
954 if (--pgnum)
955 continue;
956
957 m->current_pgpath = NULL;
958 m->current_pg = NULL;
959 m->next_pg = pg;
960 }
961 spin_unlock_irqrestore(&m->lock, flags);
962
Alasdair G Kergonc5573082005-05-05 16:16:07 -0700963 queue_work(kmultipathd, &m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 return 0;
965}
966
967/*
968 * Set/clear bypassed status of a PG.
969 * PGs are numbered upwards from 1 in the order they were declared.
970 */
971static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
972{
973 struct priority_group *pg;
974 unsigned pgnum;
975
976 if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
977 (pgnum > m->nr_priority_groups)) {
978 DMWARN("invalid PG number supplied to bypass_pg");
979 return -EINVAL;
980 }
981
982 list_for_each_entry(pg, &m->priority_groups, list) {
983 if (!--pgnum)
984 break;
985 }
986
987 bypass_pg(m, pg, bypassed);
988 return 0;
989}
990
991/*
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100992 * Should we retry pg_init immediately?
993 */
994static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
995{
996 unsigned long flags;
997 int limit_reached = 0;
998
999 spin_lock_irqsave(&m->lock, flags);
1000
1001 if (m->pg_init_count <= m->pg_init_retries)
1002 m->pg_init_required = 1;
1003 else
1004 limit_reached = 1;
1005
1006 spin_unlock_irqrestore(&m->lock, flags);
1007
1008 return limit_reached;
1009}
1010
1011/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001012 * pg_init must call this when it has completed its initialisation
1013 */
Josef "Jeff" Sipekc922d5f2006-12-08 02:36:33 -08001014void dm_pg_init_complete(struct dm_path *path, unsigned err_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001015{
1016 struct pgpath *pgpath = path_to_pgpath(path);
1017 struct priority_group *pg = pgpath->pg;
1018 struct multipath *m = pg->m;
1019 unsigned long flags;
1020
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001021 /*
1022 * If requested, retry pg_init until maximum number of retries exceeded.
1023 * If retry not requested and PG already bypassed, always fail the path.
1024 */
1025 if (err_flags & MP_RETRY) {
1026 if (pg_init_limit_reached(m, pgpath))
1027 err_flags |= MP_FAIL_PATH;
1028 } else if (err_flags && pg->bypassed)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029 err_flags |= MP_FAIL_PATH;
1030
1031 if (err_flags & MP_FAIL_PATH)
1032 fail_path(pgpath);
1033
1034 if (err_flags & MP_BYPASS_PG)
1035 bypass_pg(m, pg, 1);
1036
1037 spin_lock_irqsave(&m->lock, flags);
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001038 if (err_flags & ~MP_RETRY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039 m->current_pgpath = NULL;
1040 m->current_pg = NULL;
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -07001041 } else if (!m->pg_init_required)
1042 m->queue_io = 0;
1043
1044 m->pg_init_in_progress = 0;
Alasdair G Kergonc5573082005-05-05 16:16:07 -07001045 queue_work(kmultipathd, &m->process_queued_ios);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046 spin_unlock_irqrestore(&m->lock, flags);
1047}
1048
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001049static void pg_init_done(struct dm_path *path, int errors)
1050{
1051 struct pgpath *pgpath = path_to_pgpath(path);
1052 struct priority_group *pg = pgpath->pg;
1053 struct multipath *m = pg->m;
1054 unsigned long flags;
1055
1056 /* device or driver problems */
1057 switch (errors) {
1058 case SCSI_DH_OK:
1059 break;
1060 case SCSI_DH_NOSYS:
1061 if (!m->hw_handler_name) {
1062 errors = 0;
1063 break;
1064 }
1065 DMERR("Cannot failover device because scsi_dh_%s was not "
1066 "loaded.", m->hw_handler_name);
1067 /*
1068 * Fail path for now, so we do not ping pong
1069 */
1070 fail_path(pgpath);
1071 break;
1072 case SCSI_DH_DEV_TEMP_BUSY:
1073 /*
1074 * Probably doing something like FW upgrade on the
1075 * controller so try the other pg.
1076 */
1077 bypass_pg(m, pg, 1);
1078 break;
1079 /* TODO: For SCSI_DH_RETRY we should wait a couple seconds */
1080 case SCSI_DH_RETRY:
1081 case SCSI_DH_IMM_RETRY:
1082 case SCSI_DH_RES_TEMP_UNAVAIL:
1083 if (pg_init_limit_reached(m, pgpath))
1084 fail_path(pgpath);
1085 errors = 0;
1086 break;
1087 default:
1088 /*
1089 * We probably do not want to fail the path for a device
1090 * error, but this is what the old dm did. In future
1091 * patches we can do more advanced handling.
1092 */
1093 fail_path(pgpath);
1094 }
1095
1096 spin_lock_irqsave(&m->lock, flags);
1097 if (errors) {
1098 DMERR("Could not failover device. Error %d.", errors);
1099 m->current_pgpath = NULL;
1100 m->current_pg = NULL;
1101 } else if (!m->pg_init_required) {
1102 m->queue_io = 0;
1103 pg->bypassed = 0;
1104 }
1105
1106 m->pg_init_in_progress = 0;
1107 queue_work(kmultipathd, &m->process_queued_ios);
1108 spin_unlock_irqrestore(&m->lock, flags);
1109}
1110
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001111static void activate_path(struct work_struct *work)
1112{
1113 int ret;
1114 struct multipath *m =
1115 container_of(work, struct multipath, activate_path);
1116 struct dm_path *path = &m->current_pgpath->path;
1117
1118 ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
1119 pg_init_done(path, ret);
1120}
1121
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122/*
1123 * end_io handling
1124 */
1125static int do_end_io(struct multipath *m, struct bio *bio,
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001126 int error, struct dm_mpath_io *mpio)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127{
Stefan Bader640eb3b2005-11-21 21:32:35 -08001128 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129
1130 if (!error)
1131 return 0; /* I/O complete */
1132
Lars Marowsky-Bree4f588022005-06-08 15:50:31 -07001133 if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
1134 return error;
1135
Alasdair G Kergonf6a80ea2005-07-12 15:53:01 -07001136 if (error == -EOPNOTSUPP)
1137 return error;
1138
Stefan Bader640eb3b2005-11-21 21:32:35 -08001139 spin_lock_irqsave(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 if (!m->nr_valid_paths) {
Kiyoshi Ueda45e15722006-12-08 02:41:10 -08001141 if (__must_push_back(m)) {
1142 spin_unlock_irqrestore(&m->lock, flags);
1143 return DM_ENDIO_REQUEUE;
1144 } else if (!m->queue_if_no_path) {
Stefan Bader640eb3b2005-11-21 21:32:35 -08001145 spin_unlock_irqrestore(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 return -EIO;
1147 } else {
Stefan Bader640eb3b2005-11-21 21:32:35 -08001148 spin_unlock_irqrestore(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 goto requeue;
1150 }
1151 }
Stefan Bader640eb3b2005-11-21 21:32:35 -08001152 spin_unlock_irqrestore(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001154 if (mpio->pgpath)
1155 fail_path(mpio->pgpath);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156
1157 requeue:
1158 dm_bio_restore(&mpio->details, bio);
1159
1160 /* queue for the daemon to resubmit or fail */
Stefan Bader640eb3b2005-11-21 21:32:35 -08001161 spin_lock_irqsave(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162 bio_list_add(&m->queued_ios, bio);
1163 m->queue_size++;
1164 if (!m->queue_io)
Alasdair G Kergonc5573082005-05-05 16:16:07 -07001165 queue_work(kmultipathd, &m->process_queued_ios);
Stefan Bader640eb3b2005-11-21 21:32:35 -08001166 spin_unlock_irqrestore(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -08001168 return DM_ENDIO_INCOMPLETE; /* io not complete */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169}
1170
1171static int multipath_end_io(struct dm_target *ti, struct bio *bio,
1172 int error, union map_info *map_context)
1173{
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001174 struct multipath *m = ti->private;
1175 struct dm_mpath_io *mpio = map_context->ptr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001176 struct pgpath *pgpath = mpio->pgpath;
1177 struct path_selector *ps;
1178 int r;
1179
1180 r = do_end_io(m, bio, error, mpio);
1181 if (pgpath) {
1182 ps = &pgpath->pg->ps;
1183 if (ps->type->end_io)
1184 ps->type->end_io(ps, &pgpath->path);
1185 }
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -08001186 if (r != DM_ENDIO_INCOMPLETE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001187 mempool_free(mpio, m->mpio_pool);
1188
1189 return r;
1190}
1191
1192/*
1193 * Suspend can't complete until all the I/O is processed so if
Alasdair G Kergon436d4102005-07-12 15:53:03 -07001194 * the last path fails we must error any remaining I/O.
1195 * Note that if the freeze_bdev fails while suspending, the
1196 * queue_if_no_path state is lost - userspace should reset it.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197 */
1198static void multipath_presuspend(struct dm_target *ti)
1199{
1200 struct multipath *m = (struct multipath *) ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001201
Alasdair G Kergon485ef692005-09-27 21:45:45 -07001202 queue_if_no_path(m, 0, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203}
1204
Alasdair G Kergon436d4102005-07-12 15:53:03 -07001205/*
1206 * Restore the queue_if_no_path setting.
1207 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001208static void multipath_resume(struct dm_target *ti)
1209{
1210 struct multipath *m = (struct multipath *) ti->private;
1211 unsigned long flags;
1212
1213 spin_lock_irqsave(&m->lock, flags);
Alasdair G Kergon436d4102005-07-12 15:53:03 -07001214 m->queue_if_no_path = m->saved_queue_if_no_path;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 spin_unlock_irqrestore(&m->lock, flags);
1216}
1217
1218/*
1219 * Info output has the following format:
1220 * num_multipath_feature_args [multipath_feature_args]*
1221 * num_handler_status_args [handler_status_args]*
1222 * num_groups init_group_number
1223 * [A|D|E num_ps_status_args [ps_status_args]*
1224 * num_paths num_selector_args
1225 * [path_dev A|F fail_count [selector_args]* ]+ ]+
1226 *
1227 * Table output has the following format (identical to the constructor string):
1228 * num_feature_args [features_args]*
1229 * num_handler_args hw_handler [hw_handler_args]*
1230 * num_groups init_group_number
1231 * [priority selector-name num_ps_args [ps_args]*
1232 * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
1233 */
1234static int multipath_status(struct dm_target *ti, status_type_t type,
1235 char *result, unsigned int maxlen)
1236{
1237 int sz = 0;
1238 unsigned long flags;
1239 struct multipath *m = (struct multipath *) ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001240 struct priority_group *pg;
1241 struct pgpath *p;
1242 unsigned pg_num;
1243 char state;
1244
1245 spin_lock_irqsave(&m->lock, flags);
1246
1247 /* Features */
1248 if (type == STATUSTYPE_INFO)
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001249 DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count);
1250 else {
1251 DMEMIT("%u ", m->queue_if_no_path +
1252 (m->pg_init_retries > 0) * 2);
1253 if (m->queue_if_no_path)
1254 DMEMIT("queue_if_no_path ");
1255 if (m->pg_init_retries)
1256 DMEMIT("pg_init_retries %u ", m->pg_init_retries);
1257 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001258
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001259 if (!m->hw_handler_name || type == STATUSTYPE_INFO)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 DMEMIT("0 ");
1261 else
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001262 DMEMIT("1 %s ", m->hw_handler_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263
1264 DMEMIT("%u ", m->nr_priority_groups);
1265
1266 if (m->next_pg)
1267 pg_num = m->next_pg->pg_num;
1268 else if (m->current_pg)
1269 pg_num = m->current_pg->pg_num;
1270 else
1271 pg_num = 1;
1272
1273 DMEMIT("%u ", pg_num);
1274
1275 switch (type) {
1276 case STATUSTYPE_INFO:
1277 list_for_each_entry(pg, &m->priority_groups, list) {
1278 if (pg->bypassed)
1279 state = 'D'; /* Disabled */
1280 else if (pg == m->current_pg)
1281 state = 'A'; /* Currently Active */
1282 else
1283 state = 'E'; /* Enabled */
1284
1285 DMEMIT("%c ", state);
1286
1287 if (pg->ps.type->status)
1288 sz += pg->ps.type->status(&pg->ps, NULL, type,
1289 result + sz,
1290 maxlen - sz);
1291 else
1292 DMEMIT("0 ");
1293
1294 DMEMIT("%u %u ", pg->nr_pgpaths,
1295 pg->ps.type->info_args);
1296
1297 list_for_each_entry(p, &pg->pgpaths, list) {
1298 DMEMIT("%s %s %u ", p->path.dev->name,
1299 p->path.is_active ? "A" : "F",
1300 p->fail_count);
1301 if (pg->ps.type->status)
1302 sz += pg->ps.type->status(&pg->ps,
1303 &p->path, type, result + sz,
1304 maxlen - sz);
1305 }
1306 }
1307 break;
1308
1309 case STATUSTYPE_TABLE:
1310 list_for_each_entry(pg, &m->priority_groups, list) {
1311 DMEMIT("%s ", pg->ps.type->name);
1312
1313 if (pg->ps.type->status)
1314 sz += pg->ps.type->status(&pg->ps, NULL, type,
1315 result + sz,
1316 maxlen - sz);
1317 else
1318 DMEMIT("0 ");
1319
1320 DMEMIT("%u %u ", pg->nr_pgpaths,
1321 pg->ps.type->table_args);
1322
1323 list_for_each_entry(p, &pg->pgpaths, list) {
1324 DMEMIT("%s ", p->path.dev->name);
1325 if (pg->ps.type->status)
1326 sz += pg->ps.type->status(&pg->ps,
1327 &p->path, type, result + sz,
1328 maxlen - sz);
1329 }
1330 }
1331 break;
1332 }
1333
1334 spin_unlock_irqrestore(&m->lock, flags);
1335
1336 return 0;
1337}
1338
1339static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1340{
1341 int r;
1342 struct dm_dev *dev;
1343 struct multipath *m = (struct multipath *) ti->private;
1344 action_fn action;
1345
1346 if (argc == 1) {
1347 if (!strnicmp(argv[0], MESG_STR("queue_if_no_path")))
Alasdair G Kergon485ef692005-09-27 21:45:45 -07001348 return queue_if_no_path(m, 1, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349 else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path")))
Alasdair G Kergon485ef692005-09-27 21:45:45 -07001350 return queue_if_no_path(m, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351 }
1352
1353 if (argc != 2)
1354 goto error;
1355
1356 if (!strnicmp(argv[0], MESG_STR("disable_group")))
1357 return bypass_pg_num(m, argv[1], 1);
1358 else if (!strnicmp(argv[0], MESG_STR("enable_group")))
1359 return bypass_pg_num(m, argv[1], 0);
1360 else if (!strnicmp(argv[0], MESG_STR("switch_group")))
1361 return switch_pg_num(m, argv[1]);
1362 else if (!strnicmp(argv[0], MESG_STR("reinstate_path")))
1363 action = reinstate_path;
1364 else if (!strnicmp(argv[0], MESG_STR("fail_path")))
1365 action = fail_path;
1366 else
1367 goto error;
1368
1369 r = dm_get_device(ti, argv[1], ti->begin, ti->len,
1370 dm_table_get_mode(ti->table), &dev);
1371 if (r) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001372 DMWARN("message: error getting device %s",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 argv[1]);
1374 return -EINVAL;
1375 }
1376
1377 r = action_dev(m, dev, action);
1378
1379 dm_put_device(ti, dev);
1380
1381 return r;
1382
1383error:
1384 DMWARN("Unrecognised multipath message received.");
1385 return -EINVAL;
1386}
1387
Milan Broz9af4aa32006-10-03 01:15:20 -07001388static int multipath_ioctl(struct dm_target *ti, struct inode *inode,
1389 struct file *filp, unsigned int cmd,
1390 unsigned long arg)
1391{
1392 struct multipath *m = (struct multipath *) ti->private;
1393 struct block_device *bdev = NULL;
1394 unsigned long flags;
Milan Broze90dae12006-10-03 01:15:22 -07001395 struct file fake_file = {};
1396 struct dentry fake_dentry = {};
Milan Broz9af4aa32006-10-03 01:15:20 -07001397 int r = 0;
1398
Josef Sipekc649bb92006-12-08 02:37:19 -08001399 fake_file.f_path.dentry = &fake_dentry;
Milan Broze90dae12006-10-03 01:15:22 -07001400
Milan Broz9af4aa32006-10-03 01:15:20 -07001401 spin_lock_irqsave(&m->lock, flags);
1402
1403 if (!m->current_pgpath)
1404 __choose_pgpath(m);
1405
Milan Broze90dae12006-10-03 01:15:22 -07001406 if (m->current_pgpath) {
Milan Broz9af4aa32006-10-03 01:15:20 -07001407 bdev = m->current_pgpath->path.dev->bdev;
Milan Broze90dae12006-10-03 01:15:22 -07001408 fake_dentry.d_inode = bdev->bd_inode;
1409 fake_file.f_mode = m->current_pgpath->path.dev->mode;
1410 }
Milan Broz9af4aa32006-10-03 01:15:20 -07001411
1412 if (m->queue_io)
1413 r = -EAGAIN;
1414 else if (!bdev)
1415 r = -EIO;
1416
1417 spin_unlock_irqrestore(&m->lock, flags);
1418
Milan Broze90dae12006-10-03 01:15:22 -07001419 return r ? : blkdev_driver_ioctl(bdev->bd_inode, &fake_file,
1420 bdev->bd_disk, cmd, arg);
Milan Broz9af4aa32006-10-03 01:15:20 -07001421}
1422
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423/*-----------------------------------------------------------------
1424 * Module setup
1425 *---------------------------------------------------------------*/
1426static struct target_type multipath_target = {
1427 .name = "multipath",
Milan Broz9af4aa32006-10-03 01:15:20 -07001428 .version = {1, 0, 5},
Linus Torvalds1da177e2005-04-16 15:20:36 -07001429 .module = THIS_MODULE,
1430 .ctr = multipath_ctr,
1431 .dtr = multipath_dtr,
1432 .map = multipath_map,
1433 .end_io = multipath_end_io,
1434 .presuspend = multipath_presuspend,
1435 .resume = multipath_resume,
1436 .status = multipath_status,
1437 .message = multipath_message,
Milan Broz9af4aa32006-10-03 01:15:20 -07001438 .ioctl = multipath_ioctl,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439};
1440
1441static int __init dm_multipath_init(void)
1442{
1443 int r;
1444
1445 /* allocate a slab for the dm_ios */
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001446 _mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447 if (!_mpio_cache)
1448 return -ENOMEM;
1449
1450 r = dm_register_target(&multipath_target);
1451 if (r < 0) {
Alasdair G Kergon0cd33122007-07-12 17:27:01 +01001452 DMERR("register failed %d", r);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 kmem_cache_destroy(_mpio_cache);
1454 return -EINVAL;
1455 }
1456
Alasdair G Kergonc5573082005-05-05 16:16:07 -07001457 kmultipathd = create_workqueue("kmpathd");
1458 if (!kmultipathd) {
Alasdair G Kergon0cd33122007-07-12 17:27:01 +01001459 DMERR("failed to create workqueue kmpathd");
Alasdair G Kergonc5573082005-05-05 16:16:07 -07001460 dm_unregister_target(&multipath_target);
1461 kmem_cache_destroy(_mpio_cache);
1462 return -ENOMEM;
1463 }
1464
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001465 /*
1466 * A separate workqueue is used to handle the device handlers
1467 * to avoid overloading existing workqueue. Overloading the
1468 * old workqueue would also create a bottleneck in the
1469 * path of the storage hardware device activation.
1470 */
1471 kmpath_handlerd = create_singlethread_workqueue("kmpath_handlerd");
1472 if (!kmpath_handlerd) {
1473 DMERR("failed to create workqueue kmpath_handlerd");
1474 destroy_workqueue(kmultipathd);
1475 dm_unregister_target(&multipath_target);
1476 kmem_cache_destroy(_mpio_cache);
1477 return -ENOMEM;
1478 }
1479
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001480 DMINFO("version %u.%u.%u loaded",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 multipath_target.version[0], multipath_target.version[1],
1482 multipath_target.version[2]);
1483
1484 return r;
1485}
1486
1487static void __exit dm_multipath_exit(void)
1488{
1489 int r;
1490
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001491 destroy_workqueue(kmpath_handlerd);
Alasdair G Kergonc5573082005-05-05 16:16:07 -07001492 destroy_workqueue(kmultipathd);
1493
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494 r = dm_unregister_target(&multipath_target);
1495 if (r < 0)
Alasdair G Kergon0cd33122007-07-12 17:27:01 +01001496 DMERR("target unregister failed %d", r);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497 kmem_cache_destroy(_mpio_cache);
1498}
1499
1500EXPORT_SYMBOL_GPL(dm_pg_init_complete);
1501
1502module_init(dm_multipath_init);
1503module_exit(dm_multipath_exit);
1504
1505MODULE_DESCRIPTION(DM_NAME " multipath target");
1506MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
1507MODULE_LICENSE("GPL");