blob: b2ab8489d0eb3bc8dec6e638a01e2c4f165ced36 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Copyright (C) 2003 Sistina Software Limited.
3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
4 *
5 * This file is released under the GPL.
6 */
7
8#include "dm.h"
9#include "dm-path-selector.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070010#include "dm-bio-list.h"
11#include "dm-bio-record.h"
Mike Andersonb15546f2007-10-19 22:48:02 +010012#include "dm-uevent.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070013
14#include <linux/ctype.h>
15#include <linux/init.h>
16#include <linux/mempool.h>
17#include <linux/module.h>
18#include <linux/pagemap.h>
19#include <linux/slab.h>
20#include <linux/time.h>
21#include <linux/workqueue.h>
Chandra Seetharamancfae5c92008-05-01 14:50:11 -070022#include <scsi/scsi_dh.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <asm/atomic.h>
24
Alasdair G Kergon72d94862006-06-26 00:27:35 -070025#define DM_MSG_PREFIX "multipath"
Linus Torvalds1da177e2005-04-16 15:20:36 -070026#define MESG_STR(x) x, sizeof(x)
27
28/* Path properties */
29struct pgpath {
30 struct list_head list;
31
32 struct priority_group *pg; /* Owning PG */
Kiyoshi Ueda66800732008-10-10 13:36:58 +010033 unsigned is_active; /* Path status */
Linus Torvalds1da177e2005-04-16 15:20:36 -070034 unsigned fail_count; /* Cumulative failure count */
35
Josef "Jeff" Sipekc922d5f2006-12-08 02:36:33 -080036 struct dm_path path;
Linus Torvalds1da177e2005-04-16 15:20:36 -070037};
38
39#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
40
41/*
42 * Paths are grouped into Priority Groups and numbered from 1 upwards.
43 * Each has a path selector which controls which path gets used.
44 */
45struct priority_group {
46 struct list_head list;
47
48 struct multipath *m; /* Owning multipath instance */
49 struct path_selector ps;
50
51 unsigned pg_num; /* Reference number */
52 unsigned bypassed; /* Temporarily bypass this PG? */
53
54 unsigned nr_pgpaths; /* Number of paths in PG */
55 struct list_head pgpaths;
56};
57
58/* Multipath context */
59struct multipath {
60 struct list_head list;
61 struct dm_target *ti;
62
63 spinlock_t lock;
64
Chandra Seetharamancfae5c92008-05-01 14:50:11 -070065 const char *hw_handler_name;
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -070066 struct work_struct activate_path;
Chandra Seetharaman7253a332008-10-01 14:39:27 +010067 struct pgpath *pgpath_to_activate;
Linus Torvalds1da177e2005-04-16 15:20:36 -070068 unsigned nr_priority_groups;
69 struct list_head priority_groups;
70 unsigned pg_init_required; /* pg_init needs calling? */
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -070071 unsigned pg_init_in_progress; /* Only one pg_init allowed at once */
Linus Torvalds1da177e2005-04-16 15:20:36 -070072
73 unsigned nr_valid_paths; /* Total number of usable paths */
74 struct pgpath *current_pgpath;
75 struct priority_group *current_pg;
76 struct priority_group *next_pg; /* Switch to this PG if set */
77 unsigned repeat_count; /* I/Os left before calling PS again */
78
79 unsigned queue_io; /* Must we queue all I/O? */
80 unsigned queue_if_no_path; /* Queue I/O if last path fails? */
Alasdair G Kergon436d4102005-07-12 15:53:03 -070081 unsigned saved_queue_if_no_path;/* Saved state during suspension */
Dave Wysochanskic9e45582007-10-19 22:47:53 +010082 unsigned pg_init_retries; /* Number of times to retry pg_init */
83 unsigned pg_init_count; /* Number of times pg_init called */
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
85 struct work_struct process_queued_ios;
86 struct bio_list queued_ios;
87 unsigned queue_size;
88
89 struct work_struct trigger_event;
90
91 /*
Alasdair G Kergon028867a2007-07-12 17:26:32 +010092 * We must use a mempool of dm_mpath_io structs so that we
Linus Torvalds1da177e2005-04-16 15:20:36 -070093 * can resubmit bios on error.
94 */
95 mempool_t *mpio_pool;
96};
97
98/*
99 * Context information attached to each bio we process.
100 */
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100101struct dm_mpath_io {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700102 struct pgpath *pgpath;
103 struct dm_bio_details details;
104};
105
106typedef int (*action_fn) (struct pgpath *pgpath);
107
108#define MIN_IOS 256 /* Mempool size */
109
Christoph Lametere18b8902006-12-06 20:33:20 -0800110static struct kmem_cache *_mpio_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700112static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
David Howellsc4028952006-11-22 14:57:56 +0000113static void process_queued_ios(struct work_struct *work);
114static void trigger_event(struct work_struct *work);
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700115static void activate_path(struct work_struct *work);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116
117
118/*-----------------------------------------------
119 * Allocation routines
120 *-----------------------------------------------*/
121
122static struct pgpath *alloc_pgpath(void)
123{
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700124 struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700125
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700126 if (pgpath)
Kiyoshi Ueda66800732008-10-10 13:36:58 +0100127 pgpath->is_active = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128
129 return pgpath;
130}
131
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100132static void free_pgpath(struct pgpath *pgpath)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133{
134 kfree(pgpath);
135}
136
137static struct priority_group *alloc_priority_group(void)
138{
139 struct priority_group *pg;
140
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700141 pg = kzalloc(sizeof(*pg), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700143 if (pg)
144 INIT_LIST_HEAD(&pg->pgpaths);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145
146 return pg;
147}
148
149static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
150{
Chandra Seetharaman7253a332008-10-01 14:39:27 +0100151 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152 struct pgpath *pgpath, *tmp;
Hannes Reineckeae11b1b2008-07-17 17:49:02 -0700153 struct multipath *m = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154
155 list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
156 list_del(&pgpath->list);
Hannes Reineckeae11b1b2008-07-17 17:49:02 -0700157 if (m->hw_handler_name)
158 scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159 dm_put_device(ti, pgpath->path.dev);
Chandra Seetharaman7253a332008-10-01 14:39:27 +0100160 spin_lock_irqsave(&m->lock, flags);
161 if (m->pgpath_to_activate == pgpath)
162 m->pgpath_to_activate = NULL;
163 spin_unlock_irqrestore(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 free_pgpath(pgpath);
165 }
166}
167
168static void free_priority_group(struct priority_group *pg,
169 struct dm_target *ti)
170{
171 struct path_selector *ps = &pg->ps;
172
173 if (ps->type) {
174 ps->type->destroy(ps);
175 dm_put_path_selector(ps->type);
176 }
177
178 free_pgpaths(&pg->pgpaths, ti);
179 kfree(pg);
180}
181
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700182static struct multipath *alloc_multipath(struct dm_target *ti)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183{
184 struct multipath *m;
185
Micha³ Miros³awe69fae52006-10-03 01:15:34 -0700186 m = kzalloc(sizeof(*m), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 if (m) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188 INIT_LIST_HEAD(&m->priority_groups);
189 spin_lock_init(&m->lock);
190 m->queue_io = 1;
David Howellsc4028952006-11-22 14:57:56 +0000191 INIT_WORK(&m->process_queued_ios, process_queued_ios);
192 INIT_WORK(&m->trigger_event, trigger_event);
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700193 INIT_WORK(&m->activate_path, activate_path);
Matthew Dobson93d23412006-03-26 01:37:50 -0800194 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195 if (!m->mpio_pool) {
196 kfree(m);
197 return NULL;
198 }
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700199 m->ti = ti;
200 ti->private = m;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 }
202
203 return m;
204}
205
206static void free_multipath(struct multipath *m)
207{
208 struct priority_group *pg, *tmp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209
210 list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
211 list_del(&pg->list);
212 free_priority_group(pg, m->ti);
213 }
214
Chandra Seetharamancfae5c92008-05-01 14:50:11 -0700215 kfree(m->hw_handler_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216 mempool_destroy(m->mpio_pool);
217 kfree(m);
218}
219
220
221/*-----------------------------------------------
222 * Path selection
223 *-----------------------------------------------*/
224
225static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
226{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 m->current_pg = pgpath->pg;
228
229 /* Must we initialise the PG first, and queue I/O till it's ready? */
Chandra Seetharamancfae5c92008-05-01 14:50:11 -0700230 if (m->hw_handler_name) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231 m->pg_init_required = 1;
232 m->queue_io = 1;
233 } else {
234 m->pg_init_required = 0;
235 m->queue_io = 0;
236 }
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100237
238 m->pg_init_count = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239}
240
241static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg)
242{
Josef "Jeff" Sipekc922d5f2006-12-08 02:36:33 -0800243 struct dm_path *path;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244
245 path = pg->ps.type->select_path(&pg->ps, &m->repeat_count);
246 if (!path)
247 return -ENXIO;
248
249 m->current_pgpath = path_to_pgpath(path);
250
251 if (m->current_pg != pg)
252 __switch_pg(m, m->current_pgpath);
253
254 return 0;
255}
256
257static void __choose_pgpath(struct multipath *m)
258{
259 struct priority_group *pg;
260 unsigned bypassed = 1;
261
262 if (!m->nr_valid_paths)
263 goto failed;
264
265 /* Were we instructed to switch PG? */
266 if (m->next_pg) {
267 pg = m->next_pg;
268 m->next_pg = NULL;
269 if (!__choose_path_in_pg(m, pg))
270 return;
271 }
272
273 /* Don't change PG until it has no remaining paths */
274 if (m->current_pg && !__choose_path_in_pg(m, m->current_pg))
275 return;
276
277 /*
278 * Loop through priority groups until we find a valid path.
279 * First time we skip PGs marked 'bypassed'.
280 * Second time we only try the ones we skipped.
281 */
282 do {
283 list_for_each_entry(pg, &m->priority_groups, list) {
284 if (pg->bypassed == bypassed)
285 continue;
286 if (!__choose_path_in_pg(m, pg))
287 return;
288 }
289 } while (bypassed--);
290
291failed:
292 m->current_pgpath = NULL;
293 m->current_pg = NULL;
294}
295
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800296/*
297 * Check whether bios must be queued in the device-mapper core rather
298 * than here in the target.
299 *
300 * m->lock must be held on entry.
301 *
302 * If m->queue_if_no_path and m->saved_queue_if_no_path hold the
303 * same value then we are not between multipath_presuspend()
304 * and multipath_resume() calls and we have no need to check
305 * for the DMF_NOFLUSH_SUSPENDING flag.
306 */
307static int __must_push_back(struct multipath *m)
308{
309 return (m->queue_if_no_path != m->saved_queue_if_no_path &&
310 dm_noflush_suspending(m->ti));
311}
312
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100313static int map_io(struct multipath *m, struct bio *bio,
314 struct dm_mpath_io *mpio, unsigned was_queued)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315{
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -0800316 int r = DM_MAPIO_REMAPPED;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 unsigned long flags;
318 struct pgpath *pgpath;
319
320 spin_lock_irqsave(&m->lock, flags);
321
322 /* Do we need to select a new pgpath? */
323 if (!m->current_pgpath ||
324 (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
325 __choose_pgpath(m);
326
327 pgpath = m->current_pgpath;
328
329 if (was_queued)
330 m->queue_size--;
331
332 if ((pgpath && m->queue_io) ||
Alasdair G Kergon436d4102005-07-12 15:53:03 -0700333 (!pgpath && m->queue_if_no_path)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 /* Queue for the daemon to resubmit */
335 bio_list_add(&m->queued_ios, bio);
336 m->queue_size++;
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700337 if ((m->pg_init_required && !m->pg_init_in_progress) ||
338 !m->queue_io)
Alasdair G Kergonc5573082005-05-05 16:16:07 -0700339 queue_work(kmultipathd, &m->process_queued_ios);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340 pgpath = NULL;
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -0800341 r = DM_MAPIO_SUBMITTED;
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800342 } else if (pgpath)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343 bio->bi_bdev = pgpath->path.dev->bdev;
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800344 else if (__must_push_back(m))
345 r = DM_MAPIO_REQUEUE;
346 else
347 r = -EIO; /* Failed */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348
349 mpio->pgpath = pgpath;
350
351 spin_unlock_irqrestore(&m->lock, flags);
352
353 return r;
354}
355
356/*
357 * If we run out of usable paths, should we queue I/O or error it?
358 */
Alasdair G Kergon485ef692005-09-27 21:45:45 -0700359static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path,
360 unsigned save_old_value)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361{
362 unsigned long flags;
363
364 spin_lock_irqsave(&m->lock, flags);
365
Alasdair G Kergon485ef692005-09-27 21:45:45 -0700366 if (save_old_value)
367 m->saved_queue_if_no_path = m->queue_if_no_path;
368 else
369 m->saved_queue_if_no_path = queue_if_no_path;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 m->queue_if_no_path = queue_if_no_path;
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700371 if (!m->queue_if_no_path && m->queue_size)
Alasdair G Kergonc5573082005-05-05 16:16:07 -0700372 queue_work(kmultipathd, &m->process_queued_ios);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373
374 spin_unlock_irqrestore(&m->lock, flags);
375
376 return 0;
377}
378
379/*-----------------------------------------------------------------
380 * The multipath daemon is responsible for resubmitting queued ios.
381 *---------------------------------------------------------------*/
382
383static void dispatch_queued_ios(struct multipath *m)
384{
385 int r;
386 unsigned long flags;
387 struct bio *bio = NULL, *next;
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100388 struct dm_mpath_io *mpio;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389 union map_info *info;
390
391 spin_lock_irqsave(&m->lock, flags);
392 bio = bio_list_get(&m->queued_ios);
393 spin_unlock_irqrestore(&m->lock, flags);
394
395 while (bio) {
396 next = bio->bi_next;
397 bio->bi_next = NULL;
398
399 info = dm_get_mapinfo(bio);
400 mpio = info->ptr;
401
402 r = map_io(m, bio, mpio, 1);
403 if (r < 0)
NeilBrown6712ecf2007-09-27 12:47:43 +0200404 bio_endio(bio, r);
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -0800405 else if (r == DM_MAPIO_REMAPPED)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700406 generic_make_request(bio);
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800407 else if (r == DM_MAPIO_REQUEUE)
NeilBrown6712ecf2007-09-27 12:47:43 +0200408 bio_endio(bio, -EIO);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409
410 bio = next;
411 }
412}
413
David Howellsc4028952006-11-22 14:57:56 +0000414static void process_queued_ios(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415{
David Howellsc4028952006-11-22 14:57:56 +0000416 struct multipath *m =
417 container_of(work, struct multipath, process_queued_ios);
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700418 struct pgpath *pgpath = NULL;
419 unsigned init_required = 0, must_queue = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 unsigned long flags;
421
422 spin_lock_irqsave(&m->lock, flags);
423
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700424 if (!m->queue_size)
425 goto out;
426
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 if (!m->current_pgpath)
428 __choose_pgpath(m);
429
430 pgpath = m->current_pgpath;
Chandra Seetharaman7253a332008-10-01 14:39:27 +0100431 m->pgpath_to_activate = m->current_pgpath;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700433 if ((pgpath && !m->queue_io) ||
434 (!pgpath && !m->queue_if_no_path))
435 must_queue = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700437 if (m->pg_init_required && !m->pg_init_in_progress) {
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100438 m->pg_init_count++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439 m->pg_init_required = 0;
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700440 m->pg_init_in_progress = 1;
441 init_required = 1;
442 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700444out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445 spin_unlock_irqrestore(&m->lock, flags);
446
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700447 if (init_required)
448 queue_work(kmpath_handlerd, &m->activate_path);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449
450 if (!must_queue)
451 dispatch_queued_ios(m);
452}
453
454/*
455 * An event is triggered whenever a path is taken out of use.
456 * Includes path failure and PG bypass.
457 */
David Howellsc4028952006-11-22 14:57:56 +0000458static void trigger_event(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459{
David Howellsc4028952006-11-22 14:57:56 +0000460 struct multipath *m =
461 container_of(work, struct multipath, trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462
463 dm_table_event(m->ti->table);
464}
465
466/*-----------------------------------------------------------------
467 * Constructor/argument parsing:
468 * <#multipath feature args> [<arg>]*
469 * <#hw_handler args> [hw_handler [<arg>]*]
470 * <#priority groups>
471 * <initial priority group>
472 * [<selector> <#selector args> [<arg>]*
473 * <#paths> <#per-path selector args>
474 * [<path> [<arg>]* ]+ ]+
475 *---------------------------------------------------------------*/
476struct param {
477 unsigned min;
478 unsigned max;
479 char *error;
480};
481
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482static int read_param(struct param *param, char *str, unsigned *v, char **error)
483{
484 if (!str ||
485 (sscanf(str, "%u", v) != 1) ||
486 (*v < param->min) ||
487 (*v > param->max)) {
488 *error = param->error;
489 return -EINVAL;
490 }
491
492 return 0;
493}
494
495struct arg_set {
496 unsigned argc;
497 char **argv;
498};
499
500static char *shift(struct arg_set *as)
501{
502 char *r;
503
504 if (as->argc) {
505 as->argc--;
506 r = *as->argv;
507 as->argv++;
508 return r;
509 }
510
511 return NULL;
512}
513
514static void consume(struct arg_set *as, unsigned n)
515{
516 BUG_ON (as->argc < n);
517 as->argc -= n;
518 as->argv += n;
519}
520
521static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
522 struct dm_target *ti)
523{
524 int r;
525 struct path_selector_type *pst;
526 unsigned ps_argc;
527
528 static struct param _params[] = {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700529 {0, 1024, "invalid number of path selector args"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530 };
531
532 pst = dm_get_path_selector(shift(as));
533 if (!pst) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700534 ti->error = "unknown path selector type";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535 return -EINVAL;
536 }
537
538 r = read_param(_params, shift(as), &ps_argc, &ti->error);
Mikulas Patocka371b2e32008-07-21 12:00:24 +0100539 if (r) {
540 dm_put_path_selector(pst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541 return -EINVAL;
Mikulas Patocka371b2e32008-07-21 12:00:24 +0100542 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543
544 r = pst->create(&pg->ps, ps_argc, as->argv);
545 if (r) {
546 dm_put_path_selector(pst);
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700547 ti->error = "path selector constructor failed";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548 return r;
549 }
550
551 pg->ps.type = pst;
552 consume(as, ps_argc);
553
554 return 0;
555}
556
557static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
558 struct dm_target *ti)
559{
560 int r;
561 struct pgpath *p;
Hannes Reineckeae11b1b2008-07-17 17:49:02 -0700562 struct multipath *m = ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563
564 /* we need at least a path arg */
565 if (as->argc < 1) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700566 ti->error = "no device given";
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100567 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568 }
569
570 p = alloc_pgpath();
571 if (!p)
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100572 return ERR_PTR(-ENOMEM);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700573
574 r = dm_get_device(ti, shift(as), ti->begin, ti->len,
575 dm_table_get_mode(ti->table), &p->path.dev);
576 if (r) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700577 ti->error = "error getting device";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578 goto bad;
579 }
580
Hannes Reineckeae11b1b2008-07-17 17:49:02 -0700581 if (m->hw_handler_name) {
582 r = scsi_dh_attach(bdev_get_queue(p->path.dev->bdev),
583 m->hw_handler_name);
584 if (r < 0) {
585 dm_put_device(ti, p->path.dev);
586 goto bad;
587 }
588 }
589
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590 r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
591 if (r) {
592 dm_put_device(ti, p->path.dev);
593 goto bad;
594 }
595
596 return p;
597
598 bad:
599 free_pgpath(p);
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100600 return ERR_PTR(r);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601}
602
603static struct priority_group *parse_priority_group(struct arg_set *as,
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700604 struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605{
606 static struct param _params[] = {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700607 {1, 1024, "invalid number of paths"},
608 {0, 1024, "invalid number of selector args"}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 };
610
611 int r;
612 unsigned i, nr_selector_args, nr_params;
613 struct priority_group *pg;
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700614 struct dm_target *ti = m->ti;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615
616 if (as->argc < 2) {
617 as->argc = 0;
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100618 ti->error = "not enough priority group arguments";
619 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700620 }
621
622 pg = alloc_priority_group();
623 if (!pg) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700624 ti->error = "couldn't allocate priority group";
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100625 return ERR_PTR(-ENOMEM);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 }
627 pg->m = m;
628
629 r = parse_path_selector(as, pg, ti);
630 if (r)
631 goto bad;
632
633 /*
634 * read the paths
635 */
636 r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error);
637 if (r)
638 goto bad;
639
640 r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error);
641 if (r)
642 goto bad;
643
644 nr_params = 1 + nr_selector_args;
645 for (i = 0; i < pg->nr_pgpaths; i++) {
646 struct pgpath *pgpath;
647 struct arg_set path_args;
648
Mikulas Patocka148acff2008-07-21 12:00:30 +0100649 if (as->argc < nr_params) {
650 ti->error = "not enough path parameters";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651 goto bad;
Mikulas Patocka148acff2008-07-21 12:00:30 +0100652 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700653
654 path_args.argc = nr_params;
655 path_args.argv = as->argv;
656
657 pgpath = parse_path(&path_args, &pg->ps, ti);
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100658 if (IS_ERR(pgpath)) {
659 r = PTR_ERR(pgpath);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 goto bad;
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100661 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662
663 pgpath->pg = pg;
664 list_add_tail(&pgpath->list, &pg->pgpaths);
665 consume(as, nr_params);
666 }
667
668 return pg;
669
670 bad:
671 free_priority_group(pg, ti);
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100672 return ERR_PTR(r);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673}
674
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700675static int parse_hw_handler(struct arg_set *as, struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677 unsigned hw_argc;
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700678 struct dm_target *ti = m->ti;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679
680 static struct param _params[] = {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700681 {0, 1024, "invalid number of hardware handler args"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682 };
683
Chandra Seetharamancfae5c92008-05-01 14:50:11 -0700684 if (read_param(_params, shift(as), &hw_argc, &ti->error))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685 return -EINVAL;
686
687 if (!hw_argc)
688 return 0;
689
Chandra Seetharamancfae5c92008-05-01 14:50:11 -0700690 m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
691 request_module("scsi_dh_%s", m->hw_handler_name);
692 if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700693 ti->error = "unknown hardware handler type";
Chandra Seetharamanfe9233f2008-05-23 18:16:40 -0700694 kfree(m->hw_handler_name);
695 m->hw_handler_name = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696 return -EINVAL;
697 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698 consume(as, hw_argc - 1);
699
700 return 0;
701}
702
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700703static int parse_features(struct arg_set *as, struct multipath *m)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704{
705 int r;
706 unsigned argc;
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700707 struct dm_target *ti = m->ti;
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100708 const char *param_name;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700709
710 static struct param _params[] = {
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100711 {0, 3, "invalid number of feature args"},
712 {1, 50, "pg_init_retries must be between 1 and 50"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 };
714
715 r = read_param(_params, shift(as), &argc, &ti->error);
716 if (r)
717 return -EINVAL;
718
719 if (!argc)
720 return 0;
721
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100722 do {
723 param_name = shift(as);
724 argc--;
725
726 if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
727 r = queue_if_no_path(m, 1, 0);
728 continue;
729 }
730
731 if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
732 (argc >= 1)) {
733 r = read_param(_params + 1, shift(as),
734 &m->pg_init_retries, &ti->error);
735 argc--;
736 continue;
737 }
738
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 ti->error = "Unrecognised multipath feature request";
Dave Wysochanskic9e45582007-10-19 22:47:53 +0100740 r = -EINVAL;
741 } while (argc && !r);
742
743 return r;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700744}
745
746static int multipath_ctr(struct dm_target *ti, unsigned int argc,
747 char **argv)
748{
749 /* target parameters */
750 static struct param _params[] = {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700751 {1, 1024, "invalid number of priority groups"},
752 {1, 1024, "invalid initial priority group number"},
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753 };
754
755 int r;
756 struct multipath *m;
757 struct arg_set as;
758 unsigned pg_count = 0;
759 unsigned next_pg_num;
760
761 as.argc = argc;
762 as.argv = argv;
763
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700764 m = alloc_multipath(ti);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 if (!m) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700766 ti->error = "can't allocate multipath";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767 return -EINVAL;
768 }
769
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700770 r = parse_features(&as, m);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771 if (r)
772 goto bad;
773
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700774 r = parse_hw_handler(&as, m);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775 if (r)
776 goto bad;
777
778 r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
779 if (r)
780 goto bad;
781
782 r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error);
783 if (r)
784 goto bad;
785
786 /* parse the priority groups */
787 while (as.argc) {
788 struct priority_group *pg;
789
Micha³ Miros³aw28f16c22006-10-03 01:15:33 -0700790 pg = parse_priority_group(&as, m);
Benjamin Marzinski01460f32008-10-10 13:36:57 +0100791 if (IS_ERR(pg)) {
792 r = PTR_ERR(pg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 goto bad;
794 }
795
796 m->nr_valid_paths += pg->nr_pgpaths;
797 list_add_tail(&pg->list, &m->priority_groups);
798 pg_count++;
799 pg->pg_num = pg_count;
800 if (!--next_pg_num)
801 m->next_pg = pg;
802 }
803
804 if (pg_count != m->nr_priority_groups) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700805 ti->error = "priority group count mismatch";
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 r = -EINVAL;
807 goto bad;
808 }
809
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 return 0;
811
812 bad:
813 free_multipath(m);
814 return r;
815}
816
817static void multipath_dtr(struct dm_target *ti)
818{
819 struct multipath *m = (struct multipath *) ti->private;
Alasdair G Kergona044d012005-07-12 15:53:02 -0700820
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -0700821 flush_workqueue(kmpath_handlerd);
Alasdair G Kergona044d012005-07-12 15:53:02 -0700822 flush_workqueue(kmultipathd);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 free_multipath(m);
824}
825
826/*
827 * Map bios, recording original fields for later in case we have to resubmit
828 */
829static int multipath_map(struct dm_target *ti, struct bio *bio,
830 union map_info *map_context)
831{
832 int r;
Alasdair G Kergon028867a2007-07-12 17:26:32 +0100833 struct dm_mpath_io *mpio;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 struct multipath *m = (struct multipath *) ti->private;
835
836 mpio = mempool_alloc(m->mpio_pool, GFP_NOIO);
837 dm_bio_record(&mpio->details, bio);
838
839 map_context->ptr = mpio;
840 bio->bi_rw |= (1 << BIO_RW_FAILFAST);
841 r = map_io(m, bio, mpio, 0);
Kiyoshi Ueda45e15722006-12-08 02:41:10 -0800842 if (r < 0 || r == DM_MAPIO_REQUEUE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 mempool_free(mpio, m->mpio_pool);
844
845 return r;
846}
847
848/*
849 * Take a path out of use.
850 */
851static int fail_path(struct pgpath *pgpath)
852{
853 unsigned long flags;
854 struct multipath *m = pgpath->pg->m;
855
856 spin_lock_irqsave(&m->lock, flags);
857
Kiyoshi Ueda66800732008-10-10 13:36:58 +0100858 if (!pgpath->is_active)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700859 goto out;
860
Alasdair G Kergon72d94862006-06-26 00:27:35 -0700861 DMWARN("Failing path %s.", pgpath->path.dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862
863 pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
Kiyoshi Ueda66800732008-10-10 13:36:58 +0100864 pgpath->is_active = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865 pgpath->fail_count++;
866
867 m->nr_valid_paths--;
868
869 if (pgpath == m->current_pgpath)
870 m->current_pgpath = NULL;
871
Mike Andersonb15546f2007-10-19 22:48:02 +0100872 dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
873 pgpath->path.dev->name, m->nr_valid_paths);
874
Alasdair G Kergonc5573082005-05-05 16:16:07 -0700875 queue_work(kmultipathd, &m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876
877out:
878 spin_unlock_irqrestore(&m->lock, flags);
879
880 return 0;
881}
882
883/*
884 * Reinstate a previously-failed path
885 */
886static int reinstate_path(struct pgpath *pgpath)
887{
888 int r = 0;
889 unsigned long flags;
890 struct multipath *m = pgpath->pg->m;
891
892 spin_lock_irqsave(&m->lock, flags);
893
Kiyoshi Ueda66800732008-10-10 13:36:58 +0100894 if (pgpath->is_active)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700895 goto out;
896
Alasdair G Kergondef052d2008-07-21 12:00:31 +0100897 if (!pgpath->pg->ps.type->reinstate_path) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 DMWARN("Reinstate path not supported by path selector %s",
899 pgpath->pg->ps.type->name);
900 r = -EINVAL;
901 goto out;
902 }
903
904 r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
905 if (r)
906 goto out;
907
Kiyoshi Ueda66800732008-10-10 13:36:58 +0100908 pgpath->is_active = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909
910 m->current_pgpath = NULL;
Alasdair G Kergonc3cd4f62005-07-12 15:53:04 -0700911 if (!m->nr_valid_paths++ && m->queue_size)
Alasdair G Kergonc5573082005-05-05 16:16:07 -0700912 queue_work(kmultipathd, &m->process_queued_ios);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913
Mike Andersonb15546f2007-10-19 22:48:02 +0100914 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
915 pgpath->path.dev->name, m->nr_valid_paths);
916
Alasdair G Kergonc5573082005-05-05 16:16:07 -0700917 queue_work(kmultipathd, &m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918
919out:
920 spin_unlock_irqrestore(&m->lock, flags);
921
922 return r;
923}
924
925/*
926 * Fail or reinstate all paths that match the provided struct dm_dev.
927 */
928static int action_dev(struct multipath *m, struct dm_dev *dev,
929 action_fn action)
930{
931 int r = 0;
932 struct pgpath *pgpath;
933 struct priority_group *pg;
934
935 list_for_each_entry(pg, &m->priority_groups, list) {
936 list_for_each_entry(pgpath, &pg->pgpaths, list) {
937 if (pgpath->path.dev == dev)
938 r = action(pgpath);
939 }
940 }
941
942 return r;
943}
944
945/*
946 * Temporarily try to avoid having to use the specified PG
947 */
948static void bypass_pg(struct multipath *m, struct priority_group *pg,
949 int bypassed)
950{
951 unsigned long flags;
952
953 spin_lock_irqsave(&m->lock, flags);
954
955 pg->bypassed = bypassed;
956 m->current_pgpath = NULL;
957 m->current_pg = NULL;
958
959 spin_unlock_irqrestore(&m->lock, flags);
960
Alasdair G Kergonc5573082005-05-05 16:16:07 -0700961 queue_work(kmultipathd, &m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700962}
963
964/*
965 * Switch to using the specified PG from the next I/O that gets mapped
966 */
967static int switch_pg_num(struct multipath *m, const char *pgstr)
968{
969 struct priority_group *pg;
970 unsigned pgnum;
971 unsigned long flags;
972
973 if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
974 (pgnum > m->nr_priority_groups)) {
975 DMWARN("invalid PG number supplied to switch_pg_num");
976 return -EINVAL;
977 }
978
979 spin_lock_irqsave(&m->lock, flags);
980 list_for_each_entry(pg, &m->priority_groups, list) {
981 pg->bypassed = 0;
982 if (--pgnum)
983 continue;
984
985 m->current_pgpath = NULL;
986 m->current_pg = NULL;
987 m->next_pg = pg;
988 }
989 spin_unlock_irqrestore(&m->lock, flags);
990
Alasdair G Kergonc5573082005-05-05 16:16:07 -0700991 queue_work(kmultipathd, &m->trigger_event);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 return 0;
993}
994
995/*
996 * Set/clear bypassed status of a PG.
997 * PGs are numbered upwards from 1 in the order they were declared.
998 */
999static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
1000{
1001 struct priority_group *pg;
1002 unsigned pgnum;
1003
1004 if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
1005 (pgnum > m->nr_priority_groups)) {
1006 DMWARN("invalid PG number supplied to bypass_pg");
1007 return -EINVAL;
1008 }
1009
1010 list_for_each_entry(pg, &m->priority_groups, list) {
1011 if (!--pgnum)
1012 break;
1013 }
1014
1015 bypass_pg(m, pg, bypassed);
1016 return 0;
1017}
1018
1019/*
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001020 * Should we retry pg_init immediately?
1021 */
1022static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
1023{
1024 unsigned long flags;
1025 int limit_reached = 0;
1026
1027 spin_lock_irqsave(&m->lock, flags);
1028
1029 if (m->pg_init_count <= m->pg_init_retries)
1030 m->pg_init_required = 1;
1031 else
1032 limit_reached = 1;
1033
1034 spin_unlock_irqrestore(&m->lock, flags);
1035
1036 return limit_reached;
1037}
1038
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001039static void pg_init_done(struct dm_path *path, int errors)
1040{
1041 struct pgpath *pgpath = path_to_pgpath(path);
1042 struct priority_group *pg = pgpath->pg;
1043 struct multipath *m = pg->m;
1044 unsigned long flags;
1045
1046 /* device or driver problems */
1047 switch (errors) {
1048 case SCSI_DH_OK:
1049 break;
1050 case SCSI_DH_NOSYS:
1051 if (!m->hw_handler_name) {
1052 errors = 0;
1053 break;
1054 }
1055 DMERR("Cannot failover device because scsi_dh_%s was not "
1056 "loaded.", m->hw_handler_name);
1057 /*
1058 * Fail path for now, so we do not ping pong
1059 */
1060 fail_path(pgpath);
1061 break;
1062 case SCSI_DH_DEV_TEMP_BUSY:
1063 /*
1064 * Probably doing something like FW upgrade on the
1065 * controller so try the other pg.
1066 */
1067 bypass_pg(m, pg, 1);
1068 break;
1069 /* TODO: For SCSI_DH_RETRY we should wait a couple seconds */
1070 case SCSI_DH_RETRY:
1071 case SCSI_DH_IMM_RETRY:
1072 case SCSI_DH_RES_TEMP_UNAVAIL:
1073 if (pg_init_limit_reached(m, pgpath))
1074 fail_path(pgpath);
1075 errors = 0;
1076 break;
1077 default:
1078 /*
1079 * We probably do not want to fail the path for a device
1080 * error, but this is what the old dm did. In future
1081 * patches we can do more advanced handling.
1082 */
1083 fail_path(pgpath);
1084 }
1085
1086 spin_lock_irqsave(&m->lock, flags);
1087 if (errors) {
1088 DMERR("Could not failover device. Error %d.", errors);
1089 m->current_pgpath = NULL;
1090 m->current_pg = NULL;
1091 } else if (!m->pg_init_required) {
1092 m->queue_io = 0;
1093 pg->bypassed = 0;
1094 }
1095
1096 m->pg_init_in_progress = 0;
1097 queue_work(kmultipathd, &m->process_queued_ios);
1098 spin_unlock_irqrestore(&m->lock, flags);
1099}
1100
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001101static void activate_path(struct work_struct *work)
1102{
1103 int ret;
1104 struct multipath *m =
1105 container_of(work, struct multipath, activate_path);
Chandra Seetharaman7253a332008-10-01 14:39:27 +01001106 struct dm_path *path;
1107 unsigned long flags;
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001108
Chandra Seetharaman7253a332008-10-01 14:39:27 +01001109 spin_lock_irqsave(&m->lock, flags);
1110 path = &m->pgpath_to_activate->path;
1111 m->pgpath_to_activate = NULL;
1112 spin_unlock_irqrestore(&m->lock, flags);
1113 if (!path)
1114 return;
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001115 ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev));
1116 pg_init_done(path, ret);
1117}
1118
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119/*
1120 * end_io handling
1121 */
1122static int do_end_io(struct multipath *m, struct bio *bio,
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001123 int error, struct dm_mpath_io *mpio)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124{
Stefan Bader640eb3b2005-11-21 21:32:35 -08001125 unsigned long flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126
1127 if (!error)
1128 return 0; /* I/O complete */
1129
Lars Marowsky-Bree4f588022005-06-08 15:50:31 -07001130 if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
1131 return error;
1132
Alasdair G Kergonf6a80ea2005-07-12 15:53:01 -07001133 if (error == -EOPNOTSUPP)
1134 return error;
1135
Stefan Bader640eb3b2005-11-21 21:32:35 -08001136 spin_lock_irqsave(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 if (!m->nr_valid_paths) {
Kiyoshi Ueda45e15722006-12-08 02:41:10 -08001138 if (__must_push_back(m)) {
1139 spin_unlock_irqrestore(&m->lock, flags);
1140 return DM_ENDIO_REQUEUE;
1141 } else if (!m->queue_if_no_path) {
Stefan Bader640eb3b2005-11-21 21:32:35 -08001142 spin_unlock_irqrestore(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001143 return -EIO;
1144 } else {
Stefan Bader640eb3b2005-11-21 21:32:35 -08001145 spin_unlock_irqrestore(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 goto requeue;
1147 }
1148 }
Stefan Bader640eb3b2005-11-21 21:32:35 -08001149 spin_unlock_irqrestore(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001151 if (mpio->pgpath)
1152 fail_path(mpio->pgpath);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153
1154 requeue:
1155 dm_bio_restore(&mpio->details, bio);
1156
1157 /* queue for the daemon to resubmit or fail */
Stefan Bader640eb3b2005-11-21 21:32:35 -08001158 spin_lock_irqsave(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159 bio_list_add(&m->queued_ios, bio);
1160 m->queue_size++;
1161 if (!m->queue_io)
Alasdair G Kergonc5573082005-05-05 16:16:07 -07001162 queue_work(kmultipathd, &m->process_queued_ios);
Stefan Bader640eb3b2005-11-21 21:32:35 -08001163 spin_unlock_irqrestore(&m->lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -08001165 return DM_ENDIO_INCOMPLETE; /* io not complete */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166}
1167
1168static int multipath_end_io(struct dm_target *ti, struct bio *bio,
1169 int error, union map_info *map_context)
1170{
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001171 struct multipath *m = ti->private;
1172 struct dm_mpath_io *mpio = map_context->ptr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173 struct pgpath *pgpath = mpio->pgpath;
1174 struct path_selector *ps;
1175 int r;
1176
1177 r = do_end_io(m, bio, error, mpio);
1178 if (pgpath) {
1179 ps = &pgpath->pg->ps;
1180 if (ps->type->end_io)
1181 ps->type->end_io(ps, &pgpath->path);
1182 }
Kiyoshi Uedad2a7ad22006-12-08 02:41:06 -08001183 if (r != DM_ENDIO_INCOMPLETE)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001184 mempool_free(mpio, m->mpio_pool);
1185
1186 return r;
1187}
1188
1189/*
1190 * Suspend can't complete until all the I/O is processed so if
Alasdair G Kergon436d4102005-07-12 15:53:03 -07001191 * the last path fails we must error any remaining I/O.
1192 * Note that if the freeze_bdev fails while suspending, the
1193 * queue_if_no_path state is lost - userspace should reset it.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194 */
1195static void multipath_presuspend(struct dm_target *ti)
1196{
1197 struct multipath *m = (struct multipath *) ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001198
Alasdair G Kergon485ef692005-09-27 21:45:45 -07001199 queue_if_no_path(m, 0, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200}
1201
Alasdair G Kergon436d4102005-07-12 15:53:03 -07001202/*
1203 * Restore the queue_if_no_path setting.
1204 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205static void multipath_resume(struct dm_target *ti)
1206{
1207 struct multipath *m = (struct multipath *) ti->private;
1208 unsigned long flags;
1209
1210 spin_lock_irqsave(&m->lock, flags);
Alasdair G Kergon436d4102005-07-12 15:53:03 -07001211 m->queue_if_no_path = m->saved_queue_if_no_path;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212 spin_unlock_irqrestore(&m->lock, flags);
1213}
1214
1215/*
1216 * Info output has the following format:
1217 * num_multipath_feature_args [multipath_feature_args]*
1218 * num_handler_status_args [handler_status_args]*
1219 * num_groups init_group_number
1220 * [A|D|E num_ps_status_args [ps_status_args]*
1221 * num_paths num_selector_args
1222 * [path_dev A|F fail_count [selector_args]* ]+ ]+
1223 *
1224 * Table output has the following format (identical to the constructor string):
1225 * num_feature_args [features_args]*
1226 * num_handler_args hw_handler [hw_handler_args]*
1227 * num_groups init_group_number
1228 * [priority selector-name num_ps_args [ps_args]*
1229 * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
1230 */
1231static int multipath_status(struct dm_target *ti, status_type_t type,
1232 char *result, unsigned int maxlen)
1233{
1234 int sz = 0;
1235 unsigned long flags;
1236 struct multipath *m = (struct multipath *) ti->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 struct priority_group *pg;
1238 struct pgpath *p;
1239 unsigned pg_num;
1240 char state;
1241
1242 spin_lock_irqsave(&m->lock, flags);
1243
1244 /* Features */
1245 if (type == STATUSTYPE_INFO)
Dave Wysochanskic9e45582007-10-19 22:47:53 +01001246 DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count);
1247 else {
1248 DMEMIT("%u ", m->queue_if_no_path +
1249 (m->pg_init_retries > 0) * 2);
1250 if (m->queue_if_no_path)
1251 DMEMIT("queue_if_no_path ");
1252 if (m->pg_init_retries)
1253 DMEMIT("pg_init_retries %u ", m->pg_init_retries);
1254 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001255
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001256 if (!m->hw_handler_name || type == STATUSTYPE_INFO)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257 DMEMIT("0 ");
1258 else
Chandra Seetharamancfae5c92008-05-01 14:50:11 -07001259 DMEMIT("1 %s ", m->hw_handler_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260
1261 DMEMIT("%u ", m->nr_priority_groups);
1262
1263 if (m->next_pg)
1264 pg_num = m->next_pg->pg_num;
1265 else if (m->current_pg)
1266 pg_num = m->current_pg->pg_num;
1267 else
1268 pg_num = 1;
1269
1270 DMEMIT("%u ", pg_num);
1271
1272 switch (type) {
1273 case STATUSTYPE_INFO:
1274 list_for_each_entry(pg, &m->priority_groups, list) {
1275 if (pg->bypassed)
1276 state = 'D'; /* Disabled */
1277 else if (pg == m->current_pg)
1278 state = 'A'; /* Currently Active */
1279 else
1280 state = 'E'; /* Enabled */
1281
1282 DMEMIT("%c ", state);
1283
1284 if (pg->ps.type->status)
1285 sz += pg->ps.type->status(&pg->ps, NULL, type,
1286 result + sz,
1287 maxlen - sz);
1288 else
1289 DMEMIT("0 ");
1290
1291 DMEMIT("%u %u ", pg->nr_pgpaths,
1292 pg->ps.type->info_args);
1293
1294 list_for_each_entry(p, &pg->pgpaths, list) {
1295 DMEMIT("%s %s %u ", p->path.dev->name,
Kiyoshi Ueda66800732008-10-10 13:36:58 +01001296 p->is_active ? "A" : "F",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297 p->fail_count);
1298 if (pg->ps.type->status)
1299 sz += pg->ps.type->status(&pg->ps,
1300 &p->path, type, result + sz,
1301 maxlen - sz);
1302 }
1303 }
1304 break;
1305
1306 case STATUSTYPE_TABLE:
1307 list_for_each_entry(pg, &m->priority_groups, list) {
1308 DMEMIT("%s ", pg->ps.type->name);
1309
1310 if (pg->ps.type->status)
1311 sz += pg->ps.type->status(&pg->ps, NULL, type,
1312 result + sz,
1313 maxlen - sz);
1314 else
1315 DMEMIT("0 ");
1316
1317 DMEMIT("%u %u ", pg->nr_pgpaths,
1318 pg->ps.type->table_args);
1319
1320 list_for_each_entry(p, &pg->pgpaths, list) {
1321 DMEMIT("%s ", p->path.dev->name);
1322 if (pg->ps.type->status)
1323 sz += pg->ps.type->status(&pg->ps,
1324 &p->path, type, result + sz,
1325 maxlen - sz);
1326 }
1327 }
1328 break;
1329 }
1330
1331 spin_unlock_irqrestore(&m->lock, flags);
1332
1333 return 0;
1334}
1335
1336static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1337{
1338 int r;
1339 struct dm_dev *dev;
1340 struct multipath *m = (struct multipath *) ti->private;
1341 action_fn action;
1342
1343 if (argc == 1) {
1344 if (!strnicmp(argv[0], MESG_STR("queue_if_no_path")))
Alasdair G Kergon485ef692005-09-27 21:45:45 -07001345 return queue_if_no_path(m, 1, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346 else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path")))
Alasdair G Kergon485ef692005-09-27 21:45:45 -07001347 return queue_if_no_path(m, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348 }
1349
1350 if (argc != 2)
1351 goto error;
1352
1353 if (!strnicmp(argv[0], MESG_STR("disable_group")))
1354 return bypass_pg_num(m, argv[1], 1);
1355 else if (!strnicmp(argv[0], MESG_STR("enable_group")))
1356 return bypass_pg_num(m, argv[1], 0);
1357 else if (!strnicmp(argv[0], MESG_STR("switch_group")))
1358 return switch_pg_num(m, argv[1]);
1359 else if (!strnicmp(argv[0], MESG_STR("reinstate_path")))
1360 action = reinstate_path;
1361 else if (!strnicmp(argv[0], MESG_STR("fail_path")))
1362 action = fail_path;
1363 else
1364 goto error;
1365
1366 r = dm_get_device(ti, argv[1], ti->begin, ti->len,
1367 dm_table_get_mode(ti->table), &dev);
1368 if (r) {
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001369 DMWARN("message: error getting device %s",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001370 argv[1]);
1371 return -EINVAL;
1372 }
1373
1374 r = action_dev(m, dev, action);
1375
1376 dm_put_device(ti, dev);
1377
1378 return r;
1379
1380error:
1381 DMWARN("Unrecognised multipath message received.");
1382 return -EINVAL;
1383}
1384
Milan Broz9af4aa32006-10-03 01:15:20 -07001385static int multipath_ioctl(struct dm_target *ti, struct inode *inode,
1386 struct file *filp, unsigned int cmd,
1387 unsigned long arg)
1388{
1389 struct multipath *m = (struct multipath *) ti->private;
1390 struct block_device *bdev = NULL;
1391 unsigned long flags;
Milan Broze90dae12006-10-03 01:15:22 -07001392 struct file fake_file = {};
1393 struct dentry fake_dentry = {};
Milan Broz9af4aa32006-10-03 01:15:20 -07001394 int r = 0;
1395
Josef Sipekc649bb92006-12-08 02:37:19 -08001396 fake_file.f_path.dentry = &fake_dentry;
Milan Broze90dae12006-10-03 01:15:22 -07001397
Milan Broz9af4aa32006-10-03 01:15:20 -07001398 spin_lock_irqsave(&m->lock, flags);
1399
1400 if (!m->current_pgpath)
1401 __choose_pgpath(m);
1402
Milan Broze90dae12006-10-03 01:15:22 -07001403 if (m->current_pgpath) {
Milan Broz9af4aa32006-10-03 01:15:20 -07001404 bdev = m->current_pgpath->path.dev->bdev;
Milan Broze90dae12006-10-03 01:15:22 -07001405 fake_dentry.d_inode = bdev->bd_inode;
1406 fake_file.f_mode = m->current_pgpath->path.dev->mode;
1407 }
Milan Broz9af4aa32006-10-03 01:15:20 -07001408
1409 if (m->queue_io)
1410 r = -EAGAIN;
1411 else if (!bdev)
1412 r = -EIO;
1413
1414 spin_unlock_irqrestore(&m->lock, flags);
1415
Milan Broze90dae12006-10-03 01:15:22 -07001416 return r ? : blkdev_driver_ioctl(bdev->bd_inode, &fake_file,
1417 bdev->bd_disk, cmd, arg);
Milan Broz9af4aa32006-10-03 01:15:20 -07001418}
1419
Linus Torvalds1da177e2005-04-16 15:20:36 -07001420/*-----------------------------------------------------------------
1421 * Module setup
1422 *---------------------------------------------------------------*/
1423static struct target_type multipath_target = {
1424 .name = "multipath",
Milan Broz9af4aa32006-10-03 01:15:20 -07001425 .version = {1, 0, 5},
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426 .module = THIS_MODULE,
1427 .ctr = multipath_ctr,
1428 .dtr = multipath_dtr,
1429 .map = multipath_map,
1430 .end_io = multipath_end_io,
1431 .presuspend = multipath_presuspend,
1432 .resume = multipath_resume,
1433 .status = multipath_status,
1434 .message = multipath_message,
Milan Broz9af4aa32006-10-03 01:15:20 -07001435 .ioctl = multipath_ioctl,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436};
1437
1438static int __init dm_multipath_init(void)
1439{
1440 int r;
1441
1442 /* allocate a slab for the dm_ios */
Alasdair G Kergon028867a2007-07-12 17:26:32 +01001443 _mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444 if (!_mpio_cache)
1445 return -ENOMEM;
1446
1447 r = dm_register_target(&multipath_target);
1448 if (r < 0) {
Alasdair G Kergon0cd33122007-07-12 17:27:01 +01001449 DMERR("register failed %d", r);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450 kmem_cache_destroy(_mpio_cache);
1451 return -EINVAL;
1452 }
1453
Alasdair G Kergonc5573082005-05-05 16:16:07 -07001454 kmultipathd = create_workqueue("kmpathd");
1455 if (!kmultipathd) {
Alasdair G Kergon0cd33122007-07-12 17:27:01 +01001456 DMERR("failed to create workqueue kmpathd");
Alasdair G Kergonc5573082005-05-05 16:16:07 -07001457 dm_unregister_target(&multipath_target);
1458 kmem_cache_destroy(_mpio_cache);
1459 return -ENOMEM;
1460 }
1461
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001462 /*
1463 * A separate workqueue is used to handle the device handlers
1464 * to avoid overloading existing workqueue. Overloading the
1465 * old workqueue would also create a bottleneck in the
1466 * path of the storage hardware device activation.
1467 */
1468 kmpath_handlerd = create_singlethread_workqueue("kmpath_handlerd");
1469 if (!kmpath_handlerd) {
1470 DMERR("failed to create workqueue kmpath_handlerd");
1471 destroy_workqueue(kmultipathd);
1472 dm_unregister_target(&multipath_target);
1473 kmem_cache_destroy(_mpio_cache);
1474 return -ENOMEM;
1475 }
1476
Alasdair G Kergon72d94862006-06-26 00:27:35 -07001477 DMINFO("version %u.%u.%u loaded",
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 multipath_target.version[0], multipath_target.version[1],
1479 multipath_target.version[2]);
1480
1481 return r;
1482}
1483
1484static void __exit dm_multipath_exit(void)
1485{
1486 int r;
1487
Chandra Seetharamanbab7cfc2008-05-01 14:50:22 -07001488 destroy_workqueue(kmpath_handlerd);
Alasdair G Kergonc5573082005-05-05 16:16:07 -07001489 destroy_workqueue(kmultipathd);
1490
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 r = dm_unregister_target(&multipath_target);
1492 if (r < 0)
Alasdair G Kergon0cd33122007-07-12 17:27:01 +01001493 DMERR("target unregister failed %d", r);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494 kmem_cache_destroy(_mpio_cache);
1495}
1496
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497module_init(dm_multipath_init);
1498module_exit(dm_multipath_exit);
1499
1500MODULE_DESCRIPTION(DM_NAME " multipath target");
1501MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
1502MODULE_LICENSE("GPL");