blob: 43b90485448d5e09cb9981b89b875167425ba241 [file] [log] [blame]
Goldwyn Rodrigues8e854e92014-03-07 11:21:15 -06001/*
2 * Copyright (C) 2015, SUSE
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2, or (at your option)
7 * any later version.
8 *
9 */
10
11
12#include <linux/module.h>
Guoqing Jiang7bcda712016-08-12 13:42:42 +080013#include <linux/kthread.h>
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -060014#include <linux/dlm.h>
15#include <linux/sched.h>
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -050016#include <linux/raid/md_p.h>
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -060017#include "md.h"
Goldwyn Rodriguese94987d2014-06-07 00:45:22 -050018#include "bitmap.h"
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -050019#include "md-cluster.h"
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -060020
21#define LVB_SIZE 64
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -050022#define NEW_DEV_TIMEOUT 5000
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -060023
24struct dlm_lock_resource {
25 dlm_lockspace_t *ls;
26 struct dlm_lksb lksb;
27 char *name; /* lock name. */
28 uint32_t flags; /* flags to pass to dlm_lock() */
Guoqing Jiangfccb60a2016-08-12 13:42:41 +080029 wait_queue_head_t sync_locking; /* wait queue for synchronized locking */
30 bool sync_locking_done;
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -050031 void (*bast)(void *arg, int mode); /* blocking AST function pointer*/
32 struct mddev *mddev; /* pointing back to mddev. */
Goldwyn Rodriguesdbb64f82015-10-01 13:20:27 -050033 int mode;
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -050034};
35
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -050036struct suspend_info {
37 int slot;
38 sector_t lo;
39 sector_t hi;
40 struct list_head list;
41};
42
43struct resync_info {
44 __le64 lo;
45 __le64 hi;
46};
47
Goldwyn Rodriguesfa8259d2015-03-02 10:55:49 -060048/* md_cluster_info flags */
49#define MD_CLUSTER_WAITING_FOR_NEWDISK 1
Goldwyn Rodrigues90382ed2015-06-24 09:30:32 -050050#define MD_CLUSTER_SUSPEND_READ_BALANCING 2
Guoqing Jiangeece0752015-07-10 17:01:21 +080051#define MD_CLUSTER_BEGIN_JOIN_CLUSTER 3
Goldwyn Rodriguesfa8259d2015-03-02 10:55:49 -060052
Guoqing Jiang8b9277c2015-12-21 10:51:00 +110053/* Lock the send communication. This is done through
54 * bit manipulation as opposed to a mutex in order to
55 * accomodate lock and hold. See next comment.
56 */
57#define MD_CLUSTER_SEND_LOCK 4
Guoqing Jiange19508f2015-12-21 10:51:01 +110058/* If cluster operations (such as adding a disk) must lock the
59 * communication channel, so as to perform extra operations
60 * (update metadata) and no other operation is allowed on the
61 * MD. Token needs to be locked and held until the operation
62 * completes witha md_update_sb(), which would eventually release
63 * the lock.
Guoqing Jiang8b9277c2015-12-21 10:51:00 +110064 */
65#define MD_CLUSTER_SEND_LOCKED_ALREADY 5
Guoqing Jiang51e453a2016-05-04 02:17:09 -040066/* We should receive message after node joined cluster and
67 * set up all the related infos such as bitmap and personality */
68#define MD_CLUSTER_ALREADY_IN_CLUSTER 6
69#define MD_CLUSTER_PENDING_RECV_EVENT 7
Guoqing Jiang8b9277c2015-12-21 10:51:00 +110070
Goldwyn Rodriguesfa8259d2015-03-02 10:55:49 -060071
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -050072struct md_cluster_info {
73 /* dlm lock space and resources for clustered raid. */
74 dlm_lockspace_t *lockspace;
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -050075 int slot_number;
76 struct completion completion;
Guoqing Jiang8b9277c2015-12-21 10:51:00 +110077 struct mutex recv_mutex;
Goldwyn Rodrigues54519c52014-06-06 12:12:32 -050078 struct dlm_lock_resource *bitmap_lockres;
Guoqing Jiangf6a2dc62015-12-21 10:51:00 +110079 struct dlm_lock_resource **other_bitmap_lockres;
Goldwyn Rodriguesc186b122015-09-30 13:20:35 -050080 struct dlm_lock_resource *resync_lockres;
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -050081 struct list_head suspend_list;
82 spinlock_t suspend_lock;
Goldwyn Rodriguese94987d2014-06-07 00:45:22 -050083 struct md_thread *recovery_thread;
84 unsigned long recovery_map;
Goldwyn Rodrigues46646802014-06-07 01:08:29 -050085 /* communication loc resources */
86 struct dlm_lock_resource *ack_lockres;
87 struct dlm_lock_resource *message_lockres;
88 struct dlm_lock_resource *token_lockres;
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -050089 struct dlm_lock_resource *no_new_dev_lockres;
Goldwyn Rodrigues46646802014-06-07 01:08:29 -050090 struct md_thread *recv_thread;
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -050091 struct completion newdisk_completion;
Guoqing Jiang8b9277c2015-12-21 10:51:00 +110092 wait_queue_head_t wait;
Goldwyn Rodriguesfa8259d2015-03-02 10:55:49 -060093 unsigned long state;
Guoqing Jiang18c9ff72016-05-02 11:50:12 -040094 /* record the region in RESYNCING message */
95 sector_t sync_low;
96 sector_t sync_hi;
Goldwyn Rodrigues46646802014-06-07 01:08:29 -050097};
98
99enum msg_type {
100 METADATA_UPDATED = 0,
101 RESYNCING,
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500102 NEWDISK,
Goldwyn Rodrigues88bcfef2015-04-14 10:44:44 -0500103 REMOVE,
Goldwyn Rodrigues97f6cd32015-04-14 10:45:42 -0500104 RE_ADD,
Guoqing Jiangdc737d72015-07-10 16:54:04 +0800105 BITMAP_NEEDS_SYNC,
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500106};
107
108struct cluster_msg {
Guoqing Jiangcf97a342015-10-16 15:40:22 +0800109 __le32 type;
110 __le32 slot;
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500111 /* TODO: Unionize this for smaller footprint */
Guoqing Jiangcf97a342015-10-16 15:40:22 +0800112 __le64 low;
113 __le64 high;
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500114 char uuid[16];
Guoqing Jiangcf97a342015-10-16 15:40:22 +0800115 __le32 raid_slot;
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -0600116};
117
118static void sync_ast(void *arg)
119{
120 struct dlm_lock_resource *res;
121
NeilBrown2e2a7cd2015-10-19 15:42:18 +1100122 res = arg;
Guoqing Jiangfccb60a2016-08-12 13:42:41 +0800123 res->sync_locking_done = true;
124 wake_up(&res->sync_locking);
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -0600125}
126
127static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
128{
129 int ret = 0;
130
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -0600131 ret = dlm_lock(res->ls, mode, &res->lksb,
132 res->flags, res->name, strlen(res->name),
133 0, sync_ast, res, res->bast);
134 if (ret)
135 return ret;
Guoqing Jiangfccb60a2016-08-12 13:42:41 +0800136 wait_event(res->sync_locking, res->sync_locking_done);
137 res->sync_locking_done = false;
Goldwyn Rodriguesdbb64f82015-10-01 13:20:27 -0500138 if (res->lksb.sb_status == 0)
139 res->mode = mode;
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -0600140 return res->lksb.sb_status;
141}
142
143static int dlm_unlock_sync(struct dlm_lock_resource *res)
144{
145 return dlm_lock_sync(res, DLM_LOCK_NL);
146}
147
Guoqing Jiang7bcda712016-08-12 13:42:42 +0800148/*
149 * An variation of dlm_lock_sync, which make lock request could
150 * be interrupted
151 */
152static int dlm_lock_sync_interruptible(struct dlm_lock_resource *res, int mode,
153 struct mddev *mddev)
154{
155 int ret = 0;
156
157 ret = dlm_lock(res->ls, mode, &res->lksb,
158 res->flags, res->name, strlen(res->name),
159 0, sync_ast, res, res->bast);
160 if (ret)
161 return ret;
162
163 wait_event(res->sync_locking, res->sync_locking_done
164 || kthread_should_stop());
165 if (!res->sync_locking_done) {
166 /*
167 * the convert queue contains the lock request when request is
168 * interrupted, and sync_ast could still be run, so need to
169 * cancel the request and reset completion
170 */
171 ret = dlm_unlock(res->ls, res->lksb.sb_lkid, DLM_LKF_CANCEL,
172 &res->lksb, res);
173 res->sync_locking_done = false;
174 if (unlikely(ret != 0))
175 pr_info("failed to cancel previous lock request "
176 "%s return %d\n", res->name, ret);
177 return -EPERM;
178 } else
179 res->sync_locking_done = false;
180 if (res->lksb.sb_status == 0)
181 res->mode = mode;
182 return res->lksb.sb_status;
183}
184
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500185static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -0600186 char *name, void (*bastfn)(void *arg, int mode), int with_lvb)
187{
188 struct dlm_lock_resource *res = NULL;
189 int ret, namelen;
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500190 struct md_cluster_info *cinfo = mddev->cluster_info;
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -0600191
192 res = kzalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL);
193 if (!res)
194 return NULL;
Guoqing Jiangfccb60a2016-08-12 13:42:41 +0800195 init_waitqueue_head(&res->sync_locking);
196 res->sync_locking_done = false;
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500197 res->ls = cinfo->lockspace;
198 res->mddev = mddev;
Goldwyn Rodriguesdbb64f82015-10-01 13:20:27 -0500199 res->mode = DLM_LOCK_IV;
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -0600200 namelen = strlen(name);
201 res->name = kzalloc(namelen + 1, GFP_KERNEL);
202 if (!res->name) {
203 pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
204 goto out_err;
205 }
206 strlcpy(res->name, name, namelen + 1);
207 if (with_lvb) {
208 res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
209 if (!res->lksb.sb_lvbptr) {
210 pr_err("md-cluster: Unable to allocate LVB for resource %s\n", name);
211 goto out_err;
212 }
213 res->flags = DLM_LKF_VALBLK;
214 }
215
216 if (bastfn)
217 res->bast = bastfn;
218
219 res->flags |= DLM_LKF_EXPEDITE;
220
221 ret = dlm_lock_sync(res, DLM_LOCK_NL);
222 if (ret) {
223 pr_err("md-cluster: Unable to lock NL on new lock resource %s\n", name);
224 goto out_err;
225 }
226 res->flags &= ~DLM_LKF_EXPEDITE;
227 res->flags |= DLM_LKF_CONVERT;
228
229 return res;
230out_err:
231 kfree(res->lksb.sb_lvbptr);
232 kfree(res->name);
233 kfree(res);
234 return NULL;
235}
236
237static void lockres_free(struct dlm_lock_resource *res)
238{
Guoqing Jiang400cb452016-08-12 13:42:35 +0800239 int ret = 0;
Guoqing Jiangb5ef5672015-07-10 17:01:17 +0800240
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -0600241 if (!res)
242 return;
243
Guoqing Jiang400cb452016-08-12 13:42:35 +0800244 /*
245 * use FORCEUNLOCK flag, so we can unlock even the lock is on the
246 * waiting or convert queue
247 */
248 ret = dlm_unlock(res->ls, res->lksb.sb_lkid, DLM_LKF_FORCEUNLOCK,
249 &res->lksb, res);
250 if (unlikely(ret != 0))
251 pr_err("failed to unlock %s return %d\n", res->name, ret);
252 else
Guoqing Jiangfccb60a2016-08-12 13:42:41 +0800253 wait_event(res->sync_locking, res->sync_locking_done);
Goldwyn Rodrigues47741b72014-03-07 13:49:26 -0600254
255 kfree(res->name);
256 kfree(res->lksb.sb_lvbptr);
257 kfree(res);
258}
Goldwyn Rodrigues8e854e92014-03-07 11:21:15 -0600259
NeilBrown30661b42015-10-19 15:44:00 +1100260static void add_resync_info(struct dlm_lock_resource *lockres,
261 sector_t lo, sector_t hi)
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -0500262{
263 struct resync_info *ri;
264
265 ri = (struct resync_info *)lockres->lksb.sb_lvbptr;
266 ri->lo = cpu_to_le64(lo);
267 ri->hi = cpu_to_le64(hi);
268}
269
270static struct suspend_info *read_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres)
271{
272 struct resync_info ri;
273 struct suspend_info *s = NULL;
274 sector_t hi = 0;
275
276 dlm_lock_sync(lockres, DLM_LOCK_CR);
277 memcpy(&ri, lockres->lksb.sb_lvbptr, sizeof(struct resync_info));
278 hi = le64_to_cpu(ri.hi);
Guoqing Jiangcf97a342015-10-16 15:40:22 +0800279 if (hi > 0) {
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -0500280 s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
281 if (!s)
282 goto out;
283 s->hi = hi;
284 s->lo = le64_to_cpu(ri.lo);
285 }
286 dlm_unlock_sync(lockres);
287out:
288 return s;
289}
290
kbuild test robot6dc69c9c2015-02-28 07:04:37 +0800291static void recover_bitmaps(struct md_thread *thread)
Goldwyn Rodriguese94987d2014-06-07 00:45:22 -0500292{
293 struct mddev *mddev = thread->mddev;
294 struct md_cluster_info *cinfo = mddev->cluster_info;
295 struct dlm_lock_resource *bm_lockres;
296 char str[64];
297 int slot, ret;
298 struct suspend_info *s, *tmp;
299 sector_t lo, hi;
300
301 while (cinfo->recovery_map) {
302 slot = fls64((u64)cinfo->recovery_map) - 1;
303
304 /* Clear suspend_area associated with the bitmap */
305 spin_lock_irq(&cinfo->suspend_lock);
306 list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
307 if (slot == s->slot) {
308 list_del(&s->list);
309 kfree(s);
310 }
311 spin_unlock_irq(&cinfo->suspend_lock);
312
313 snprintf(str, 64, "bitmap%04d", slot);
314 bm_lockres = lockres_init(mddev, str, NULL, 1);
315 if (!bm_lockres) {
316 pr_err("md-cluster: Cannot initialize bitmaps\n");
317 goto clear_bit;
318 }
319
Guoqing Jiang7bcda712016-08-12 13:42:42 +0800320 ret = dlm_lock_sync_interruptible(bm_lockres, DLM_LOCK_PW, mddev);
Goldwyn Rodriguese94987d2014-06-07 00:45:22 -0500321 if (ret) {
322 pr_err("md-cluster: Could not DLM lock %s: %d\n",
323 str, ret);
324 goto clear_bit;
325 }
Goldwyn Rodrigues97f6cd32015-04-14 10:45:42 -0500326 ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi, true);
Goldwyn Rodrigues4b26a082014-06-07 00:52:29 -0500327 if (ret) {
Goldwyn Rodriguese94987d2014-06-07 00:45:22 -0500328 pr_err("md-cluster: Could not copy data from bitmap %d\n", slot);
Guoqing Jiange3f924d2016-08-12 13:42:36 +0800329 goto clear_bit;
Goldwyn Rodrigues4b26a082014-06-07 00:52:29 -0500330 }
331 if (hi > 0) {
Goldwyn Rodrigues4b26a082014-06-07 00:52:29 -0500332 if (lo < mddev->recovery_cp)
333 mddev->recovery_cp = lo;
Guoqing Jiangeb315cd2016-05-02 11:33:10 -0400334 /* wake up thread to continue resync in case resync
335 * is not finished */
336 if (mddev->recovery_cp != MaxSector) {
337 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
338 md_wakeup_thread(mddev->thread);
339 }
Goldwyn Rodrigues4b26a082014-06-07 00:52:29 -0500340 }
Goldwyn Rodriguese94987d2014-06-07 00:45:22 -0500341clear_bit:
Shaohua Li4ac7a652016-01-22 15:54:42 -0800342 lockres_free(bm_lockres);
Goldwyn Rodriguese94987d2014-06-07 00:45:22 -0500343 clear_bit(slot, &cinfo->recovery_map);
344 }
345}
346
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500347static void recover_prep(void *arg)
348{
Goldwyn Rodrigues90382ed2015-06-24 09:30:32 -0500349 struct mddev *mddev = arg;
350 struct md_cluster_info *cinfo = mddev->cluster_info;
351 set_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state);
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500352}
353
Guoqing Jiang05cd0e52015-07-10 16:54:03 +0800354static void __recover_slot(struct mddev *mddev, int slot)
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500355{
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500356 struct md_cluster_info *cinfo = mddev->cluster_info;
357
Guoqing Jiang05cd0e52015-07-10 16:54:03 +0800358 set_bit(slot, &cinfo->recovery_map);
Goldwyn Rodriguese94987d2014-06-07 00:45:22 -0500359 if (!cinfo->recovery_thread) {
360 cinfo->recovery_thread = md_register_thread(recover_bitmaps,
361 mddev, "recover");
362 if (!cinfo->recovery_thread) {
363 pr_warn("md-cluster: Could not create recovery thread\n");
364 return;
365 }
366 }
367 md_wakeup_thread(cinfo->recovery_thread);
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500368}
369
Guoqing Jiang05cd0e52015-07-10 16:54:03 +0800370static void recover_slot(void *arg, struct dlm_slot *slot)
371{
372 struct mddev *mddev = arg;
373 struct md_cluster_info *cinfo = mddev->cluster_info;
374
375 pr_info("md-cluster: %s Node %d/%d down. My slot: %d. Initiating recovery.\n",
376 mddev->bitmap_info.cluster_name,
377 slot->nodeid, slot->slot,
378 cinfo->slot_number);
379 /* deduct one since dlm slot starts from one while the num of
380 * cluster-md begins with 0 */
381 __recover_slot(mddev, slot->slot - 1);
382}
383
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500384static void recover_done(void *arg, struct dlm_slot *slots,
385 int num_slots, int our_slot,
386 uint32_t generation)
387{
388 struct mddev *mddev = arg;
389 struct md_cluster_info *cinfo = mddev->cluster_info;
390
391 cinfo->slot_number = our_slot;
Guoqing Jiangeece0752015-07-10 17:01:21 +0800392 /* completion is only need to be complete when node join cluster,
393 * it doesn't need to run during another node's failure */
394 if (test_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state)) {
395 complete(&cinfo->completion);
396 clear_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state);
397 }
Goldwyn Rodrigues90382ed2015-06-24 09:30:32 -0500398 clear_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state);
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500399}
400
Guoqing Jiangeece0752015-07-10 17:01:21 +0800401/* the ops is called when node join the cluster, and do lock recovery
402 * if node failure occurs */
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500403static const struct dlm_lockspace_ops md_ls_ops = {
404 .recover_prep = recover_prep,
405 .recover_slot = recover_slot,
406 .recover_done = recover_done,
407};
408
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500409/*
410 * The BAST function for the ack lock resource
411 * This function wakes up the receive thread in
412 * order to receive and process the message.
413 */
414static void ack_bast(void *arg, int mode)
415{
NeilBrown2e2a7cd2015-10-19 15:42:18 +1100416 struct dlm_lock_resource *res = arg;
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500417 struct md_cluster_info *cinfo = res->mddev->cluster_info;
418
Guoqing Jiang51e453a2016-05-04 02:17:09 -0400419 if (mode == DLM_LOCK_EX) {
420 if (test_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state))
421 md_wakeup_thread(cinfo->recv_thread);
422 else
423 set_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state);
424 }
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500425}
426
Goldwyn Rodriguese59721c2014-06-07 02:30:30 -0500427static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
428{
429 struct suspend_info *s, *tmp;
430
431 list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
432 if (slot == s->slot) {
Goldwyn Rodriguese59721c2014-06-07 02:30:30 -0500433 list_del(&s->list);
434 kfree(s);
435 break;
436 }
437}
438
Goldwyn Rodriguesb8ca8462015-10-09 11:27:01 -0500439static void remove_suspend_info(struct mddev *mddev, int slot)
Goldwyn Rodriguese59721c2014-06-07 02:30:30 -0500440{
Goldwyn Rodriguesb8ca8462015-10-09 11:27:01 -0500441 struct md_cluster_info *cinfo = mddev->cluster_info;
Goldwyn Rodriguese59721c2014-06-07 02:30:30 -0500442 spin_lock_irq(&cinfo->suspend_lock);
443 __remove_suspend_info(cinfo, slot);
444 spin_unlock_irq(&cinfo->suspend_lock);
Goldwyn Rodriguesb8ca8462015-10-09 11:27:01 -0500445 mddev->pers->quiesce(mddev, 2);
Goldwyn Rodriguese59721c2014-06-07 02:30:30 -0500446}
447
448
Goldwyn Rodrigues9ed38ff52015-08-14 12:19:40 -0500449static void process_suspend_info(struct mddev *mddev,
Goldwyn Rodriguese59721c2014-06-07 02:30:30 -0500450 int slot, sector_t lo, sector_t hi)
451{
Goldwyn Rodrigues9ed38ff52015-08-14 12:19:40 -0500452 struct md_cluster_info *cinfo = mddev->cluster_info;
Goldwyn Rodriguese59721c2014-06-07 02:30:30 -0500453 struct suspend_info *s;
454
455 if (!hi) {
Goldwyn Rodriguesb8ca8462015-10-09 11:27:01 -0500456 remove_suspend_info(mddev, slot);
Goldwyn Rodriguesc186b122015-09-30 13:20:35 -0500457 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
458 md_wakeup_thread(mddev->thread);
Goldwyn Rodriguese59721c2014-06-07 02:30:30 -0500459 return;
460 }
Guoqing Jiang18c9ff72016-05-02 11:50:12 -0400461
462 /*
463 * The bitmaps are not same for different nodes
464 * if RESYNCING is happening in one node, then
465 * the node which received the RESYNCING message
466 * probably will perform resync with the region
467 * [lo, hi] again, so we could reduce resync time
468 * a lot if we can ensure that the bitmaps among
469 * different nodes are match up well.
470 *
471 * sync_low/hi is used to record the region which
472 * arrived in the previous RESYNCING message,
473 *
474 * Call bitmap_sync_with_cluster to clear
475 * NEEDED_MASK and set RESYNC_MASK since
476 * resync thread is running in another node,
477 * so we don't need to do the resync again
478 * with the same section */
479 bitmap_sync_with_cluster(mddev, cinfo->sync_low,
480 cinfo->sync_hi,
481 lo, hi);
482 cinfo->sync_low = lo;
483 cinfo->sync_hi = hi;
484
Goldwyn Rodriguese59721c2014-06-07 02:30:30 -0500485 s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
486 if (!s)
487 return;
488 s->slot = slot;
489 s->lo = lo;
490 s->hi = hi;
Goldwyn Rodrigues9ed38ff52015-08-14 12:19:40 -0500491 mddev->pers->quiesce(mddev, 1);
492 mddev->pers->quiesce(mddev, 0);
Goldwyn Rodriguese59721c2014-06-07 02:30:30 -0500493 spin_lock_irq(&cinfo->suspend_lock);
494 /* Remove existing entry (if exists) before adding */
495 __remove_suspend_info(cinfo, slot);
496 list_add(&s->list, &cinfo->suspend_list);
497 spin_unlock_irq(&cinfo->suspend_lock);
Goldwyn Rodriguesb8ca8462015-10-09 11:27:01 -0500498 mddev->pers->quiesce(mddev, 2);
Goldwyn Rodriguese59721c2014-06-07 02:30:30 -0500499}
500
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500501static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
502{
503 char disk_uuid[64];
504 struct md_cluster_info *cinfo = mddev->cluster_info;
505 char event_name[] = "EVENT=ADD_DEVICE";
506 char raid_slot[16];
507 char *envp[] = {event_name, disk_uuid, raid_slot, NULL};
508 int len;
509
510 len = snprintf(disk_uuid, 64, "DEVICE_UUID=");
Guoqing Jiangb89f7042015-07-10 16:54:02 +0800511 sprintf(disk_uuid + len, "%pU", cmsg->uuid);
Guoqing Jiangfaeff832015-10-12 17:21:21 +0800512 snprintf(raid_slot, 16, "RAID_DISK=%d", le32_to_cpu(cmsg->raid_slot));
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500513 pr_info("%s:%d Sending kobject change with %s and %s\n", __func__, __LINE__, disk_uuid, raid_slot);
514 init_completion(&cinfo->newdisk_completion);
Goldwyn Rodriguesfa8259d2015-03-02 10:55:49 -0600515 set_bit(MD_CLUSTER_WAITING_FOR_NEWDISK, &cinfo->state);
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500516 kobject_uevent_env(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE, envp);
517 wait_for_completion_timeout(&cinfo->newdisk_completion,
518 NEW_DEV_TIMEOUT);
Goldwyn Rodriguesfa8259d2015-03-02 10:55:49 -0600519 clear_bit(MD_CLUSTER_WAITING_FOR_NEWDISK, &cinfo->state);
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500520}
521
522
523static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg)
524{
525 struct md_cluster_info *cinfo = mddev->cluster_info;
Guoqing Jiang15858fa2015-12-21 10:51:00 +1100526 mddev->good_device_nr = le32_to_cpu(msg->raid_slot);
527 set_bit(MD_RELOAD_SB, &mddev->flags);
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500528 dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
Guoqing Jiang15858fa2015-12-21 10:51:00 +1100529 md_wakeup_thread(mddev->thread);
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500530}
531
Goldwyn Rodrigues88bcfef2015-04-14 10:44:44 -0500532static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg)
533{
Guoqing Jiang5f0aa212016-08-12 13:42:39 +0800534 struct md_rdev *rdev;
Goldwyn Rodrigues88bcfef2015-04-14 10:44:44 -0500535
Guoqing Jiang5f0aa212016-08-12 13:42:39 +0800536 rcu_read_lock();
537 rdev = md_find_rdev_nr_rcu(mddev, le32_to_cpu(msg->raid_slot));
Guoqing Jiang659b2542015-12-21 10:50:59 +1100538 if (rdev) {
539 set_bit(ClusterRemove, &rdev->flags);
540 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
541 md_wakeup_thread(mddev->thread);
542 }
Goldwyn Rodrigues88bcfef2015-04-14 10:44:44 -0500543 else
Guoqing Jiangfaeff832015-10-12 17:21:21 +0800544 pr_warn("%s: %d Could not find disk(%d) to REMOVE\n",
545 __func__, __LINE__, le32_to_cpu(msg->raid_slot));
Guoqing Jiang5f0aa212016-08-12 13:42:39 +0800546 rcu_read_unlock();
Goldwyn Rodrigues88bcfef2015-04-14 10:44:44 -0500547}
548
Goldwyn Rodrigues97f6cd32015-04-14 10:45:42 -0500549static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg)
550{
Guoqing Jiang5f0aa212016-08-12 13:42:39 +0800551 struct md_rdev *rdev;
Goldwyn Rodrigues97f6cd32015-04-14 10:45:42 -0500552
Guoqing Jiang5f0aa212016-08-12 13:42:39 +0800553 rcu_read_lock();
554 rdev = md_find_rdev_nr_rcu(mddev, le32_to_cpu(msg->raid_slot));
Goldwyn Rodrigues97f6cd32015-04-14 10:45:42 -0500555 if (rdev && test_bit(Faulty, &rdev->flags))
556 clear_bit(Faulty, &rdev->flags);
557 else
Guoqing Jiangfaeff832015-10-12 17:21:21 +0800558 pr_warn("%s: %d Could not find disk(%d) which is faulty",
559 __func__, __LINE__, le32_to_cpu(msg->raid_slot));
Guoqing Jiang5f0aa212016-08-12 13:42:39 +0800560 rcu_read_unlock();
Goldwyn Rodrigues97f6cd32015-04-14 10:45:42 -0500561}
562
Guoqing Jiang1fa9a1a2016-05-03 22:22:15 -0400563static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500564{
Guoqing Jiang1fa9a1a2016-05-03 22:22:15 -0400565 int ret = 0;
566
Guoqing Jiang256f5b22015-10-12 17:21:23 +0800567 if (WARN(mddev->cluster_info->slot_number - 1 == le32_to_cpu(msg->slot),
568 "node %d received it's own msg\n", le32_to_cpu(msg->slot)))
Guoqing Jiang1fa9a1a2016-05-03 22:22:15 -0400569 return -1;
Guoqing Jiangcf97a342015-10-16 15:40:22 +0800570 switch (le32_to_cpu(msg->type)) {
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500571 case METADATA_UPDATED:
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500572 process_metadata_update(mddev, msg);
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500573 break;
574 case RESYNCING:
Guoqing Jiangcf97a342015-10-16 15:40:22 +0800575 process_suspend_info(mddev, le32_to_cpu(msg->slot),
576 le64_to_cpu(msg->low),
577 le64_to_cpu(msg->high));
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500578 break;
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500579 case NEWDISK:
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500580 process_add_new_disk(mddev, msg);
Goldwyn Rodrigues88bcfef2015-04-14 10:44:44 -0500581 break;
582 case REMOVE:
Goldwyn Rodrigues88bcfef2015-04-14 10:44:44 -0500583 process_remove_disk(mddev, msg);
584 break;
Goldwyn Rodrigues97f6cd32015-04-14 10:45:42 -0500585 case RE_ADD:
Goldwyn Rodrigues97f6cd32015-04-14 10:45:42 -0500586 process_readd_disk(mddev, msg);
587 break;
Guoqing Jiangdc737d72015-07-10 16:54:04 +0800588 case BITMAP_NEEDS_SYNC:
Guoqing Jiangcf97a342015-10-16 15:40:22 +0800589 __recover_slot(mddev, le32_to_cpu(msg->slot));
Guoqing Jiangdc737d72015-07-10 16:54:04 +0800590 break;
Goldwyn Rodrigues88bcfef2015-04-14 10:44:44 -0500591 default:
Guoqing Jiang1fa9a1a2016-05-03 22:22:15 -0400592 ret = -1;
Goldwyn Rodrigues88bcfef2015-04-14 10:44:44 -0500593 pr_warn("%s:%d Received unknown message from %d\n",
594 __func__, __LINE__, msg->slot);
kbuild test robot09dd1af2015-02-28 09:16:08 +0800595 }
Guoqing Jiang1fa9a1a2016-05-03 22:22:15 -0400596 return ret;
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500597}
598
599/*
600 * thread for receiving message
601 */
602static void recv_daemon(struct md_thread *thread)
603{
604 struct md_cluster_info *cinfo = thread->mddev->cluster_info;
605 struct dlm_lock_resource *ack_lockres = cinfo->ack_lockres;
606 struct dlm_lock_resource *message_lockres = cinfo->message_lockres;
607 struct cluster_msg msg;
Guoqing Jiangb5ef5672015-07-10 17:01:17 +0800608 int ret;
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500609
Guoqing Jiang8b9277c2015-12-21 10:51:00 +1100610 mutex_lock(&cinfo->recv_mutex);
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500611 /*get CR on Message*/
612 if (dlm_lock_sync(message_lockres, DLM_LOCK_CR)) {
613 pr_err("md/raid1:failed to get CR on MESSAGE\n");
Guoqing Jiang8b9277c2015-12-21 10:51:00 +1100614 mutex_unlock(&cinfo->recv_mutex);
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500615 return;
616 }
617
618 /* read lvb and wake up thread to process this message_lockres */
619 memcpy(&msg, message_lockres->lksb.sb_lvbptr, sizeof(struct cluster_msg));
Guoqing Jiang1fa9a1a2016-05-03 22:22:15 -0400620 ret = process_recvd_msg(thread->mddev, &msg);
621 if (ret)
622 goto out;
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500623
624 /*release CR on ack_lockres*/
Guoqing Jiangb5ef5672015-07-10 17:01:17 +0800625 ret = dlm_unlock_sync(ack_lockres);
626 if (unlikely(ret != 0))
627 pr_info("unlock ack failed return %d\n", ret);
Guoqing Jiang66099bb2015-07-10 17:01:15 +0800628 /*up-convert to PR on message_lockres*/
Guoqing Jiangb5ef5672015-07-10 17:01:17 +0800629 ret = dlm_lock_sync(message_lockres, DLM_LOCK_PR);
630 if (unlikely(ret != 0))
631 pr_info("lock PR on msg failed return %d\n", ret);
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500632 /*get CR on ack_lockres again*/
Guoqing Jiangb5ef5672015-07-10 17:01:17 +0800633 ret = dlm_lock_sync(ack_lockres, DLM_LOCK_CR);
634 if (unlikely(ret != 0))
635 pr_info("lock CR on ack failed return %d\n", ret);
Guoqing Jiang1fa9a1a2016-05-03 22:22:15 -0400636out:
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500637 /*release CR on message_lockres*/
Guoqing Jiangb5ef5672015-07-10 17:01:17 +0800638 ret = dlm_unlock_sync(message_lockres);
639 if (unlikely(ret != 0))
640 pr_info("unlock msg failed return %d\n", ret);
Guoqing Jiang8b9277c2015-12-21 10:51:00 +1100641 mutex_unlock(&cinfo->recv_mutex);
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500642}
643
Guoqing Jiang8b9277c2015-12-21 10:51:00 +1100644/* lock_token()
Goldwyn Rodrigues601b5152014-06-07 01:28:53 -0500645 * Takes the lock on the TOKEN lock resource so no other
646 * node can communicate while the operation is underway.
647 */
Guoqing Jiang8b9277c2015-12-21 10:51:00 +1100648static int lock_token(struct md_cluster_info *cinfo)
Goldwyn Rodrigues601b5152014-06-07 01:28:53 -0500649{
650 int error;
651
652 error = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX);
653 if (error)
654 pr_err("md-cluster(%s:%d): failed to get EX on TOKEN (%d)\n",
655 __func__, __LINE__, error);
Guoqing Jiang8b9277c2015-12-21 10:51:00 +1100656
657 /* Lock the receive sequence */
658 mutex_lock(&cinfo->recv_mutex);
Goldwyn Rodrigues601b5152014-06-07 01:28:53 -0500659 return error;
660}
661
Guoqing Jiang8b9277c2015-12-21 10:51:00 +1100662/* lock_comm()
663 * Sets the MD_CLUSTER_SEND_LOCK bit to lock the send channel.
664 */
665static int lock_comm(struct md_cluster_info *cinfo)
666{
667 wait_event(cinfo->wait,
668 !test_and_set_bit(MD_CLUSTER_SEND_LOCK, &cinfo->state));
669
670 return lock_token(cinfo);
671}
672
Goldwyn Rodrigues601b5152014-06-07 01:28:53 -0500673static void unlock_comm(struct md_cluster_info *cinfo)
674{
Goldwyn Rodriguesdbb64f82015-10-01 13:20:27 -0500675 WARN_ON(cinfo->token_lockres->mode != DLM_LOCK_EX);
Guoqing Jiang8b9277c2015-12-21 10:51:00 +1100676 mutex_unlock(&cinfo->recv_mutex);
Goldwyn Rodrigues601b5152014-06-07 01:28:53 -0500677 dlm_unlock_sync(cinfo->token_lockres);
Guoqing Jiang8b9277c2015-12-21 10:51:00 +1100678 clear_bit(MD_CLUSTER_SEND_LOCK, &cinfo->state);
679 wake_up(&cinfo->wait);
Goldwyn Rodrigues601b5152014-06-07 01:28:53 -0500680}
681
682/* __sendmsg()
683 * This function performs the actual sending of the message. This function is
684 * usually called after performing the encompassing operation
685 * The function:
686 * 1. Grabs the message lockresource in EX mode
687 * 2. Copies the message to the message LVB
Guoqing Jiang66099bb2015-07-10 17:01:15 +0800688 * 3. Downconverts message lockresource to CW
Goldwyn Rodrigues601b5152014-06-07 01:28:53 -0500689 * 4. Upconverts ack lock resource from CR to EX. This forces the BAST on other nodes
690 * and the other nodes read the message. The thread will wait here until all other
691 * nodes have released ack lock resource.
692 * 5. Downconvert ack lockresource to CR
693 */
694static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
695{
696 int error;
697 int slot = cinfo->slot_number - 1;
698
699 cmsg->slot = cpu_to_le32(slot);
700 /*get EX on Message*/
701 error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_EX);
702 if (error) {
703 pr_err("md-cluster: failed to get EX on MESSAGE (%d)\n", error);
704 goto failed_message;
705 }
706
707 memcpy(cinfo->message_lockres->lksb.sb_lvbptr, (void *)cmsg,
708 sizeof(struct cluster_msg));
Guoqing Jiang66099bb2015-07-10 17:01:15 +0800709 /*down-convert EX to CW on Message*/
710 error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_CW);
Goldwyn Rodrigues601b5152014-06-07 01:28:53 -0500711 if (error) {
Guoqing Jiang66099bb2015-07-10 17:01:15 +0800712 pr_err("md-cluster: failed to convert EX to CW on MESSAGE(%d)\n",
Goldwyn Rodrigues601b5152014-06-07 01:28:53 -0500713 error);
Guoqing Jiang66099bb2015-07-10 17:01:15 +0800714 goto failed_ack;
Goldwyn Rodrigues601b5152014-06-07 01:28:53 -0500715 }
716
717 /*up-convert CR to EX on Ack*/
718 error = dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_EX);
719 if (error) {
720 pr_err("md-cluster: failed to convert CR to EX on ACK(%d)\n",
721 error);
722 goto failed_ack;
723 }
724
725 /*down-convert EX to CR on Ack*/
726 error = dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR);
727 if (error) {
728 pr_err("md-cluster: failed to convert EX to CR on ACK(%d)\n",
729 error);
730 goto failed_ack;
731 }
732
733failed_ack:
Guoqing Jiangb5ef5672015-07-10 17:01:17 +0800734 error = dlm_unlock_sync(cinfo->message_lockres);
735 if (unlikely(error != 0)) {
736 pr_err("md-cluster: failed convert to NL on MESSAGE(%d)\n",
737 error);
738 /* in case the message can't be released due to some reason */
739 goto failed_ack;
740 }
Goldwyn Rodrigues601b5152014-06-07 01:28:53 -0500741failed_message:
742 return error;
743}
744
745static int sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
746{
747 int ret;
748
749 lock_comm(cinfo);
750 ret = __sendmsg(cinfo, cmsg);
751 unlock_comm(cinfo);
752 return ret;
753}
754
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -0500755static int gather_all_resync_info(struct mddev *mddev, int total_slots)
756{
757 struct md_cluster_info *cinfo = mddev->cluster_info;
758 int i, ret = 0;
759 struct dlm_lock_resource *bm_lockres;
760 struct suspend_info *s;
761 char str[64];
Guoqing Jiangabb9b222015-07-10 17:01:22 +0800762 sector_t lo, hi;
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -0500763
764
765 for (i = 0; i < total_slots; i++) {
766 memset(str, '\0', 64);
767 snprintf(str, 64, "bitmap%04d", i);
768 bm_lockres = lockres_init(mddev, str, NULL, 1);
769 if (!bm_lockres)
770 return -ENOMEM;
Shaohua Li4ac7a652016-01-22 15:54:42 -0800771 if (i == (cinfo->slot_number - 1)) {
772 lockres_free(bm_lockres);
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -0500773 continue;
Shaohua Li4ac7a652016-01-22 15:54:42 -0800774 }
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -0500775
776 bm_lockres->flags |= DLM_LKF_NOQUEUE;
777 ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
778 if (ret == -EAGAIN) {
779 memset(bm_lockres->lksb.sb_lvbptr, '\0', LVB_SIZE);
780 s = read_resync_info(mddev, bm_lockres);
781 if (s) {
782 pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
783 __func__, __LINE__,
784 (unsigned long long) s->lo,
785 (unsigned long long) s->hi, i);
786 spin_lock_irq(&cinfo->suspend_lock);
787 s->slot = i;
788 list_add(&s->list, &cinfo->suspend_list);
789 spin_unlock_irq(&cinfo->suspend_lock);
790 }
791 ret = 0;
792 lockres_free(bm_lockres);
793 continue;
794 }
Guoqing Jiang6e6d9f22015-07-10 17:01:20 +0800795 if (ret) {
796 lockres_free(bm_lockres);
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -0500797 goto out;
Guoqing Jiang6e6d9f22015-07-10 17:01:20 +0800798 }
Guoqing Jiangabb9b222015-07-10 17:01:22 +0800799
800 /* Read the disk bitmap sb and check if it needs recovery */
801 ret = bitmap_copy_from_slot(mddev, i, &lo, &hi, false);
802 if (ret) {
803 pr_warn("md-cluster: Could not gather bitmaps from slot %d", i);
804 lockres_free(bm_lockres);
805 continue;
806 }
807 if ((hi > 0) && (lo < mddev->recovery_cp)) {
808 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
809 mddev->recovery_cp = lo;
810 md_check_recovery(mddev);
811 }
812
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -0500813 lockres_free(bm_lockres);
814 }
815out:
816 return ret;
817}
818
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -0500819static int join(struct mddev *mddev, int nodes)
820{
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500821 struct md_cluster_info *cinfo;
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500822 int ret, ops_rv;
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500823 char str[64];
824
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500825 cinfo = kzalloc(sizeof(struct md_cluster_info), GFP_KERNEL);
826 if (!cinfo)
827 return -ENOMEM;
828
Guoqing Jiang9e3072e2015-07-10 17:01:18 +0800829 INIT_LIST_HEAD(&cinfo->suspend_list);
830 spin_lock_init(&cinfo->suspend_lock);
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500831 init_completion(&cinfo->completion);
Guoqing Jiangeece0752015-07-10 17:01:21 +0800832 set_bit(MD_CLUSTER_BEGIN_JOIN_CLUSTER, &cinfo->state);
Guoqing Jiang8b9277c2015-12-21 10:51:00 +1100833 init_waitqueue_head(&cinfo->wait);
834 mutex_init(&cinfo->recv_mutex);
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500835
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500836 mddev->cluster_info = cinfo;
837
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500838 memset(str, 0, 64);
Guoqing Jiangb89f7042015-07-10 16:54:02 +0800839 sprintf(str, "%pU", mddev->uuid);
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500840 ret = dlm_new_lockspace(str, mddev->bitmap_info.cluster_name,
841 DLM_LSFL_FS, LVB_SIZE,
842 &md_ls_ops, mddev, &ops_rv, &cinfo->lockspace);
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500843 if (ret)
844 goto err;
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500845 wait_for_completion(&cinfo->completion);
Guoqing Jiang8c58f022015-04-21 11:25:52 -0500846 if (nodes < cinfo->slot_number) {
847 pr_err("md-cluster: Slot allotted(%d) is greater than available slots(%d).",
848 cinfo->slot_number, nodes);
Goldwyn Rodriguesb97e92572014-06-06 11:50:56 -0500849 ret = -ERANGE;
850 goto err;
851 }
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500852 /* Initiate the communication resources */
853 ret = -ENOMEM;
854 cinfo->recv_thread = md_register_thread(recv_daemon, mddev, "cluster_recv");
855 if (!cinfo->recv_thread) {
856 pr_err("md-cluster: cannot allocate memory for recv_thread!\n");
857 goto err;
858 }
859 cinfo->message_lockres = lockres_init(mddev, "message", NULL, 1);
860 if (!cinfo->message_lockres)
861 goto err;
862 cinfo->token_lockres = lockres_init(mddev, "token", NULL, 0);
863 if (!cinfo->token_lockres)
864 goto err;
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500865 cinfo->no_new_dev_lockres = lockres_init(mddev, "no-new-dev", NULL, 0);
866 if (!cinfo->no_new_dev_lockres)
867 goto err;
868
Guoqing Jiang15352122016-05-02 11:33:12 -0400869 ret = dlm_lock_sync(cinfo->token_lockres, DLM_LOCK_EX);
870 if (ret) {
871 ret = -EAGAIN;
872 pr_err("md-cluster: can't join cluster to avoid lock issue\n");
873 goto err;
874 }
875 cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
Wei Yongjun0f6187d2016-08-21 14:42:25 +0000876 if (!cinfo->ack_lockres) {
877 ret = -ENOMEM;
Guoqing Jiang15352122016-05-02 11:33:12 -0400878 goto err;
Wei Yongjun0f6187d2016-08-21 14:42:25 +0000879 }
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500880 /* get sync CR lock on ACK. */
881 if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR))
882 pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n",
883 ret);
Guoqing Jiang15352122016-05-02 11:33:12 -0400884 dlm_unlock_sync(cinfo->token_lockres);
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500885 /* get sync CR lock on no-new-dev. */
886 if (dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR))
887 pr_err("md-cluster: failed to get a sync CR lock on no-new-dev!(%d)\n", ret);
888
Goldwyn Rodrigues54519c52014-06-06 12:12:32 -0500889
890 pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number);
891 snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1);
892 cinfo->bitmap_lockres = lockres_init(mddev, str, NULL, 1);
Wei Yongjun0f6187d2016-08-21 14:42:25 +0000893 if (!cinfo->bitmap_lockres) {
894 ret = -ENOMEM;
Goldwyn Rodrigues54519c52014-06-06 12:12:32 -0500895 goto err;
Wei Yongjun0f6187d2016-08-21 14:42:25 +0000896 }
Goldwyn Rodrigues54519c52014-06-06 12:12:32 -0500897 if (dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW)) {
898 pr_err("Failed to get bitmap lock\n");
899 ret = -EINVAL;
900 goto err;
901 }
902
Goldwyn Rodriguesc186b122015-09-30 13:20:35 -0500903 cinfo->resync_lockres = lockres_init(mddev, "resync", NULL, 0);
Wei Yongjun0f6187d2016-08-21 14:42:25 +0000904 if (!cinfo->resync_lockres) {
905 ret = -ENOMEM;
Goldwyn Rodriguesc186b122015-09-30 13:20:35 -0500906 goto err;
Wei Yongjun0f6187d2016-08-21 14:42:25 +0000907 }
Goldwyn Rodriguesc186b122015-09-30 13:20:35 -0500908
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -0500909 return 0;
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500910err:
Guoqing Jiang5b0fb332016-05-02 11:33:11 -0400911 md_unregister_thread(&cinfo->recovery_thread);
912 md_unregister_thread(&cinfo->recv_thread);
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500913 lockres_free(cinfo->message_lockres);
914 lockres_free(cinfo->token_lockres);
915 lockres_free(cinfo->ack_lockres);
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500916 lockres_free(cinfo->no_new_dev_lockres);
Goldwyn Rodriguesc186b122015-09-30 13:20:35 -0500917 lockres_free(cinfo->resync_lockres);
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -0500918 lockres_free(cinfo->bitmap_lockres);
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500919 if (cinfo->lockspace)
920 dlm_release_lockspace(cinfo->lockspace, 2);
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500921 mddev->cluster_info = NULL;
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500922 kfree(cinfo);
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500923 return ret;
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -0500924}
925
Guoqing Jiang51e453a2016-05-04 02:17:09 -0400926static void load_bitmaps(struct mddev *mddev, int total_slots)
927{
928 struct md_cluster_info *cinfo = mddev->cluster_info;
929
930 /* load all the node's bitmap info for resync */
931 if (gather_all_resync_info(mddev, total_slots))
932 pr_err("md-cluster: failed to gather all resyn infos\n");
933 set_bit(MD_CLUSTER_ALREADY_IN_CLUSTER, &cinfo->state);
934 /* wake up recv thread in case something need to be handled */
935 if (test_and_clear_bit(MD_CLUSTER_PENDING_RECV_EVENT, &cinfo->state))
936 md_wakeup_thread(cinfo->recv_thread);
937}
938
Guoqing Jiang09995412015-10-01 00:09:18 +0800939static void resync_bitmap(struct mddev *mddev)
940{
941 struct md_cluster_info *cinfo = mddev->cluster_info;
942 struct cluster_msg cmsg = {0};
943 int err;
944
945 cmsg.type = cpu_to_le32(BITMAP_NEEDS_SYNC);
946 err = sendmsg(cinfo, &cmsg);
947 if (err)
948 pr_err("%s:%d: failed to send BITMAP_NEEDS_SYNC message (%d)\n",
949 __func__, __LINE__, err);
950}
951
Guoqing Jiangf6a2dc62015-12-21 10:51:00 +1100952static void unlock_all_bitmaps(struct mddev *mddev);
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -0500953static int leave(struct mddev *mddev)
954{
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500955 struct md_cluster_info *cinfo = mddev->cluster_info;
956
957 if (!cinfo)
958 return 0;
Guoqing Jiang09995412015-10-01 00:09:18 +0800959
960 /* BITMAP_NEEDS_SYNC message should be sent when node
961 * is leaving the cluster with dirty bitmap, also we
962 * can only deliver it when dlm connection is available */
963 if (cinfo->slot_number > 0 && mddev->recovery_cp != MaxSector)
964 resync_bitmap(mddev);
965
Goldwyn Rodriguese94987d2014-06-07 00:45:22 -0500966 md_unregister_thread(&cinfo->recovery_thread);
Goldwyn Rodrigues46646802014-06-07 01:08:29 -0500967 md_unregister_thread(&cinfo->recv_thread);
968 lockres_free(cinfo->message_lockres);
969 lockres_free(cinfo->token_lockres);
970 lockres_free(cinfo->ack_lockres);
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -0500971 lockres_free(cinfo->no_new_dev_lockres);
Shaohua Li4ac7a652016-01-22 15:54:42 -0800972 lockres_free(cinfo->resync_lockres);
Goldwyn Rodrigues54519c52014-06-06 12:12:32 -0500973 lockres_free(cinfo->bitmap_lockres);
Guoqing Jiangf6a2dc62015-12-21 10:51:00 +1100974 unlock_all_bitmaps(mddev);
Goldwyn Rodriguesc4ce8672014-03-29 10:20:02 -0500975 dlm_release_lockspace(cinfo->lockspace, 2);
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -0500976 return 0;
977}
978
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -0500979/* slot_number(): Returns the MD slot number to use
980 * DLM starts the slot numbers from 1, wheras cluster-md
981 * wants the number to be from zero, so we deduct one
982 */
983static int slot_number(struct mddev *mddev)
984{
985 struct md_cluster_info *cinfo = mddev->cluster_info;
986
987 return cinfo->slot_number - 1;
988}
989
Guoqing Jiang8b9277c2015-12-21 10:51:00 +1100990/*
991 * Check if the communication is already locked, else lock the communication
992 * channel.
993 * If it is already locked, token is in EX mode, and hence lock_token()
994 * should not be called.
995 */
Goldwyn Rodrigues293467a2014-06-07 01:44:51 -0500996static int metadata_update_start(struct mddev *mddev)
997{
Guoqing Jiang8b9277c2015-12-21 10:51:00 +1100998 struct md_cluster_info *cinfo = mddev->cluster_info;
999
1000 wait_event(cinfo->wait,
1001 !test_and_set_bit(MD_CLUSTER_SEND_LOCK, &cinfo->state) ||
1002 test_and_clear_bit(MD_CLUSTER_SEND_LOCKED_ALREADY, &cinfo->state));
1003
1004 /* If token is already locked, return 0 */
1005 if (cinfo->token_lockres->mode == DLM_LOCK_EX)
1006 return 0;
1007
1008 return lock_token(cinfo);
Goldwyn Rodrigues293467a2014-06-07 01:44:51 -05001009}
1010
1011static int metadata_update_finish(struct mddev *mddev)
1012{
1013 struct md_cluster_info *cinfo = mddev->cluster_info;
1014 struct cluster_msg cmsg;
Goldwyn Rodrigues70bcecd2015-08-21 10:33:39 -05001015 struct md_rdev *rdev;
1016 int ret = 0;
NeilBrownba2746b2015-10-16 13:48:35 +11001017 int raid_slot = -1;
Goldwyn Rodrigues293467a2014-06-07 01:44:51 -05001018
1019 memset(&cmsg, 0, sizeof(cmsg));
1020 cmsg.type = cpu_to_le32(METADATA_UPDATED);
Goldwyn Rodrigues70bcecd2015-08-21 10:33:39 -05001021 /* Pick up a good active device number to send.
1022 */
1023 rdev_for_each(rdev, mddev)
1024 if (rdev->raid_disk > -1 && !test_bit(Faulty, &rdev->flags)) {
NeilBrownba2746b2015-10-16 13:48:35 +11001025 raid_slot = rdev->desc_nr;
Goldwyn Rodrigues70bcecd2015-08-21 10:33:39 -05001026 break;
1027 }
NeilBrownba2746b2015-10-16 13:48:35 +11001028 if (raid_slot >= 0) {
1029 cmsg.raid_slot = cpu_to_le32(raid_slot);
Goldwyn Rodrigues70bcecd2015-08-21 10:33:39 -05001030 ret = __sendmsg(cinfo, &cmsg);
NeilBrownba2746b2015-10-16 13:48:35 +11001031 } else
Goldwyn Rodrigues70bcecd2015-08-21 10:33:39 -05001032 pr_warn("md-cluster: No good device id found to send\n");
Guoqing Jiang8b9277c2015-12-21 10:51:00 +11001033 clear_bit(MD_CLUSTER_SEND_LOCKED_ALREADY, &cinfo->state);
Goldwyn Rodrigues293467a2014-06-07 01:44:51 -05001034 unlock_comm(cinfo);
1035 return ret;
1036}
1037
Goldwyn Rodriguesdbb64f82015-10-01 13:20:27 -05001038static void metadata_update_cancel(struct mddev *mddev)
Goldwyn Rodrigues293467a2014-06-07 01:44:51 -05001039{
1040 struct md_cluster_info *cinfo = mddev->cluster_info;
Guoqing Jiang8b9277c2015-12-21 10:51:00 +11001041 clear_bit(MD_CLUSTER_SEND_LOCKED_ALREADY, &cinfo->state);
Goldwyn Rodriguesdbb64f82015-10-01 13:20:27 -05001042 unlock_comm(cinfo);
Goldwyn Rodrigues293467a2014-06-07 01:44:51 -05001043}
1044
Goldwyn Rodriguesc186b122015-09-30 13:20:35 -05001045static int resync_start(struct mddev *mddev)
1046{
1047 struct md_cluster_info *cinfo = mddev->cluster_info;
Goldwyn Rodriguesc186b122015-09-30 13:20:35 -05001048 return dlm_lock_sync(cinfo->resync_lockres, DLM_LOCK_EX);
1049}
1050
Goldwyn Rodriguesc40f3412015-08-19 08:14:42 +10001051static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
Goldwyn Rodrigues965400e2014-06-07 02:16:58 -05001052{
1053 struct md_cluster_info *cinfo = mddev->cluster_info;
Goldwyn Rodriguesac277c62015-12-21 10:50:59 +11001054 struct resync_info ri;
Guoqing Jiangaee177a2015-10-12 17:21:24 +08001055 struct cluster_msg cmsg = {0};
Goldwyn Rodrigues965400e2014-06-07 02:16:58 -05001056
Goldwyn Rodriguesac277c62015-12-21 10:50:59 +11001057 /* do not send zero again, if we have sent before */
1058 if (hi == 0) {
1059 memcpy(&ri, cinfo->bitmap_lockres->lksb.sb_lvbptr, sizeof(struct resync_info));
1060 if (le64_to_cpu(ri.hi) == 0)
1061 return 0;
1062 }
1063
NeilBrown30661b42015-10-19 15:44:00 +11001064 add_resync_info(cinfo->bitmap_lockres, lo, hi);
Goldwyn Rodriguesc40f3412015-08-19 08:14:42 +10001065 /* Re-acquire the lock to refresh LVB */
1066 dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW);
Goldwyn Rodriguesc40f3412015-08-19 08:14:42 +10001067 cmsg.type = cpu_to_le32(RESYNCING);
Goldwyn Rodrigues965400e2014-06-07 02:16:58 -05001068 cmsg.low = cpu_to_le64(lo);
1069 cmsg.high = cpu_to_le64(hi);
Goldwyn Rodriguesc186b122015-09-30 13:20:35 -05001070
Goldwyn Rodrigues965400e2014-06-07 02:16:58 -05001071 return sendmsg(cinfo, &cmsg);
1072}
1073
Goldwyn Rodriguesc186b122015-09-30 13:20:35 -05001074static int resync_finish(struct mddev *mddev)
1075{
1076 struct md_cluster_info *cinfo = mddev->cluster_info;
Goldwyn Rodriguesc186b122015-09-30 13:20:35 -05001077 dlm_unlock_sync(cinfo->resync_lockres);
1078 return resync_info_update(mddev, 0, 0);
1079}
1080
Goldwyn Rodrigues90382ed2015-06-24 09:30:32 -05001081static int area_resyncing(struct mddev *mddev, int direction,
1082 sector_t lo, sector_t hi)
Goldwyn Rodrigues589a1c42014-06-07 02:39:37 -05001083{
1084 struct md_cluster_info *cinfo = mddev->cluster_info;
1085 int ret = 0;
1086 struct suspend_info *s;
1087
Goldwyn Rodrigues90382ed2015-06-24 09:30:32 -05001088 if ((direction == READ) &&
1089 test_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state))
1090 return 1;
1091
Goldwyn Rodrigues589a1c42014-06-07 02:39:37 -05001092 spin_lock_irq(&cinfo->suspend_lock);
1093 if (list_empty(&cinfo->suspend_list))
1094 goto out;
1095 list_for_each_entry(s, &cinfo->suspend_list, list)
1096 if (hi > s->lo && lo < s->hi) {
1097 ret = 1;
1098 break;
1099 }
1100out:
1101 spin_unlock_irq(&cinfo->suspend_lock);
1102 return ret;
1103}
1104
Goldwyn Rodriguesdbb64f82015-10-01 13:20:27 -05001105/* add_new_disk() - initiates a disk add
1106 * However, if this fails before writing md_update_sb(),
1107 * add_new_disk_cancel() must be called to release token lock
1108 */
1109static int add_new_disk(struct mddev *mddev, struct md_rdev *rdev)
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -05001110{
1111 struct md_cluster_info *cinfo = mddev->cluster_info;
1112 struct cluster_msg cmsg;
1113 int ret = 0;
1114 struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
1115 char *uuid = sb->device_uuid;
1116
1117 memset(&cmsg, 0, sizeof(cmsg));
1118 cmsg.type = cpu_to_le32(NEWDISK);
1119 memcpy(cmsg.uuid, uuid, 16);
Guoqing Jiangfaeff832015-10-12 17:21:21 +08001120 cmsg.raid_slot = cpu_to_le32(rdev->desc_nr);
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -05001121 lock_comm(cinfo);
1122 ret = __sendmsg(cinfo, &cmsg);
1123 if (ret)
1124 return ret;
1125 cinfo->no_new_dev_lockres->flags |= DLM_LKF_NOQUEUE;
1126 ret = dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_EX);
1127 cinfo->no_new_dev_lockres->flags &= ~DLM_LKF_NOQUEUE;
1128 /* Some node does not "see" the device */
1129 if (ret == -EAGAIN)
1130 ret = -ENOENT;
Goldwyn Rodriguesdbb64f82015-10-01 13:20:27 -05001131 if (ret)
1132 unlock_comm(cinfo);
Guoqing Jiang8b9277c2015-12-21 10:51:00 +11001133 else {
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -05001134 dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
Guoqing Jiange19508f2015-12-21 10:51:01 +11001135 /* Since MD_CHANGE_DEVS will be set in add_bound_rdev which
1136 * will run soon after add_new_disk, the below path will be
1137 * invoked:
1138 * md_wakeup_thread(mddev->thread)
1139 * -> conf->thread (raid1d)
1140 * -> md_check_recovery -> md_update_sb
1141 * -> metadata_update_start/finish
1142 * MD_CLUSTER_SEND_LOCKED_ALREADY will be cleared eventually.
1143 *
1144 * For other failure cases, metadata_update_cancel and
1145 * add_new_disk_cancel also clear below bit as well.
1146 * */
Guoqing Jiang8b9277c2015-12-21 10:51:00 +11001147 set_bit(MD_CLUSTER_SEND_LOCKED_ALREADY, &cinfo->state);
1148 wake_up(&cinfo->wait);
1149 }
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -05001150 return ret;
1151}
1152
Goldwyn Rodriguesdbb64f82015-10-01 13:20:27 -05001153static void add_new_disk_cancel(struct mddev *mddev)
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -05001154{
Goldwyn Rodriguesdbb64f82015-10-01 13:20:27 -05001155 struct md_cluster_info *cinfo = mddev->cluster_info;
Guoqing Jiang8b9277c2015-12-21 10:51:00 +11001156 clear_bit(MD_CLUSTER_SEND_LOCKED_ALREADY, &cinfo->state);
Goldwyn Rodriguesdbb64f82015-10-01 13:20:27 -05001157 unlock_comm(cinfo);
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -05001158}
1159
Goldwyn Rodriguesfa8259d2015-03-02 10:55:49 -06001160static int new_disk_ack(struct mddev *mddev, bool ack)
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -05001161{
1162 struct md_cluster_info *cinfo = mddev->cluster_info;
1163
Goldwyn Rodriguesfa8259d2015-03-02 10:55:49 -06001164 if (!test_bit(MD_CLUSTER_WAITING_FOR_NEWDISK, &cinfo->state)) {
1165 pr_warn("md-cluster(%s): Spurious cluster confirmation\n", mdname(mddev));
1166 return -EINVAL;
1167 }
1168
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -05001169 if (ack)
1170 dlm_unlock_sync(cinfo->no_new_dev_lockres);
1171 complete(&cinfo->newdisk_completion);
Goldwyn Rodriguesfa8259d2015-03-02 10:55:49 -06001172 return 0;
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -05001173}
1174
Goldwyn Rodrigues88bcfef2015-04-14 10:44:44 -05001175static int remove_disk(struct mddev *mddev, struct md_rdev *rdev)
1176{
Guoqing Jiangaee177a2015-10-12 17:21:24 +08001177 struct cluster_msg cmsg = {0};
Goldwyn Rodrigues88bcfef2015-04-14 10:44:44 -05001178 struct md_cluster_info *cinfo = mddev->cluster_info;
Guoqing Jiangfaeff832015-10-12 17:21:21 +08001179 cmsg.type = cpu_to_le32(REMOVE);
1180 cmsg.raid_slot = cpu_to_le32(rdev->desc_nr);
Goldwyn Rodrigues54a88392015-12-21 10:51:00 +11001181 return sendmsg(cinfo, &cmsg);
Goldwyn Rodrigues88bcfef2015-04-14 10:44:44 -05001182}
1183
Guoqing Jiangf6a2dc62015-12-21 10:51:00 +11001184static int lock_all_bitmaps(struct mddev *mddev)
1185{
1186 int slot, my_slot, ret, held = 1, i = 0;
1187 char str[64];
1188 struct md_cluster_info *cinfo = mddev->cluster_info;
1189
1190 cinfo->other_bitmap_lockres = kzalloc((mddev->bitmap_info.nodes - 1) *
1191 sizeof(struct dlm_lock_resource *),
1192 GFP_KERNEL);
1193 if (!cinfo->other_bitmap_lockres) {
1194 pr_err("md: can't alloc mem for other bitmap locks\n");
1195 return 0;
1196 }
1197
1198 my_slot = slot_number(mddev);
1199 for (slot = 0; slot < mddev->bitmap_info.nodes; slot++) {
1200 if (slot == my_slot)
1201 continue;
1202
1203 memset(str, '\0', 64);
1204 snprintf(str, 64, "bitmap%04d", slot);
1205 cinfo->other_bitmap_lockres[i] = lockres_init(mddev, str, NULL, 1);
1206 if (!cinfo->other_bitmap_lockres[i])
1207 return -ENOMEM;
1208
1209 cinfo->other_bitmap_lockres[i]->flags |= DLM_LKF_NOQUEUE;
1210 ret = dlm_lock_sync(cinfo->other_bitmap_lockres[i], DLM_LOCK_PW);
1211 if (ret)
1212 held = -1;
1213 i++;
1214 }
1215
1216 return held;
1217}
1218
1219static void unlock_all_bitmaps(struct mddev *mddev)
1220{
1221 struct md_cluster_info *cinfo = mddev->cluster_info;
1222 int i;
1223
1224 /* release other node's bitmap lock if they are existed */
1225 if (cinfo->other_bitmap_lockres) {
1226 for (i = 0; i < mddev->bitmap_info.nodes - 1; i++) {
1227 if (cinfo->other_bitmap_lockres[i]) {
Guoqing Jiangf6a2dc62015-12-21 10:51:00 +11001228 lockres_free(cinfo->other_bitmap_lockres[i]);
1229 }
1230 }
1231 kfree(cinfo->other_bitmap_lockres);
1232 }
1233}
1234
Goldwyn Rodrigues97f6cd32015-04-14 10:45:42 -05001235static int gather_bitmaps(struct md_rdev *rdev)
1236{
1237 int sn, err;
1238 sector_t lo, hi;
Guoqing Jiangaee177a2015-10-12 17:21:24 +08001239 struct cluster_msg cmsg = {0};
Goldwyn Rodrigues97f6cd32015-04-14 10:45:42 -05001240 struct mddev *mddev = rdev->mddev;
1241 struct md_cluster_info *cinfo = mddev->cluster_info;
1242
Guoqing Jiangfaeff832015-10-12 17:21:21 +08001243 cmsg.type = cpu_to_le32(RE_ADD);
1244 cmsg.raid_slot = cpu_to_le32(rdev->desc_nr);
Goldwyn Rodrigues97f6cd32015-04-14 10:45:42 -05001245 err = sendmsg(cinfo, &cmsg);
1246 if (err)
1247 goto out;
1248
1249 for (sn = 0; sn < mddev->bitmap_info.nodes; sn++) {
1250 if (sn == (cinfo->slot_number - 1))
1251 continue;
1252 err = bitmap_copy_from_slot(mddev, sn, &lo, &hi, false);
1253 if (err) {
1254 pr_warn("md-cluster: Could not gather bitmaps from slot %d", sn);
1255 goto out;
1256 }
1257 if ((hi > 0) && (lo < mddev->recovery_cp))
1258 mddev->recovery_cp = lo;
1259 }
1260out:
1261 return err;
1262}
1263
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -05001264static struct md_cluster_operations cluster_ops = {
1265 .join = join,
1266 .leave = leave,
Goldwyn Rodriguescf921cc2014-03-30 00:42:49 -05001267 .slot_number = slot_number,
Goldwyn Rodriguesc186b122015-09-30 13:20:35 -05001268 .resync_start = resync_start,
1269 .resync_finish = resync_finish,
Goldwyn Rodrigues96ae9232014-06-06 12:35:34 -05001270 .resync_info_update = resync_info_update,
Goldwyn Rodrigues293467a2014-06-07 01:44:51 -05001271 .metadata_update_start = metadata_update_start,
1272 .metadata_update_finish = metadata_update_finish,
1273 .metadata_update_cancel = metadata_update_cancel,
Goldwyn Rodrigues589a1c42014-06-07 02:39:37 -05001274 .area_resyncing = area_resyncing,
Goldwyn Rodriguesdbb64f82015-10-01 13:20:27 -05001275 .add_new_disk = add_new_disk,
1276 .add_new_disk_cancel = add_new_disk_cancel,
Goldwyn Rodrigues1aee41f2014-10-29 18:51:31 -05001277 .new_disk_ack = new_disk_ack,
Goldwyn Rodrigues88bcfef2015-04-14 10:44:44 -05001278 .remove_disk = remove_disk,
Guoqing Jiang51e453a2016-05-04 02:17:09 -04001279 .load_bitmaps = load_bitmaps,
Goldwyn Rodrigues97f6cd32015-04-14 10:45:42 -05001280 .gather_bitmaps = gather_bitmaps,
Guoqing Jiangf6a2dc62015-12-21 10:51:00 +11001281 .lock_all_bitmaps = lock_all_bitmaps,
1282 .unlock_all_bitmaps = unlock_all_bitmaps,
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -05001283};
1284
Goldwyn Rodrigues8e854e92014-03-07 11:21:15 -06001285static int __init cluster_init(void)
1286{
1287 pr_warn("md-cluster: EXPERIMENTAL. Use with caution\n");
1288 pr_info("Registering Cluster MD functions\n");
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -05001289 register_md_cluster_operations(&cluster_ops, THIS_MODULE);
Goldwyn Rodrigues8e854e92014-03-07 11:21:15 -06001290 return 0;
1291}
1292
1293static void cluster_exit(void)
1294{
Goldwyn Rodriguesedb39c92014-03-29 10:01:53 -05001295 unregister_md_cluster_operations();
Goldwyn Rodrigues8e854e92014-03-07 11:21:15 -06001296}
1297
1298module_init(cluster_init);
1299module_exit(cluster_exit);
Guoqing Jiang86b57272015-10-12 17:21:25 +08001300MODULE_AUTHOR("SUSE");
Goldwyn Rodrigues8e854e92014-03-07 11:21:15 -06001301MODULE_LICENSE("GPL");
1302MODULE_DESCRIPTION("Clustering support for MD");