blob: 940c7b2b5ab44163bef02d094f02b9971a0d0175 [file] [log] [blame]
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001/*
2 * Copyright (C) 2012 Red Hat. All rights reserved.
3 *
4 * This file is released under the GPL.
5 */
6
7#include "dm.h"
8#include "dm-bio-prison.h"
Darrick J. Wongb844fe62013-04-05 15:36:32 +01009#include "dm-bio-record.h"
Joe Thornberc6b4fcb2013-03-01 22:45:51 +000010#include "dm-cache-metadata.h"
11
12#include <linux/dm-io.h>
13#include <linux/dm-kcopyd.h>
Manuel Schölling0f30af92014-05-22 22:42:37 +020014#include <linux/jiffies.h>
Joe Thornberc6b4fcb2013-03-01 22:45:51 +000015#include <linux/init.h>
16#include <linux/mempool.h>
17#include <linux/module.h>
18#include <linux/slab.h>
19#include <linux/vmalloc.h>
20
21#define DM_MSG_PREFIX "cache"
22
23DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(cache_copy_throttle,
24 "A percentage of time allocated for copying to and/or from cache");
25
26/*----------------------------------------------------------------*/
27
Joe Thornber77289d32015-05-15 13:45:30 +010028#define IOT_RESOLUTION 4
29
30struct io_tracker {
31 spinlock_t lock;
32
33 /*
34 * Sectors of in-flight IO.
35 */
36 sector_t in_flight;
37
38 /*
39 * The time, in jiffies, when this device became idle (if it is
40 * indeed idle).
41 */
42 unsigned long idle_time;
43 unsigned long last_update_time;
44};
45
46static void iot_init(struct io_tracker *iot)
47{
48 spin_lock_init(&iot->lock);
49 iot->in_flight = 0ul;
50 iot->idle_time = 0ul;
51 iot->last_update_time = jiffies;
52}
53
54static bool __iot_idle_for(struct io_tracker *iot, unsigned long jifs)
55{
56 if (iot->in_flight)
57 return false;
58
59 return time_after(jiffies, iot->idle_time + jifs);
60}
61
62static bool iot_idle_for(struct io_tracker *iot, unsigned long jifs)
63{
64 bool r;
65 unsigned long flags;
66
67 spin_lock_irqsave(&iot->lock, flags);
68 r = __iot_idle_for(iot, jifs);
69 spin_unlock_irqrestore(&iot->lock, flags);
70
71 return r;
72}
73
74static void iot_io_begin(struct io_tracker *iot, sector_t len)
75{
76 unsigned long flags;
77
78 spin_lock_irqsave(&iot->lock, flags);
79 iot->in_flight += len;
80 spin_unlock_irqrestore(&iot->lock, flags);
81}
82
83static void __iot_io_end(struct io_tracker *iot, sector_t len)
84{
85 iot->in_flight -= len;
86 if (!iot->in_flight)
87 iot->idle_time = jiffies;
88}
89
90static void iot_io_end(struct io_tracker *iot, sector_t len)
91{
92 unsigned long flags;
93
94 spin_lock_irqsave(&iot->lock, flags);
95 __iot_io_end(iot, len);
96 spin_unlock_irqrestore(&iot->lock, flags);
97}
98
99/*----------------------------------------------------------------*/
100
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000101/*
102 * Glossary:
103 *
104 * oblock: index of an origin block
105 * cblock: index of a cache block
106 * promotion: movement of a block from origin to cache
107 * demotion: movement of a block from cache to origin
108 * migration: movement of a block between the origin and cache device,
109 * either direction
110 */
111
112/*----------------------------------------------------------------*/
113
114static size_t bitset_size_in_bytes(unsigned nr_entries)
115{
116 return sizeof(unsigned long) * dm_div_up(nr_entries, BITS_PER_LONG);
117}
118
119static unsigned long *alloc_bitset(unsigned nr_entries)
120{
121 size_t s = bitset_size_in_bytes(nr_entries);
122 return vzalloc(s);
123}
124
125static void clear_bitset(void *bitset, unsigned nr_entries)
126{
127 size_t s = bitset_size_in_bytes(nr_entries);
128 memset(bitset, 0, s);
129}
130
131static void free_bitset(unsigned long *bits)
132{
133 vfree(bits);
134}
135
136/*----------------------------------------------------------------*/
137
Joe Thornberc9d28d52013-10-31 13:55:48 -0400138/*
139 * There are a couple of places where we let a bio run, but want to do some
140 * work before calling its endio function. We do this by temporarily
141 * changing the endio fn.
142 */
143struct dm_hook_info {
144 bio_end_io_t *bi_end_io;
145 void *bi_private;
146};
147
148static void dm_hook_bio(struct dm_hook_info *h, struct bio *bio,
149 bio_end_io_t *bi_end_io, void *bi_private)
150{
151 h->bi_end_io = bio->bi_end_io;
152 h->bi_private = bio->bi_private;
153
154 bio->bi_end_io = bi_end_io;
155 bio->bi_private = bi_private;
156}
157
158static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio)
159{
160 bio->bi_end_io = h->bi_end_io;
161 bio->bi_private = h->bi_private;
162}
163
164/*----------------------------------------------------------------*/
165
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000166#define MIGRATION_POOL_SIZE 128
167#define COMMIT_PERIOD HZ
168#define MIGRATION_COUNT_WINDOW 10
169
170/*
Mike Snitzer05473042013-08-16 10:54:19 -0400171 * The block size of the device holding cache data must be
172 * between 32KB and 1GB.
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000173 */
174#define DATA_DEV_BLOCK_SIZE_MIN_SECTORS (32 * 1024 >> SECTOR_SHIFT)
Mike Snitzer05473042013-08-16 10:54:19 -0400175#define DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000176
177/*
178 * FIXME: the cache is read/write for the time being.
179 */
Joe Thornber2ee57d52013-10-24 14:10:29 -0400180enum cache_metadata_mode {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000181 CM_WRITE, /* metadata may be changed */
182 CM_READ_ONLY, /* metadata may not be changed */
183};
184
Joe Thornber2ee57d52013-10-24 14:10:29 -0400185enum cache_io_mode {
186 /*
187 * Data is written to cached blocks only. These blocks are marked
188 * dirty. If you lose the cache device you will lose data.
189 * Potential performance increase for both reads and writes.
190 */
191 CM_IO_WRITEBACK,
192
193 /*
194 * Data is written to both cache and origin. Blocks are never
195 * dirty. Potential performance benfit for reads only.
196 */
197 CM_IO_WRITETHROUGH,
198
199 /*
200 * A degraded mode useful for various cache coherency situations
201 * (eg, rolling back snapshots). Reads and writes always go to the
202 * origin. If a write goes to a cached oblock, then the cache
203 * block is invalidated.
204 */
205 CM_IO_PASSTHROUGH
206};
207
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000208struct cache_features {
Joe Thornber2ee57d52013-10-24 14:10:29 -0400209 enum cache_metadata_mode mode;
210 enum cache_io_mode io_mode;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000211};
212
213struct cache_stats {
214 atomic_t read_hit;
215 atomic_t read_miss;
216 atomic_t write_hit;
217 atomic_t write_miss;
218 atomic_t demotion;
219 atomic_t promotion;
220 atomic_t copies_avoided;
221 atomic_t cache_cell_clash;
222 atomic_t commit_count;
223 atomic_t discard_count;
224};
225
Joe Thornber65790ff2013-11-08 16:39:50 +0000226/*
227 * Defines a range of cblocks, begin to (end - 1) are in the range. end is
228 * the one-past-the-end value.
229 */
230struct cblock_range {
231 dm_cblock_t begin;
232 dm_cblock_t end;
233};
234
235struct invalidation_request {
236 struct list_head list;
237 struct cblock_range *cblocks;
238
239 atomic_t complete;
240 int err;
241
242 wait_queue_head_t result_wait;
243};
244
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000245struct cache {
246 struct dm_target *ti;
247 struct dm_target_callbacks callbacks;
248
Mike Snitzerc9ec5d72013-08-16 10:54:21 -0400249 struct dm_cache_metadata *cmd;
250
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000251 /*
252 * Metadata is written to this device.
253 */
254 struct dm_dev *metadata_dev;
255
256 /*
257 * The slower of the two data devices. Typically a spindle.
258 */
259 struct dm_dev *origin_dev;
260
261 /*
262 * The faster of the two data devices. Typically an SSD.
263 */
264 struct dm_dev *cache_dev;
265
266 /*
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000267 * Size of the origin device in _complete_ blocks and native sectors.
268 */
269 dm_oblock_t origin_blocks;
270 sector_t origin_sectors;
271
272 /*
273 * Size of the cache device in blocks.
274 */
275 dm_cblock_t cache_size;
276
277 /*
278 * Fields for converting from sectors to blocks.
279 */
280 uint32_t sectors_per_block;
281 int sectors_per_block_shift;
282
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000283 spinlock_t lock;
284 struct bio_list deferred_bios;
285 struct bio_list deferred_flush_bios;
Joe Thornbere2e74d62013-03-20 17:21:27 +0000286 struct bio_list deferred_writethrough_bios;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000287 struct list_head quiesced_migrations;
288 struct list_head completed_migrations;
289 struct list_head need_commit_migrations;
290 sector_t migration_threshold;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000291 wait_queue_head_t migration_wait;
Joe Thornbera59db672015-01-23 10:16:16 +0000292 atomic_t nr_allocated_migrations;
293
294 /*
295 * The number of in flight migrations that are performing
296 * background io. eg, promotion, writeback.
297 */
298 atomic_t nr_io_migrations;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000299
Joe Thornber66cb1912013-10-30 17:11:58 +0000300 wait_queue_head_t quiescing_wait;
Joe Thornber238f8362013-10-30 17:29:30 +0000301 atomic_t quiescing;
Joe Thornber66cb1912013-10-30 17:11:58 +0000302 atomic_t quiescing_ack;
303
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000304 /*
305 * cache_size entries, dirty if set
306 */
Anssi Hannula44fa8162014-08-01 11:55:47 -0400307 atomic_t nr_dirty;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000308 unsigned long *dirty_bitset;
309
310 /*
311 * origin_blocks entries, discarded if set.
312 */
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000313 dm_dblock_t discard_nr_blocks;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000314 unsigned long *discard_bitset;
Joe Thornber08b18452014-11-06 14:38:01 +0000315 uint32_t discard_block_size; /* a power of 2 times sectors per block */
Mike Snitzerc9ec5d72013-08-16 10:54:21 -0400316
317 /*
318 * Rather than reconstructing the table line for the status we just
319 * save it and regurgitate.
320 */
321 unsigned nr_ctr_args;
322 const char **ctr_args;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000323
324 struct dm_kcopyd_client *copier;
325 struct workqueue_struct *wq;
326 struct work_struct worker;
327
328 struct delayed_work waker;
329 unsigned long last_commit_jiffies;
330
331 struct dm_bio_prison *prison;
332 struct dm_deferred_set *all_io_ds;
333
334 mempool_t *migration_pool;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000335
336 struct dm_cache_policy *policy;
337 unsigned policy_nr_args;
338
339 bool need_tick_bio:1;
340 bool sized:1;
Joe Thornber65790ff2013-11-08 16:39:50 +0000341 bool invalidate:1;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000342 bool commit_requested:1;
343 bool loaded_mappings:1;
344 bool loaded_discards:1;
345
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000346 /*
Mike Snitzerc9ec5d72013-08-16 10:54:21 -0400347 * Cache features such as write-through.
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000348 */
Mike Snitzerc9ec5d72013-08-16 10:54:21 -0400349 struct cache_features features;
350
351 struct cache_stats stats;
Joe Thornber65790ff2013-11-08 16:39:50 +0000352
353 /*
354 * Invalidation fields.
355 */
356 spinlock_t invalidation_lock;
357 struct list_head invalidation_requests;
Joe Thornber066dbaa32015-05-15 15:18:01 +0100358
359 struct io_tracker origin_tracker;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000360};
361
362struct per_bio_data {
363 bool tick:1;
364 unsigned req_nr:2;
365 struct dm_deferred_entry *all_io_entry;
Mike Snitzerc6eda5e2014-01-31 14:11:54 -0500366 struct dm_hook_info hook_info;
Joe Thornber066dbaa32015-05-15 15:18:01 +0100367 sector_t len;
Joe Thornbere2e74d62013-03-20 17:21:27 +0000368
Mike Snitzer19b00922013-04-05 15:36:34 +0100369 /*
370 * writethrough fields. These MUST remain at the end of this
371 * structure and the 'cache' member must be the first as it
Joe Thornberaeed14202013-05-10 14:37:18 +0100372 * is used to determine the offset of the writethrough fields.
Mike Snitzer19b00922013-04-05 15:36:34 +0100373 */
Joe Thornbere2e74d62013-03-20 17:21:27 +0000374 struct cache *cache;
375 dm_cblock_t cblock;
Darrick J. Wongb844fe62013-04-05 15:36:32 +0100376 struct dm_bio_details bio_details;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000377};
378
379struct dm_cache_migration {
380 struct list_head list;
381 struct cache *cache;
382
383 unsigned long start_jiffies;
384 dm_oblock_t old_oblock;
385 dm_oblock_t new_oblock;
386 dm_cblock_t cblock;
387
388 bool err:1;
Joe Thornber7ae34e72014-11-06 10:18:04 +0000389 bool discard:1;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000390 bool writeback:1;
391 bool demote:1;
392 bool promote:1;
Joe Thornberc9d28d52013-10-31 13:55:48 -0400393 bool requeue_holder:1;
Joe Thornber65790ff2013-11-08 16:39:50 +0000394 bool invalidate:1;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000395
396 struct dm_bio_prison_cell *old_ocell;
397 struct dm_bio_prison_cell *new_ocell;
398};
399
400/*
401 * Processing a bio in the worker thread may require these memory
402 * allocations. We prealloc to avoid deadlocks (the same worker thread
403 * frees them back to the mempool).
404 */
405struct prealloc {
406 struct dm_cache_migration *mg;
407 struct dm_bio_prison_cell *cell1;
408 struct dm_bio_prison_cell *cell2;
409};
410
411static void wake_worker(struct cache *cache)
412{
413 queue_work(cache->wq, &cache->worker);
414}
415
416/*----------------------------------------------------------------*/
417
418static struct dm_bio_prison_cell *alloc_prison_cell(struct cache *cache)
419{
420 /* FIXME: change to use a local slab. */
421 return dm_bio_prison_alloc_cell(cache->prison, GFP_NOWAIT);
422}
423
424static void free_prison_cell(struct cache *cache, struct dm_bio_prison_cell *cell)
425{
426 dm_bio_prison_free_cell(cache->prison, cell);
427}
428
Joe Thornbera59db672015-01-23 10:16:16 +0000429static struct dm_cache_migration *alloc_migration(struct cache *cache)
430{
431 struct dm_cache_migration *mg;
432
433 mg = mempool_alloc(cache->migration_pool, GFP_NOWAIT);
434 if (mg) {
435 mg->cache = cache;
436 atomic_inc(&mg->cache->nr_allocated_migrations);
437 }
438
439 return mg;
440}
441
442static void free_migration(struct dm_cache_migration *mg)
443{
444 if (atomic_dec_and_test(&mg->cache->nr_allocated_migrations))
445 wake_up(&mg->cache->migration_wait);
446
447 mempool_free(mg, mg->cache->migration_pool);
448}
449
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000450static int prealloc_data_structs(struct cache *cache, struct prealloc *p)
451{
452 if (!p->mg) {
Joe Thornbera59db672015-01-23 10:16:16 +0000453 p->mg = alloc_migration(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000454 if (!p->mg)
455 return -ENOMEM;
456 }
457
458 if (!p->cell1) {
459 p->cell1 = alloc_prison_cell(cache);
460 if (!p->cell1)
461 return -ENOMEM;
462 }
463
464 if (!p->cell2) {
465 p->cell2 = alloc_prison_cell(cache);
466 if (!p->cell2)
467 return -ENOMEM;
468 }
469
470 return 0;
471}
472
473static void prealloc_free_structs(struct cache *cache, struct prealloc *p)
474{
475 if (p->cell2)
476 free_prison_cell(cache, p->cell2);
477
478 if (p->cell1)
479 free_prison_cell(cache, p->cell1);
480
481 if (p->mg)
Joe Thornbera59db672015-01-23 10:16:16 +0000482 free_migration(p->mg);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000483}
484
485static struct dm_cache_migration *prealloc_get_migration(struct prealloc *p)
486{
487 struct dm_cache_migration *mg = p->mg;
488
489 BUG_ON(!mg);
490 p->mg = NULL;
491
492 return mg;
493}
494
495/*
496 * You must have a cell within the prealloc struct to return. If not this
497 * function will BUG() rather than returning NULL.
498 */
499static struct dm_bio_prison_cell *prealloc_get_cell(struct prealloc *p)
500{
501 struct dm_bio_prison_cell *r = NULL;
502
503 if (p->cell1) {
504 r = p->cell1;
505 p->cell1 = NULL;
506
507 } else if (p->cell2) {
508 r = p->cell2;
509 p->cell2 = NULL;
510 } else
511 BUG();
512
513 return r;
514}
515
516/*
517 * You can't have more than two cells in a prealloc struct. BUG() will be
518 * called if you try and overfill.
519 */
520static void prealloc_put_cell(struct prealloc *p, struct dm_bio_prison_cell *cell)
521{
522 if (!p->cell2)
523 p->cell2 = cell;
524
525 else if (!p->cell1)
526 p->cell1 = cell;
527
528 else
529 BUG();
530}
531
532/*----------------------------------------------------------------*/
533
Joe Thornber7ae34e72014-11-06 10:18:04 +0000534static void build_key(dm_oblock_t begin, dm_oblock_t end, struct dm_cell_key *key)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000535{
536 key->virtual = 0;
537 key->dev = 0;
Joe Thornber7ae34e72014-11-06 10:18:04 +0000538 key->block_begin = from_oblock(begin);
539 key->block_end = from_oblock(end);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000540}
541
542/*
543 * The caller hands in a preallocated cell, and a free function for it.
544 * The cell will be freed if there's an error, or if it wasn't used because
545 * a cell with that key already exists.
546 */
547typedef void (*cell_free_fn)(void *context, struct dm_bio_prison_cell *cell);
548
Joe Thornber7ae34e72014-11-06 10:18:04 +0000549static int bio_detain_range(struct cache *cache, dm_oblock_t oblock_begin, dm_oblock_t oblock_end,
550 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
551 cell_free_fn free_fn, void *free_context,
552 struct dm_bio_prison_cell **cell_result)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000553{
554 int r;
555 struct dm_cell_key key;
556
Joe Thornber7ae34e72014-11-06 10:18:04 +0000557 build_key(oblock_begin, oblock_end, &key);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000558 r = dm_bio_detain(cache->prison, &key, bio, cell_prealloc, cell_result);
559 if (r)
560 free_fn(free_context, cell_prealloc);
561
562 return r;
563}
564
Joe Thornber7ae34e72014-11-06 10:18:04 +0000565static int bio_detain(struct cache *cache, dm_oblock_t oblock,
566 struct bio *bio, struct dm_bio_prison_cell *cell_prealloc,
567 cell_free_fn free_fn, void *free_context,
568 struct dm_bio_prison_cell **cell_result)
569{
570 dm_oblock_t end = to_oblock(from_oblock(oblock) + 1ULL);
571 return bio_detain_range(cache, oblock, end, bio,
572 cell_prealloc, free_fn, free_context, cell_result);
573}
574
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000575static int get_cell(struct cache *cache,
576 dm_oblock_t oblock,
577 struct prealloc *structs,
578 struct dm_bio_prison_cell **cell_result)
579{
580 int r;
581 struct dm_cell_key key;
582 struct dm_bio_prison_cell *cell_prealloc;
583
584 cell_prealloc = prealloc_get_cell(structs);
585
Joe Thornber7ae34e72014-11-06 10:18:04 +0000586 build_key(oblock, to_oblock(from_oblock(oblock) + 1ULL), &key);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000587 r = dm_get_cell(cache->prison, &key, cell_prealloc, cell_result);
588 if (r)
589 prealloc_put_cell(structs, cell_prealloc);
590
591 return r;
592}
593
Joe Thornberaeed14202013-05-10 14:37:18 +0100594/*----------------------------------------------------------------*/
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000595
596static bool is_dirty(struct cache *cache, dm_cblock_t b)
597{
598 return test_bit(from_cblock(b), cache->dirty_bitset);
599}
600
601static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
602{
603 if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) {
Anssi Hannula44fa8162014-08-01 11:55:47 -0400604 atomic_inc(&cache->nr_dirty);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000605 policy_set_dirty(cache->policy, oblock);
606 }
607}
608
609static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock)
610{
611 if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) {
612 policy_clear_dirty(cache->policy, oblock);
Anssi Hannula44fa8162014-08-01 11:55:47 -0400613 if (atomic_dec_return(&cache->nr_dirty) == 0)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000614 dm_table_event(cache->ti->table);
615 }
616}
617
618/*----------------------------------------------------------------*/
Joe Thornberaeed14202013-05-10 14:37:18 +0100619
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000620static bool block_size_is_power_of_two(struct cache *cache)
621{
622 return cache->sectors_per_block_shift >= 0;
623}
624
Mikulas Patocka43aeaa22013-07-10 23:41:17 +0100625/* gcc on ARM generates spurious references to __udivdi3 and __umoddi3 */
626#if defined(CONFIG_ARM) && __GNUC__ == 4 && __GNUC_MINOR__ <= 6
627__always_inline
628#endif
Joe Thornber414dd672013-03-20 17:21:25 +0000629static dm_block_t block_div(dm_block_t b, uint32_t n)
630{
631 do_div(b, n);
632
633 return b;
634}
635
Joe Thornber7ae34e72014-11-06 10:18:04 +0000636static dm_block_t oblocks_per_dblock(struct cache *cache)
637{
638 dm_block_t oblocks = cache->discard_block_size;
639
640 if (block_size_is_power_of_two(cache))
641 oblocks >>= cache->sectors_per_block_shift;
642 else
643 oblocks = block_div(oblocks, cache->sectors_per_block);
644
645 return oblocks;
646}
647
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000648static dm_dblock_t oblock_to_dblock(struct cache *cache, dm_oblock_t oblock)
649{
Joe Thornber7ae34e72014-11-06 10:18:04 +0000650 return to_dblock(block_div(from_oblock(oblock),
651 oblocks_per_dblock(cache)));
652}
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000653
Joe Thornber7ae34e72014-11-06 10:18:04 +0000654static dm_oblock_t dblock_to_oblock(struct cache *cache, dm_dblock_t dblock)
655{
656 return to_oblock(from_dblock(dblock) * oblocks_per_dblock(cache));
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000657}
658
659static void set_discard(struct cache *cache, dm_dblock_t b)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000660{
661 unsigned long flags;
662
Joe Thornber7ae34e72014-11-06 10:18:04 +0000663 BUG_ON(from_dblock(b) >= from_dblock(cache->discard_nr_blocks));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000664 atomic_inc(&cache->stats.discard_count);
665
666 spin_lock_irqsave(&cache->lock, flags);
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000667 set_bit(from_dblock(b), cache->discard_bitset);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000668 spin_unlock_irqrestore(&cache->lock, flags);
669}
670
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000671static void clear_discard(struct cache *cache, dm_dblock_t b)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000672{
673 unsigned long flags;
674
675 spin_lock_irqsave(&cache->lock, flags);
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000676 clear_bit(from_dblock(b), cache->discard_bitset);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000677 spin_unlock_irqrestore(&cache->lock, flags);
678}
679
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000680static bool is_discarded(struct cache *cache, dm_dblock_t b)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000681{
682 int r;
683 unsigned long flags;
684
685 spin_lock_irqsave(&cache->lock, flags);
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000686 r = test_bit(from_dblock(b), cache->discard_bitset);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000687 spin_unlock_irqrestore(&cache->lock, flags);
688
689 return r;
690}
691
692static bool is_discarded_oblock(struct cache *cache, dm_oblock_t b)
693{
694 int r;
695 unsigned long flags;
696
697 spin_lock_irqsave(&cache->lock, flags);
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000698 r = test_bit(from_dblock(oblock_to_dblock(cache, b)),
699 cache->discard_bitset);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000700 spin_unlock_irqrestore(&cache->lock, flags);
701
702 return r;
703}
704
705/*----------------------------------------------------------------*/
706
707static void load_stats(struct cache *cache)
708{
709 struct dm_cache_statistics stats;
710
711 dm_cache_metadata_get_stats(cache->cmd, &stats);
712 atomic_set(&cache->stats.read_hit, stats.read_hits);
713 atomic_set(&cache->stats.read_miss, stats.read_misses);
714 atomic_set(&cache->stats.write_hit, stats.write_hits);
715 atomic_set(&cache->stats.write_miss, stats.write_misses);
716}
717
718static void save_stats(struct cache *cache)
719{
720 struct dm_cache_statistics stats;
721
722 stats.read_hits = atomic_read(&cache->stats.read_hit);
723 stats.read_misses = atomic_read(&cache->stats.read_miss);
724 stats.write_hits = atomic_read(&cache->stats.write_hit);
725 stats.write_misses = atomic_read(&cache->stats.write_miss);
726
727 dm_cache_metadata_set_stats(cache->cmd, &stats);
728}
729
730/*----------------------------------------------------------------
731 * Per bio data
732 *--------------------------------------------------------------*/
Mike Snitzer19b00922013-04-05 15:36:34 +0100733
734/*
735 * If using writeback, leave out struct per_bio_data's writethrough fields.
736 */
737#define PB_DATA_SIZE_WB (offsetof(struct per_bio_data, cache))
738#define PB_DATA_SIZE_WT (sizeof(struct per_bio_data))
739
Joe Thornber2ee57d52013-10-24 14:10:29 -0400740static bool writethrough_mode(struct cache_features *f)
741{
742 return f->io_mode == CM_IO_WRITETHROUGH;
743}
744
745static bool writeback_mode(struct cache_features *f)
746{
747 return f->io_mode == CM_IO_WRITEBACK;
748}
749
750static bool passthrough_mode(struct cache_features *f)
751{
752 return f->io_mode == CM_IO_PASSTHROUGH;
753}
754
Mike Snitzer19b00922013-04-05 15:36:34 +0100755static size_t get_per_bio_data_size(struct cache *cache)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000756{
Joe Thornber2ee57d52013-10-24 14:10:29 -0400757 return writethrough_mode(&cache->features) ? PB_DATA_SIZE_WT : PB_DATA_SIZE_WB;
Mike Snitzer19b00922013-04-05 15:36:34 +0100758}
759
760static struct per_bio_data *get_per_bio_data(struct bio *bio, size_t data_size)
761{
762 struct per_bio_data *pb = dm_per_bio_data(bio, data_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000763 BUG_ON(!pb);
764 return pb;
765}
766
Mike Snitzer19b00922013-04-05 15:36:34 +0100767static struct per_bio_data *init_per_bio_data(struct bio *bio, size_t data_size)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000768{
Mike Snitzer19b00922013-04-05 15:36:34 +0100769 struct per_bio_data *pb = get_per_bio_data(bio, data_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000770
771 pb->tick = false;
772 pb->req_nr = dm_bio_get_target_bio_nr(bio);
773 pb->all_io_entry = NULL;
Joe Thornber066dbaa32015-05-15 15:18:01 +0100774 pb->len = 0;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000775
776 return pb;
777}
778
779/*----------------------------------------------------------------
780 * Remapping
781 *--------------------------------------------------------------*/
782static void remap_to_origin(struct cache *cache, struct bio *bio)
783{
784 bio->bi_bdev = cache->origin_dev->bdev;
785}
786
787static void remap_to_cache(struct cache *cache, struct bio *bio,
788 dm_cblock_t cblock)
789{
Kent Overstreet4f024f32013-10-11 15:44:27 -0700790 sector_t bi_sector = bio->bi_iter.bi_sector;
Heinz Mauelshagene0d849f2014-02-27 22:46:48 +0100791 sector_t block = from_cblock(cblock);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000792
793 bio->bi_bdev = cache->cache_dev->bdev;
794 if (!block_size_is_power_of_two(cache))
Kent Overstreet4f024f32013-10-11 15:44:27 -0700795 bio->bi_iter.bi_sector =
Heinz Mauelshagene0d849f2014-02-27 22:46:48 +0100796 (block * cache->sectors_per_block) +
Kent Overstreet4f024f32013-10-11 15:44:27 -0700797 sector_div(bi_sector, cache->sectors_per_block);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000798 else
Kent Overstreet4f024f32013-10-11 15:44:27 -0700799 bio->bi_iter.bi_sector =
Heinz Mauelshagene0d849f2014-02-27 22:46:48 +0100800 (block << cache->sectors_per_block_shift) |
Kent Overstreet4f024f32013-10-11 15:44:27 -0700801 (bi_sector & (cache->sectors_per_block - 1));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000802}
803
804static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
805{
806 unsigned long flags;
Mike Snitzer19b00922013-04-05 15:36:34 +0100807 size_t pb_data_size = get_per_bio_data_size(cache);
808 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000809
810 spin_lock_irqsave(&cache->lock, flags);
811 if (cache->need_tick_bio &&
812 !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) {
813 pb->tick = true;
814 cache->need_tick_bio = false;
815 }
816 spin_unlock_irqrestore(&cache->lock, flags);
817}
818
819static void remap_to_origin_clear_discard(struct cache *cache, struct bio *bio,
820 dm_oblock_t oblock)
821{
822 check_if_tick_bio_needed(cache, bio);
823 remap_to_origin(cache, bio);
824 if (bio_data_dir(bio) == WRITE)
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000825 clear_discard(cache, oblock_to_dblock(cache, oblock));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000826}
827
828static void remap_to_cache_dirty(struct cache *cache, struct bio *bio,
829 dm_oblock_t oblock, dm_cblock_t cblock)
830{
Joe Thornberf8e5f012013-10-21 12:51:45 +0100831 check_if_tick_bio_needed(cache, bio);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000832 remap_to_cache(cache, bio, cblock);
833 if (bio_data_dir(bio) == WRITE) {
834 set_dirty(cache, oblock, cblock);
Joe Thornber1bad9bc2014-11-07 14:47:07 +0000835 clear_discard(cache, oblock_to_dblock(cache, oblock));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000836 }
837}
838
839static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
840{
Kent Overstreet4f024f32013-10-11 15:44:27 -0700841 sector_t block_nr = bio->bi_iter.bi_sector;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000842
843 if (!block_size_is_power_of_two(cache))
844 (void) sector_div(block_nr, cache->sectors_per_block);
845 else
846 block_nr >>= cache->sectors_per_block_shift;
847
848 return to_oblock(block_nr);
849}
850
851static int bio_triggers_commit(struct cache *cache, struct bio *bio)
852{
853 return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
854}
855
Joe Thornber8c081b52014-05-13 16:18:38 +0100856/*
857 * You must increment the deferred set whilst the prison cell is held. To
858 * encourage this, we ask for 'cell' to be passed in.
859 */
860static void inc_ds(struct cache *cache, struct bio *bio,
861 struct dm_bio_prison_cell *cell)
862{
863 size_t pb_data_size = get_per_bio_data_size(cache);
864 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
865
866 BUG_ON(!cell);
867 BUG_ON(pb->all_io_entry);
868
869 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
870}
871
Joe Thornber066dbaa32015-05-15 15:18:01 +0100872static bool accountable_bio(struct cache *cache, struct bio *bio)
873{
874 return ((bio->bi_bdev == cache->origin_dev->bdev) &&
875 !(bio->bi_rw & REQ_DISCARD));
876}
877
878static void accounted_begin(struct cache *cache, struct bio *bio)
879{
880 size_t pb_data_size = get_per_bio_data_size(cache);
881 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
882
883 if (accountable_bio(cache, bio)) {
884 pb->len = bio_sectors(bio);
885 iot_io_begin(&cache->origin_tracker, pb->len);
886 }
887}
888
889static void accounted_complete(struct cache *cache, struct bio *bio)
890{
891 size_t pb_data_size = get_per_bio_data_size(cache);
892 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
893
894 iot_io_end(&cache->origin_tracker, pb->len);
895}
896
897static void accounted_request(struct cache *cache, struct bio *bio)
898{
899 accounted_begin(cache, bio);
900 generic_make_request(bio);
901}
902
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000903static void issue(struct cache *cache, struct bio *bio)
904{
905 unsigned long flags;
906
907 if (!bio_triggers_commit(cache, bio)) {
Joe Thornber066dbaa32015-05-15 15:18:01 +0100908 accounted_request(cache, bio);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000909 return;
910 }
911
912 /*
913 * Batch together any bios that trigger commits and then issue a
914 * single commit for them in do_worker().
915 */
916 spin_lock_irqsave(&cache->lock, flags);
917 cache->commit_requested = true;
918 bio_list_add(&cache->deferred_flush_bios, bio);
919 spin_unlock_irqrestore(&cache->lock, flags);
920}
921
Joe Thornber8c081b52014-05-13 16:18:38 +0100922static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell)
923{
924 inc_ds(cache, bio, cell);
925 issue(cache, bio);
926}
927
Joe Thornbere2e74d62013-03-20 17:21:27 +0000928static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
929{
930 unsigned long flags;
931
932 spin_lock_irqsave(&cache->lock, flags);
933 bio_list_add(&cache->deferred_writethrough_bios, bio);
934 spin_unlock_irqrestore(&cache->lock, flags);
935
936 wake_worker(cache);
937}
938
939static void writethrough_endio(struct bio *bio, int err)
940{
Mike Snitzer19b00922013-04-05 15:36:34 +0100941 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
Joe Thornberc9d28d52013-10-31 13:55:48 -0400942
943 dm_unhook_bio(&pb->hook_info, bio);
Joe Thornbere2e74d62013-03-20 17:21:27 +0000944
945 if (err) {
946 bio_endio(bio, err);
947 return;
948 }
949
Darrick J. Wongb844fe62013-04-05 15:36:32 +0100950 dm_bio_restore(&pb->bio_details, bio);
Joe Thornbere2e74d62013-03-20 17:21:27 +0000951 remap_to_cache(pb->cache, bio, pb->cblock);
952
953 /*
954 * We can't issue this bio directly, since we're in interrupt
Joe Thornberaeed14202013-05-10 14:37:18 +0100955 * context. So it gets put on a bio list for processing by the
Joe Thornbere2e74d62013-03-20 17:21:27 +0000956 * worker thread.
957 */
958 defer_writethrough_bio(pb->cache, bio);
959}
960
961/*
962 * When running in writethrough mode we need to send writes to clean blocks
963 * to both the cache and origin devices. In future we'd like to clone the
964 * bio and send them in parallel, but for now we're doing them in
965 * series as this is easier.
966 */
967static void remap_to_origin_then_cache(struct cache *cache, struct bio *bio,
968 dm_oblock_t oblock, dm_cblock_t cblock)
969{
Mike Snitzer19b00922013-04-05 15:36:34 +0100970 struct per_bio_data *pb = get_per_bio_data(bio, PB_DATA_SIZE_WT);
Joe Thornbere2e74d62013-03-20 17:21:27 +0000971
972 pb->cache = cache;
973 pb->cblock = cblock;
Joe Thornberc9d28d52013-10-31 13:55:48 -0400974 dm_hook_bio(&pb->hook_info, bio, writethrough_endio, NULL);
Darrick J. Wongb844fe62013-04-05 15:36:32 +0100975 dm_bio_record(&pb->bio_details, bio);
Joe Thornbere2e74d62013-03-20 17:21:27 +0000976
977 remap_to_origin_clear_discard(pb->cache, bio, oblock);
978}
979
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000980/*----------------------------------------------------------------
981 * Migration processing
982 *
983 * Migration covers moving data from the origin device to the cache, or
984 * vice versa.
985 *--------------------------------------------------------------*/
Joe Thornbera59db672015-01-23 10:16:16 +0000986static void inc_io_migrations(struct cache *cache)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000987{
Joe Thornbera59db672015-01-23 10:16:16 +0000988 atomic_inc(&cache->nr_io_migrations);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000989}
990
Joe Thornbera59db672015-01-23 10:16:16 +0000991static void dec_io_migrations(struct cache *cache)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000992{
Joe Thornbera59db672015-01-23 10:16:16 +0000993 atomic_dec(&cache->nr_io_migrations);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +0000994}
995
996static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
997 bool holder)
998{
999 (holder ? dm_cell_release : dm_cell_release_no_holder)
1000 (cache->prison, cell, &cache->deferred_bios);
1001 free_prison_cell(cache, cell);
1002}
1003
1004static void cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell,
1005 bool holder)
1006{
1007 unsigned long flags;
1008
1009 spin_lock_irqsave(&cache->lock, flags);
1010 __cell_defer(cache, cell, holder);
1011 spin_unlock_irqrestore(&cache->lock, flags);
1012
1013 wake_worker(cache);
1014}
1015
Joe Thornbera59db672015-01-23 10:16:16 +00001016static void free_io_migration(struct dm_cache_migration *mg)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001017{
Joe Thornbera59db672015-01-23 10:16:16 +00001018 dec_io_migrations(mg->cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001019 free_migration(mg);
1020}
1021
1022static void migration_failure(struct dm_cache_migration *mg)
1023{
1024 struct cache *cache = mg->cache;
1025
1026 if (mg->writeback) {
1027 DMWARN_LIMIT("writeback failed; couldn't copy block");
1028 set_dirty(cache, mg->old_oblock, mg->cblock);
1029 cell_defer(cache, mg->old_ocell, false);
1030
1031 } else if (mg->demote) {
1032 DMWARN_LIMIT("demotion failed; couldn't copy block");
1033 policy_force_mapping(cache->policy, mg->new_oblock, mg->old_oblock);
1034
Heinz Mauelshagen80f659f2013-10-14 17:10:47 +02001035 cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001036 if (mg->promote)
Heinz Mauelshagen80f659f2013-10-14 17:10:47 +02001037 cell_defer(cache, mg->new_ocell, true);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001038 } else {
1039 DMWARN_LIMIT("promotion failed; couldn't copy block");
1040 policy_remove_mapping(cache->policy, mg->new_oblock);
Heinz Mauelshagen80f659f2013-10-14 17:10:47 +02001041 cell_defer(cache, mg->new_ocell, true);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001042 }
1043
Joe Thornbera59db672015-01-23 10:16:16 +00001044 free_io_migration(mg);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001045}
1046
1047static void migration_success_pre_commit(struct dm_cache_migration *mg)
1048{
1049 unsigned long flags;
1050 struct cache *cache = mg->cache;
1051
1052 if (mg->writeback) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001053 clear_dirty(cache, mg->old_oblock, mg->cblock);
Anssi Hannula40aa9782014-09-05 03:11:28 +03001054 cell_defer(cache, mg->old_ocell, false);
Joe Thornbera59db672015-01-23 10:16:16 +00001055 free_io_migration(mg);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001056 return;
1057
1058 } else if (mg->demote) {
1059 if (dm_cache_remove_mapping(cache->cmd, mg->cblock)) {
1060 DMWARN_LIMIT("demotion failed; couldn't update on disk metadata");
1061 policy_force_mapping(cache->policy, mg->new_oblock,
1062 mg->old_oblock);
1063 if (mg->promote)
1064 cell_defer(cache, mg->new_ocell, true);
Joe Thornbera59db672015-01-23 10:16:16 +00001065 free_io_migration(mg);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001066 return;
1067 }
1068 } else {
1069 if (dm_cache_insert_mapping(cache->cmd, mg->cblock, mg->new_oblock)) {
1070 DMWARN_LIMIT("promotion failed; couldn't update on disk metadata");
1071 policy_remove_mapping(cache->policy, mg->new_oblock);
Joe Thornbera59db672015-01-23 10:16:16 +00001072 free_io_migration(mg);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001073 return;
1074 }
1075 }
1076
1077 spin_lock_irqsave(&cache->lock, flags);
1078 list_add_tail(&mg->list, &cache->need_commit_migrations);
1079 cache->commit_requested = true;
1080 spin_unlock_irqrestore(&cache->lock, flags);
1081}
1082
1083static void migration_success_post_commit(struct dm_cache_migration *mg)
1084{
1085 unsigned long flags;
1086 struct cache *cache = mg->cache;
1087
1088 if (mg->writeback) {
1089 DMWARN("writeback unexpectedly triggered commit");
1090 return;
1091
1092 } else if (mg->demote) {
Heinz Mauelshagen80f659f2013-10-14 17:10:47 +02001093 cell_defer(cache, mg->old_ocell, mg->promote ? false : true);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001094
1095 if (mg->promote) {
1096 mg->demote = false;
1097
1098 spin_lock_irqsave(&cache->lock, flags);
1099 list_add_tail(&mg->list, &cache->quiesced_migrations);
1100 spin_unlock_irqrestore(&cache->lock, flags);
1101
Joe Thornber65790ff2013-11-08 16:39:50 +00001102 } else {
1103 if (mg->invalidate)
1104 policy_remove_mapping(cache->policy, mg->old_oblock);
Joe Thornbera59db672015-01-23 10:16:16 +00001105 free_io_migration(mg);
Joe Thornber65790ff2013-11-08 16:39:50 +00001106 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001107
1108 } else {
Joe Thornber1e321342014-11-27 12:26:46 +00001109 if (mg->requeue_holder) {
1110 clear_dirty(cache, mg->new_oblock, mg->cblock);
Joe Thornberc9d28d52013-10-31 13:55:48 -04001111 cell_defer(cache, mg->new_ocell, true);
Joe Thornber1e321342014-11-27 12:26:46 +00001112 } else {
1113 /*
1114 * The block was promoted via an overwrite, so it's dirty.
1115 */
1116 set_dirty(cache, mg->new_oblock, mg->cblock);
Joe Thornberc9d28d52013-10-31 13:55:48 -04001117 bio_endio(mg->new_ocell->holder, 0);
1118 cell_defer(cache, mg->new_ocell, false);
1119 }
Joe Thornbera59db672015-01-23 10:16:16 +00001120 free_io_migration(mg);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001121 }
1122}
1123
1124static void copy_complete(int read_err, unsigned long write_err, void *context)
1125{
1126 unsigned long flags;
1127 struct dm_cache_migration *mg = (struct dm_cache_migration *) context;
1128 struct cache *cache = mg->cache;
1129
1130 if (read_err || write_err)
1131 mg->err = true;
1132
1133 spin_lock_irqsave(&cache->lock, flags);
1134 list_add_tail(&mg->list, &cache->completed_migrations);
1135 spin_unlock_irqrestore(&cache->lock, flags);
1136
1137 wake_worker(cache);
1138}
1139
Joe Thornber7ae34e72014-11-06 10:18:04 +00001140static void issue_copy(struct dm_cache_migration *mg)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001141{
1142 int r;
1143 struct dm_io_region o_region, c_region;
1144 struct cache *cache = mg->cache;
Heinz Mauelshagen8b9d9662014-03-12 00:40:05 +01001145 sector_t cblock = from_cblock(mg->cblock);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001146
1147 o_region.bdev = cache->origin_dev->bdev;
1148 o_region.count = cache->sectors_per_block;
1149
1150 c_region.bdev = cache->cache_dev->bdev;
Heinz Mauelshagen8b9d9662014-03-12 00:40:05 +01001151 c_region.sector = cblock * cache->sectors_per_block;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001152 c_region.count = cache->sectors_per_block;
1153
1154 if (mg->writeback || mg->demote) {
1155 /* demote */
1156 o_region.sector = from_oblock(mg->old_oblock) * cache->sectors_per_block;
1157 r = dm_kcopyd_copy(cache->copier, &c_region, 1, &o_region, 0, copy_complete, mg);
1158 } else {
1159 /* promote */
1160 o_region.sector = from_oblock(mg->new_oblock) * cache->sectors_per_block;
1161 r = dm_kcopyd_copy(cache->copier, &o_region, 1, &c_region, 0, copy_complete, mg);
1162 }
1163
Heinz Mauelshagen2c2263c2013-10-14 17:14:45 +02001164 if (r < 0) {
1165 DMERR_LIMIT("issuing migration failed");
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001166 migration_failure(mg);
Heinz Mauelshagen2c2263c2013-10-14 17:14:45 +02001167 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001168}
1169
Joe Thornberc9d28d52013-10-31 13:55:48 -04001170static void overwrite_endio(struct bio *bio, int err)
1171{
1172 struct dm_cache_migration *mg = bio->bi_private;
1173 struct cache *cache = mg->cache;
1174 size_t pb_data_size = get_per_bio_data_size(cache);
1175 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1176 unsigned long flags;
1177
Mike Snitzer80ae49a2014-01-31 14:30:37 -05001178 dm_unhook_bio(&pb->hook_info, bio);
1179
Joe Thornberc9d28d52013-10-31 13:55:48 -04001180 if (err)
1181 mg->err = true;
1182
Mike Snitzer80ae49a2014-01-31 14:30:37 -05001183 mg->requeue_holder = false;
1184
Joe Thornberc9d28d52013-10-31 13:55:48 -04001185 spin_lock_irqsave(&cache->lock, flags);
1186 list_add_tail(&mg->list, &cache->completed_migrations);
Joe Thornberc9d28d52013-10-31 13:55:48 -04001187 spin_unlock_irqrestore(&cache->lock, flags);
1188
1189 wake_worker(cache);
1190}
1191
1192static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
1193{
1194 size_t pb_data_size = get_per_bio_data_size(mg->cache);
1195 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1196
1197 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
1198 remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
Joe Thornber8c081b52014-05-13 16:18:38 +01001199
1200 /*
1201 * No need to inc_ds() here, since the cell will be held for the
1202 * duration of the io.
1203 */
Joe Thornber066dbaa32015-05-15 15:18:01 +01001204 accounted_request(mg->cache, bio);
Joe Thornberc9d28d52013-10-31 13:55:48 -04001205}
1206
1207static bool bio_writes_complete_block(struct cache *cache, struct bio *bio)
1208{
1209 return (bio_data_dir(bio) == WRITE) &&
Kent Overstreet4f024f32013-10-11 15:44:27 -07001210 (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT));
Joe Thornberc9d28d52013-10-31 13:55:48 -04001211}
1212
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001213static void avoid_copy(struct dm_cache_migration *mg)
1214{
1215 atomic_inc(&mg->cache->stats.copies_avoided);
1216 migration_success_pre_commit(mg);
1217}
1218
Joe Thornber7ae34e72014-11-06 10:18:04 +00001219static void calc_discard_block_range(struct cache *cache, struct bio *bio,
1220 dm_dblock_t *b, dm_dblock_t *e)
1221{
1222 sector_t sb = bio->bi_iter.bi_sector;
1223 sector_t se = bio_end_sector(bio);
1224
1225 *b = to_dblock(dm_sector_div_up(sb, cache->discard_block_size));
1226
1227 if (se - sb < cache->discard_block_size)
1228 *e = *b;
1229 else
1230 *e = to_dblock(block_div(se, cache->discard_block_size));
1231}
1232
1233static void issue_discard(struct dm_cache_migration *mg)
1234{
1235 dm_dblock_t b, e;
1236 struct bio *bio = mg->new_ocell->holder;
1237
1238 calc_discard_block_range(mg->cache, bio, &b, &e);
1239 while (b != e) {
1240 set_discard(mg->cache, b);
1241 b = to_dblock(from_dblock(b) + 1);
1242 }
1243
1244 bio_endio(bio, 0);
1245 cell_defer(mg->cache, mg->new_ocell, false);
1246 free_migration(mg);
1247}
1248
1249static void issue_copy_or_discard(struct dm_cache_migration *mg)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001250{
1251 bool avoid;
1252 struct cache *cache = mg->cache;
1253
Joe Thornber7ae34e72014-11-06 10:18:04 +00001254 if (mg->discard) {
1255 issue_discard(mg);
1256 return;
1257 }
1258
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001259 if (mg->writeback || mg->demote)
1260 avoid = !is_dirty(cache, mg->cblock) ||
1261 is_discarded_oblock(cache, mg->old_oblock);
Joe Thornberc9d28d52013-10-31 13:55:48 -04001262 else {
1263 struct bio *bio = mg->new_ocell->holder;
1264
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001265 avoid = is_discarded_oblock(cache, mg->new_oblock);
1266
Joe Thornberf29a3142014-11-27 12:21:08 +00001267 if (writeback_mode(&cache->features) &&
1268 !avoid && bio_writes_complete_block(cache, bio)) {
Joe Thornberc9d28d52013-10-31 13:55:48 -04001269 issue_overwrite(mg, bio);
1270 return;
1271 }
1272 }
1273
Joe Thornber7ae34e72014-11-06 10:18:04 +00001274 avoid ? avoid_copy(mg) : issue_copy(mg);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001275}
1276
1277static void complete_migration(struct dm_cache_migration *mg)
1278{
1279 if (mg->err)
1280 migration_failure(mg);
1281 else
1282 migration_success_pre_commit(mg);
1283}
1284
1285static void process_migrations(struct cache *cache, struct list_head *head,
1286 void (*fn)(struct dm_cache_migration *))
1287{
1288 unsigned long flags;
1289 struct list_head list;
1290 struct dm_cache_migration *mg, *tmp;
1291
1292 INIT_LIST_HEAD(&list);
1293 spin_lock_irqsave(&cache->lock, flags);
1294 list_splice_init(head, &list);
1295 spin_unlock_irqrestore(&cache->lock, flags);
1296
1297 list_for_each_entry_safe(mg, tmp, &list, list)
1298 fn(mg);
1299}
1300
1301static void __queue_quiesced_migration(struct dm_cache_migration *mg)
1302{
1303 list_add_tail(&mg->list, &mg->cache->quiesced_migrations);
1304}
1305
1306static void queue_quiesced_migration(struct dm_cache_migration *mg)
1307{
1308 unsigned long flags;
1309 struct cache *cache = mg->cache;
1310
1311 spin_lock_irqsave(&cache->lock, flags);
1312 __queue_quiesced_migration(mg);
1313 spin_unlock_irqrestore(&cache->lock, flags);
1314
1315 wake_worker(cache);
1316}
1317
1318static void queue_quiesced_migrations(struct cache *cache, struct list_head *work)
1319{
1320 unsigned long flags;
1321 struct dm_cache_migration *mg, *tmp;
1322
1323 spin_lock_irqsave(&cache->lock, flags);
1324 list_for_each_entry_safe(mg, tmp, work, list)
1325 __queue_quiesced_migration(mg);
1326 spin_unlock_irqrestore(&cache->lock, flags);
1327
1328 wake_worker(cache);
1329}
1330
1331static void check_for_quiesced_migrations(struct cache *cache,
1332 struct per_bio_data *pb)
1333{
1334 struct list_head work;
1335
1336 if (!pb->all_io_entry)
1337 return;
1338
1339 INIT_LIST_HEAD(&work);
Joe Thornber8c081b52014-05-13 16:18:38 +01001340 dm_deferred_entry_dec(pb->all_io_entry, &work);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001341
1342 if (!list_empty(&work))
1343 queue_quiesced_migrations(cache, &work);
1344}
1345
1346static void quiesce_migration(struct dm_cache_migration *mg)
1347{
1348 if (!dm_deferred_set_add_work(mg->cache->all_io_ds, &mg->list))
1349 queue_quiesced_migration(mg);
1350}
1351
1352static void promote(struct cache *cache, struct prealloc *structs,
1353 dm_oblock_t oblock, dm_cblock_t cblock,
1354 struct dm_bio_prison_cell *cell)
1355{
1356 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1357
1358 mg->err = false;
Joe Thornber7ae34e72014-11-06 10:18:04 +00001359 mg->discard = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001360 mg->writeback = false;
1361 mg->demote = false;
1362 mg->promote = true;
Joe Thornberc9d28d52013-10-31 13:55:48 -04001363 mg->requeue_holder = true;
Joe Thornber65790ff2013-11-08 16:39:50 +00001364 mg->invalidate = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001365 mg->cache = cache;
1366 mg->new_oblock = oblock;
1367 mg->cblock = cblock;
1368 mg->old_ocell = NULL;
1369 mg->new_ocell = cell;
1370 mg->start_jiffies = jiffies;
1371
Joe Thornbera59db672015-01-23 10:16:16 +00001372 inc_io_migrations(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001373 quiesce_migration(mg);
1374}
1375
1376static void writeback(struct cache *cache, struct prealloc *structs,
1377 dm_oblock_t oblock, dm_cblock_t cblock,
1378 struct dm_bio_prison_cell *cell)
1379{
1380 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1381
1382 mg->err = false;
Joe Thornber7ae34e72014-11-06 10:18:04 +00001383 mg->discard = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001384 mg->writeback = true;
1385 mg->demote = false;
1386 mg->promote = false;
Joe Thornberc9d28d52013-10-31 13:55:48 -04001387 mg->requeue_holder = true;
Joe Thornber65790ff2013-11-08 16:39:50 +00001388 mg->invalidate = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001389 mg->cache = cache;
1390 mg->old_oblock = oblock;
1391 mg->cblock = cblock;
1392 mg->old_ocell = cell;
1393 mg->new_ocell = NULL;
1394 mg->start_jiffies = jiffies;
1395
Joe Thornbera59db672015-01-23 10:16:16 +00001396 inc_io_migrations(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001397 quiesce_migration(mg);
1398}
1399
1400static void demote_then_promote(struct cache *cache, struct prealloc *structs,
1401 dm_oblock_t old_oblock, dm_oblock_t new_oblock,
1402 dm_cblock_t cblock,
1403 struct dm_bio_prison_cell *old_ocell,
1404 struct dm_bio_prison_cell *new_ocell)
1405{
1406 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1407
1408 mg->err = false;
Joe Thornber7ae34e72014-11-06 10:18:04 +00001409 mg->discard = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001410 mg->writeback = false;
1411 mg->demote = true;
1412 mg->promote = true;
Joe Thornberc9d28d52013-10-31 13:55:48 -04001413 mg->requeue_holder = true;
Joe Thornber65790ff2013-11-08 16:39:50 +00001414 mg->invalidate = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001415 mg->cache = cache;
1416 mg->old_oblock = old_oblock;
1417 mg->new_oblock = new_oblock;
1418 mg->cblock = cblock;
1419 mg->old_ocell = old_ocell;
1420 mg->new_ocell = new_ocell;
1421 mg->start_jiffies = jiffies;
1422
Joe Thornbera59db672015-01-23 10:16:16 +00001423 inc_io_migrations(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001424 quiesce_migration(mg);
1425}
1426
Joe Thornber2ee57d52013-10-24 14:10:29 -04001427/*
1428 * Invalidate a cache entry. No writeback occurs; any changes in the cache
1429 * block are thrown away.
1430 */
1431static void invalidate(struct cache *cache, struct prealloc *structs,
1432 dm_oblock_t oblock, dm_cblock_t cblock,
1433 struct dm_bio_prison_cell *cell)
1434{
1435 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1436
1437 mg->err = false;
Joe Thornber7ae34e72014-11-06 10:18:04 +00001438 mg->discard = false;
Joe Thornber2ee57d52013-10-24 14:10:29 -04001439 mg->writeback = false;
1440 mg->demote = true;
1441 mg->promote = false;
1442 mg->requeue_holder = true;
Joe Thornber65790ff2013-11-08 16:39:50 +00001443 mg->invalidate = true;
Joe Thornber2ee57d52013-10-24 14:10:29 -04001444 mg->cache = cache;
1445 mg->old_oblock = oblock;
1446 mg->cblock = cblock;
1447 mg->old_ocell = cell;
1448 mg->new_ocell = NULL;
1449 mg->start_jiffies = jiffies;
1450
Joe Thornbera59db672015-01-23 10:16:16 +00001451 inc_io_migrations(cache);
Joe Thornber2ee57d52013-10-24 14:10:29 -04001452 quiesce_migration(mg);
1453}
1454
Joe Thornber7ae34e72014-11-06 10:18:04 +00001455static void discard(struct cache *cache, struct prealloc *structs,
1456 struct dm_bio_prison_cell *cell)
1457{
1458 struct dm_cache_migration *mg = prealloc_get_migration(structs);
1459
1460 mg->err = false;
1461 mg->discard = true;
1462 mg->writeback = false;
1463 mg->demote = false;
1464 mg->promote = false;
1465 mg->requeue_holder = false;
1466 mg->invalidate = false;
1467 mg->cache = cache;
1468 mg->old_ocell = NULL;
1469 mg->new_ocell = cell;
1470 mg->start_jiffies = jiffies;
1471
1472 quiesce_migration(mg);
1473}
1474
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001475/*----------------------------------------------------------------
1476 * bio processing
1477 *--------------------------------------------------------------*/
1478static void defer_bio(struct cache *cache, struct bio *bio)
1479{
1480 unsigned long flags;
1481
1482 spin_lock_irqsave(&cache->lock, flags);
1483 bio_list_add(&cache->deferred_bios, bio);
1484 spin_unlock_irqrestore(&cache->lock, flags);
1485
1486 wake_worker(cache);
1487}
1488
1489static void process_flush_bio(struct cache *cache, struct bio *bio)
1490{
Mike Snitzer19b00922013-04-05 15:36:34 +01001491 size_t pb_data_size = get_per_bio_data_size(cache);
1492 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001493
Kent Overstreet4f024f32013-10-11 15:44:27 -07001494 BUG_ON(bio->bi_iter.bi_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001495 if (!pb->req_nr)
1496 remap_to_origin(cache, bio);
1497 else
1498 remap_to_cache(cache, bio, 0);
1499
Joe Thornber8c081b52014-05-13 16:18:38 +01001500 /*
1501 * REQ_FLUSH is not directed at any particular block so we don't
1502 * need to inc_ds(). REQ_FUA's are split into a write + REQ_FLUSH
1503 * by dm-core.
1504 */
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001505 issue(cache, bio);
1506}
1507
Joe Thornber7ae34e72014-11-06 10:18:04 +00001508static void process_discard_bio(struct cache *cache, struct prealloc *structs,
1509 struct bio *bio)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001510{
Joe Thornber7ae34e72014-11-06 10:18:04 +00001511 int r;
1512 dm_dblock_t b, e;
1513 struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001514
Joe Thornber7ae34e72014-11-06 10:18:04 +00001515 calc_discard_block_range(cache, bio, &b, &e);
1516 if (b == e) {
1517 bio_endio(bio, 0);
1518 return;
1519 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001520
Joe Thornber7ae34e72014-11-06 10:18:04 +00001521 cell_prealloc = prealloc_get_cell(structs);
1522 r = bio_detain_range(cache, dblock_to_oblock(cache, b), dblock_to_oblock(cache, e), bio, cell_prealloc,
1523 (cell_free_fn) prealloc_put_cell,
1524 structs, &new_ocell);
1525 if (r > 0)
1526 return;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001527
Joe Thornber7ae34e72014-11-06 10:18:04 +00001528 discard(cache, structs, new_ocell);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001529}
1530
1531static bool spare_migration_bandwidth(struct cache *cache)
1532{
Joe Thornbera59db672015-01-23 10:16:16 +00001533 sector_t current_volume = (atomic_read(&cache->nr_io_migrations) + 1) *
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001534 cache->sectors_per_block;
1535 return current_volume < cache->migration_threshold;
1536}
1537
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001538static void inc_hit_counter(struct cache *cache, struct bio *bio)
1539{
1540 atomic_inc(bio_data_dir(bio) == READ ?
1541 &cache->stats.read_hit : &cache->stats.write_hit);
1542}
1543
1544static void inc_miss_counter(struct cache *cache, struct bio *bio)
1545{
1546 atomic_inc(bio_data_dir(bio) == READ ?
1547 &cache->stats.read_miss : &cache->stats.write_miss);
1548}
1549
Joe Thornberfb4100a2015-05-20 10:30:32 +01001550/*----------------------------------------------------------------*/
1551
1552struct old_oblock_lock {
1553 struct policy_locker locker;
1554 struct cache *cache;
1555 struct prealloc *structs;
1556 struct dm_bio_prison_cell *cell;
1557};
1558
1559static int null_locker(struct policy_locker *locker, dm_oblock_t b)
1560{
1561 /* This should never be called */
1562 BUG();
1563 return 0;
1564}
1565
1566static int cell_locker(struct policy_locker *locker, dm_oblock_t b)
1567{
1568 struct old_oblock_lock *l = container_of(locker, struct old_oblock_lock, locker);
1569 struct dm_bio_prison_cell *cell_prealloc = prealloc_get_cell(l->structs);
1570
1571 return bio_detain(l->cache, b, NULL, cell_prealloc,
1572 (cell_free_fn) prealloc_put_cell,
1573 l->structs, &l->cell);
1574}
1575
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001576static void process_bio(struct cache *cache, struct prealloc *structs,
1577 struct bio *bio)
1578{
1579 int r;
1580 bool release_cell = true;
1581 dm_oblock_t block = get_bio_block(cache, bio);
Joe Thornberfb4100a2015-05-20 10:30:32 +01001582 struct dm_bio_prison_cell *cell_prealloc, *new_ocell;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001583 struct policy_result lookup_result;
Joe Thornber2ee57d52013-10-24 14:10:29 -04001584 bool passthrough = passthrough_mode(&cache->features);
Joe Thornber43c32bf2014-11-25 13:14:57 +00001585 bool discarded_block, can_migrate;
Joe Thornberfb4100a2015-05-20 10:30:32 +01001586 struct old_oblock_lock ool;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001587
1588 /*
1589 * Check to see if that block is currently migrating.
1590 */
1591 cell_prealloc = prealloc_get_cell(structs);
1592 r = bio_detain(cache, block, bio, cell_prealloc,
1593 (cell_free_fn) prealloc_put_cell,
1594 structs, &new_ocell);
1595 if (r > 0)
1596 return;
1597
Joe Thornber43c32bf2014-11-25 13:14:57 +00001598 discarded_block = is_discarded_oblock(cache, block);
1599 can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
1600
Joe Thornberfb4100a2015-05-20 10:30:32 +01001601 ool.locker.fn = cell_locker;
1602 ool.cache = cache;
1603 ool.structs = structs;
1604 ool.cell = NULL;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001605 r = policy_map(cache->policy, block, true, can_migrate, discarded_block,
Joe Thornberfb4100a2015-05-20 10:30:32 +01001606 bio, &ool.locker, &lookup_result);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001607
1608 if (r == -EWOULDBLOCK)
1609 /* migration has been denied */
1610 lookup_result.op = POLICY_MISS;
1611
1612 switch (lookup_result.op) {
1613 case POLICY_HIT:
Joe Thornber2ee57d52013-10-24 14:10:29 -04001614 if (passthrough) {
1615 inc_miss_counter(cache, bio);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001616
Joe Thornber2ee57d52013-10-24 14:10:29 -04001617 /*
1618 * Passthrough always maps to the origin,
1619 * invalidating any cache blocks that are written
1620 * to.
1621 */
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001622
Joe Thornber2ee57d52013-10-24 14:10:29 -04001623 if (bio_data_dir(bio) == WRITE) {
1624 atomic_inc(&cache->stats.demotion);
1625 invalidate(cache, structs, block, lookup_result.cblock, new_ocell);
1626 release_cell = false;
1627
1628 } else {
1629 /* FIXME: factor out issue_origin() */
Joe Thornber2ee57d52013-10-24 14:10:29 -04001630 remap_to_origin_clear_discard(cache, bio, block);
Joe Thornber8c081b52014-05-13 16:18:38 +01001631 inc_and_issue(cache, bio, new_ocell);
Joe Thornber2ee57d52013-10-24 14:10:29 -04001632 }
1633 } else {
1634 inc_hit_counter(cache, bio);
1635
1636 if (bio_data_dir(bio) == WRITE &&
1637 writethrough_mode(&cache->features) &&
1638 !is_dirty(cache, lookup_result.cblock)) {
Joe Thornber2ee57d52013-10-24 14:10:29 -04001639 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
Joe Thornber8c081b52014-05-13 16:18:38 +01001640 inc_and_issue(cache, bio, new_ocell);
1641
1642 } else {
1643 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
1644 inc_and_issue(cache, bio, new_ocell);
1645 }
Joe Thornber2ee57d52013-10-24 14:10:29 -04001646 }
1647
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001648 break;
1649
1650 case POLICY_MISS:
1651 inc_miss_counter(cache, bio);
Joe Thornbere2e74d62013-03-20 17:21:27 +00001652 remap_to_origin_clear_discard(cache, bio, block);
Joe Thornber8c081b52014-05-13 16:18:38 +01001653 inc_and_issue(cache, bio, new_ocell);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001654 break;
1655
1656 case POLICY_NEW:
1657 atomic_inc(&cache->stats.promotion);
1658 promote(cache, structs, block, lookup_result.cblock, new_ocell);
1659 release_cell = false;
1660 break;
1661
1662 case POLICY_REPLACE:
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001663 atomic_inc(&cache->stats.demotion);
1664 atomic_inc(&cache->stats.promotion);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001665 demote_then_promote(cache, structs, lookup_result.old_oblock,
1666 block, lookup_result.cblock,
Joe Thornberfb4100a2015-05-20 10:30:32 +01001667 ool.cell, new_ocell);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001668 release_cell = false;
1669 break;
1670
1671 default:
1672 DMERR_LIMIT("%s: erroring bio, unknown policy op: %u", __func__,
1673 (unsigned) lookup_result.op);
1674 bio_io_error(bio);
1675 }
1676
1677 if (release_cell)
1678 cell_defer(cache, new_ocell, false);
1679}
1680
1681static int need_commit_due_to_time(struct cache *cache)
1682{
Manuel Schölling0f30af92014-05-22 22:42:37 +02001683 return !time_in_range(jiffies, cache->last_commit_jiffies,
1684 cache->last_commit_jiffies + COMMIT_PERIOD);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001685}
1686
1687static int commit_if_needed(struct cache *cache)
1688{
Heinz Mauelshagenffcbcb62013-10-14 17:24:43 +02001689 int r = 0;
1690
1691 if ((cache->commit_requested || need_commit_due_to_time(cache)) &&
1692 dm_cache_changed_this_transaction(cache->cmd)) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001693 atomic_inc(&cache->stats.commit_count);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001694 cache->commit_requested = false;
Heinz Mauelshagenffcbcb62013-10-14 17:24:43 +02001695 r = dm_cache_commit(cache->cmd, false);
1696 cache->last_commit_jiffies = jiffies;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001697 }
1698
Heinz Mauelshagenffcbcb62013-10-14 17:24:43 +02001699 return r;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001700}
1701
1702static void process_deferred_bios(struct cache *cache)
1703{
1704 unsigned long flags;
1705 struct bio_list bios;
1706 struct bio *bio;
1707 struct prealloc structs;
1708
1709 memset(&structs, 0, sizeof(structs));
1710 bio_list_init(&bios);
1711
1712 spin_lock_irqsave(&cache->lock, flags);
1713 bio_list_merge(&bios, &cache->deferred_bios);
1714 bio_list_init(&cache->deferred_bios);
1715 spin_unlock_irqrestore(&cache->lock, flags);
1716
1717 while (!bio_list_empty(&bios)) {
1718 /*
1719 * If we've got no free migration structs, and processing
1720 * this bio might require one, we pause until there are some
1721 * prepared mappings to process.
1722 */
1723 if (prealloc_data_structs(cache, &structs)) {
1724 spin_lock_irqsave(&cache->lock, flags);
1725 bio_list_merge(&cache->deferred_bios, &bios);
1726 spin_unlock_irqrestore(&cache->lock, flags);
1727 break;
1728 }
1729
1730 bio = bio_list_pop(&bios);
1731
1732 if (bio->bi_rw & REQ_FLUSH)
1733 process_flush_bio(cache, bio);
1734 else if (bio->bi_rw & REQ_DISCARD)
Joe Thornber7ae34e72014-11-06 10:18:04 +00001735 process_discard_bio(cache, &structs, bio);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001736 else
1737 process_bio(cache, &structs, bio);
1738 }
1739
1740 prealloc_free_structs(cache, &structs);
1741}
1742
1743static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
1744{
1745 unsigned long flags;
1746 struct bio_list bios;
1747 struct bio *bio;
1748
1749 bio_list_init(&bios);
1750
1751 spin_lock_irqsave(&cache->lock, flags);
1752 bio_list_merge(&bios, &cache->deferred_flush_bios);
1753 bio_list_init(&cache->deferred_flush_bios);
1754 spin_unlock_irqrestore(&cache->lock, flags);
1755
Joe Thornber8c081b52014-05-13 16:18:38 +01001756 /*
1757 * These bios have already been through inc_ds()
1758 */
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001759 while ((bio = bio_list_pop(&bios)))
Joe Thornber066dbaa32015-05-15 15:18:01 +01001760 submit_bios ? accounted_request(cache, bio) : bio_io_error(bio);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001761}
1762
Joe Thornbere2e74d62013-03-20 17:21:27 +00001763static void process_deferred_writethrough_bios(struct cache *cache)
1764{
1765 unsigned long flags;
1766 struct bio_list bios;
1767 struct bio *bio;
1768
1769 bio_list_init(&bios);
1770
1771 spin_lock_irqsave(&cache->lock, flags);
1772 bio_list_merge(&bios, &cache->deferred_writethrough_bios);
1773 bio_list_init(&cache->deferred_writethrough_bios);
1774 spin_unlock_irqrestore(&cache->lock, flags);
1775
Joe Thornber8c081b52014-05-13 16:18:38 +01001776 /*
1777 * These bios have already been through inc_ds()
1778 */
Joe Thornbere2e74d62013-03-20 17:21:27 +00001779 while ((bio = bio_list_pop(&bios)))
Joe Thornber066dbaa32015-05-15 15:18:01 +01001780 accounted_request(cache, bio);
Joe Thornbere2e74d62013-03-20 17:21:27 +00001781}
1782
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001783static void writeback_some_dirty_blocks(struct cache *cache)
1784{
1785 int r = 0;
1786 dm_oblock_t oblock;
1787 dm_cblock_t cblock;
1788 struct prealloc structs;
1789 struct dm_bio_prison_cell *old_ocell;
1790
1791 memset(&structs, 0, sizeof(structs));
1792
1793 while (spare_migration_bandwidth(cache)) {
1794 if (prealloc_data_structs(cache, &structs))
1795 break;
1796
1797 r = policy_writeback_work(cache->policy, &oblock, &cblock);
1798 if (r)
1799 break;
1800
1801 r = get_cell(cache, oblock, &structs, &old_ocell);
1802 if (r) {
1803 policy_set_dirty(cache->policy, oblock);
1804 break;
1805 }
1806
1807 writeback(cache, &structs, oblock, cblock, old_ocell);
1808 }
1809
1810 prealloc_free_structs(cache, &structs);
1811}
1812
1813/*----------------------------------------------------------------
Joe Thornber65790ff2013-11-08 16:39:50 +00001814 * Invalidations.
1815 * Dropping something from the cache *without* writing back.
1816 *--------------------------------------------------------------*/
1817
1818static void process_invalidation_request(struct cache *cache, struct invalidation_request *req)
1819{
1820 int r = 0;
1821 uint64_t begin = from_cblock(req->cblocks->begin);
1822 uint64_t end = from_cblock(req->cblocks->end);
1823
1824 while (begin != end) {
1825 r = policy_remove_cblock(cache->policy, to_cblock(begin));
1826 if (!r) {
1827 r = dm_cache_remove_mapping(cache->cmd, to_cblock(begin));
1828 if (r)
1829 break;
1830
1831 } else if (r == -ENODATA) {
1832 /* harmless, already unmapped */
1833 r = 0;
1834
1835 } else {
1836 DMERR("policy_remove_cblock failed");
1837 break;
1838 }
1839
1840 begin++;
1841 }
1842
1843 cache->commit_requested = true;
1844
1845 req->err = r;
1846 atomic_set(&req->complete, 1);
1847
1848 wake_up(&req->result_wait);
1849}
1850
1851static void process_invalidation_requests(struct cache *cache)
1852{
1853 struct list_head list;
1854 struct invalidation_request *req, *tmp;
1855
1856 INIT_LIST_HEAD(&list);
1857 spin_lock(&cache->invalidation_lock);
1858 list_splice_init(&cache->invalidation_requests, &list);
1859 spin_unlock(&cache->invalidation_lock);
1860
1861 list_for_each_entry_safe (req, tmp, &list, list)
1862 process_invalidation_request(cache, req);
1863}
1864
1865/*----------------------------------------------------------------
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001866 * Main worker loop
1867 *--------------------------------------------------------------*/
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001868static bool is_quiescing(struct cache *cache)
1869{
Joe Thornber238f8362013-10-30 17:29:30 +00001870 return atomic_read(&cache->quiescing);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001871}
1872
Joe Thornber66cb1912013-10-30 17:11:58 +00001873static void ack_quiescing(struct cache *cache)
1874{
1875 if (is_quiescing(cache)) {
1876 atomic_inc(&cache->quiescing_ack);
1877 wake_up(&cache->quiescing_wait);
1878 }
1879}
1880
1881static void wait_for_quiescing_ack(struct cache *cache)
1882{
1883 wait_event(cache->quiescing_wait, atomic_read(&cache->quiescing_ack));
1884}
1885
1886static void start_quiescing(struct cache *cache)
1887{
Joe Thornber238f8362013-10-30 17:29:30 +00001888 atomic_inc(&cache->quiescing);
Joe Thornber66cb1912013-10-30 17:11:58 +00001889 wait_for_quiescing_ack(cache);
1890}
1891
1892static void stop_quiescing(struct cache *cache)
1893{
Joe Thornber238f8362013-10-30 17:29:30 +00001894 atomic_set(&cache->quiescing, 0);
Joe Thornber66cb1912013-10-30 17:11:58 +00001895 atomic_set(&cache->quiescing_ack, 0);
1896}
1897
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001898static void wait_for_migrations(struct cache *cache)
1899{
Joe Thornbera59db672015-01-23 10:16:16 +00001900 wait_event(cache->migration_wait, !atomic_read(&cache->nr_allocated_migrations));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001901}
1902
1903static void stop_worker(struct cache *cache)
1904{
1905 cancel_delayed_work(&cache->waker);
1906 flush_workqueue(cache->wq);
1907}
1908
1909static void requeue_deferred_io(struct cache *cache)
1910{
1911 struct bio *bio;
1912 struct bio_list bios;
1913
1914 bio_list_init(&bios);
1915 bio_list_merge(&bios, &cache->deferred_bios);
1916 bio_list_init(&cache->deferred_bios);
1917
1918 while ((bio = bio_list_pop(&bios)))
1919 bio_endio(bio, DM_ENDIO_REQUEUE);
1920}
1921
1922static int more_work(struct cache *cache)
1923{
1924 if (is_quiescing(cache))
1925 return !list_empty(&cache->quiesced_migrations) ||
1926 !list_empty(&cache->completed_migrations) ||
1927 !list_empty(&cache->need_commit_migrations);
1928 else
1929 return !bio_list_empty(&cache->deferred_bios) ||
1930 !bio_list_empty(&cache->deferred_flush_bios) ||
Joe Thornbere2e74d62013-03-20 17:21:27 +00001931 !bio_list_empty(&cache->deferred_writethrough_bios) ||
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001932 !list_empty(&cache->quiesced_migrations) ||
1933 !list_empty(&cache->completed_migrations) ||
Joe Thornber65790ff2013-11-08 16:39:50 +00001934 !list_empty(&cache->need_commit_migrations) ||
1935 cache->invalidate;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001936}
1937
1938static void do_worker(struct work_struct *ws)
1939{
1940 struct cache *cache = container_of(ws, struct cache, worker);
1941
1942 do {
Joe Thornber66cb1912013-10-30 17:11:58 +00001943 if (!is_quiescing(cache)) {
1944 writeback_some_dirty_blocks(cache);
1945 process_deferred_writethrough_bios(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001946 process_deferred_bios(cache);
Joe Thornber65790ff2013-11-08 16:39:50 +00001947 process_invalidation_requests(cache);
Joe Thornber66cb1912013-10-30 17:11:58 +00001948 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001949
Joe Thornber7ae34e72014-11-06 10:18:04 +00001950 process_migrations(cache, &cache->quiesced_migrations, issue_copy_or_discard);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001951 process_migrations(cache, &cache->completed_migrations, complete_migration);
1952
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001953 if (commit_if_needed(cache)) {
1954 process_deferred_flush_bios(cache, false);
Joe Thornber304affa2014-06-24 15:36:58 -04001955 process_migrations(cache, &cache->need_commit_migrations, migration_failure);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001956
1957 /*
1958 * FIXME: rollback metadata or just go into a
1959 * failure mode and error everything
1960 */
1961 } else {
1962 process_deferred_flush_bios(cache, true);
1963 process_migrations(cache, &cache->need_commit_migrations,
1964 migration_success_post_commit);
1965 }
Joe Thornber66cb1912013-10-30 17:11:58 +00001966
1967 ack_quiescing(cache);
1968
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001969 } while (more_work(cache));
1970}
1971
1972/*
1973 * We want to commit periodically so that not too much
1974 * unwritten metadata builds up.
1975 */
1976static void do_waker(struct work_struct *ws)
1977{
1978 struct cache *cache = container_of(to_delayed_work(ws), struct cache, waker);
Joe Thornberf8350da2013-05-10 14:37:16 +01001979 policy_tick(cache->policy);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00001980 wake_worker(cache);
1981 queue_delayed_work(cache->wq, &cache->waker, COMMIT_PERIOD);
1982}
1983
1984/*----------------------------------------------------------------*/
1985
1986static int is_congested(struct dm_dev *dev, int bdi_bits)
1987{
1988 struct request_queue *q = bdev_get_queue(dev->bdev);
1989 return bdi_congested(&q->backing_dev_info, bdi_bits);
1990}
1991
1992static int cache_is_congested(struct dm_target_callbacks *cb, int bdi_bits)
1993{
1994 struct cache *cache = container_of(cb, struct cache, callbacks);
1995
1996 return is_congested(cache->origin_dev, bdi_bits) ||
1997 is_congested(cache->cache_dev, bdi_bits);
1998}
1999
2000/*----------------------------------------------------------------
2001 * Target methods
2002 *--------------------------------------------------------------*/
2003
2004/*
2005 * This function gets called on the error paths of the constructor, so we
2006 * have to cope with a partially initialised struct.
2007 */
2008static void destroy(struct cache *cache)
2009{
2010 unsigned i;
2011
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002012 if (cache->migration_pool)
2013 mempool_destroy(cache->migration_pool);
2014
2015 if (cache->all_io_ds)
2016 dm_deferred_set_destroy(cache->all_io_ds);
2017
2018 if (cache->prison)
2019 dm_bio_prison_destroy(cache->prison);
2020
2021 if (cache->wq)
2022 destroy_workqueue(cache->wq);
2023
2024 if (cache->dirty_bitset)
2025 free_bitset(cache->dirty_bitset);
2026
2027 if (cache->discard_bitset)
2028 free_bitset(cache->discard_bitset);
2029
2030 if (cache->copier)
2031 dm_kcopyd_client_destroy(cache->copier);
2032
2033 if (cache->cmd)
2034 dm_cache_metadata_close(cache->cmd);
2035
2036 if (cache->metadata_dev)
2037 dm_put_device(cache->ti, cache->metadata_dev);
2038
2039 if (cache->origin_dev)
2040 dm_put_device(cache->ti, cache->origin_dev);
2041
2042 if (cache->cache_dev)
2043 dm_put_device(cache->ti, cache->cache_dev);
2044
2045 if (cache->policy)
2046 dm_cache_policy_destroy(cache->policy);
2047
2048 for (i = 0; i < cache->nr_ctr_args ; i++)
2049 kfree(cache->ctr_args[i]);
2050 kfree(cache->ctr_args);
2051
2052 kfree(cache);
2053}
2054
2055static void cache_dtr(struct dm_target *ti)
2056{
2057 struct cache *cache = ti->private;
2058
2059 destroy(cache);
2060}
2061
2062static sector_t get_dev_size(struct dm_dev *dev)
2063{
2064 return i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
2065}
2066
2067/*----------------------------------------------------------------*/
2068
2069/*
2070 * Construct a cache device mapping.
2071 *
2072 * cache <metadata dev> <cache dev> <origin dev> <block size>
2073 * <#feature args> [<feature arg>]*
2074 * <policy> <#policy args> [<policy arg>]*
2075 *
2076 * metadata dev : fast device holding the persistent metadata
2077 * cache dev : fast device holding cached data blocks
2078 * origin dev : slow device holding original data blocks
2079 * block size : cache unit size in sectors
2080 *
2081 * #feature args : number of feature arguments passed
2082 * feature args : writethrough. (The default is writeback.)
2083 *
2084 * policy : the replacement policy to use
2085 * #policy args : an even number of policy arguments corresponding
2086 * to key/value pairs passed to the policy
2087 * policy args : key/value pairs passed to the policy
2088 * E.g. 'sequential_threshold 1024'
2089 * See cache-policies.txt for details.
2090 *
2091 * Optional feature arguments are:
2092 * writethrough : write through caching that prohibits cache block
2093 * content from being different from origin block content.
2094 * Without this argument, the default behaviour is to write
2095 * back cache block contents later for performance reasons,
2096 * so they may differ from the corresponding origin blocks.
2097 */
2098struct cache_args {
2099 struct dm_target *ti;
2100
2101 struct dm_dev *metadata_dev;
2102
2103 struct dm_dev *cache_dev;
2104 sector_t cache_sectors;
2105
2106 struct dm_dev *origin_dev;
2107 sector_t origin_sectors;
2108
2109 uint32_t block_size;
2110
2111 const char *policy_name;
2112 int policy_argc;
2113 const char **policy_argv;
2114
2115 struct cache_features features;
2116};
2117
2118static void destroy_cache_args(struct cache_args *ca)
2119{
2120 if (ca->metadata_dev)
2121 dm_put_device(ca->ti, ca->metadata_dev);
2122
2123 if (ca->cache_dev)
2124 dm_put_device(ca->ti, ca->cache_dev);
2125
2126 if (ca->origin_dev)
2127 dm_put_device(ca->ti, ca->origin_dev);
2128
2129 kfree(ca);
2130}
2131
2132static bool at_least_one_arg(struct dm_arg_set *as, char **error)
2133{
2134 if (!as->argc) {
2135 *error = "Insufficient args";
2136 return false;
2137 }
2138
2139 return true;
2140}
2141
2142static int parse_metadata_dev(struct cache_args *ca, struct dm_arg_set *as,
2143 char **error)
2144{
2145 int r;
2146 sector_t metadata_dev_size;
2147 char b[BDEVNAME_SIZE];
2148
2149 if (!at_least_one_arg(as, error))
2150 return -EINVAL;
2151
2152 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2153 &ca->metadata_dev);
2154 if (r) {
2155 *error = "Error opening metadata device";
2156 return r;
2157 }
2158
2159 metadata_dev_size = get_dev_size(ca->metadata_dev);
2160 if (metadata_dev_size > DM_CACHE_METADATA_MAX_SECTORS_WARNING)
2161 DMWARN("Metadata device %s is larger than %u sectors: excess space will not be used.",
2162 bdevname(ca->metadata_dev->bdev, b), THIN_METADATA_MAX_SECTORS);
2163
2164 return 0;
2165}
2166
2167static int parse_cache_dev(struct cache_args *ca, struct dm_arg_set *as,
2168 char **error)
2169{
2170 int r;
2171
2172 if (!at_least_one_arg(as, error))
2173 return -EINVAL;
2174
2175 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2176 &ca->cache_dev);
2177 if (r) {
2178 *error = "Error opening cache device";
2179 return r;
2180 }
2181 ca->cache_sectors = get_dev_size(ca->cache_dev);
2182
2183 return 0;
2184}
2185
2186static int parse_origin_dev(struct cache_args *ca, struct dm_arg_set *as,
2187 char **error)
2188{
2189 int r;
2190
2191 if (!at_least_one_arg(as, error))
2192 return -EINVAL;
2193
2194 r = dm_get_device(ca->ti, dm_shift_arg(as), FMODE_READ | FMODE_WRITE,
2195 &ca->origin_dev);
2196 if (r) {
2197 *error = "Error opening origin device";
2198 return r;
2199 }
2200
2201 ca->origin_sectors = get_dev_size(ca->origin_dev);
2202 if (ca->ti->len > ca->origin_sectors) {
2203 *error = "Device size larger than cached device";
2204 return -EINVAL;
2205 }
2206
2207 return 0;
2208}
2209
2210static int parse_block_size(struct cache_args *ca, struct dm_arg_set *as,
2211 char **error)
2212{
Mike Snitzer05473042013-08-16 10:54:19 -04002213 unsigned long block_size;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002214
2215 if (!at_least_one_arg(as, error))
2216 return -EINVAL;
2217
Mike Snitzer05473042013-08-16 10:54:19 -04002218 if (kstrtoul(dm_shift_arg(as), 10, &block_size) || !block_size ||
2219 block_size < DATA_DEV_BLOCK_SIZE_MIN_SECTORS ||
2220 block_size > DATA_DEV_BLOCK_SIZE_MAX_SECTORS ||
2221 block_size & (DATA_DEV_BLOCK_SIZE_MIN_SECTORS - 1)) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002222 *error = "Invalid data block size";
2223 return -EINVAL;
2224 }
2225
Mike Snitzer05473042013-08-16 10:54:19 -04002226 if (block_size > ca->cache_sectors) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002227 *error = "Data block size is larger than the cache device";
2228 return -EINVAL;
2229 }
2230
Mike Snitzer05473042013-08-16 10:54:19 -04002231 ca->block_size = block_size;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002232
2233 return 0;
2234}
2235
2236static void init_features(struct cache_features *cf)
2237{
2238 cf->mode = CM_WRITE;
Joe Thornber2ee57d52013-10-24 14:10:29 -04002239 cf->io_mode = CM_IO_WRITEBACK;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002240}
2241
2242static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
2243 char **error)
2244{
2245 static struct dm_arg _args[] = {
2246 {0, 1, "Invalid number of cache feature arguments"},
2247 };
2248
2249 int r;
2250 unsigned argc;
2251 const char *arg;
2252 struct cache_features *cf = &ca->features;
2253
2254 init_features(cf);
2255
2256 r = dm_read_arg_group(_args, as, &argc, error);
2257 if (r)
2258 return -EINVAL;
2259
2260 while (argc--) {
2261 arg = dm_shift_arg(as);
2262
2263 if (!strcasecmp(arg, "writeback"))
Joe Thornber2ee57d52013-10-24 14:10:29 -04002264 cf->io_mode = CM_IO_WRITEBACK;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002265
2266 else if (!strcasecmp(arg, "writethrough"))
Joe Thornber2ee57d52013-10-24 14:10:29 -04002267 cf->io_mode = CM_IO_WRITETHROUGH;
2268
2269 else if (!strcasecmp(arg, "passthrough"))
2270 cf->io_mode = CM_IO_PASSTHROUGH;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002271
2272 else {
2273 *error = "Unrecognised cache feature requested";
2274 return -EINVAL;
2275 }
2276 }
2277
2278 return 0;
2279}
2280
2281static int parse_policy(struct cache_args *ca, struct dm_arg_set *as,
2282 char **error)
2283{
2284 static struct dm_arg _args[] = {
2285 {0, 1024, "Invalid number of policy arguments"},
2286 };
2287
2288 int r;
2289
2290 if (!at_least_one_arg(as, error))
2291 return -EINVAL;
2292
2293 ca->policy_name = dm_shift_arg(as);
2294
2295 r = dm_read_arg_group(_args, as, &ca->policy_argc, error);
2296 if (r)
2297 return -EINVAL;
2298
2299 ca->policy_argv = (const char **)as->argv;
2300 dm_consume_args(as, ca->policy_argc);
2301
2302 return 0;
2303}
2304
2305static int parse_cache_args(struct cache_args *ca, int argc, char **argv,
2306 char **error)
2307{
2308 int r;
2309 struct dm_arg_set as;
2310
2311 as.argc = argc;
2312 as.argv = argv;
2313
2314 r = parse_metadata_dev(ca, &as, error);
2315 if (r)
2316 return r;
2317
2318 r = parse_cache_dev(ca, &as, error);
2319 if (r)
2320 return r;
2321
2322 r = parse_origin_dev(ca, &as, error);
2323 if (r)
2324 return r;
2325
2326 r = parse_block_size(ca, &as, error);
2327 if (r)
2328 return r;
2329
2330 r = parse_features(ca, &as, error);
2331 if (r)
2332 return r;
2333
2334 r = parse_policy(ca, &as, error);
2335 if (r)
2336 return r;
2337
2338 return 0;
2339}
2340
2341/*----------------------------------------------------------------*/
2342
2343static struct kmem_cache *migration_cache;
2344
Alasdair G Kergon2c73c472013-05-10 14:37:21 +01002345#define NOT_CORE_OPTION 1
2346
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002347static int process_config_option(struct cache *cache, const char *key, const char *value)
Alasdair G Kergon2c73c472013-05-10 14:37:21 +01002348{
2349 unsigned long tmp;
2350
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002351 if (!strcasecmp(key, "migration_threshold")) {
2352 if (kstrtoul(value, 10, &tmp))
Alasdair G Kergon2c73c472013-05-10 14:37:21 +01002353 return -EINVAL;
2354
2355 cache->migration_threshold = tmp;
2356 return 0;
2357 }
2358
2359 return NOT_CORE_OPTION;
2360}
2361
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002362static int set_config_value(struct cache *cache, const char *key, const char *value)
2363{
2364 int r = process_config_option(cache, key, value);
2365
2366 if (r == NOT_CORE_OPTION)
2367 r = policy_set_config_value(cache->policy, key, value);
2368
2369 if (r)
2370 DMWARN("bad config value for %s: %s", key, value);
2371
2372 return r;
2373}
2374
2375static int set_config_values(struct cache *cache, int argc, const char **argv)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002376{
2377 int r = 0;
2378
2379 if (argc & 1) {
2380 DMWARN("Odd number of policy arguments given but they should be <key> <value> pairs.");
2381 return -EINVAL;
2382 }
2383
2384 while (argc) {
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002385 r = set_config_value(cache, argv[0], argv[1]);
2386 if (r)
2387 break;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002388
2389 argc -= 2;
2390 argv += 2;
2391 }
2392
2393 return r;
2394}
2395
2396static int create_cache_policy(struct cache *cache, struct cache_args *ca,
2397 char **error)
2398{
Mikulas Patocka4cb3e1d2013-10-01 18:35:39 -04002399 struct dm_cache_policy *p = dm_cache_policy_create(ca->policy_name,
2400 cache->cache_size,
2401 cache->origin_sectors,
2402 cache->sectors_per_block);
2403 if (IS_ERR(p)) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002404 *error = "Error creating cache's policy";
Mikulas Patocka4cb3e1d2013-10-01 18:35:39 -04002405 return PTR_ERR(p);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002406 }
Mikulas Patocka4cb3e1d2013-10-01 18:35:39 -04002407 cache->policy = p;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002408
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002409 return 0;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002410}
2411
Joe Thornber08b18452014-11-06 14:38:01 +00002412/*
Joe Thornber2bb812d2014-11-26 16:07:50 +00002413 * We want the discard block size to be at least the size of the cache
2414 * block size and have no more than 2^14 discard blocks across the origin.
Joe Thornber08b18452014-11-06 14:38:01 +00002415 */
2416#define MAX_DISCARD_BLOCKS (1 << 14)
2417
2418static bool too_many_discard_blocks(sector_t discard_block_size,
2419 sector_t origin_size)
2420{
2421 (void) sector_div(origin_size, discard_block_size);
2422
2423 return origin_size > MAX_DISCARD_BLOCKS;
2424}
2425
2426static sector_t calculate_discard_block_size(sector_t cache_block_size,
2427 sector_t origin_size)
2428{
Joe Thornber2bb812d2014-11-26 16:07:50 +00002429 sector_t discard_block_size = cache_block_size;
Joe Thornber08b18452014-11-06 14:38:01 +00002430
2431 if (origin_size)
2432 while (too_many_discard_blocks(discard_block_size, origin_size))
2433 discard_block_size *= 2;
2434
2435 return discard_block_size;
2436}
2437
Joe Thornberd1d92202014-11-11 11:58:32 +00002438static void set_cache_size(struct cache *cache, dm_cblock_t size)
2439{
2440 dm_block_t nr_blocks = from_cblock(size);
2441
2442 if (nr_blocks > (1 << 20) && cache->cache_size != size)
2443 DMWARN_LIMIT("You have created a cache device with a lot of individual cache blocks (%llu)\n"
2444 "All these mappings can consume a lot of kernel memory, and take some time to read/write.\n"
2445 "Please consider increasing the cache block size to reduce the overall cache block count.",
2446 (unsigned long long) nr_blocks);
2447
2448 cache->cache_size = size;
2449}
2450
Joe Thornberf8350da2013-05-10 14:37:16 +01002451#define DEFAULT_MIGRATION_THRESHOLD 2048
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002452
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002453static int cache_create(struct cache_args *ca, struct cache **result)
2454{
2455 int r = 0;
2456 char **error = &ca->ti->error;
2457 struct cache *cache;
2458 struct dm_target *ti = ca->ti;
2459 dm_block_t origin_blocks;
2460 struct dm_cache_metadata *cmd;
2461 bool may_format = ca->features.mode == CM_WRITE;
2462
2463 cache = kzalloc(sizeof(*cache), GFP_KERNEL);
2464 if (!cache)
2465 return -ENOMEM;
2466
2467 cache->ti = ca->ti;
2468 ti->private = cache;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002469 ti->num_flush_bios = 2;
2470 ti->flush_supported = true;
2471
2472 ti->num_discard_bios = 1;
2473 ti->discards_supported = true;
2474 ti->discard_zeroes_data_unsupported = true;
Joe Thornber25726292014-11-24 14:05:16 +00002475 ti->split_discard_bios = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002476
Joe Thornber8c5008f2013-05-10 14:37:18 +01002477 cache->features = ca->features;
Mike Snitzer19b00922013-04-05 15:36:34 +01002478 ti->per_bio_data_size = get_per_bio_data_size(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002479
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002480 cache->callbacks.congested_fn = cache_is_congested;
2481 dm_table_add_target_callbacks(ti->table, &cache->callbacks);
2482
2483 cache->metadata_dev = ca->metadata_dev;
2484 cache->origin_dev = ca->origin_dev;
2485 cache->cache_dev = ca->cache_dev;
2486
2487 ca->metadata_dev = ca->origin_dev = ca->cache_dev = NULL;
2488
2489 /* FIXME: factor out this whole section */
2490 origin_blocks = cache->origin_sectors = ca->origin_sectors;
Joe Thornber414dd672013-03-20 17:21:25 +00002491 origin_blocks = block_div(origin_blocks, ca->block_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002492 cache->origin_blocks = to_oblock(origin_blocks);
2493
2494 cache->sectors_per_block = ca->block_size;
2495 if (dm_set_target_max_io_len(ti, cache->sectors_per_block)) {
2496 r = -EINVAL;
2497 goto bad;
2498 }
2499
2500 if (ca->block_size & (ca->block_size - 1)) {
2501 dm_block_t cache_size = ca->cache_sectors;
2502
2503 cache->sectors_per_block_shift = -1;
Joe Thornber414dd672013-03-20 17:21:25 +00002504 cache_size = block_div(cache_size, ca->block_size);
Joe Thornberd1d92202014-11-11 11:58:32 +00002505 set_cache_size(cache, to_cblock(cache_size));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002506 } else {
2507 cache->sectors_per_block_shift = __ffs(ca->block_size);
Joe Thornberd1d92202014-11-11 11:58:32 +00002508 set_cache_size(cache, to_cblock(ca->cache_sectors >> cache->sectors_per_block_shift));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002509 }
2510
2511 r = create_cache_policy(cache, ca, error);
2512 if (r)
2513 goto bad;
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002514
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002515 cache->policy_nr_args = ca->policy_argc;
Joe Thornber2f14f4b2013-05-10 14:37:21 +01002516 cache->migration_threshold = DEFAULT_MIGRATION_THRESHOLD;
2517
2518 r = set_config_values(cache, ca->policy_argc, ca->policy_argv);
2519 if (r) {
2520 *error = "Error setting cache policy's config values";
2521 goto bad;
2522 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002523
2524 cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
2525 ca->block_size, may_format,
2526 dm_cache_policy_get_hint_size(cache->policy));
2527 if (IS_ERR(cmd)) {
2528 *error = "Error creating metadata object";
2529 r = PTR_ERR(cmd);
2530 goto bad;
2531 }
2532 cache->cmd = cmd;
2533
Joe Thornber2ee57d52013-10-24 14:10:29 -04002534 if (passthrough_mode(&cache->features)) {
2535 bool all_clean;
2536
2537 r = dm_cache_metadata_all_clean(cache->cmd, &all_clean);
2538 if (r) {
2539 *error = "dm_cache_metadata_all_clean() failed";
2540 goto bad;
2541 }
2542
2543 if (!all_clean) {
2544 *error = "Cannot enter passthrough mode unless all blocks are clean";
2545 r = -EINVAL;
2546 goto bad;
2547 }
2548 }
2549
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002550 spin_lock_init(&cache->lock);
2551 bio_list_init(&cache->deferred_bios);
2552 bio_list_init(&cache->deferred_flush_bios);
Joe Thornbere2e74d62013-03-20 17:21:27 +00002553 bio_list_init(&cache->deferred_writethrough_bios);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002554 INIT_LIST_HEAD(&cache->quiesced_migrations);
2555 INIT_LIST_HEAD(&cache->completed_migrations);
2556 INIT_LIST_HEAD(&cache->need_commit_migrations);
Joe Thornbera59db672015-01-23 10:16:16 +00002557 atomic_set(&cache->nr_allocated_migrations, 0);
2558 atomic_set(&cache->nr_io_migrations, 0);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002559 init_waitqueue_head(&cache->migration_wait);
2560
Joe Thornber66cb1912013-10-30 17:11:58 +00002561 init_waitqueue_head(&cache->quiescing_wait);
Joe Thornber238f8362013-10-30 17:29:30 +00002562 atomic_set(&cache->quiescing, 0);
Joe Thornber66cb1912013-10-30 17:11:58 +00002563 atomic_set(&cache->quiescing_ack, 0);
2564
Wei Yongjunfa4d6832013-05-10 14:37:14 +01002565 r = -ENOMEM;
Anssi Hannula44fa8162014-08-01 11:55:47 -04002566 atomic_set(&cache->nr_dirty, 0);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002567 cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size));
2568 if (!cache->dirty_bitset) {
2569 *error = "could not allocate dirty bitset";
2570 goto bad;
2571 }
2572 clear_bitset(cache->dirty_bitset, from_cblock(cache->cache_size));
2573
Joe Thornber08b18452014-11-06 14:38:01 +00002574 cache->discard_block_size =
2575 calculate_discard_block_size(cache->sectors_per_block,
2576 cache->origin_sectors);
Joe Thornber25726292014-11-24 14:05:16 +00002577 cache->discard_nr_blocks = to_dblock(dm_sector_div_up(cache->origin_sectors,
2578 cache->discard_block_size));
Joe Thornber1bad9bc2014-11-07 14:47:07 +00002579 cache->discard_bitset = alloc_bitset(from_dblock(cache->discard_nr_blocks));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002580 if (!cache->discard_bitset) {
2581 *error = "could not allocate discard bitset";
2582 goto bad;
2583 }
Joe Thornber1bad9bc2014-11-07 14:47:07 +00002584 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002585
2586 cache->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle);
2587 if (IS_ERR(cache->copier)) {
2588 *error = "could not create kcopyd client";
2589 r = PTR_ERR(cache->copier);
2590 goto bad;
2591 }
2592
2593 cache->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM);
2594 if (!cache->wq) {
2595 *error = "could not create workqueue for metadata object";
2596 goto bad;
2597 }
2598 INIT_WORK(&cache->worker, do_worker);
2599 INIT_DELAYED_WORK(&cache->waker, do_waker);
2600 cache->last_commit_jiffies = jiffies;
2601
Joe Thornbera195db22014-10-06 16:30:06 -04002602 cache->prison = dm_bio_prison_create();
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002603 if (!cache->prison) {
2604 *error = "could not create bio prison";
2605 goto bad;
2606 }
2607
2608 cache->all_io_ds = dm_deferred_set_create();
2609 if (!cache->all_io_ds) {
2610 *error = "could not create all_io deferred set";
2611 goto bad;
2612 }
2613
2614 cache->migration_pool = mempool_create_slab_pool(MIGRATION_POOL_SIZE,
2615 migration_cache);
2616 if (!cache->migration_pool) {
2617 *error = "Error creating cache's migration mempool";
2618 goto bad;
2619 }
2620
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002621 cache->need_tick_bio = true;
2622 cache->sized = false;
Joe Thornber65790ff2013-11-08 16:39:50 +00002623 cache->invalidate = false;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002624 cache->commit_requested = false;
2625 cache->loaded_mappings = false;
2626 cache->loaded_discards = false;
2627
2628 load_stats(cache);
2629
2630 atomic_set(&cache->stats.demotion, 0);
2631 atomic_set(&cache->stats.promotion, 0);
2632 atomic_set(&cache->stats.copies_avoided, 0);
2633 atomic_set(&cache->stats.cache_cell_clash, 0);
2634 atomic_set(&cache->stats.commit_count, 0);
2635 atomic_set(&cache->stats.discard_count, 0);
2636
Joe Thornber65790ff2013-11-08 16:39:50 +00002637 spin_lock_init(&cache->invalidation_lock);
2638 INIT_LIST_HEAD(&cache->invalidation_requests);
2639
Joe Thornber066dbaa32015-05-15 15:18:01 +01002640 iot_init(&cache->origin_tracker);
2641
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002642 *result = cache;
2643 return 0;
2644
2645bad:
2646 destroy(cache);
2647 return r;
2648}
2649
2650static int copy_ctr_args(struct cache *cache, int argc, const char **argv)
2651{
2652 unsigned i;
2653 const char **copy;
2654
2655 copy = kcalloc(argc, sizeof(*copy), GFP_KERNEL);
2656 if (!copy)
2657 return -ENOMEM;
2658 for (i = 0; i < argc; i++) {
2659 copy[i] = kstrdup(argv[i], GFP_KERNEL);
2660 if (!copy[i]) {
2661 while (i--)
2662 kfree(copy[i]);
2663 kfree(copy);
2664 return -ENOMEM;
2665 }
2666 }
2667
2668 cache->nr_ctr_args = argc;
2669 cache->ctr_args = copy;
2670
2671 return 0;
2672}
2673
2674static int cache_ctr(struct dm_target *ti, unsigned argc, char **argv)
2675{
2676 int r = -EINVAL;
2677 struct cache_args *ca;
2678 struct cache *cache = NULL;
2679
2680 ca = kzalloc(sizeof(*ca), GFP_KERNEL);
2681 if (!ca) {
2682 ti->error = "Error allocating memory for cache";
2683 return -ENOMEM;
2684 }
2685 ca->ti = ti;
2686
2687 r = parse_cache_args(ca, argc, argv, &ti->error);
2688 if (r)
2689 goto out;
2690
2691 r = cache_create(ca, &cache);
Heinz Mauelshagen617a0b82013-03-20 17:21:26 +00002692 if (r)
2693 goto out;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002694
2695 r = copy_ctr_args(cache, argc - 3, (const char **)argv + 3);
2696 if (r) {
2697 destroy(cache);
2698 goto out;
2699 }
2700
2701 ti->private = cache;
2702
2703out:
2704 destroy_cache_args(ca);
2705 return r;
2706}
2707
Joe Thornber8c081b52014-05-13 16:18:38 +01002708static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell **cell)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002709{
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002710 int r;
2711 dm_oblock_t block = get_bio_block(cache, bio);
Mike Snitzer19b00922013-04-05 15:36:34 +01002712 size_t pb_data_size = get_per_bio_data_size(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002713 bool can_migrate = false;
2714 bool discarded_block;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002715 struct policy_result lookup_result;
Heinz Mauelshagene893fba2014-03-12 16:13:39 +01002716 struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);
Joe Thornberfb4100a2015-05-20 10:30:32 +01002717 struct old_oblock_lock ool;
2718
2719 ool.locker.fn = null_locker;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002720
Heinz Mauelshagene893fba2014-03-12 16:13:39 +01002721 if (unlikely(from_oblock(block) >= from_oblock(cache->origin_blocks))) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002722 /*
2723 * This can only occur if the io goes to a partial block at
2724 * the end of the origin device. We don't cache these.
2725 * Just remap to the origin and carry on.
2726 */
Heinz Mauelshagene893fba2014-03-12 16:13:39 +01002727 remap_to_origin(cache, bio);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002728 return DM_MAPIO_REMAPPED;
2729 }
2730
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002731 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD)) {
2732 defer_bio(cache, bio);
2733 return DM_MAPIO_SUBMITTED;
2734 }
2735
2736 /*
2737 * Check to see if that block is currently migrating.
2738 */
Joe Thornber8c081b52014-05-13 16:18:38 +01002739 *cell = alloc_prison_cell(cache);
2740 if (!*cell) {
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002741 defer_bio(cache, bio);
2742 return DM_MAPIO_SUBMITTED;
2743 }
2744
Joe Thornber8c081b52014-05-13 16:18:38 +01002745 r = bio_detain(cache, block, bio, *cell,
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002746 (cell_free_fn) free_prison_cell,
Joe Thornber8c081b52014-05-13 16:18:38 +01002747 cache, cell);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002748 if (r) {
2749 if (r < 0)
2750 defer_bio(cache, bio);
2751
2752 return DM_MAPIO_SUBMITTED;
2753 }
2754
2755 discarded_block = is_discarded_oblock(cache, block);
2756
2757 r = policy_map(cache->policy, block, false, can_migrate, discarded_block,
Joe Thornberfb4100a2015-05-20 10:30:32 +01002758 bio, &ool.locker, &lookup_result);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002759 if (r == -EWOULDBLOCK) {
Joe Thornber8c081b52014-05-13 16:18:38 +01002760 cell_defer(cache, *cell, true);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002761 return DM_MAPIO_SUBMITTED;
2762
2763 } else if (r) {
2764 DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r);
Joe Thornber8c081b52014-05-13 16:18:38 +01002765 cell_defer(cache, *cell, false);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002766 bio_io_error(bio);
2767 return DM_MAPIO_SUBMITTED;
2768 }
2769
Joe Thornber2ee57d52013-10-24 14:10:29 -04002770 r = DM_MAPIO_REMAPPED;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002771 switch (lookup_result.op) {
2772 case POLICY_HIT:
Joe Thornber2ee57d52013-10-24 14:10:29 -04002773 if (passthrough_mode(&cache->features)) {
2774 if (bio_data_dir(bio) == WRITE) {
2775 /*
2776 * We need to invalidate this block, so
2777 * defer for the worker thread.
2778 */
Joe Thornber8c081b52014-05-13 16:18:38 +01002779 cell_defer(cache, *cell, true);
Joe Thornber2ee57d52013-10-24 14:10:29 -04002780 r = DM_MAPIO_SUBMITTED;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002781
Joe Thornber2ee57d52013-10-24 14:10:29 -04002782 } else {
Joe Thornber2ee57d52013-10-24 14:10:29 -04002783 inc_miss_counter(cache, bio);
2784 remap_to_origin_clear_discard(cache, bio, block);
Joe Thornber2ee57d52013-10-24 14:10:29 -04002785 }
2786
2787 } else {
2788 inc_hit_counter(cache, bio);
Joe Thornber2ee57d52013-10-24 14:10:29 -04002789 if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
2790 !is_dirty(cache, lookup_result.cblock))
2791 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
2792 else
2793 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
Joe Thornber2ee57d52013-10-24 14:10:29 -04002794 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002795 break;
2796
2797 case POLICY_MISS:
2798 inc_miss_counter(cache, bio);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002799 if (pb->req_nr != 0) {
2800 /*
2801 * This is a duplicate writethrough io that is no
2802 * longer needed because the block has been demoted.
2803 */
2804 bio_endio(bio, 0);
Joe Thornber8c081b52014-05-13 16:18:38 +01002805 cell_defer(cache, *cell, false);
2806 r = DM_MAPIO_SUBMITTED;
2807
2808 } else
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002809 remap_to_origin_clear_discard(cache, bio, block);
Joe Thornber8c081b52014-05-13 16:18:38 +01002810
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002811 break;
2812
2813 default:
2814 DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
2815 (unsigned) lookup_result.op);
Joe Thornber8c081b52014-05-13 16:18:38 +01002816 cell_defer(cache, *cell, false);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002817 bio_io_error(bio);
Joe Thornber2ee57d52013-10-24 14:10:29 -04002818 r = DM_MAPIO_SUBMITTED;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002819 }
2820
Joe Thornber2ee57d52013-10-24 14:10:29 -04002821 return r;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002822}
2823
Joe Thornber8c081b52014-05-13 16:18:38 +01002824static int cache_map(struct dm_target *ti, struct bio *bio)
2825{
2826 int r;
Joe Thornberf824a2a2014-11-28 09:48:25 +00002827 struct dm_bio_prison_cell *cell = NULL;
Joe Thornber8c081b52014-05-13 16:18:38 +01002828 struct cache *cache = ti->private;
2829
2830 r = __cache_map(cache, bio, &cell);
Joe Thornber066dbaa32015-05-15 15:18:01 +01002831 if (r == DM_MAPIO_REMAPPED) {
2832 accounted_begin(cache, bio);
2833
2834 if (cell) {
2835 inc_ds(cache, bio, cell);
2836 cell_defer(cache, cell, false);
2837 }
Joe Thornber8c081b52014-05-13 16:18:38 +01002838 }
2839
2840 return r;
2841}
2842
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002843static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
2844{
2845 struct cache *cache = ti->private;
2846 unsigned long flags;
Mike Snitzer19b00922013-04-05 15:36:34 +01002847 size_t pb_data_size = get_per_bio_data_size(cache);
2848 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002849
2850 if (pb->tick) {
2851 policy_tick(cache->policy);
2852
2853 spin_lock_irqsave(&cache->lock, flags);
2854 cache->need_tick_bio = true;
2855 spin_unlock_irqrestore(&cache->lock, flags);
2856 }
2857
2858 check_for_quiesced_migrations(cache, pb);
Joe Thornber066dbaa32015-05-15 15:18:01 +01002859 accounted_complete(cache, bio);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002860
2861 return 0;
2862}
2863
2864static int write_dirty_bitset(struct cache *cache)
2865{
2866 unsigned i, r;
2867
2868 for (i = 0; i < from_cblock(cache->cache_size); i++) {
2869 r = dm_cache_set_dirty(cache->cmd, to_cblock(i),
2870 is_dirty(cache, to_cblock(i)));
2871 if (r)
2872 return r;
2873 }
2874
2875 return 0;
2876}
2877
2878static int write_discard_bitset(struct cache *cache)
2879{
2880 unsigned i, r;
2881
Joe Thornber1bad9bc2014-11-07 14:47:07 +00002882 r = dm_cache_discard_bitset_resize(cache->cmd, cache->discard_block_size,
2883 cache->discard_nr_blocks);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002884 if (r) {
2885 DMERR("could not resize on-disk discard bitset");
2886 return r;
2887 }
2888
Joe Thornber1bad9bc2014-11-07 14:47:07 +00002889 for (i = 0; i < from_dblock(cache->discard_nr_blocks); i++) {
2890 r = dm_cache_set_discard(cache->cmd, to_dblock(i),
2891 is_discarded(cache, to_dblock(i)));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002892 if (r)
2893 return r;
2894 }
2895
2896 return 0;
2897}
2898
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002899/*
2900 * returns true on success
2901 */
2902static bool sync_metadata(struct cache *cache)
2903{
2904 int r1, r2, r3, r4;
2905
2906 r1 = write_dirty_bitset(cache);
2907 if (r1)
2908 DMERR("could not write dirty bitset");
2909
2910 r2 = write_discard_bitset(cache);
2911 if (r2)
2912 DMERR("could not write discard bitset");
2913
2914 save_stats(cache);
2915
Joe Thornber05966612014-04-03 16:16:44 +01002916 r3 = dm_cache_write_hints(cache->cmd, cache->policy);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00002917 if (r3)
2918 DMERR("could not write hints");
2919
2920 /*
2921 * If writing the above metadata failed, we still commit, but don't
2922 * set the clean shutdown flag. This will effectively force every
2923 * dirty bit to be set on reload.
2924 */
2925 r4 = dm_cache_commit(cache->cmd, !r1 && !r2 && !r3);
2926 if (r4)
2927 DMERR("could not write cache metadata. Data loss may occur.");
2928
2929 return !r1 && !r2 && !r3 && !r4;
2930}
2931
2932static void cache_postsuspend(struct dm_target *ti)
2933{
2934 struct cache *cache = ti->private;
2935
2936 start_quiescing(cache);
2937 wait_for_migrations(cache);
2938 stop_worker(cache);
2939 requeue_deferred_io(cache);
2940 stop_quiescing(cache);
2941
2942 (void) sync_metadata(cache);
2943}
2944
2945static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock,
2946 bool dirty, uint32_t hint, bool hint_valid)
2947{
2948 int r;
2949 struct cache *cache = context;
2950
2951 r = policy_load_mapping(cache->policy, oblock, cblock, hint, hint_valid);
2952 if (r)
2953 return r;
2954
2955 if (dirty)
2956 set_dirty(cache, oblock, cblock);
2957 else
2958 clear_dirty(cache, oblock, cblock);
2959
2960 return 0;
2961}
2962
Joe Thornber3e2e1c32014-11-24 14:06:22 +00002963/*
2964 * The discard block size in the on disk metadata is not
2965 * neccessarily the same as we're currently using. So we have to
2966 * be careful to only set the discarded attribute if we know it
2967 * covers a complete block of the new size.
2968 */
2969struct discard_load_info {
2970 struct cache *cache;
2971
2972 /*
2973 * These blocks are sized using the on disk dblock size, rather
2974 * than the current one.
2975 */
2976 dm_block_t block_size;
2977 dm_block_t discard_begin, discard_end;
2978};
2979
2980static void discard_load_info_init(struct cache *cache,
2981 struct discard_load_info *li)
2982{
2983 li->cache = cache;
2984 li->discard_begin = li->discard_end = 0;
2985}
2986
2987static void set_discard_range(struct discard_load_info *li)
2988{
2989 sector_t b, e;
2990
2991 if (li->discard_begin == li->discard_end)
2992 return;
2993
2994 /*
2995 * Convert to sectors.
2996 */
2997 b = li->discard_begin * li->block_size;
2998 e = li->discard_end * li->block_size;
2999
3000 /*
3001 * Then convert back to the current dblock size.
3002 */
3003 b = dm_sector_div_up(b, li->cache->discard_block_size);
3004 sector_div(e, li->cache->discard_block_size);
3005
3006 /*
3007 * The origin may have shrunk, so we need to check we're still in
3008 * bounds.
3009 */
3010 if (e > from_dblock(li->cache->discard_nr_blocks))
3011 e = from_dblock(li->cache->discard_nr_blocks);
3012
3013 for (; b < e; b++)
3014 set_discard(li->cache, to_dblock(b));
3015}
3016
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003017static int load_discard(void *context, sector_t discard_block_size,
Joe Thornber1bad9bc2014-11-07 14:47:07 +00003018 dm_dblock_t dblock, bool discard)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003019{
Joe Thornber3e2e1c32014-11-24 14:06:22 +00003020 struct discard_load_info *li = context;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003021
Joe Thornber3e2e1c32014-11-24 14:06:22 +00003022 li->block_size = discard_block_size;
Joe Thornber1bad9bc2014-11-07 14:47:07 +00003023
Joe Thornber3e2e1c32014-11-24 14:06:22 +00003024 if (discard) {
3025 if (from_dblock(dblock) == li->discard_end)
3026 /*
3027 * We're already in a discard range, just extend it.
3028 */
3029 li->discard_end = li->discard_end + 1ULL;
3030
3031 else {
3032 /*
3033 * Emit the old range and start a new one.
3034 */
3035 set_discard_range(li);
3036 li->discard_begin = from_dblock(dblock);
3037 li->discard_end = li->discard_begin + 1ULL;
3038 }
3039 } else {
3040 set_discard_range(li);
3041 li->discard_begin = li->discard_end = 0;
3042 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003043
3044 return 0;
3045}
3046
Joe Thornberf494a9c2013-10-31 13:55:49 -04003047static dm_cblock_t get_cache_dev_size(struct cache *cache)
3048{
3049 sector_t size = get_dev_size(cache->cache_dev);
3050 (void) sector_div(size, cache->sectors_per_block);
3051 return to_cblock(size);
3052}
3053
3054static bool can_resize(struct cache *cache, dm_cblock_t new_size)
3055{
3056 if (from_cblock(new_size) > from_cblock(cache->cache_size))
3057 return true;
3058
3059 /*
3060 * We can't drop a dirty block when shrinking the cache.
3061 */
3062 while (from_cblock(new_size) < from_cblock(cache->cache_size)) {
3063 new_size = to_cblock(from_cblock(new_size) + 1);
3064 if (is_dirty(cache, new_size)) {
3065 DMERR("unable to shrink cache; cache block %llu is dirty",
3066 (unsigned long long) from_cblock(new_size));
3067 return false;
3068 }
3069 }
3070
3071 return true;
3072}
3073
3074static int resize_cache_dev(struct cache *cache, dm_cblock_t new_size)
3075{
3076 int r;
3077
Vincent Pelletier08844802013-11-30 12:58:42 +01003078 r = dm_cache_resize(cache->cmd, new_size);
Joe Thornberf494a9c2013-10-31 13:55:49 -04003079 if (r) {
3080 DMERR("could not resize cache metadata");
3081 return r;
3082 }
3083
Joe Thornberd1d92202014-11-11 11:58:32 +00003084 set_cache_size(cache, new_size);
Joe Thornberf494a9c2013-10-31 13:55:49 -04003085
3086 return 0;
3087}
3088
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003089static int cache_preresume(struct dm_target *ti)
3090{
3091 int r = 0;
3092 struct cache *cache = ti->private;
Joe Thornberf494a9c2013-10-31 13:55:49 -04003093 dm_cblock_t csize = get_cache_dev_size(cache);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003094
3095 /*
3096 * Check to see if the cache has resized.
3097 */
Joe Thornberf494a9c2013-10-31 13:55:49 -04003098 if (!cache->sized) {
3099 r = resize_cache_dev(cache, csize);
3100 if (r)
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003101 return r;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003102
3103 cache->sized = true;
Joe Thornberf494a9c2013-10-31 13:55:49 -04003104
3105 } else if (csize != cache->cache_size) {
3106 if (!can_resize(cache, csize))
3107 return -EINVAL;
3108
3109 r = resize_cache_dev(cache, csize);
3110 if (r)
3111 return r;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003112 }
3113
3114 if (!cache->loaded_mappings) {
Mike Snitzerea2dd8c2013-03-20 17:21:28 +00003115 r = dm_cache_load_mappings(cache->cmd, cache->policy,
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003116 load_mapping, cache);
3117 if (r) {
3118 DMERR("could not load cache mappings");
3119 return r;
3120 }
3121
3122 cache->loaded_mappings = true;
3123 }
3124
3125 if (!cache->loaded_discards) {
Joe Thornber3e2e1c32014-11-24 14:06:22 +00003126 struct discard_load_info li;
3127
3128 /*
3129 * The discard bitset could have been resized, or the
3130 * discard block size changed. To be safe we start by
3131 * setting every dblock to not discarded.
3132 */
3133 clear_bitset(cache->discard_bitset, from_dblock(cache->discard_nr_blocks));
3134
3135 discard_load_info_init(cache, &li);
3136 r = dm_cache_load_discards(cache->cmd, load_discard, &li);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003137 if (r) {
3138 DMERR("could not load origin discards");
3139 return r;
3140 }
Joe Thornber3e2e1c32014-11-24 14:06:22 +00003141 set_discard_range(&li);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003142
3143 cache->loaded_discards = true;
3144 }
3145
3146 return r;
3147}
3148
3149static void cache_resume(struct dm_target *ti)
3150{
3151 struct cache *cache = ti->private;
3152
3153 cache->need_tick_bio = true;
3154 do_waker(&cache->waker.work);
3155}
3156
3157/*
3158 * Status format:
3159 *
Mike Snitzer6a388612014-01-09 16:04:12 -05003160 * <metadata block size> <#used metadata blocks>/<#total metadata blocks>
3161 * <cache block size> <#used cache blocks>/<#total cache blocks>
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003162 * <#read hits> <#read misses> <#write hits> <#write misses>
Mike Snitzer6a388612014-01-09 16:04:12 -05003163 * <#demotions> <#promotions> <#dirty>
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003164 * <#features> <features>*
3165 * <#core args> <core args>
Mike Snitzer2e68c4e2014-01-15 21:06:55 -05003166 * <policy name> <#policy args> <policy args>*
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003167 */
3168static void cache_status(struct dm_target *ti, status_type_t type,
3169 unsigned status_flags, char *result, unsigned maxlen)
3170{
3171 int r = 0;
3172 unsigned i;
3173 ssize_t sz = 0;
3174 dm_block_t nr_free_blocks_metadata = 0;
3175 dm_block_t nr_blocks_metadata = 0;
3176 char buf[BDEVNAME_SIZE];
3177 struct cache *cache = ti->private;
3178 dm_cblock_t residency;
3179
3180 switch (type) {
3181 case STATUSTYPE_INFO:
3182 /* Commit to ensure statistics aren't out-of-date */
3183 if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti)) {
3184 r = dm_cache_commit(cache->cmd, false);
3185 if (r)
3186 DMERR("could not commit metadata for accurate status");
3187 }
3188
3189 r = dm_cache_get_free_metadata_block_count(cache->cmd,
3190 &nr_free_blocks_metadata);
3191 if (r) {
3192 DMERR("could not get metadata free block count");
3193 goto err;
3194 }
3195
3196 r = dm_cache_get_metadata_dev_size(cache->cmd, &nr_blocks_metadata);
3197 if (r) {
3198 DMERR("could not get metadata device size");
3199 goto err;
3200 }
3201
3202 residency = policy_residency(cache->policy);
3203
Anssi Hannula44fa8162014-08-01 11:55:47 -04003204 DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ",
Mike Snitzer895b47d2014-07-14 15:37:18 -04003205 (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003206 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
3207 (unsigned long long)nr_blocks_metadata,
Mike Snitzer6a388612014-01-09 16:04:12 -05003208 cache->sectors_per_block,
3209 (unsigned long long) from_cblock(residency),
3210 (unsigned long long) from_cblock(cache->cache_size),
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003211 (unsigned) atomic_read(&cache->stats.read_hit),
3212 (unsigned) atomic_read(&cache->stats.read_miss),
3213 (unsigned) atomic_read(&cache->stats.write_hit),
3214 (unsigned) atomic_read(&cache->stats.write_miss),
3215 (unsigned) atomic_read(&cache->stats.demotion),
3216 (unsigned) atomic_read(&cache->stats.promotion),
Anssi Hannula44fa8162014-08-01 11:55:47 -04003217 (unsigned long) atomic_read(&cache->nr_dirty));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003218
Joe Thornber2ee57d52013-10-24 14:10:29 -04003219 if (writethrough_mode(&cache->features))
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003220 DMEMIT("1 writethrough ");
Joe Thornber2ee57d52013-10-24 14:10:29 -04003221
3222 else if (passthrough_mode(&cache->features))
3223 DMEMIT("1 passthrough ");
3224
3225 else if (writeback_mode(&cache->features))
3226 DMEMIT("1 writeback ");
3227
3228 else {
3229 DMERR("internal error: unknown io mode: %d", (int) cache->features.io_mode);
3230 goto err;
3231 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003232
3233 DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
Mike Snitzer2e68c4e2014-01-15 21:06:55 -05003234
3235 DMEMIT("%s ", dm_cache_policy_get_name(cache->policy));
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003236 if (sz < maxlen) {
3237 r = policy_emit_config_values(cache->policy, result + sz, maxlen - sz);
3238 if (r)
3239 DMERR("policy_emit_config_values returned %d", r);
3240 }
3241
3242 break;
3243
3244 case STATUSTYPE_TABLE:
3245 format_dev_t(buf, cache->metadata_dev->bdev->bd_dev);
3246 DMEMIT("%s ", buf);
3247 format_dev_t(buf, cache->cache_dev->bdev->bd_dev);
3248 DMEMIT("%s ", buf);
3249 format_dev_t(buf, cache->origin_dev->bdev->bd_dev);
3250 DMEMIT("%s", buf);
3251
3252 for (i = 0; i < cache->nr_ctr_args - 1; i++)
3253 DMEMIT(" %s", cache->ctr_args[i]);
3254 if (cache->nr_ctr_args)
3255 DMEMIT(" %s", cache->ctr_args[cache->nr_ctr_args - 1]);
3256 }
3257
3258 return;
3259
3260err:
3261 DMEMIT("Error");
3262}
3263
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003264/*
Joe Thornber65790ff2013-11-08 16:39:50 +00003265 * A cache block range can take two forms:
3266 *
3267 * i) A single cblock, eg. '3456'
3268 * ii) A begin and end cblock with dots between, eg. 123-234
3269 */
3270static int parse_cblock_range(struct cache *cache, const char *str,
3271 struct cblock_range *result)
3272{
3273 char dummy;
3274 uint64_t b, e;
3275 int r;
3276
3277 /*
3278 * Try and parse form (ii) first.
3279 */
3280 r = sscanf(str, "%llu-%llu%c", &b, &e, &dummy);
3281 if (r < 0)
3282 return r;
3283
3284 if (r == 2) {
3285 result->begin = to_cblock(b);
3286 result->end = to_cblock(e);
3287 return 0;
3288 }
3289
3290 /*
3291 * That didn't work, try form (i).
3292 */
3293 r = sscanf(str, "%llu%c", &b, &dummy);
3294 if (r < 0)
3295 return r;
3296
3297 if (r == 1) {
3298 result->begin = to_cblock(b);
3299 result->end = to_cblock(from_cblock(result->begin) + 1u);
3300 return 0;
3301 }
3302
3303 DMERR("invalid cblock range '%s'", str);
3304 return -EINVAL;
3305}
3306
3307static int validate_cblock_range(struct cache *cache, struct cblock_range *range)
3308{
3309 uint64_t b = from_cblock(range->begin);
3310 uint64_t e = from_cblock(range->end);
3311 uint64_t n = from_cblock(cache->cache_size);
3312
3313 if (b >= n) {
3314 DMERR("begin cblock out of range: %llu >= %llu", b, n);
3315 return -EINVAL;
3316 }
3317
3318 if (e > n) {
3319 DMERR("end cblock out of range: %llu > %llu", e, n);
3320 return -EINVAL;
3321 }
3322
3323 if (b >= e) {
3324 DMERR("invalid cblock range: %llu >= %llu", b, e);
3325 return -EINVAL;
3326 }
3327
3328 return 0;
3329}
3330
3331static int request_invalidation(struct cache *cache, struct cblock_range *range)
3332{
3333 struct invalidation_request req;
3334
3335 INIT_LIST_HEAD(&req.list);
3336 req.cblocks = range;
3337 atomic_set(&req.complete, 0);
3338 req.err = 0;
3339 init_waitqueue_head(&req.result_wait);
3340
3341 spin_lock(&cache->invalidation_lock);
3342 list_add(&req.list, &cache->invalidation_requests);
3343 spin_unlock(&cache->invalidation_lock);
3344 wake_worker(cache);
3345
3346 wait_event(req.result_wait, atomic_read(&req.complete));
3347 return req.err;
3348}
3349
3350static int process_invalidate_cblocks_message(struct cache *cache, unsigned count,
3351 const char **cblock_ranges)
3352{
3353 int r = 0;
3354 unsigned i;
3355 struct cblock_range range;
3356
3357 if (!passthrough_mode(&cache->features)) {
3358 DMERR("cache has to be in passthrough mode for invalidation");
3359 return -EPERM;
3360 }
3361
3362 for (i = 0; i < count; i++) {
3363 r = parse_cblock_range(cache, cblock_ranges[i], &range);
3364 if (r)
3365 break;
3366
3367 r = validate_cblock_range(cache, &range);
3368 if (r)
3369 break;
3370
3371 /*
3372 * Pass begin and end origin blocks to the worker and wake it.
3373 */
3374 r = request_invalidation(cache, &range);
3375 if (r)
3376 break;
3377 }
3378
3379 return r;
3380}
3381
3382/*
3383 * Supports
3384 * "<key> <value>"
3385 * and
3386 * "invalidate_cblocks [(<begin>)|(<begin>-<end>)]*
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003387 *
3388 * The key migration_threshold is supported by the cache target core.
3389 */
3390static int cache_message(struct dm_target *ti, unsigned argc, char **argv)
3391{
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003392 struct cache *cache = ti->private;
3393
Joe Thornber65790ff2013-11-08 16:39:50 +00003394 if (!argc)
3395 return -EINVAL;
3396
Mike Snitzer7b6b2bc2013-11-12 12:17:43 -05003397 if (!strcasecmp(argv[0], "invalidate_cblocks"))
Joe Thornber65790ff2013-11-08 16:39:50 +00003398 return process_invalidate_cblocks_message(cache, argc - 1, (const char **) argv + 1);
3399
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003400 if (argc != 2)
3401 return -EINVAL;
3402
Joe Thornber2f14f4b2013-05-10 14:37:21 +01003403 return set_config_value(cache, argv[0], argv[1]);
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003404}
3405
3406static int cache_iterate_devices(struct dm_target *ti,
3407 iterate_devices_callout_fn fn, void *data)
3408{
3409 int r = 0;
3410 struct cache *cache = ti->private;
3411
3412 r = fn(ti, cache->cache_dev, 0, get_dev_size(cache->cache_dev), data);
3413 if (!r)
3414 r = fn(ti, cache->origin_dev, 0, ti->len, data);
3415
3416 return r;
3417}
3418
3419/*
3420 * We assume I/O is going to the origin (which is the volume
3421 * more likely to have restrictions e.g. by being striped).
3422 * (Looking up the exact location of the data would be expensive
3423 * and could always be out of date by the time the bio is submitted.)
3424 */
3425static int cache_bvec_merge(struct dm_target *ti,
3426 struct bvec_merge_data *bvm,
3427 struct bio_vec *biovec, int max_size)
3428{
3429 struct cache *cache = ti->private;
3430 struct request_queue *q = bdev_get_queue(cache->origin_dev->bdev);
3431
3432 if (!q->merge_bvec_fn)
3433 return max_size;
3434
3435 bvm->bi_bdev = cache->origin_dev->bdev;
3436 return min(max_size, q->merge_bvec_fn(q, bvm, biovec));
3437}
3438
3439static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
3440{
3441 /*
3442 * FIXME: these limits may be incompatible with the cache device
3443 */
Joe Thornber7ae34e72014-11-06 10:18:04 +00003444 limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
3445 cache->origin_sectors);
Joe Thornber1bad9bc2014-11-07 14:47:07 +00003446 limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003447}
3448
3449static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
3450{
3451 struct cache *cache = ti->private;
Mike Snitzerf6109372013-08-20 15:02:41 -04003452 uint64_t io_opt_sectors = limits->io_opt >> SECTOR_SHIFT;
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003453
Mike Snitzerf6109372013-08-20 15:02:41 -04003454 /*
3455 * If the system-determined stacked limits are compatible with the
3456 * cache's blocksize (io_opt is a factor) do not override them.
3457 */
3458 if (io_opt_sectors < cache->sectors_per_block ||
3459 do_div(io_opt_sectors, cache->sectors_per_block)) {
Mike Snitzerb0246532014-07-19 13:25:46 -04003460 blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
Mike Snitzerf6109372013-08-20 15:02:41 -04003461 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
3462 }
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003463 set_discard_limits(cache, limits);
3464}
3465
3466/*----------------------------------------------------------------*/
3467
3468static struct target_type cache_target = {
3469 .name = "cache",
Joe Thornber7ae34e72014-11-06 10:18:04 +00003470 .version = {1, 6, 0},
Joe Thornberc6b4fcb2013-03-01 22:45:51 +00003471 .module = THIS_MODULE,
3472 .ctr = cache_ctr,
3473 .dtr = cache_dtr,
3474 .map = cache_map,
3475 .end_io = cache_end_io,
3476 .postsuspend = cache_postsuspend,
3477 .preresume = cache_preresume,
3478 .resume = cache_resume,
3479 .status = cache_status,
3480 .message = cache_message,
3481 .iterate_devices = cache_iterate_devices,
3482 .merge = cache_bvec_merge,
3483 .io_hints = cache_io_hints,
3484};
3485
3486static int __init dm_cache_init(void)
3487{
3488 int r;
3489
3490 r = dm_register_target(&cache_target);
3491 if (r) {
3492 DMERR("cache target registration failed: %d", r);
3493 return r;
3494 }
3495
3496 migration_cache = KMEM_CACHE(dm_cache_migration, 0);
3497 if (!migration_cache) {
3498 dm_unregister_target(&cache_target);
3499 return -ENOMEM;
3500 }
3501
3502 return 0;
3503}
3504
3505static void __exit dm_cache_exit(void)
3506{
3507 dm_unregister_target(&cache_target);
3508 kmem_cache_destroy(migration_cache);
3509}
3510
3511module_init(dm_cache_init);
3512module_exit(dm_cache_exit);
3513
3514MODULE_DESCRIPTION(DM_NAME " cache target");
3515MODULE_AUTHOR("Joe Thornber <ejt@redhat.com>");
3516MODULE_LICENSE("GPL");