Blame - drivers/md/dm-snap-persistent.c - kernel/hikey-linaro

blob: 936b34e0959fdd5cdeaa6eddc51dee888fb05447 [file] [log] [blame]

Alasdair G Kergon	4db6bfe	2009-01-06 03:05:17 +0000	[diff] [blame]	1	/*
				2	* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
				3	* Copyright (C) 2006-2008 Red Hat GmbH
				4	*
				5	* This file is released under the GPL.
				6	*/
				7
				8	#include "dm-exception-store.h"
				9	#include "dm-snap.h"
				10
				11	#include <linux/mm.h>
				12	#include <linux/pagemap.h>
				13	#include <linux/vmalloc.h>
				14	#include <linux/slab.h>
				15	#include <linux/dm-io.h>
				16
				17	#define DM_MSG_PREFIX "persistent snapshot"
				18	#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */
				19
				20	/*-----------------------------------------------------------------
				21	* Persistent snapshots, by persistent we mean that the snapshot
				22	* will survive a reboot.
				23	---------------------------------------------------------------/
				24
				25	/*
				26	* We need to store a record of which parts of the origin have
				27	* been copied to the snapshot device. The snapshot code
				28	* requires that we copy exception chunks to chunk aligned areas
				29	* of the COW store. It makes sense therefore, to store the
				30	* metadata in chunk size blocks.
				31	*
				32	* There is no backward or forward compatibility implemented,
				33	* snapshots with different disk versions than the kernel will
				34	* not be usable. It is expected that "lvcreate" will blank out
				35	* the start of a fresh COW device before calling the snapshot
				36	* constructor.
				37	*
				38	* The first chunk of the COW device just contains the header.
				39	* After this there is a chunk filled with exception metadata,
				40	* followed by as many exception chunks as can fit in the
				41	* metadata areas.
				42	*
				43	* All on disk structures are in little-endian format. The end
				44	* of the exceptions info is indicated by an exception with a
				45	* new_chunk of 0, which is invalid since it would point to the
				46	* header chunk.
				47	*/
				48
				49	/*
				50	* Magic for persistent snapshots: "SnAp" - Feeble isn't it.
				51	*/
				52	#define SNAP_MAGIC 0x70416e53
				53
				54	/*
				55	* The on-disk version of the metadata.
				56	*/
				57	#define SNAPSHOT_DISK_VERSION 1
				58
				59	struct disk_header {
				60	uint32_t magic;
				61
				62	/*
				63	* Is this snapshot valid. There is no way of recovering
				64	* an invalid snapshot.
				65	*/
				66	uint32_t valid;
				67
				68	/*
				69	* Simple, incrementing version. no backward
				70	* compatibility.
				71	*/
				72	uint32_t version;
				73
				74	/* In sectors */
				75	uint32_t chunk_size;
				76	};
				77
				78	struct disk_exception {
				79	uint64_t old_chunk;
				80	uint64_t new_chunk;
				81	};
				82
				83	struct commit_callback {
				84	void (callback)(void , int success);
				85	void *context;
				86	};
				87
				88	/*
				89	* The top level structure for a persistent exception store.
				90	*/
				91	struct pstore {
				92	struct dm_snapshot snap; / up pointer to my snapshot */
				93	int version;
				94	int valid;
				95	uint32_t exceptions_per_area;
				96
				97	/*
				98	* Now that we have an asynchronous kcopyd there is no
				99	* need for large chunk sizes, so it wont hurt to have a
				100	* whole chunks worth of metadata in memory at once.
				101	*/
				102	void *area;
				103
				104	/*
				105	* An area of zeros used to clear the next area.
				106	*/
				107	void *zero_area;
				108
				109	/*
				110	* Used to keep track of which metadata area the data in
				111	* 'chunk' refers to.
				112	*/
				113	chunk_t current_area;
				114
				115	/*
				116	* The next free chunk for an exception.
				117	*/
				118	chunk_t next_free;
				119
				120	/*
				121	* The index of next free exception in the current
				122	* metadata area.
				123	*/
				124	uint32_t current_committed;
				125
				126	atomic_t pending_count;
				127	uint32_t callback_count;
				128	struct commit_callback *callbacks;
				129	struct dm_io_client *io_client;
				130
				131	struct workqueue_struct *metadata_wq;
				132	};
				133
				134	static unsigned sectors_to_pages(unsigned sectors)
				135	{
				136	return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9);
				137	}
				138
				139	static int alloc_area(struct pstore *ps)
				140	{
				141	int r = -ENOMEM;
				142	size_t len;
				143
				144	len = ps->snap->chunk_size << SECTOR_SHIFT;
				145
				146	/*
				147	* Allocate the chunk_size block of memory that will hold
				148	* a single metadata area.
				149	*/
				150	ps->area = vmalloc(len);
				151	if (!ps->area)
				152	return r;
				153
				154	ps->zero_area = vmalloc(len);
				155	if (!ps->zero_area) {
				156	vfree(ps->area);
				157	return r;
				158	}
				159	memset(ps->zero_area, 0, len);
				160
				161	return 0;
				162	}
				163
				164	static void free_area(struct pstore *ps)
				165	{
				166	vfree(ps->area);
				167	ps->area = NULL;
				168	vfree(ps->zero_area);
				169	ps->zero_area = NULL;
				170	}
				171
				172	struct mdata_req {
				173	struct dm_io_region *where;
				174	struct dm_io_request *io_req;
				175	struct work_struct work;
				176	int result;
				177	};
				178
				179	static void do_metadata(struct work_struct *work)
				180	{
				181	struct mdata_req *req = container_of(work, struct mdata_req, work);
				182
				183	req->result = dm_io(req->io_req, 1, req->where, NULL);
				184	}
				185
				186	/*
				187	* Read or write a chunk aligned and sized block of data from a device.
				188	*/
				189	static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
				190	{
				191	struct dm_io_region where = {
				192	.bdev = ps->snap->cow->bdev,
				193	.sector = ps->snap->chunk_size * chunk,
				194	.count = ps->snap->chunk_size,
				195	};
				196	struct dm_io_request io_req = {
				197	.bi_rw = rw,
				198	.mem.type = DM_IO_VMA,
				199	.mem.ptr.vma = ps->area,
				200	.client = ps->io_client,
				201	.notify.fn = NULL,
				202	};
				203	struct mdata_req req;
				204
				205	if (!metadata)
				206	return dm_io(&io_req, 1, &where, NULL);
				207
				208	req.where = &where;
				209	req.io_req = &io_req;
				210
				211	/*
				212	* Issue the synchronous I/O from a different thread
				213	* to avoid generic_make_request recursion.
				214	*/
				215	INIT_WORK(&req.work, do_metadata);
				216	queue_work(ps->metadata_wq, &req.work);
				217	flush_workqueue(ps->metadata_wq);
				218
				219	return req.result;
				220	}
				221
				222	/*
				223	* Convert a metadata area index to a chunk index.
				224	*/
				225	static chunk_t area_location(struct pstore *ps, chunk_t area)
				226	{
				227	return 1 + ((ps->exceptions_per_area + 1) * area);
				228	}
				229
				230	/*
				231	* Read or write a metadata area. Remembering to skip the first
				232	* chunk which holds the header.
				233	*/
				234	static int area_io(struct pstore *ps, int rw)
				235	{
				236	int r;
				237	chunk_t chunk;
				238
				239	chunk = area_location(ps, ps->current_area);
				240
				241	r = chunk_io(ps, chunk, rw, 0);
				242	if (r)
				243	return r;
				244
				245	return 0;
				246	}
				247
				248	static void zero_memory_area(struct pstore *ps)
				249	{
				250	memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
				251	}
				252
				253	static int zero_disk_area(struct pstore *ps, chunk_t area)
				254	{
				255	struct dm_io_region where = {
				256	.bdev = ps->snap->cow->bdev,
				257	.sector = ps->snap->chunk_size * area_location(ps, area),
				258	.count = ps->snap->chunk_size,
				259	};
				260	struct dm_io_request io_req = {
				261	.bi_rw = WRITE,
				262	.mem.type = DM_IO_VMA,
				263	.mem.ptr.vma = ps->zero_area,
				264	.client = ps->io_client,
				265	.notify.fn = NULL,
				266	};
				267
				268	return dm_io(&io_req, 1, &where, NULL);
				269	}
				270
				271	static int read_header(struct pstore ps, int new_snapshot)
				272	{
				273	int r;
				274	struct disk_header *dh;
				275	chunk_t chunk_size;
				276	int chunk_size_supplied = 1;
				277
				278	/*
				279	* Use default chunk size (or hardsect_size, if larger) if none supplied
				280	*/
				281	if (!ps->snap->chunk_size) {
				282	ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
				283	bdev_hardsect_size(ps->snap->cow->bdev) >> 9);
				284	ps->snap->chunk_mask = ps->snap->chunk_size - 1;
				285	ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1;
				286	chunk_size_supplied = 0;
				287	}
				288
				289	ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap->
				290	chunk_size));
				291	if (IS_ERR(ps->io_client))
				292	return PTR_ERR(ps->io_client);
				293
				294	r = alloc_area(ps);
				295	if (r)
				296	return r;
				297
				298	r = chunk_io(ps, 0, READ, 1);
				299	if (r)
				300	goto bad;
				301
				302	dh = (struct disk_header *) ps->area;
				303
				304	if (le32_to_cpu(dh->magic) == 0) {
				305	*new_snapshot = 1;
				306	return 0;
				307	}
				308
				309	if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
				310	DMWARN("Invalid or corrupt snapshot");
				311	r = -ENXIO;
				312	goto bad;
				313	}
				314
				315	*new_snapshot = 0;
				316	ps->valid = le32_to_cpu(dh->valid);
				317	ps->version = le32_to_cpu(dh->version);
				318	chunk_size = le32_to_cpu(dh->chunk_size);
				319
				320	if (!chunk_size_supplied \|\| ps->snap->chunk_size == chunk_size)
				321	return 0;
				322
				323	DMWARN("chunk size %llu in device metadata overrides "
				324	"table chunk size of %llu.",
				325	(unsigned long long)chunk_size,
				326	(unsigned long long)ps->snap->chunk_size);
				327
				328	/* We had a bogus chunk_size. Fix stuff up. */
				329	free_area(ps);
				330
				331	ps->snap->chunk_size = chunk_size;
				332	ps->snap->chunk_mask = chunk_size - 1;
				333	ps->snap->chunk_shift = ffs(chunk_size) - 1;
				334
				335	r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size),
				336	ps->io_client);
				337	if (r)
				338	return r;
				339
				340	r = alloc_area(ps);
				341	return r;
				342
				343	bad:
				344	free_area(ps);
				345	return r;
				346	}
				347
				348	static int write_header(struct pstore *ps)
				349	{
				350	struct disk_header *dh;
				351
				352	memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
				353
				354	dh = (struct disk_header *) ps->area;
				355	dh->magic = cpu_to_le32(SNAP_MAGIC);
				356	dh->valid = cpu_to_le32(ps->valid);
				357	dh->version = cpu_to_le32(ps->version);
				358	dh->chunk_size = cpu_to_le32(ps->snap->chunk_size);
				359
				360	return chunk_io(ps, 0, WRITE, 1);
				361	}
				362
				363	/*
				364	* Access functions for the disk exceptions, these do the endian conversions.
				365	*/
				366	static struct disk_exception get_exception(struct pstore ps, uint32_t index)
				367	{
				368	BUG_ON(index >= ps->exceptions_per_area);
				369
				370	return ((struct disk_exception *) ps->area) + index;
				371	}
				372
				373	static void read_exception(struct pstore *ps,
				374	uint32_t index, struct disk_exception *result)
				375	{
				376	struct disk_exception *e = get_exception(ps, index);
				377
				378	/* copy it */
				379	result->old_chunk = le64_to_cpu(e->old_chunk);
				380	result->new_chunk = le64_to_cpu(e->new_chunk);
				381	}
				382
				383	static void write_exception(struct pstore *ps,
				384	uint32_t index, struct disk_exception *de)
				385	{
				386	struct disk_exception *e = get_exception(ps, index);
				387
				388	/* copy it */
				389	e->old_chunk = cpu_to_le64(de->old_chunk);
				390	e->new_chunk = cpu_to_le64(de->new_chunk);
				391	}
				392
				393	/*
				394	* Registers the exceptions that are present in the current area.
				395	* 'full' is filled in to indicate if the area has been
				396	* filled.
				397	*/
Jonathan Brassow	a159c1a	2009-01-06 03:05:19 +0000	[diff] [blame^]	398	static int insert_exceptions(struct pstore *ps,
				399	int (callback)(void callback_context,
				400	chunk_t old, chunk_t new),
				401	void *callback_context,
				402	int *full)
Alasdair G Kergon	4db6bfe	2009-01-06 03:05:17 +0000	[diff] [blame]	403	{
				404	int r;
				405	unsigned int i;
				406	struct disk_exception de;
				407
				408	/* presume the area is full */
				409	*full = 1;
				410
				411	for (i = 0; i < ps->exceptions_per_area; i++) {
				412	read_exception(ps, i, &de);
				413
				414	/*
				415	* If the new_chunk is pointing at the start of
				416	* the COW device, where the first metadata area
				417	* is we know that we've hit the end of the
				418	* exceptions. Therefore the area is not full.
				419	*/
				420	if (de.new_chunk == 0LL) {
				421	ps->current_committed = i;
				422	*full = 0;
				423	break;
				424	}
				425
				426	/*
				427	* Keep track of the start of the free chunks.
				428	*/
				429	if (ps->next_free <= de.new_chunk)
				430	ps->next_free = de.new_chunk + 1;
				431
				432	/*
				433	* Otherwise we add the exception to the snapshot.
				434	*/
Jonathan Brassow	a159c1a	2009-01-06 03:05:19 +0000	[diff] [blame^]	435	r = callback(callback_context, de.old_chunk, de.new_chunk);
Alasdair G Kergon	4db6bfe	2009-01-06 03:05:17 +0000	[diff] [blame]	436	if (r)
				437	return r;
				438	}
				439
				440	return 0;
				441	}
				442
Jonathan Brassow	a159c1a	2009-01-06 03:05:19 +0000	[diff] [blame^]	443	static int read_exceptions(struct pstore *ps,
				444	int (callback)(void callback_context, chunk_t old,
				445	chunk_t new),
				446	void *callback_context)
Alasdair G Kergon	4db6bfe	2009-01-06 03:05:17 +0000	[diff] [blame]	447	{
				448	int r, full = 1;
				449
				450	/*
				451	* Keeping reading chunks and inserting exceptions until
				452	* we find a partially full area.
				453	*/
				454	for (ps->current_area = 0; full; ps->current_area++) {
				455	r = area_io(ps, READ);
				456	if (r)
				457	return r;
				458
Jonathan Brassow	a159c1a	2009-01-06 03:05:19 +0000	[diff] [blame^]	459	r = insert_exceptions(ps, callback, callback_context, &full);
Alasdair G Kergon	4db6bfe	2009-01-06 03:05:17 +0000	[diff] [blame]	460	if (r)
				461	return r;
				462	}
				463
				464	ps->current_area--;
				465
				466	return 0;
				467	}
				468
				469	static struct pstore get_info(struct dm_exception_store store)
				470	{
				471	return (struct pstore *) store->context;
				472	}
				473
				474	static void persistent_fraction_full(struct dm_exception_store *store,
				475	sector_t numerator, sector_t denominator)
				476	{
				477	numerator = get_info(store)->next_free store->snap->chunk_size;
				478	*denominator = get_dev_size(store->snap->cow->bdev);
				479	}
				480
				481	static void persistent_destroy(struct dm_exception_store *store)
				482	{
				483	struct pstore *ps = get_info(store);
				484
				485	destroy_workqueue(ps->metadata_wq);
				486	dm_io_client_destroy(ps->io_client);
				487	vfree(ps->callbacks);
				488	free_area(ps);
				489	kfree(ps);
				490	}
				491
Jonathan Brassow	a159c1a	2009-01-06 03:05:19 +0000	[diff] [blame^]	492	static int persistent_read_metadata(struct dm_exception_store *store,
				493	int (callback)(void callback_context,
				494	chunk_t old, chunk_t new),
				495	void *callback_context)
Alasdair G Kergon	4db6bfe	2009-01-06 03:05:17 +0000	[diff] [blame]	496	{
				497	int r, uninitialized_var(new_snapshot);
				498	struct pstore *ps = get_info(store);
				499
				500	/*
				501	* Read the snapshot header.
				502	*/
				503	r = read_header(ps, &new_snapshot);
				504	if (r)
				505	return r;
				506
				507	/*
				508	* Now we know correct chunk_size, complete the initialisation.
				509	*/
				510	ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) /
				511	sizeof(struct disk_exception);
				512	ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
				513	sizeof(*ps->callbacks));
				514	if (!ps->callbacks)
				515	return -ENOMEM;
				516
				517	/*
				518	* Do we need to setup a new snapshot ?
				519	*/
				520	if (new_snapshot) {
				521	r = write_header(ps);
				522	if (r) {
				523	DMWARN("write_header failed");
				524	return r;
				525	}
				526
				527	ps->current_area = 0;
				528	zero_memory_area(ps);
				529	r = zero_disk_area(ps, 0);
				530	if (r) {
				531	DMWARN("zero_disk_area(0) failed");
				532	return r;
				533	}
				534	} else {
				535	/*
				536	* Sanity checks.
				537	*/
				538	if (ps->version != SNAPSHOT_DISK_VERSION) {
				539	DMWARN("unable to handle snapshot disk version %d",
				540	ps->version);
				541	return -EINVAL;
				542	}
				543
				544	/*
				545	* Metadata are valid, but snapshot is invalidated
				546	*/
				547	if (!ps->valid)
				548	return 1;
				549
				550	/*
				551	* Read the metadata.
				552	*/
Jonathan Brassow	a159c1a	2009-01-06 03:05:19 +0000	[diff] [blame^]	553	r = read_exceptions(ps, callback, callback_context);
Alasdair G Kergon	4db6bfe	2009-01-06 03:05:17 +0000	[diff] [blame]	554	if (r)
				555	return r;
				556	}
				557
				558	return 0;
				559	}
				560
Jonathan Brassow	a159c1a	2009-01-06 03:05:19 +0000	[diff] [blame^]	561	static int persistent_prepare_exception(struct dm_exception_store *store,
				562	struct dm_snap_exception *e)
Alasdair G Kergon	4db6bfe	2009-01-06 03:05:17 +0000	[diff] [blame]	563	{
				564	struct pstore *ps = get_info(store);
				565	uint32_t stride;
				566	chunk_t next_free;
				567	sector_t size = get_dev_size(store->snap->cow->bdev);
				568
				569	/* Is there enough room ? */
				570	if (size < ((ps->next_free + 1) * store->snap->chunk_size))
				571	return -ENOSPC;
				572
				573	e->new_chunk = ps->next_free;
				574
				575	/*
				576	* Move onto the next free pending, making sure to take
				577	* into account the location of the metadata chunks.
				578	*/
				579	stride = (ps->exceptions_per_area + 1);
				580	next_free = ++ps->next_free;
				581	if (sector_div(next_free, stride) == 1)
				582	ps->next_free++;
				583
				584	atomic_inc(&ps->pending_count);
				585	return 0;
				586	}
				587
Jonathan Brassow	a159c1a	2009-01-06 03:05:19 +0000	[diff] [blame^]	588	static void persistent_commit_exception(struct dm_exception_store *store,
				589	struct dm_snap_exception *e,
				590	void (callback) (void , int success),
				591	void *callback_context)
Alasdair G Kergon	4db6bfe	2009-01-06 03:05:17 +0000	[diff] [blame]	592	{
				593	unsigned int i;
				594	struct pstore *ps = get_info(store);
				595	struct disk_exception de;
				596	struct commit_callback *cb;
				597
				598	de.old_chunk = e->old_chunk;
				599	de.new_chunk = e->new_chunk;
				600	write_exception(ps, ps->current_committed++, &de);
				601
				602	/*
				603	* Add the callback to the back of the array. This code
				604	* is the only place where the callback array is
				605	* manipulated, and we know that it will never be called
				606	* multiple times concurrently.
				607	*/
				608	cb = ps->callbacks + ps->callback_count++;
				609	cb->callback = callback;
				610	cb->context = callback_context;
				611
				612	/*
				613	* If there are exceptions in flight and we have not yet
				614	* filled this metadata area there's nothing more to do.
				615	*/
				616	if (!atomic_dec_and_test(&ps->pending_count) &&
				617	(ps->current_committed != ps->exceptions_per_area))
				618	return;
				619
				620	/*
				621	* If we completely filled the current area, then wipe the next one.
				622	*/
				623	if ((ps->current_committed == ps->exceptions_per_area) &&
				624	zero_disk_area(ps, ps->current_area + 1))
				625	ps->valid = 0;
				626
				627	/*
				628	* Commit exceptions to disk.
				629	*/
				630	if (ps->valid && area_io(ps, WRITE))
				631	ps->valid = 0;
				632
				633	/*
				634	* Advance to the next area if this one is full.
				635	*/
				636	if (ps->current_committed == ps->exceptions_per_area) {
				637	ps->current_committed = 0;
				638	ps->current_area++;
				639	zero_memory_area(ps);
				640	}
				641
				642	for (i = 0; i < ps->callback_count; i++) {
				643	cb = ps->callbacks + i;
				644	cb->callback(cb->context, ps->valid);
				645	}
				646
				647	ps->callback_count = 0;
				648	}
				649
Jonathan Brassow	a159c1a	2009-01-06 03:05:19 +0000	[diff] [blame^]	650	static void persistent_drop_snapshot(struct dm_exception_store *store)
Alasdair G Kergon	4db6bfe	2009-01-06 03:05:17 +0000	[diff] [blame]	651	{
				652	struct pstore *ps = get_info(store);
				653
				654	ps->valid = 0;
				655	if (write_header(ps))
				656	DMWARN("write header failed");
				657	}
				658
				659	int dm_create_persistent(struct dm_exception_store *store)
				660	{
				661	struct pstore *ps;
				662
				663	/* allocate the pstore */
				664	ps = kmalloc(sizeof(*ps), GFP_KERNEL);
				665	if (!ps)
				666	return -ENOMEM;
				667
				668	ps->snap = store->snap;
				669	ps->valid = 1;
				670	ps->version = SNAPSHOT_DISK_VERSION;
				671	ps->area = NULL;
				672	ps->next_free = 2; /* skipping the header and first area */
				673	ps->current_committed = 0;
				674
				675	ps->callback_count = 0;
				676	atomic_set(&ps->pending_count, 0);
				677	ps->callbacks = NULL;
				678
				679	ps->metadata_wq = create_singlethread_workqueue("ksnaphd");
				680	if (!ps->metadata_wq) {
				681	kfree(ps);
				682	DMERR("couldn't start header metadata update thread");
				683	return -ENOMEM;
				684	}
				685
				686	store->destroy = persistent_destroy;
				687	store->read_metadata = persistent_read_metadata;
Jonathan Brassow	a159c1a	2009-01-06 03:05:19 +0000	[diff] [blame^]	688	store->prepare_exception = persistent_prepare_exception;
				689	store->commit_exception = persistent_commit_exception;
				690	store->drop_snapshot = persistent_drop_snapshot;
Alasdair G Kergon	4db6bfe	2009-01-06 03:05:17 +0000	[diff] [blame]	691	store->fraction_full = persistent_fraction_full;
				692	store->context = ps;
				693
				694	return 0;
				695	}
				696
				697	int dm_persistent_snapshot_init(void)
				698	{
				699	return 0;
				700	}
				701
				702	void dm_persistent_snapshot_exit(void)
				703	{
				704	}