Blame - drivers/md/dm-snap-persistent.c - kernel/hikey-linaro

blob: 57c946c69ee73cd049c849172e40a4de24beccf3 [file] [log] [blame]

Alasdair G Kergon	4db6bfe	2009-01-06 03:05:17 +0000	[diff] [blame^]	1	/*
				2	* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
				3	* Copyright (C) 2006-2008 Red Hat GmbH
				4	*
				5	* This file is released under the GPL.
				6	*/
				7
				8	#include "dm-exception-store.h"
				9	#include "dm-snap.h"
				10
				11	#include <linux/mm.h>
				12	#include <linux/pagemap.h>
				13	#include <linux/vmalloc.h>
				14	#include <linux/slab.h>
				15	#include <linux/dm-io.h>
				16
				17	#define DM_MSG_PREFIX "persistent snapshot"
				18	#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */
				19
				20	/*-----------------------------------------------------------------
				21	* Persistent snapshots, by persistent we mean that the snapshot
				22	* will survive a reboot.
				23	---------------------------------------------------------------/
				24
				25	/*
				26	* We need to store a record of which parts of the origin have
				27	* been copied to the snapshot device. The snapshot code
				28	* requires that we copy exception chunks to chunk aligned areas
				29	* of the COW store. It makes sense therefore, to store the
				30	* metadata in chunk size blocks.
				31	*
				32	* There is no backward or forward compatibility implemented,
				33	* snapshots with different disk versions than the kernel will
				34	* not be usable. It is expected that "lvcreate" will blank out
				35	* the start of a fresh COW device before calling the snapshot
				36	* constructor.
				37	*
				38	* The first chunk of the COW device just contains the header.
				39	* After this there is a chunk filled with exception metadata,
				40	* followed by as many exception chunks as can fit in the
				41	* metadata areas.
				42	*
				43	* All on disk structures are in little-endian format. The end
				44	* of the exceptions info is indicated by an exception with a
				45	* new_chunk of 0, which is invalid since it would point to the
				46	* header chunk.
				47	*/
				48
				49	/*
				50	* Magic for persistent snapshots: "SnAp" - Feeble isn't it.
				51	*/
				52	#define SNAP_MAGIC 0x70416e53
				53
				54	/*
				55	* The on-disk version of the metadata.
				56	*/
				57	#define SNAPSHOT_DISK_VERSION 1
				58
				59	struct disk_header {
				60	uint32_t magic;
				61
				62	/*
				63	* Is this snapshot valid. There is no way of recovering
				64	* an invalid snapshot.
				65	*/
				66	uint32_t valid;
				67
				68	/*
				69	* Simple, incrementing version. no backward
				70	* compatibility.
				71	*/
				72	uint32_t version;
				73
				74	/* In sectors */
				75	uint32_t chunk_size;
				76	};
				77
				78	struct disk_exception {
				79	uint64_t old_chunk;
				80	uint64_t new_chunk;
				81	};
				82
				83	struct commit_callback {
				84	void (callback)(void , int success);
				85	void *context;
				86	};
				87
				88	/*
				89	* The top level structure for a persistent exception store.
				90	*/
				91	struct pstore {
				92	struct dm_snapshot snap; / up pointer to my snapshot */
				93	int version;
				94	int valid;
				95	uint32_t exceptions_per_area;
				96
				97	/*
				98	* Now that we have an asynchronous kcopyd there is no
				99	* need for large chunk sizes, so it wont hurt to have a
				100	* whole chunks worth of metadata in memory at once.
				101	*/
				102	void *area;
				103
				104	/*
				105	* An area of zeros used to clear the next area.
				106	*/
				107	void *zero_area;
				108
				109	/*
				110	* Used to keep track of which metadata area the data in
				111	* 'chunk' refers to.
				112	*/
				113	chunk_t current_area;
				114
				115	/*
				116	* The next free chunk for an exception.
				117	*/
				118	chunk_t next_free;
				119
				120	/*
				121	* The index of next free exception in the current
				122	* metadata area.
				123	*/
				124	uint32_t current_committed;
				125
				126	atomic_t pending_count;
				127	uint32_t callback_count;
				128	struct commit_callback *callbacks;
				129	struct dm_io_client *io_client;
				130
				131	struct workqueue_struct *metadata_wq;
				132	};
				133
				134	static unsigned sectors_to_pages(unsigned sectors)
				135	{
				136	return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9);
				137	}
				138
				139	static int alloc_area(struct pstore *ps)
				140	{
				141	int r = -ENOMEM;
				142	size_t len;
				143
				144	len = ps->snap->chunk_size << SECTOR_SHIFT;
				145
				146	/*
				147	* Allocate the chunk_size block of memory that will hold
				148	* a single metadata area.
				149	*/
				150	ps->area = vmalloc(len);
				151	if (!ps->area)
				152	return r;
				153
				154	ps->zero_area = vmalloc(len);
				155	if (!ps->zero_area) {
				156	vfree(ps->area);
				157	return r;
				158	}
				159	memset(ps->zero_area, 0, len);
				160
				161	return 0;
				162	}
				163
				164	static void free_area(struct pstore *ps)
				165	{
				166	vfree(ps->area);
				167	ps->area = NULL;
				168	vfree(ps->zero_area);
				169	ps->zero_area = NULL;
				170	}
				171
				172	struct mdata_req {
				173	struct dm_io_region *where;
				174	struct dm_io_request *io_req;
				175	struct work_struct work;
				176	int result;
				177	};
				178
				179	static void do_metadata(struct work_struct *work)
				180	{
				181	struct mdata_req *req = container_of(work, struct mdata_req, work);
				182
				183	req->result = dm_io(req->io_req, 1, req->where, NULL);
				184	}
				185
				186	/*
				187	* Read or write a chunk aligned and sized block of data from a device.
				188	*/
				189	static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
				190	{
				191	struct dm_io_region where = {
				192	.bdev = ps->snap->cow->bdev,
				193	.sector = ps->snap->chunk_size * chunk,
				194	.count = ps->snap->chunk_size,
				195	};
				196	struct dm_io_request io_req = {
				197	.bi_rw = rw,
				198	.mem.type = DM_IO_VMA,
				199	.mem.ptr.vma = ps->area,
				200	.client = ps->io_client,
				201	.notify.fn = NULL,
				202	};
				203	struct mdata_req req;
				204
				205	if (!metadata)
				206	return dm_io(&io_req, 1, &where, NULL);
				207
				208	req.where = &where;
				209	req.io_req = &io_req;
				210
				211	/*
				212	* Issue the synchronous I/O from a different thread
				213	* to avoid generic_make_request recursion.
				214	*/
				215	INIT_WORK(&req.work, do_metadata);
				216	queue_work(ps->metadata_wq, &req.work);
				217	flush_workqueue(ps->metadata_wq);
				218
				219	return req.result;
				220	}
				221
				222	/*
				223	* Convert a metadata area index to a chunk index.
				224	*/
				225	static chunk_t area_location(struct pstore *ps, chunk_t area)
				226	{
				227	return 1 + ((ps->exceptions_per_area + 1) * area);
				228	}
				229
				230	/*
				231	* Read or write a metadata area. Remembering to skip the first
				232	* chunk which holds the header.
				233	*/
				234	static int area_io(struct pstore *ps, int rw)
				235	{
				236	int r;
				237	chunk_t chunk;
				238
				239	chunk = area_location(ps, ps->current_area);
				240
				241	r = chunk_io(ps, chunk, rw, 0);
				242	if (r)
				243	return r;
				244
				245	return 0;
				246	}
				247
				248	static void zero_memory_area(struct pstore *ps)
				249	{
				250	memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
				251	}
				252
				253	static int zero_disk_area(struct pstore *ps, chunk_t area)
				254	{
				255	struct dm_io_region where = {
				256	.bdev = ps->snap->cow->bdev,
				257	.sector = ps->snap->chunk_size * area_location(ps, area),
				258	.count = ps->snap->chunk_size,
				259	};
				260	struct dm_io_request io_req = {
				261	.bi_rw = WRITE,
				262	.mem.type = DM_IO_VMA,
				263	.mem.ptr.vma = ps->zero_area,
				264	.client = ps->io_client,
				265	.notify.fn = NULL,
				266	};
				267
				268	return dm_io(&io_req, 1, &where, NULL);
				269	}
				270
				271	static int read_header(struct pstore ps, int new_snapshot)
				272	{
				273	int r;
				274	struct disk_header *dh;
				275	chunk_t chunk_size;
				276	int chunk_size_supplied = 1;
				277
				278	/*
				279	* Use default chunk size (or hardsect_size, if larger) if none supplied
				280	*/
				281	if (!ps->snap->chunk_size) {
				282	ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
				283	bdev_hardsect_size(ps->snap->cow->bdev) >> 9);
				284	ps->snap->chunk_mask = ps->snap->chunk_size - 1;
				285	ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1;
				286	chunk_size_supplied = 0;
				287	}
				288
				289	ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap->
				290	chunk_size));
				291	if (IS_ERR(ps->io_client))
				292	return PTR_ERR(ps->io_client);
				293
				294	r = alloc_area(ps);
				295	if (r)
				296	return r;
				297
				298	r = chunk_io(ps, 0, READ, 1);
				299	if (r)
				300	goto bad;
				301
				302	dh = (struct disk_header *) ps->area;
				303
				304	if (le32_to_cpu(dh->magic) == 0) {
				305	*new_snapshot = 1;
				306	return 0;
				307	}
				308
				309	if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
				310	DMWARN("Invalid or corrupt snapshot");
				311	r = -ENXIO;
				312	goto bad;
				313	}
				314
				315	*new_snapshot = 0;
				316	ps->valid = le32_to_cpu(dh->valid);
				317	ps->version = le32_to_cpu(dh->version);
				318	chunk_size = le32_to_cpu(dh->chunk_size);
				319
				320	if (!chunk_size_supplied \|\| ps->snap->chunk_size == chunk_size)
				321	return 0;
				322
				323	DMWARN("chunk size %llu in device metadata overrides "
				324	"table chunk size of %llu.",
				325	(unsigned long long)chunk_size,
				326	(unsigned long long)ps->snap->chunk_size);
				327
				328	/* We had a bogus chunk_size. Fix stuff up. */
				329	free_area(ps);
				330
				331	ps->snap->chunk_size = chunk_size;
				332	ps->snap->chunk_mask = chunk_size - 1;
				333	ps->snap->chunk_shift = ffs(chunk_size) - 1;
				334
				335	r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size),
				336	ps->io_client);
				337	if (r)
				338	return r;
				339
				340	r = alloc_area(ps);
				341	return r;
				342
				343	bad:
				344	free_area(ps);
				345	return r;
				346	}
				347
				348	static int write_header(struct pstore *ps)
				349	{
				350	struct disk_header *dh;
				351
				352	memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
				353
				354	dh = (struct disk_header *) ps->area;
				355	dh->magic = cpu_to_le32(SNAP_MAGIC);
				356	dh->valid = cpu_to_le32(ps->valid);
				357	dh->version = cpu_to_le32(ps->version);
				358	dh->chunk_size = cpu_to_le32(ps->snap->chunk_size);
				359
				360	return chunk_io(ps, 0, WRITE, 1);
				361	}
				362
				363	/*
				364	* Access functions for the disk exceptions, these do the endian conversions.
				365	*/
				366	static struct disk_exception get_exception(struct pstore ps, uint32_t index)
				367	{
				368	BUG_ON(index >= ps->exceptions_per_area);
				369
				370	return ((struct disk_exception *) ps->area) + index;
				371	}
				372
				373	static void read_exception(struct pstore *ps,
				374	uint32_t index, struct disk_exception *result)
				375	{
				376	struct disk_exception *e = get_exception(ps, index);
				377
				378	/* copy it */
				379	result->old_chunk = le64_to_cpu(e->old_chunk);
				380	result->new_chunk = le64_to_cpu(e->new_chunk);
				381	}
				382
				383	static void write_exception(struct pstore *ps,
				384	uint32_t index, struct disk_exception *de)
				385	{
				386	struct disk_exception *e = get_exception(ps, index);
				387
				388	/* copy it */
				389	e->old_chunk = cpu_to_le64(de->old_chunk);
				390	e->new_chunk = cpu_to_le64(de->new_chunk);
				391	}
				392
				393	/*
				394	* Registers the exceptions that are present in the current area.
				395	* 'full' is filled in to indicate if the area has been
				396	* filled.
				397	*/
				398	static int insert_exceptions(struct pstore ps, int full)
				399	{
				400	int r;
				401	unsigned int i;
				402	struct disk_exception de;
				403
				404	/* presume the area is full */
				405	*full = 1;
				406
				407	for (i = 0; i < ps->exceptions_per_area; i++) {
				408	read_exception(ps, i, &de);
				409
				410	/*
				411	* If the new_chunk is pointing at the start of
				412	* the COW device, where the first metadata area
				413	* is we know that we've hit the end of the
				414	* exceptions. Therefore the area is not full.
				415	*/
				416	if (de.new_chunk == 0LL) {
				417	ps->current_committed = i;
				418	*full = 0;
				419	break;
				420	}
				421
				422	/*
				423	* Keep track of the start of the free chunks.
				424	*/
				425	if (ps->next_free <= de.new_chunk)
				426	ps->next_free = de.new_chunk + 1;
				427
				428	/*
				429	* Otherwise we add the exception to the snapshot.
				430	*/
				431	r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk);
				432	if (r)
				433	return r;
				434	}
				435
				436	return 0;
				437	}
				438
				439	static int read_exceptions(struct pstore *ps)
				440	{
				441	int r, full = 1;
				442
				443	/*
				444	* Keeping reading chunks and inserting exceptions until
				445	* we find a partially full area.
				446	*/
				447	for (ps->current_area = 0; full; ps->current_area++) {
				448	r = area_io(ps, READ);
				449	if (r)
				450	return r;
				451
				452	r = insert_exceptions(ps, &full);
				453	if (r)
				454	return r;
				455	}
				456
				457	ps->current_area--;
				458
				459	return 0;
				460	}
				461
				462	static struct pstore get_info(struct dm_exception_store store)
				463	{
				464	return (struct pstore *) store->context;
				465	}
				466
				467	static void persistent_fraction_full(struct dm_exception_store *store,
				468	sector_t numerator, sector_t denominator)
				469	{
				470	numerator = get_info(store)->next_free store->snap->chunk_size;
				471	*denominator = get_dev_size(store->snap->cow->bdev);
				472	}
				473
				474	static void persistent_destroy(struct dm_exception_store *store)
				475	{
				476	struct pstore *ps = get_info(store);
				477
				478	destroy_workqueue(ps->metadata_wq);
				479	dm_io_client_destroy(ps->io_client);
				480	vfree(ps->callbacks);
				481	free_area(ps);
				482	kfree(ps);
				483	}
				484
				485	static int persistent_read_metadata(struct dm_exception_store *store)
				486	{
				487	int r, uninitialized_var(new_snapshot);
				488	struct pstore *ps = get_info(store);
				489
				490	/*
				491	* Read the snapshot header.
				492	*/
				493	r = read_header(ps, &new_snapshot);
				494	if (r)
				495	return r;
				496
				497	/*
				498	* Now we know correct chunk_size, complete the initialisation.
				499	*/
				500	ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) /
				501	sizeof(struct disk_exception);
				502	ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
				503	sizeof(*ps->callbacks));
				504	if (!ps->callbacks)
				505	return -ENOMEM;
				506
				507	/*
				508	* Do we need to setup a new snapshot ?
				509	*/
				510	if (new_snapshot) {
				511	r = write_header(ps);
				512	if (r) {
				513	DMWARN("write_header failed");
				514	return r;
				515	}
				516
				517	ps->current_area = 0;
				518	zero_memory_area(ps);
				519	r = zero_disk_area(ps, 0);
				520	if (r) {
				521	DMWARN("zero_disk_area(0) failed");
				522	return r;
				523	}
				524	} else {
				525	/*
				526	* Sanity checks.
				527	*/
				528	if (ps->version != SNAPSHOT_DISK_VERSION) {
				529	DMWARN("unable to handle snapshot disk version %d",
				530	ps->version);
				531	return -EINVAL;
				532	}
				533
				534	/*
				535	* Metadata are valid, but snapshot is invalidated
				536	*/
				537	if (!ps->valid)
				538	return 1;
				539
				540	/*
				541	* Read the metadata.
				542	*/
				543	r = read_exceptions(ps);
				544	if (r)
				545	return r;
				546	}
				547
				548	return 0;
				549	}
				550
				551	static int persistent_prepare(struct dm_exception_store *store,
				552	struct dm_snap_exception *e)
				553	{
				554	struct pstore *ps = get_info(store);
				555	uint32_t stride;
				556	chunk_t next_free;
				557	sector_t size = get_dev_size(store->snap->cow->bdev);
				558
				559	/* Is there enough room ? */
				560	if (size < ((ps->next_free + 1) * store->snap->chunk_size))
				561	return -ENOSPC;
				562
				563	e->new_chunk = ps->next_free;
				564
				565	/*
				566	* Move onto the next free pending, making sure to take
				567	* into account the location of the metadata chunks.
				568	*/
				569	stride = (ps->exceptions_per_area + 1);
				570	next_free = ++ps->next_free;
				571	if (sector_div(next_free, stride) == 1)
				572	ps->next_free++;
				573
				574	atomic_inc(&ps->pending_count);
				575	return 0;
				576	}
				577
				578	static void persistent_commit(struct dm_exception_store *store,
				579	struct dm_snap_exception *e,
				580	void (callback) (void , int success),
				581	void *callback_context)
				582	{
				583	unsigned int i;
				584	struct pstore *ps = get_info(store);
				585	struct disk_exception de;
				586	struct commit_callback *cb;
				587
				588	de.old_chunk = e->old_chunk;
				589	de.new_chunk = e->new_chunk;
				590	write_exception(ps, ps->current_committed++, &de);
				591
				592	/*
				593	* Add the callback to the back of the array. This code
				594	* is the only place where the callback array is
				595	* manipulated, and we know that it will never be called
				596	* multiple times concurrently.
				597	*/
				598	cb = ps->callbacks + ps->callback_count++;
				599	cb->callback = callback;
				600	cb->context = callback_context;
				601
				602	/*
				603	* If there are exceptions in flight and we have not yet
				604	* filled this metadata area there's nothing more to do.
				605	*/
				606	if (!atomic_dec_and_test(&ps->pending_count) &&
				607	(ps->current_committed != ps->exceptions_per_area))
				608	return;
				609
				610	/*
				611	* If we completely filled the current area, then wipe the next one.
				612	*/
				613	if ((ps->current_committed == ps->exceptions_per_area) &&
				614	zero_disk_area(ps, ps->current_area + 1))
				615	ps->valid = 0;
				616
				617	/*
				618	* Commit exceptions to disk.
				619	*/
				620	if (ps->valid && area_io(ps, WRITE))
				621	ps->valid = 0;
				622
				623	/*
				624	* Advance to the next area if this one is full.
				625	*/
				626	if (ps->current_committed == ps->exceptions_per_area) {
				627	ps->current_committed = 0;
				628	ps->current_area++;
				629	zero_memory_area(ps);
				630	}
				631
				632	for (i = 0; i < ps->callback_count; i++) {
				633	cb = ps->callbacks + i;
				634	cb->callback(cb->context, ps->valid);
				635	}
				636
				637	ps->callback_count = 0;
				638	}
				639
				640	static void persistent_drop(struct dm_exception_store *store)
				641	{
				642	struct pstore *ps = get_info(store);
				643
				644	ps->valid = 0;
				645	if (write_header(ps))
				646	DMWARN("write header failed");
				647	}
				648
				649	int dm_create_persistent(struct dm_exception_store *store)
				650	{
				651	struct pstore *ps;
				652
				653	/* allocate the pstore */
				654	ps = kmalloc(sizeof(*ps), GFP_KERNEL);
				655	if (!ps)
				656	return -ENOMEM;
				657
				658	ps->snap = store->snap;
				659	ps->valid = 1;
				660	ps->version = SNAPSHOT_DISK_VERSION;
				661	ps->area = NULL;
				662	ps->next_free = 2; /* skipping the header and first area */
				663	ps->current_committed = 0;
				664
				665	ps->callback_count = 0;
				666	atomic_set(&ps->pending_count, 0);
				667	ps->callbacks = NULL;
				668
				669	ps->metadata_wq = create_singlethread_workqueue("ksnaphd");
				670	if (!ps->metadata_wq) {
				671	kfree(ps);
				672	DMERR("couldn't start header metadata update thread");
				673	return -ENOMEM;
				674	}
				675
				676	store->destroy = persistent_destroy;
				677	store->read_metadata = persistent_read_metadata;
				678	store->prepare_exception = persistent_prepare;
				679	store->commit_exception = persistent_commit;
				680	store->drop_snapshot = persistent_drop;
				681	store->fraction_full = persistent_fraction_full;
				682	store->context = ps;
				683
				684	return 0;
				685	}
				686
				687	int dm_persistent_snapshot_init(void)
				688	{
				689	return 0;
				690	}
				691
				692	void dm_persistent_snapshot_exit(void)
				693	{
				694	}