blob: 1e5284ef65fa47313d4a6609526b6dfecf00615e [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/drivers/block/loop.c
3 *
4 * Written by Theodore Ts'o, 3/29/93
5 *
6 * Copyright 1993 by Theodore Ts'o. Redistribution of this file is
7 * permitted under the GNU General Public License.
8 *
9 * DES encryption plus some minor changes by Werner Almesberger, 30-MAY-1993
10 * more DES encryption plus IDEA encryption by Nicholas J. Leon, June 20, 1996
11 *
12 * Modularized and updated for 1.1.16 kernel - Mitch Dsouza 28th May 1994
13 * Adapted for 1.3.59 kernel - Andries Brouwer, 1 Feb 1996
14 *
15 * Fixed do_loop_request() re-entrancy - Vincent.Renardias@waw.com Mar 20, 1997
16 *
17 * Added devfs support - Richard Gooch <rgooch@atnf.csiro.au> 16-Jan-1998
18 *
19 * Handle sparse backing files correctly - Kenn Humborg, Jun 28, 1998
20 *
21 * Loadable modules and other fixes by AK, 1998
22 *
23 * Make real block number available to downstream transfer functions, enables
24 * CBC (and relatives) mode encryption requiring unique IVs per data block.
25 * Reed H. Petty, rhp@draper.net
26 *
27 * Maximum number of loop devices now dynamic via max_loop module parameter.
28 * Russell Kroll <rkroll@exploits.org> 19990701
29 *
30 * Maximum number of loop devices when compiled-in now selectable by passing
31 * max_loop=<1-255> to the kernel on boot.
Jan Engelhardt96de0e22007-10-19 23:21:04 +020032 * Erik I. Bolsø, <eriki@himolde.no>, Oct 31, 1999
Linus Torvalds1da177e2005-04-16 15:20:36 -070033 *
34 * Completely rewrite request handling to be make_request_fn style and
35 * non blocking, pushing work to a helper thread. Lots of fixes from
36 * Al Viro too.
37 * Jens Axboe <axboe@suse.de>, Nov 2000
38 *
39 * Support up to 256 loop devices
40 * Heinz Mauelshagen <mge@sistina.com>, Feb 2002
41 *
42 * Support for falling back on the write file operation when the address space
Nick Piggin4e02ed42008-10-29 14:00:55 -070043 * operations write_begin is not available on the backing filesystem.
Linus Torvalds1da177e2005-04-16 15:20:36 -070044 * Anton Altaparmakov, 16 Feb 2005
45 *
46 * Still To Fix:
47 * - Advisory locking is ignored here.
48 * - Should use an own CAP_* category instead of CAP_SYS_ADMIN
49 *
50 */
51
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <linux/module.h>
53#include <linux/moduleparam.h>
54#include <linux/sched.h>
55#include <linux/fs.h>
56#include <linux/file.h>
57#include <linux/stat.h>
58#include <linux/errno.h>
59#include <linux/major.h>
60#include <linux/wait.h>
61#include <linux/blkdev.h>
62#include <linux/blkpg.h>
63#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070064#include <linux/swap.h>
65#include <linux/slab.h>
66#include <linux/loop.h>
David Howells863d5b822006-08-29 19:06:14 +010067#include <linux/compat.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070068#include <linux/suspend.h>
Rafael J. Wysocki83144182007-07-17 04:03:35 -070069#include <linux/freezer.h>
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020070#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070071#include <linux/writeback.h>
72#include <linux/buffer_head.h> /* for invalidate_bdev() */
73#include <linux/completion.h>
74#include <linux/highmem.h>
Serge E. Hallyn6c997912006-09-29 01:59:11 -070075#include <linux/kthread.h>
Jens Axboed6b29d72007-06-04 09:59:47 +020076#include <linux/splice.h>
Milan Brozee862732010-08-23 15:16:00 +020077#include <linux/sysfs.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070078
79#include <asm/uaccess.h>
80
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020081static DEFINE_MUTEX(loop_mutex);
Ken Chen73285082007-05-08 00:28:20 -070082static LIST_HEAD(loop_devices);
83static DEFINE_MUTEX(loop_devices_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070084
Laurent Vivier476a4812008-03-26 12:11:53 +010085static int max_part;
86static int part_shift;
87
Linus Torvalds1da177e2005-04-16 15:20:36 -070088/*
89 * Transfer functions
90 */
91static int transfer_none(struct loop_device *lo, int cmd,
92 struct page *raw_page, unsigned raw_off,
93 struct page *loop_page, unsigned loop_off,
94 int size, sector_t real_block)
95{
96 char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
97 char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
98
99 if (cmd == READ)
100 memcpy(loop_buf, raw_buf, size);
101 else
102 memcpy(raw_buf, loop_buf, size);
103
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104 kunmap_atomic(loop_buf, KM_USER1);
Peter Zijlstra61ecdb802010-10-26 14:21:47 -0700105 kunmap_atomic(raw_buf, KM_USER0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106 cond_resched();
107 return 0;
108}
109
110static int transfer_xor(struct loop_device *lo, int cmd,
111 struct page *raw_page, unsigned raw_off,
112 struct page *loop_page, unsigned loop_off,
113 int size, sector_t real_block)
114{
115 char *raw_buf = kmap_atomic(raw_page, KM_USER0) + raw_off;
116 char *loop_buf = kmap_atomic(loop_page, KM_USER1) + loop_off;
117 char *in, *out, *key;
118 int i, keysize;
119
120 if (cmd == READ) {
121 in = raw_buf;
122 out = loop_buf;
123 } else {
124 in = loop_buf;
125 out = raw_buf;
126 }
127
128 key = lo->lo_encrypt_key;
129 keysize = lo->lo_encrypt_key_size;
130 for (i = 0; i < size; i++)
131 *out++ = *in++ ^ key[(i & 511) % keysize];
132
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133 kunmap_atomic(loop_buf, KM_USER1);
Peter Zijlstra61ecdb802010-10-26 14:21:47 -0700134 kunmap_atomic(raw_buf, KM_USER0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 cond_resched();
136 return 0;
137}
138
139static int xor_init(struct loop_device *lo, const struct loop_info64 *info)
140{
141 if (unlikely(info->lo_encrypt_key_size <= 0))
142 return -EINVAL;
143 return 0;
144}
145
146static struct loop_func_table none_funcs = {
147 .number = LO_CRYPT_NONE,
148 .transfer = transfer_none,
149};
150
151static struct loop_func_table xor_funcs = {
152 .number = LO_CRYPT_XOR,
153 .transfer = transfer_xor,
154 .init = xor_init
155};
156
157/* xfer_funcs[0] is special - its release function is never called */
158static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
159 &none_funcs,
160 &xor_funcs
161};
162
163static loff_t get_loop_size(struct loop_device *lo, struct file *file)
164{
165 loff_t size, offset, loopsize;
166
167 /* Compute loopsize in bytes */
168 size = i_size_read(file->f_mapping->host);
169 offset = lo->lo_offset;
170 loopsize = size - offset;
171 if (lo->lo_sizelimit > 0 && lo->lo_sizelimit < loopsize)
172 loopsize = lo->lo_sizelimit;
173
174 /*
175 * Unfortunately, if we want to do I/O on the device,
176 * the number of 512-byte sectors has to fit into a sector_t.
177 */
178 return loopsize >> 9;
179}
180
181static int
182figure_loop_size(struct loop_device *lo)
183{
184 loff_t size = get_loop_size(lo, lo->lo_backing_file);
185 sector_t x = (sector_t)size;
186
187 if (unlikely((loff_t)x != size))
188 return -EFBIG;
189
Ken Chen73285082007-05-08 00:28:20 -0700190 set_capacity(lo->lo_disk, x);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700191 return 0;
192}
193
194static inline int
195lo_do_transfer(struct loop_device *lo, int cmd,
196 struct page *rpage, unsigned roffs,
197 struct page *lpage, unsigned loffs,
198 int size, sector_t rblock)
199{
200 if (unlikely(!lo->transfer))
201 return 0;
202
203 return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
204}
205
206/**
207 * do_lo_send_aops - helper for writing data to a loop device
208 *
209 * This is the fast version for backing filesystems which implement the address
Nick Pigginafddba42007-10-16 01:25:01 -0700210 * space operations write_begin and write_end.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 */
212static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
Al Viro511de732007-10-08 12:10:13 -0400213 loff_t pos, struct page *unused)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214{
215 struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
216 struct address_space *mapping = file->f_mapping;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217 pgoff_t index;
218 unsigned offset, bv_offs;
Zach Brown994fc28c2005-12-15 14:28:17 -0800219 int len, ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700220
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800221 mutex_lock(&mapping->host->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222 index = pos >> PAGE_CACHE_SHIFT;
223 offset = pos & ((pgoff_t)PAGE_CACHE_SIZE - 1);
224 bv_offs = bvec->bv_offset;
225 len = bvec->bv_len;
226 while (len > 0) {
227 sector_t IV;
Nick Pigginafddba42007-10-16 01:25:01 -0700228 unsigned size, copied;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229 int transfer_result;
Nick Pigginafddba42007-10-16 01:25:01 -0700230 struct page *page;
231 void *fsdata;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232
233 IV = ((sector_t)index << (PAGE_CACHE_SHIFT - 9))+(offset >> 9);
234 size = PAGE_CACHE_SIZE - offset;
235 if (size > len)
236 size = len;
Nick Pigginafddba42007-10-16 01:25:01 -0700237
238 ret = pagecache_write_begin(file, mapping, pos, size, 0,
239 &page, &fsdata);
240 if (ret)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 goto fail;
Nick Pigginafddba42007-10-16 01:25:01 -0700242
Nikanth Karthikesan02246c42010-04-08 21:39:31 +0200243 file_update_time(file);
244
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245 transfer_result = lo_do_transfer(lo, WRITE, page, offset,
246 bvec->bv_page, bv_offs, size, IV);
Nick Pigginafddba42007-10-16 01:25:01 -0700247 copied = size;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248 if (unlikely(transfer_result))
Nick Pigginafddba42007-10-16 01:25:01 -0700249 copied = 0;
250
251 ret = pagecache_write_end(file, mapping, pos, size, copied,
252 page, fsdata);
Dmitry Monakhov8268f5a2007-10-16 01:25:02 -0700253 if (ret < 0 || ret != copied)
Nick Pigginafddba42007-10-16 01:25:01 -0700254 goto fail;
Nick Pigginafddba42007-10-16 01:25:01 -0700255
256 if (unlikely(transfer_result))
257 goto fail;
258
259 bv_offs += copied;
260 len -= copied;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 offset = 0;
262 index++;
Nick Pigginafddba42007-10-16 01:25:01 -0700263 pos += copied;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 }
Zach Brown994fc28c2005-12-15 14:28:17 -0800265 ret = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266out:
Jes Sorensen1b1dcc12006-01-09 15:59:24 -0800267 mutex_unlock(&mapping->host->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269fail:
270 ret = -1;
271 goto out;
272}
273
274/**
275 * __do_lo_send_write - helper for writing data to a loop device
276 *
277 * This helper just factors out common code between do_lo_send_direct_write()
278 * and do_lo_send_write().
279 */
Arjan van de Ven858119e2006-01-14 13:20:43 -0800280static int __do_lo_send_write(struct file *file,
Al Viro98ae6ccd2006-10-10 22:45:07 +0100281 u8 *buf, const int len, loff_t pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700282{
283 ssize_t bw;
284 mm_segment_t old_fs = get_fs();
285
286 set_fs(get_ds());
287 bw = file->f_op->write(file, buf, len, &pos);
288 set_fs(old_fs);
289 if (likely(bw == len))
290 return 0;
291 printk(KERN_ERR "loop: Write error at byte offset %llu, length %i.\n",
292 (unsigned long long)pos, len);
293 if (bw >= 0)
294 bw = -EIO;
295 return bw;
296}
297
298/**
299 * do_lo_send_direct_write - helper for writing data to a loop device
300 *
301 * This is the fast, non-transforming version for backing filesystems which do
Nick Pigginafddba42007-10-16 01:25:01 -0700302 * not implement the address space operations write_begin and write_end.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 * It uses the write file operation which should be present on all writeable
304 * filesystems.
305 */
306static int do_lo_send_direct_write(struct loop_device *lo,
Al Viro511de732007-10-08 12:10:13 -0400307 struct bio_vec *bvec, loff_t pos, struct page *page)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308{
309 ssize_t bw = __do_lo_send_write(lo->lo_backing_file,
Al Viro98ae6ccd2006-10-10 22:45:07 +0100310 kmap(bvec->bv_page) + bvec->bv_offset,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311 bvec->bv_len, pos);
312 kunmap(bvec->bv_page);
313 cond_resched();
314 return bw;
315}
316
317/**
318 * do_lo_send_write - helper for writing data to a loop device
319 *
320 * This is the slow, transforming version for filesystems which do not
Nick Pigginafddba42007-10-16 01:25:01 -0700321 * implement the address space operations write_begin and write_end. It
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322 * uses the write file operation which should be present on all writeable
323 * filesystems.
324 *
325 * Using fops->write is slower than using aops->{prepare,commit}_write in the
326 * transforming case because we need to double buffer the data as we cannot do
327 * the transformations in place as we do not have direct access to the
328 * destination pages of the backing file.
329 */
330static int do_lo_send_write(struct loop_device *lo, struct bio_vec *bvec,
Al Viro511de732007-10-08 12:10:13 -0400331 loff_t pos, struct page *page)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332{
333 int ret = lo_do_transfer(lo, WRITE, page, 0, bvec->bv_page,
334 bvec->bv_offset, bvec->bv_len, pos >> 9);
335 if (likely(!ret))
336 return __do_lo_send_write(lo->lo_backing_file,
Al Viro98ae6ccd2006-10-10 22:45:07 +0100337 page_address(page), bvec->bv_len,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 pos);
339 printk(KERN_ERR "loop: Transfer error at byte offset %llu, "
340 "length %i.\n", (unsigned long long)pos, bvec->bv_len);
341 if (ret > 0)
342 ret = -EIO;
343 return ret;
344}
345
Al Viro511de732007-10-08 12:10:13 -0400346static int lo_send(struct loop_device *lo, struct bio *bio, loff_t pos)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347{
Al Viro511de732007-10-08 12:10:13 -0400348 int (*do_lo_send)(struct loop_device *, struct bio_vec *, loff_t,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 struct page *page);
350 struct bio_vec *bvec;
351 struct page *page = NULL;
352 int i, ret = 0;
353
354 do_lo_send = do_lo_send_aops;
355 if (!(lo->lo_flags & LO_FLAGS_USE_AOPS)) {
356 do_lo_send = do_lo_send_direct_write;
357 if (lo->transfer != transfer_none) {
358 page = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
359 if (unlikely(!page))
360 goto fail;
361 kmap(page);
362 do_lo_send = do_lo_send_write;
363 }
364 }
365 bio_for_each_segment(bvec, bio, i) {
Al Viro511de732007-10-08 12:10:13 -0400366 ret = do_lo_send(lo, bvec, pos, page);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 if (ret < 0)
368 break;
369 pos += bvec->bv_len;
370 }
371 if (page) {
372 kunmap(page);
373 __free_page(page);
374 }
375out:
376 return ret;
377fail:
378 printk(KERN_ERR "loop: Failed to allocate temporary page for write.\n");
379 ret = -ENOMEM;
380 goto out;
381}
382
383struct lo_read_data {
384 struct loop_device *lo;
385 struct page *page;
386 unsigned offset;
387 int bsize;
388};
389
390static int
Jens Axboefd582142007-06-12 21:20:37 +0200391lo_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
392 struct splice_desc *sd)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393{
Jens Axboefd582142007-06-12 21:20:37 +0200394 struct lo_read_data *p = sd->u.data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 struct loop_device *lo = p->lo;
Jens Axboefd582142007-06-12 21:20:37 +0200396 struct page *page = buf->page;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 sector_t IV;
Roel Kluina3941ec2009-03-05 08:03:53 +0100398 int size, ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700399
Jens Axboecac36bb02007-06-14 13:10:48 +0200400 ret = buf->ops->confirm(pipe, buf);
Jens Axboefd582142007-06-12 21:20:37 +0200401 if (unlikely(ret))
402 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403
Jens Axboefd582142007-06-12 21:20:37 +0200404 IV = ((sector_t) page->index << (PAGE_CACHE_SHIFT - 9)) +
405 (buf->offset >> 9);
406 size = sd->len;
407 if (size > p->bsize)
408 size = p->bsize;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409
Jens Axboefd582142007-06-12 21:20:37 +0200410 if (lo_do_transfer(lo, READ, page, buf->offset, p->page, p->offset, size, IV)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 printk(KERN_ERR "loop: transfer error block %ld\n",
412 page->index);
Jens Axboefd582142007-06-12 21:20:37 +0200413 size = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 }
415
416 flush_dcache_page(p->page);
417
Jens Axboefd582142007-06-12 21:20:37 +0200418 if (size > 0)
419 p->offset += size;
420
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421 return size;
422}
423
424static int
Jens Axboefd582142007-06-12 21:20:37 +0200425lo_direct_splice_actor(struct pipe_inode_info *pipe, struct splice_desc *sd)
426{
427 return __splice_from_pipe(pipe, sd, lo_splice_actor);
428}
429
430static int
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431do_lo_receive(struct loop_device *lo,
432 struct bio_vec *bvec, int bsize, loff_t pos)
433{
434 struct lo_read_data cookie;
Jens Axboefd582142007-06-12 21:20:37 +0200435 struct splice_desc sd;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 struct file *file;
Jens Axboefd582142007-06-12 21:20:37 +0200437 long retval;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438
439 cookie.lo = lo;
440 cookie.page = bvec->bv_page;
441 cookie.offset = bvec->bv_offset;
442 cookie.bsize = bsize;
Jens Axboefd582142007-06-12 21:20:37 +0200443
444 sd.len = 0;
445 sd.total_len = bvec->bv_len;
446 sd.flags = 0;
447 sd.pos = pos;
448 sd.u.data = &cookie;
449
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 file = lo->lo_backing_file;
Jens Axboefd582142007-06-12 21:20:37 +0200451 retval = splice_direct_to_actor(file, &sd, lo_direct_splice_actor);
452
453 if (retval < 0)
454 return retval;
455
456 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457}
458
459static int
460lo_receive(struct loop_device *lo, struct bio *bio, int bsize, loff_t pos)
461{
462 struct bio_vec *bvec;
463 int i, ret = 0;
464
465 bio_for_each_segment(bvec, bio, i) {
466 ret = do_lo_receive(lo, bvec, bsize, pos);
467 if (ret < 0)
468 break;
469 pos += bvec->bv_len;
470 }
471 return ret;
472}
473
474static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
475{
476 loff_t pos;
477 int ret;
478
479 pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
Nikanth Karthikesan68db1962009-03-24 12:29:54 +0100480
481 if (bio_rw(bio) == WRITE) {
Nikanth Karthikesan68db1962009-03-24 12:29:54 +0100482 struct file *file = lo->lo_backing_file;
483
Tejun Heo6259f282010-09-03 11:56:17 +0200484 /* REQ_HARDBARRIER is deprecated */
485 if (bio->bi_rw & REQ_HARDBARRIER) {
486 ret = -EOPNOTSUPP;
487 goto out;
488 }
Nikanth Karthikesan68db1962009-03-24 12:29:54 +0100489
Tejun Heo6259f282010-09-03 11:56:17 +0200490 if (bio->bi_rw & REQ_FLUSH) {
Christoph Hellwig8018ab02010-03-22 17:32:25 +0100491 ret = vfs_fsync(file, 0);
Tejun Heo6259f282010-09-03 11:56:17 +0200492 if (unlikely(ret && ret != -EINVAL)) {
Nikanth Karthikesan68db1962009-03-24 12:29:54 +0100493 ret = -EIO;
494 goto out;
495 }
496 }
497
Al Viro511de732007-10-08 12:10:13 -0400498 ret = lo_send(lo, bio, pos);
Nikanth Karthikesan68db1962009-03-24 12:29:54 +0100499
Tejun Heo6259f282010-09-03 11:56:17 +0200500 if ((bio->bi_rw & REQ_FUA) && !ret) {
Christoph Hellwig8018ab02010-03-22 17:32:25 +0100501 ret = vfs_fsync(file, 0);
Tejun Heo6259f282010-09-03 11:56:17 +0200502 if (unlikely(ret && ret != -EINVAL))
Nikanth Karthikesan68db1962009-03-24 12:29:54 +0100503 ret = -EIO;
504 }
505 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506 ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
Nikanth Karthikesan68db1962009-03-24 12:29:54 +0100507
508out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509 return ret;
510}
511
512/*
513 * Add bio to back of pending list
514 */
515static void loop_add_bio(struct loop_device *lo, struct bio *bio)
516{
Akinobu Mitae6863072009-04-17 08:41:21 +0200517 bio_list_add(&lo->lo_bio_list, bio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518}
519
520/*
521 * Grab first pending buffer
522 */
523static struct bio *loop_get_bio(struct loop_device *lo)
524{
Akinobu Mitae6863072009-04-17 08:41:21 +0200525 return bio_list_pop(&lo->lo_bio_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526}
527
Jens Axboe165125e2007-07-24 09:28:11 +0200528static int loop_make_request(struct request_queue *q, struct bio *old_bio)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529{
530 struct loop_device *lo = q->queuedata;
531 int rw = bio_rw(old_bio);
532
Nick Piggin35a82d12005-06-23 00:09:06 -0700533 if (rw == READA)
534 rw = READ;
535
536 BUG_ON(!lo || (rw != READ && rw != WRITE));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537
538 spin_lock_irq(&lo->lo_lock);
539 if (lo->lo_state != Lo_bound)
Nick Piggin35a82d12005-06-23 00:09:06 -0700540 goto out;
541 if (unlikely(rw == WRITE && (lo->lo_flags & LO_FLAGS_READ_ONLY)))
542 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543 loop_add_bio(lo, old_bio);
Serge E. Hallyn6c997912006-09-29 01:59:11 -0700544 wake_up(&lo->lo_event);
Nick Piggin35a82d12005-06-23 00:09:06 -0700545 spin_unlock_irq(&lo->lo_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546 return 0;
Nick Piggin35a82d12005-06-23 00:09:06 -0700547
Linus Torvalds1da177e2005-04-16 15:20:36 -0700548out:
Nick Piggin35a82d12005-06-23 00:09:06 -0700549 spin_unlock_irq(&lo->lo_lock);
NeilBrown6712ecf2007-09-27 12:47:43 +0200550 bio_io_error(old_bio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552}
553
554/*
555 * kick off io on the underlying address space
556 */
Jens Axboe165125e2007-07-24 09:28:11 +0200557static void loop_unplug(struct request_queue *q)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558{
559 struct loop_device *lo = q->queuedata;
560
Nick Piggin75ad23b2008-04-29 14:48:33 +0200561 queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 blk_run_address_space(lo->lo_backing_file->f_mapping);
563}
564
565struct switch_request {
566 struct file *file;
567 struct completion wait;
568};
569
570static void do_loop_switch(struct loop_device *, struct switch_request *);
571
572static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
573{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574 if (unlikely(!bio->bi_bdev)) {
575 do_loop_switch(lo, bio->bi_private);
576 bio_put(bio);
577 } else {
Nick Piggin35a82d12005-06-23 00:09:06 -0700578 int ret = do_bio_filebacked(lo, bio);
NeilBrown6712ecf2007-09-27 12:47:43 +0200579 bio_endio(bio, ret);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700580 }
581}
582
583/*
584 * worker thread that handles reads/writes to file backed loop devices,
585 * to avoid blocking in our make_request_fn. it also does loop decrypting
586 * on reads for block backed loop, as that is too heavy to do from
587 * b_end_io context where irqs may be disabled.
Serge E. Hallyn6c997912006-09-29 01:59:11 -0700588 *
589 * Loop explanation: loop_clr_fd() sets lo_state to Lo_rundown before
590 * calling kthread_stop(). Therefore once kthread_should_stop() is
591 * true, make_request will not place any more requests. Therefore
592 * once kthread_should_stop() is true and lo_bio is NULL, we are
593 * done with the loop.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594 */
595static int loop_thread(void *data)
596{
597 struct loop_device *lo = data;
598 struct bio *bio;
599
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600 set_user_nice(current, -20);
601
Akinobu Mitae6863072009-04-17 08:41:21 +0200602 while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603
Serge E. Hallyn6c997912006-09-29 01:59:11 -0700604 wait_event_interruptible(lo->lo_event,
Akinobu Mitae6863072009-04-17 08:41:21 +0200605 !bio_list_empty(&lo->lo_bio_list) ||
606 kthread_should_stop());
Linus Torvalds09c0dc62006-06-26 11:55:42 -0700607
Akinobu Mitae6863072009-04-17 08:41:21 +0200608 if (bio_list_empty(&lo->lo_bio_list))
Nick Piggin35a82d12005-06-23 00:09:06 -0700609 continue;
Nick Piggin35a82d12005-06-23 00:09:06 -0700610 spin_lock_irq(&lo->lo_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611 bio = loop_get_bio(lo);
Nick Piggin35a82d12005-06-23 00:09:06 -0700612 spin_unlock_irq(&lo->lo_lock);
613
614 BUG_ON(!bio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615 loop_handle_bio(lo, bio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616 }
617
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618 return 0;
619}
620
621/*
622 * loop_switch performs the hard work of switching a backing store.
623 * First it needs to flush existing IO, it does this by sending a magic
624 * BIO down the pipe. The completion of this BIO does the actual switch.
625 */
626static int loop_switch(struct loop_device *lo, struct file *file)
627{
628 struct switch_request w;
Jens Axboea24eab12008-01-11 10:14:40 +0100629 struct bio *bio = bio_alloc(GFP_KERNEL, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 if (!bio)
631 return -ENOMEM;
632 init_completion(&w.wait);
633 w.file = file;
634 bio->bi_private = &w;
635 bio->bi_bdev = NULL;
636 loop_make_request(lo->lo_queue, bio);
637 wait_for_completion(&w.wait);
638 return 0;
639}
640
641/*
Milan Broz14f27932008-12-12 14:48:27 +0100642 * Helper to flush the IOs in loop, but keeping loop thread running
643 */
644static int loop_flush(struct loop_device *lo)
645{
646 /* loop not yet configured, no running thread, nothing to flush */
647 if (!lo->lo_thread)
648 return 0;
649
650 return loop_switch(lo, NULL);
651}
652
653/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 * Do the actual switch; called from the BIO completion routine
655 */
656static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
657{
658 struct file *file = p->file;
659 struct file *old_file = lo->lo_backing_file;
Milan Broz14f27932008-12-12 14:48:27 +0100660 struct address_space *mapping;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661
Milan Broz14f27932008-12-12 14:48:27 +0100662 /* if no new file, only flush of queued bios requested */
663 if (!file)
664 goto out;
665
666 mapping = file->f_mapping;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
668 lo->lo_backing_file = file;
Theodore Ts'oba52de12006-09-27 01:50:49 -0700669 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
670 mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671 lo->old_gfp_mask = mapping_gfp_mask(mapping);
672 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
Milan Broz14f27932008-12-12 14:48:27 +0100673out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700674 complete(&p->wait);
675}
676
677
678/*
679 * loop_change_fd switched the backing store of a loopback device to
680 * a new file. This is useful for operating system installers to free up
681 * the original file and in High Availability environments to switch to
682 * an alternative location for the content in case of server meltdown.
683 * This can only work if the loop device is used read-only, and if the
684 * new backing store is the same size and type as the old backing store.
685 */
Al Virobb214882008-03-02 09:29:48 -0500686static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
687 unsigned int arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688{
689 struct file *file, *old_file;
690 struct inode *inode;
691 int error;
692
693 error = -ENXIO;
694 if (lo->lo_state != Lo_bound)
695 goto out;
696
697 /* the loop device has to be read-only */
698 error = -EINVAL;
699 if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
700 goto out;
701
702 error = -EBADF;
703 file = fget(arg);
704 if (!file)
705 goto out;
706
707 inode = file->f_mapping->host;
708 old_file = lo->lo_backing_file;
709
710 error = -EINVAL;
711
712 if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
713 goto out_putf;
714
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 /* size of the new backing store needs to be the same */
716 if (get_loop_size(lo, file) != get_loop_size(lo, old_file))
717 goto out_putf;
718
719 /* and ... switch */
720 error = loop_switch(lo, file);
721 if (error)
722 goto out_putf;
723
724 fput(old_file);
Laurent Vivier476a4812008-03-26 12:11:53 +0100725 if (max_part > 0)
726 ioctl_by_bdev(bdev, BLKRRPART, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727 return 0;
728
729 out_putf:
730 fput(file);
731 out:
732 return error;
733}
734
735static inline int is_loop_device(struct file *file)
736{
737 struct inode *i = file->f_mapping->host;
738
739 return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
740}
741
Milan Brozee862732010-08-23 15:16:00 +0200742/* loop sysfs attributes */
743
744static ssize_t loop_attr_show(struct device *dev, char *page,
745 ssize_t (*callback)(struct loop_device *, char *))
746{
747 struct loop_device *l, *lo = NULL;
748
749 mutex_lock(&loop_devices_mutex);
750 list_for_each_entry(l, &loop_devices, lo_list)
751 if (disk_to_dev(l->lo_disk) == dev) {
752 lo = l;
753 break;
754 }
755 mutex_unlock(&loop_devices_mutex);
756
757 return lo ? callback(lo, page) : -EIO;
758}
759
760#define LOOP_ATTR_RO(_name) \
761static ssize_t loop_attr_##_name##_show(struct loop_device *, char *); \
762static ssize_t loop_attr_do_show_##_name(struct device *d, \
763 struct device_attribute *attr, char *b) \
764{ \
765 return loop_attr_show(d, b, loop_attr_##_name##_show); \
766} \
767static struct device_attribute loop_attr_##_name = \
768 __ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL);
769
770static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
771{
772 ssize_t ret;
773 char *p = NULL;
774
775 mutex_lock(&lo->lo_ctl_mutex);
776 if (lo->lo_backing_file)
777 p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1);
778 mutex_unlock(&lo->lo_ctl_mutex);
779
780 if (IS_ERR_OR_NULL(p))
781 ret = PTR_ERR(p);
782 else {
783 ret = strlen(p);
784 memmove(buf, p, ret);
785 buf[ret++] = '\n';
786 buf[ret] = 0;
787 }
788
789 return ret;
790}
791
792static ssize_t loop_attr_offset_show(struct loop_device *lo, char *buf)
793{
794 return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_offset);
795}
796
797static ssize_t loop_attr_sizelimit_show(struct loop_device *lo, char *buf)
798{
799 return sprintf(buf, "%llu\n", (unsigned long long)lo->lo_sizelimit);
800}
801
802static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf)
803{
804 int autoclear = (lo->lo_flags & LO_FLAGS_AUTOCLEAR);
805
806 return sprintf(buf, "%s\n", autoclear ? "1" : "0");
807}
808
809LOOP_ATTR_RO(backing_file);
810LOOP_ATTR_RO(offset);
811LOOP_ATTR_RO(sizelimit);
812LOOP_ATTR_RO(autoclear);
813
814static struct attribute *loop_attrs[] = {
815 &loop_attr_backing_file.attr,
816 &loop_attr_offset.attr,
817 &loop_attr_sizelimit.attr,
818 &loop_attr_autoclear.attr,
819 NULL,
820};
821
822static struct attribute_group loop_attribute_group = {
823 .name = "loop",
824 .attrs= loop_attrs,
825};
826
827static int loop_sysfs_init(struct loop_device *lo)
828{
829 return sysfs_create_group(&disk_to_dev(lo->lo_disk)->kobj,
830 &loop_attribute_group);
831}
832
833static void loop_sysfs_exit(struct loop_device *lo)
834{
835 sysfs_remove_group(&disk_to_dev(lo->lo_disk)->kobj,
836 &loop_attribute_group);
837}
838
Al Virobb214882008-03-02 09:29:48 -0500839static int loop_set_fd(struct loop_device *lo, fmode_t mode,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840 struct block_device *bdev, unsigned int arg)
841{
842 struct file *file, *f;
843 struct inode *inode;
844 struct address_space *mapping;
845 unsigned lo_blocksize;
846 int lo_flags = 0;
847 int error;
848 loff_t size;
849
850 /* This is safe, since we have a reference from open(). */
851 __module_get(THIS_MODULE);
852
853 error = -EBADF;
854 file = fget(arg);
855 if (!file)
856 goto out;
857
858 error = -EBUSY;
859 if (lo->lo_state != Lo_unbound)
860 goto out_putf;
861
862 /* Avoid recursion */
863 f = file;
864 while (is_loop_device(f)) {
865 struct loop_device *l;
866
Al Virobb214882008-03-02 09:29:48 -0500867 if (f->f_mapping->host->i_bdev == bdev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868 goto out_putf;
869
870 l = f->f_mapping->host->i_bdev->bd_disk->private_data;
871 if (l->lo_state == Lo_unbound) {
872 error = -EINVAL;
873 goto out_putf;
874 }
875 f = l->lo_backing_file;
876 }
877
878 mapping = file->f_mapping;
879 inode = mapping->host;
880
881 if (!(file->f_mode & FMODE_WRITE))
882 lo_flags |= LO_FLAGS_READ_ONLY;
883
884 error = -EINVAL;
885 if (S_ISREG(inode->i_mode) || S_ISBLK(inode->i_mode)) {
Christoph Hellwigf5e54d62006-06-28 04:26:44 -0700886 const struct address_space_operations *aops = mapping->a_ops;
Miklos Szeredi68181732009-05-07 15:37:36 +0200887
Nick Piggin4e02ed42008-10-29 14:00:55 -0700888 if (aops->write_begin)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889 lo_flags |= LO_FLAGS_USE_AOPS;
890 if (!(lo_flags & LO_FLAGS_USE_AOPS) && !file->f_op->write)
891 lo_flags |= LO_FLAGS_READ_ONLY;
892
Theodore Ts'oba52de12006-09-27 01:50:49 -0700893 lo_blocksize = S_ISBLK(inode->i_mode) ?
894 inode->i_bdev->bd_block_size : PAGE_SIZE;
895
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896 error = 0;
897 } else {
898 goto out_putf;
899 }
900
901 size = get_loop_size(lo, file);
902
903 if ((loff_t)(sector_t)size != size) {
904 error = -EFBIG;
905 goto out_putf;
906 }
907
Al Virobb214882008-03-02 09:29:48 -0500908 if (!(mode & FMODE_WRITE))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700909 lo_flags |= LO_FLAGS_READ_ONLY;
910
911 set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
912
913 lo->lo_blocksize = lo_blocksize;
914 lo->lo_device = bdev;
915 lo->lo_flags = lo_flags;
916 lo->lo_backing_file = file;
Constantine Sapuntzakiseefe85e2006-06-23 02:06:08 -0700917 lo->transfer = transfer_none;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 lo->ioctl = NULL;
919 lo->lo_sizelimit = 0;
920 lo->old_gfp_mask = mapping_gfp_mask(mapping);
921 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
922
Akinobu Mitae6863072009-04-17 08:41:21 +0200923 bio_list_init(&lo->lo_bio_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924
925 /*
926 * set queue make_request_fn, and add limits based on lower level
927 * device
928 */
929 blk_queue_make_request(lo->lo_queue, loop_make_request);
930 lo->lo_queue->queuedata = lo;
931 lo->lo_queue->unplug_fn = loop_unplug;
932
Nikanth Karthikesan68db1962009-03-24 12:29:54 +0100933 if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
Tejun Heo4913efe2010-09-03 11:56:16 +0200934 blk_queue_flush(lo->lo_queue, REQ_FLUSH);
Nikanth Karthikesan68db1962009-03-24 12:29:54 +0100935
Ken Chen73285082007-05-08 00:28:20 -0700936 set_capacity(lo->lo_disk, size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937 bd_set_size(bdev, size << 9);
Milan Brozee862732010-08-23 15:16:00 +0200938 loop_sysfs_init(lo);
David Zeuthenc3473c62010-05-03 14:08:59 +0200939 /* let user-space know about the new size */
940 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941
942 set_blocksize(bdev, lo_blocksize);
943
Serge E. Hallyn6c997912006-09-29 01:59:11 -0700944 lo->lo_thread = kthread_create(loop_thread, lo, "loop%d",
945 lo->lo_number);
946 if (IS_ERR(lo->lo_thread)) {
947 error = PTR_ERR(lo->lo_thread);
Serge E. Hallyna7422bf2006-09-29 02:01:18 -0700948 goto out_clr;
Serge E. Hallyn6c997912006-09-29 01:59:11 -0700949 }
950 lo->lo_state = Lo_bound;
951 wake_up_process(lo->lo_thread);
Laurent Vivier476a4812008-03-26 12:11:53 +0100952 if (max_part > 0)
953 ioctl_by_bdev(bdev, BLKRRPART, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954 return 0;
955
Serge E. Hallyna7422bf2006-09-29 02:01:18 -0700956out_clr:
Milan Brozee862732010-08-23 15:16:00 +0200957 loop_sysfs_exit(lo);
Serge E. Hallyna7422bf2006-09-29 02:01:18 -0700958 lo->lo_thread = NULL;
959 lo->lo_device = NULL;
960 lo->lo_backing_file = NULL;
961 lo->lo_flags = 0;
Ken Chen73285082007-05-08 00:28:20 -0700962 set_capacity(lo->lo_disk, 0);
Peter Zijlstraf98393a2007-05-06 14:49:54 -0700963 invalidate_bdev(bdev);
Serge E. Hallyna7422bf2006-09-29 02:01:18 -0700964 bd_set_size(bdev, 0);
David Zeuthenc3473c62010-05-03 14:08:59 +0200965 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
Serge E. Hallyna7422bf2006-09-29 02:01:18 -0700966 mapping_set_gfp_mask(mapping, lo->old_gfp_mask);
967 lo->lo_state = Lo_unbound;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968 out_putf:
969 fput(file);
970 out:
971 /* This is safe: open() is still holding a reference. */
972 module_put(THIS_MODULE);
973 return error;
974}
975
976static int
977loop_release_xfer(struct loop_device *lo)
978{
979 int err = 0;
980 struct loop_func_table *xfer = lo->lo_encryption;
981
982 if (xfer) {
983 if (xfer->release)
984 err = xfer->release(lo);
985 lo->transfer = NULL;
986 lo->lo_encryption = NULL;
987 module_put(xfer->owner);
988 }
989 return err;
990}
991
992static int
993loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
994 const struct loop_info64 *i)
995{
996 int err = 0;
997
998 if (xfer) {
999 struct module *owner = xfer->owner;
1000
1001 if (!try_module_get(owner))
1002 return -EINVAL;
1003 if (xfer->init)
1004 err = xfer->init(lo, i);
1005 if (err)
1006 module_put(owner);
1007 else
1008 lo->lo_encryption = xfer;
1009 }
1010 return err;
1011}
1012
1013static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev)
1014{
1015 struct file *filp = lo->lo_backing_file;
Al Virob4e3ca12005-10-21 03:22:34 -04001016 gfp_t gfp = lo->old_gfp_mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017
1018 if (lo->lo_state != Lo_bound)
1019 return -ENXIO;
1020
1021 if (lo->lo_refcnt > 1) /* we needed one fd for the ioctl */
1022 return -EBUSY;
1023
1024 if (filp == NULL)
1025 return -EINVAL;
1026
1027 spin_lock_irq(&lo->lo_lock);
1028 lo->lo_state = Lo_rundown;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029 spin_unlock_irq(&lo->lo_lock);
1030
Serge E. Hallyn6c997912006-09-29 01:59:11 -07001031 kthread_stop(lo->lo_thread);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032
Milan Broz8ae30b82008-12-12 14:50:49 +01001033 lo->lo_queue->unplug_fn = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034 lo->lo_backing_file = NULL;
1035
1036 loop_release_xfer(lo);
1037 lo->transfer = NULL;
1038 lo->ioctl = NULL;
1039 lo->lo_device = NULL;
1040 lo->lo_encryption = NULL;
1041 lo->lo_offset = 0;
1042 lo->lo_sizelimit = 0;
1043 lo->lo_encrypt_key_size = 0;
1044 lo->lo_flags = 0;
Serge E. Hallyn6c997912006-09-29 01:59:11 -07001045 lo->lo_thread = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046 memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE);
1047 memset(lo->lo_crypt_name, 0, LO_NAME_SIZE);
1048 memset(lo->lo_file_name, 0, LO_NAME_SIZE);
Al Virobb214882008-03-02 09:29:48 -05001049 if (bdev)
1050 invalidate_bdev(bdev);
Ken Chen73285082007-05-08 00:28:20 -07001051 set_capacity(lo->lo_disk, 0);
Milan Broz51a0bb02010-10-27 19:51:30 -06001052 loop_sysfs_exit(lo);
David Zeuthenc3473c62010-05-03 14:08:59 +02001053 if (bdev) {
Al Virobb214882008-03-02 09:29:48 -05001054 bd_set_size(bdev, 0);
David Zeuthenc3473c62010-05-03 14:08:59 +02001055 /* let user-space know about this change */
1056 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
1057 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058 mapping_set_gfp_mask(filp->f_mapping, gfp);
1059 lo->lo_state = Lo_unbound;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001060 /* This is safe: open() is still holding a reference. */
1061 module_put(THIS_MODULE);
Alexey Dobriyancf6e6932009-10-26 16:49:55 -07001062 if (max_part > 0 && bdev)
Laurent Vivier476a4812008-03-26 12:11:53 +01001063 ioctl_by_bdev(bdev, BLKRRPART, 0);
Nikanth Karthikesanf028f3b2009-03-24 12:33:41 +01001064 mutex_unlock(&lo->lo_ctl_mutex);
1065 /*
1066 * Need not hold lo_ctl_mutex to fput backing file.
1067 * Calling fput holding lo_ctl_mutex triggers a circular
1068 * lock dependency possibility warning as fput can take
1069 * bd_mutex which is usually taken before lo_ctl_mutex.
1070 */
1071 fput(filp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001072 return 0;
1073}
1074
1075static int
1076loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
1077{
1078 int err;
1079 struct loop_func_table *xfer;
David Howellsb0fafa82008-11-14 10:38:41 +11001080 uid_t uid = current_uid();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081
David Howellsb0fafa82008-11-14 10:38:41 +11001082 if (lo->lo_encrypt_key_size &&
1083 lo->lo_key_owner != uid &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084 !capable(CAP_SYS_ADMIN))
1085 return -EPERM;
1086 if (lo->lo_state != Lo_bound)
1087 return -ENXIO;
1088 if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
1089 return -EINVAL;
1090
1091 err = loop_release_xfer(lo);
1092 if (err)
1093 return err;
1094
1095 if (info->lo_encrypt_type) {
1096 unsigned int type = info->lo_encrypt_type;
1097
1098 if (type >= MAX_LO_CRYPT)
1099 return -EINVAL;
1100 xfer = xfer_funcs[type];
1101 if (xfer == NULL)
1102 return -EINVAL;
1103 } else
1104 xfer = NULL;
1105
1106 err = loop_init_xfer(lo, xfer, info);
1107 if (err)
1108 return err;
1109
1110 if (lo->lo_offset != info->lo_offset ||
1111 lo->lo_sizelimit != info->lo_sizelimit) {
1112 lo->lo_offset = info->lo_offset;
1113 lo->lo_sizelimit = info->lo_sizelimit;
1114 if (figure_loop_size(lo))
1115 return -EFBIG;
1116 }
1117
1118 memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
1119 memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
1120 lo->lo_file_name[LO_NAME_SIZE-1] = 0;
1121 lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
1122
1123 if (!xfer)
1124 xfer = &none_funcs;
1125 lo->transfer = xfer->transfer;
1126 lo->ioctl = xfer->ioctl;
1127
David Woodhouse96c58652008-02-06 01:36:27 -08001128 if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) !=
1129 (info->lo_flags & LO_FLAGS_AUTOCLEAR))
1130 lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
1131
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132 lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
1133 lo->lo_init[0] = info->lo_init[0];
1134 lo->lo_init[1] = info->lo_init[1];
1135 if (info->lo_encrypt_key_size) {
1136 memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
1137 info->lo_encrypt_key_size);
David Howellsb0fafa82008-11-14 10:38:41 +11001138 lo->lo_key_owner = uid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 }
1140
1141 return 0;
1142}
1143
1144static int
1145loop_get_status(struct loop_device *lo, struct loop_info64 *info)
1146{
1147 struct file *file = lo->lo_backing_file;
1148 struct kstat stat;
1149 int error;
1150
1151 if (lo->lo_state != Lo_bound)
1152 return -ENXIO;
Josef Sipek6c648be2006-12-08 02:36:55 -08001153 error = vfs_getattr(file->f_path.mnt, file->f_path.dentry, &stat);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154 if (error)
1155 return error;
1156 memset(info, 0, sizeof(*info));
1157 info->lo_number = lo->lo_number;
1158 info->lo_device = huge_encode_dev(stat.dev);
1159 info->lo_inode = stat.ino;
1160 info->lo_rdevice = huge_encode_dev(lo->lo_device ? stat.rdev : stat.dev);
1161 info->lo_offset = lo->lo_offset;
1162 info->lo_sizelimit = lo->lo_sizelimit;
1163 info->lo_flags = lo->lo_flags;
1164 memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE);
1165 memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
1166 info->lo_encrypt_type =
1167 lo->lo_encryption ? lo->lo_encryption->number : 0;
1168 if (lo->lo_encrypt_key_size && capable(CAP_SYS_ADMIN)) {
1169 info->lo_encrypt_key_size = lo->lo_encrypt_key_size;
1170 memcpy(info->lo_encrypt_key, lo->lo_encrypt_key,
1171 lo->lo_encrypt_key_size);
1172 }
1173 return 0;
1174}
1175
1176static void
1177loop_info64_from_old(const struct loop_info *info, struct loop_info64 *info64)
1178{
1179 memset(info64, 0, sizeof(*info64));
1180 info64->lo_number = info->lo_number;
1181 info64->lo_device = info->lo_device;
1182 info64->lo_inode = info->lo_inode;
1183 info64->lo_rdevice = info->lo_rdevice;
1184 info64->lo_offset = info->lo_offset;
1185 info64->lo_sizelimit = 0;
1186 info64->lo_encrypt_type = info->lo_encrypt_type;
1187 info64->lo_encrypt_key_size = info->lo_encrypt_key_size;
1188 info64->lo_flags = info->lo_flags;
1189 info64->lo_init[0] = info->lo_init[0];
1190 info64->lo_init[1] = info->lo_init[1];
1191 if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1192 memcpy(info64->lo_crypt_name, info->lo_name, LO_NAME_SIZE);
1193 else
1194 memcpy(info64->lo_file_name, info->lo_name, LO_NAME_SIZE);
1195 memcpy(info64->lo_encrypt_key, info->lo_encrypt_key, LO_KEY_SIZE);
1196}
1197
1198static int
1199loop_info64_to_old(const struct loop_info64 *info64, struct loop_info *info)
1200{
1201 memset(info, 0, sizeof(*info));
1202 info->lo_number = info64->lo_number;
1203 info->lo_device = info64->lo_device;
1204 info->lo_inode = info64->lo_inode;
1205 info->lo_rdevice = info64->lo_rdevice;
1206 info->lo_offset = info64->lo_offset;
1207 info->lo_encrypt_type = info64->lo_encrypt_type;
1208 info->lo_encrypt_key_size = info64->lo_encrypt_key_size;
1209 info->lo_flags = info64->lo_flags;
1210 info->lo_init[0] = info64->lo_init[0];
1211 info->lo_init[1] = info64->lo_init[1];
1212 if (info->lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1213 memcpy(info->lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1214 else
1215 memcpy(info->lo_name, info64->lo_file_name, LO_NAME_SIZE);
1216 memcpy(info->lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1217
1218 /* error in case values were truncated */
1219 if (info->lo_device != info64->lo_device ||
1220 info->lo_rdevice != info64->lo_rdevice ||
1221 info->lo_inode != info64->lo_inode ||
1222 info->lo_offset != info64->lo_offset)
1223 return -EOVERFLOW;
1224
1225 return 0;
1226}
1227
1228static int
1229loop_set_status_old(struct loop_device *lo, const struct loop_info __user *arg)
1230{
1231 struct loop_info info;
1232 struct loop_info64 info64;
1233
1234 if (copy_from_user(&info, arg, sizeof (struct loop_info)))
1235 return -EFAULT;
1236 loop_info64_from_old(&info, &info64);
1237 return loop_set_status(lo, &info64);
1238}
1239
1240static int
1241loop_set_status64(struct loop_device *lo, const struct loop_info64 __user *arg)
1242{
1243 struct loop_info64 info64;
1244
1245 if (copy_from_user(&info64, arg, sizeof (struct loop_info64)))
1246 return -EFAULT;
1247 return loop_set_status(lo, &info64);
1248}
1249
1250static int
1251loop_get_status_old(struct loop_device *lo, struct loop_info __user *arg) {
1252 struct loop_info info;
1253 struct loop_info64 info64;
1254 int err = 0;
1255
1256 if (!arg)
1257 err = -EINVAL;
1258 if (!err)
1259 err = loop_get_status(lo, &info64);
1260 if (!err)
1261 err = loop_info64_to_old(&info64, &info);
1262 if (!err && copy_to_user(arg, &info, sizeof(info)))
1263 err = -EFAULT;
1264
1265 return err;
1266}
1267
1268static int
1269loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
1270 struct loop_info64 info64;
1271 int err = 0;
1272
1273 if (!arg)
1274 err = -EINVAL;
1275 if (!err)
1276 err = loop_get_status(lo, &info64);
1277 if (!err && copy_to_user(arg, &info64, sizeof(info64)))
1278 err = -EFAULT;
1279
1280 return err;
1281}
1282
J. R. Okajima53d66602009-03-31 15:23:43 -07001283static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev)
1284{
1285 int err;
1286 sector_t sec;
1287 loff_t sz;
1288
1289 err = -ENXIO;
1290 if (unlikely(lo->lo_state != Lo_bound))
1291 goto out;
1292 err = figure_loop_size(lo);
1293 if (unlikely(err))
1294 goto out;
1295 sec = get_capacity(lo->lo_disk);
1296 /* the width of sector_t may be narrow for bit-shift */
1297 sz = sec;
1298 sz <<= 9;
1299 mutex_lock(&bdev->bd_mutex);
1300 bd_set_size(bdev, sz);
David Zeuthenc3473c62010-05-03 14:08:59 +02001301 /* let user-space know about the new size */
1302 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
J. R. Okajima53d66602009-03-31 15:23:43 -07001303 mutex_unlock(&bdev->bd_mutex);
1304
1305 out:
1306 return err;
1307}
1308
Al Virobb214882008-03-02 09:29:48 -05001309static int lo_ioctl(struct block_device *bdev, fmode_t mode,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 unsigned int cmd, unsigned long arg)
1311{
Al Virobb214882008-03-02 09:29:48 -05001312 struct loop_device *lo = bdev->bd_disk->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 int err;
1314
Nikanth Karthikesanf028f3b2009-03-24 12:33:41 +01001315 mutex_lock_nested(&lo->lo_ctl_mutex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316 switch (cmd) {
1317 case LOOP_SET_FD:
Al Virobb214882008-03-02 09:29:48 -05001318 err = loop_set_fd(lo, mode, bdev, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319 break;
1320 case LOOP_CHANGE_FD:
Al Virobb214882008-03-02 09:29:48 -05001321 err = loop_change_fd(lo, bdev, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322 break;
1323 case LOOP_CLR_FD:
Nikanth Karthikesanf028f3b2009-03-24 12:33:41 +01001324 /* loop_clr_fd would have unlocked lo_ctl_mutex on success */
Al Virobb214882008-03-02 09:29:48 -05001325 err = loop_clr_fd(lo, bdev);
Nikanth Karthikesanf028f3b2009-03-24 12:33:41 +01001326 if (!err)
1327 goto out_unlocked;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328 break;
1329 case LOOP_SET_STATUS:
1330 err = loop_set_status_old(lo, (struct loop_info __user *) arg);
1331 break;
1332 case LOOP_GET_STATUS:
1333 err = loop_get_status_old(lo, (struct loop_info __user *) arg);
1334 break;
1335 case LOOP_SET_STATUS64:
1336 err = loop_set_status64(lo, (struct loop_info64 __user *) arg);
1337 break;
1338 case LOOP_GET_STATUS64:
1339 err = loop_get_status64(lo, (struct loop_info64 __user *) arg);
1340 break;
J. R. Okajima53d66602009-03-31 15:23:43 -07001341 case LOOP_SET_CAPACITY:
1342 err = -EPERM;
1343 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN))
1344 err = loop_set_capacity(lo, bdev);
1345 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346 default:
1347 err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
1348 }
Ingo Molnarf85221d2006-03-23 03:00:38 -08001349 mutex_unlock(&lo->lo_ctl_mutex);
Nikanth Karthikesanf028f3b2009-03-24 12:33:41 +01001350
1351out_unlocked:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352 return err;
1353}
1354
David Howells863d5b822006-08-29 19:06:14 +01001355#ifdef CONFIG_COMPAT
1356struct compat_loop_info {
1357 compat_int_t lo_number; /* ioctl r/o */
1358 compat_dev_t lo_device; /* ioctl r/o */
1359 compat_ulong_t lo_inode; /* ioctl r/o */
1360 compat_dev_t lo_rdevice; /* ioctl r/o */
1361 compat_int_t lo_offset;
1362 compat_int_t lo_encrypt_type;
1363 compat_int_t lo_encrypt_key_size; /* ioctl w/o */
1364 compat_int_t lo_flags; /* ioctl r/o */
1365 char lo_name[LO_NAME_SIZE];
1366 unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */
1367 compat_ulong_t lo_init[2];
1368 char reserved[4];
1369};
1370
1371/*
1372 * Transfer 32-bit compatibility structure in userspace to 64-bit loop info
1373 * - noinlined to reduce stack space usage in main part of driver
1374 */
1375static noinline int
Al Viroba674cf2006-10-10 22:48:27 +01001376loop_info64_from_compat(const struct compat_loop_info __user *arg,
David Howells863d5b822006-08-29 19:06:14 +01001377 struct loop_info64 *info64)
1378{
1379 struct compat_loop_info info;
1380
1381 if (copy_from_user(&info, arg, sizeof(info)))
1382 return -EFAULT;
1383
1384 memset(info64, 0, sizeof(*info64));
1385 info64->lo_number = info.lo_number;
1386 info64->lo_device = info.lo_device;
1387 info64->lo_inode = info.lo_inode;
1388 info64->lo_rdevice = info.lo_rdevice;
1389 info64->lo_offset = info.lo_offset;
1390 info64->lo_sizelimit = 0;
1391 info64->lo_encrypt_type = info.lo_encrypt_type;
1392 info64->lo_encrypt_key_size = info.lo_encrypt_key_size;
1393 info64->lo_flags = info.lo_flags;
1394 info64->lo_init[0] = info.lo_init[0];
1395 info64->lo_init[1] = info.lo_init[1];
1396 if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1397 memcpy(info64->lo_crypt_name, info.lo_name, LO_NAME_SIZE);
1398 else
1399 memcpy(info64->lo_file_name, info.lo_name, LO_NAME_SIZE);
1400 memcpy(info64->lo_encrypt_key, info.lo_encrypt_key, LO_KEY_SIZE);
1401 return 0;
1402}
1403
1404/*
1405 * Transfer 64-bit loop info to 32-bit compatibility structure in userspace
1406 * - noinlined to reduce stack space usage in main part of driver
1407 */
1408static noinline int
1409loop_info64_to_compat(const struct loop_info64 *info64,
1410 struct compat_loop_info __user *arg)
1411{
1412 struct compat_loop_info info;
1413
1414 memset(&info, 0, sizeof(info));
1415 info.lo_number = info64->lo_number;
1416 info.lo_device = info64->lo_device;
1417 info.lo_inode = info64->lo_inode;
1418 info.lo_rdevice = info64->lo_rdevice;
1419 info.lo_offset = info64->lo_offset;
1420 info.lo_encrypt_type = info64->lo_encrypt_type;
1421 info.lo_encrypt_key_size = info64->lo_encrypt_key_size;
1422 info.lo_flags = info64->lo_flags;
1423 info.lo_init[0] = info64->lo_init[0];
1424 info.lo_init[1] = info64->lo_init[1];
1425 if (info.lo_encrypt_type == LO_CRYPT_CRYPTOAPI)
1426 memcpy(info.lo_name, info64->lo_crypt_name, LO_NAME_SIZE);
1427 else
1428 memcpy(info.lo_name, info64->lo_file_name, LO_NAME_SIZE);
1429 memcpy(info.lo_encrypt_key, info64->lo_encrypt_key, LO_KEY_SIZE);
1430
1431 /* error in case values were truncated */
1432 if (info.lo_device != info64->lo_device ||
1433 info.lo_rdevice != info64->lo_rdevice ||
1434 info.lo_inode != info64->lo_inode ||
1435 info.lo_offset != info64->lo_offset ||
1436 info.lo_init[0] != info64->lo_init[0] ||
1437 info.lo_init[1] != info64->lo_init[1])
1438 return -EOVERFLOW;
1439
1440 if (copy_to_user(arg, &info, sizeof(info)))
1441 return -EFAULT;
1442 return 0;
1443}
1444
1445static int
1446loop_set_status_compat(struct loop_device *lo,
1447 const struct compat_loop_info __user *arg)
1448{
1449 struct loop_info64 info64;
1450 int ret;
1451
1452 ret = loop_info64_from_compat(arg, &info64);
1453 if (ret < 0)
1454 return ret;
1455 return loop_set_status(lo, &info64);
1456}
1457
1458static int
1459loop_get_status_compat(struct loop_device *lo,
1460 struct compat_loop_info __user *arg)
1461{
1462 struct loop_info64 info64;
1463 int err = 0;
1464
1465 if (!arg)
1466 err = -EINVAL;
1467 if (!err)
1468 err = loop_get_status(lo, &info64);
1469 if (!err)
1470 err = loop_info64_to_compat(&info64, arg);
1471 return err;
1472}
1473
Al Virobb214882008-03-02 09:29:48 -05001474static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
1475 unsigned int cmd, unsigned long arg)
David Howells863d5b822006-08-29 19:06:14 +01001476{
Al Virobb214882008-03-02 09:29:48 -05001477 struct loop_device *lo = bdev->bd_disk->private_data;
David Howells863d5b822006-08-29 19:06:14 +01001478 int err;
1479
David Howells863d5b822006-08-29 19:06:14 +01001480 switch(cmd) {
1481 case LOOP_SET_STATUS:
1482 mutex_lock(&lo->lo_ctl_mutex);
1483 err = loop_set_status_compat(
1484 lo, (const struct compat_loop_info __user *) arg);
1485 mutex_unlock(&lo->lo_ctl_mutex);
1486 break;
1487 case LOOP_GET_STATUS:
1488 mutex_lock(&lo->lo_ctl_mutex);
1489 err = loop_get_status_compat(
1490 lo, (struct compat_loop_info __user *) arg);
1491 mutex_unlock(&lo->lo_ctl_mutex);
1492 break;
J. R. Okajima53d66602009-03-31 15:23:43 -07001493 case LOOP_SET_CAPACITY:
David Howells863d5b822006-08-29 19:06:14 +01001494 case LOOP_CLR_FD:
1495 case LOOP_GET_STATUS64:
1496 case LOOP_SET_STATUS64:
1497 arg = (unsigned long) compat_ptr(arg);
1498 case LOOP_SET_FD:
1499 case LOOP_CHANGE_FD:
Al Virobb214882008-03-02 09:29:48 -05001500 err = lo_ioctl(bdev, mode, cmd, arg);
David Howells863d5b822006-08-29 19:06:14 +01001501 break;
1502 default:
1503 err = -ENOIOCTLCMD;
1504 break;
1505 }
David Howells863d5b822006-08-29 19:06:14 +01001506 return err;
1507}
1508#endif
1509
Al Virobb214882008-03-02 09:29:48 -05001510static int lo_open(struct block_device *bdev, fmode_t mode)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511{
Al Virobb214882008-03-02 09:29:48 -05001512 struct loop_device *lo = bdev->bd_disk->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001514 mutex_lock(&loop_mutex);
Ingo Molnarf85221d2006-03-23 03:00:38 -08001515 mutex_lock(&lo->lo_ctl_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516 lo->lo_refcnt++;
Ingo Molnarf85221d2006-03-23 03:00:38 -08001517 mutex_unlock(&lo->lo_ctl_mutex);
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001518 mutex_unlock(&loop_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519
1520 return 0;
1521}
1522
Al Virobb214882008-03-02 09:29:48 -05001523static int lo_release(struct gendisk *disk, fmode_t mode)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001524{
Al Virobb214882008-03-02 09:29:48 -05001525 struct loop_device *lo = disk->private_data;
Alexander Beregalovffcd7dc2009-04-07 13:48:21 +02001526 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001527
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001528 mutex_lock(&loop_mutex);
Ingo Molnarf85221d2006-03-23 03:00:38 -08001529 mutex_lock(&lo->lo_ctl_mutex);
David Woodhouse96c58652008-02-06 01:36:27 -08001530
Milan Broz14f27932008-12-12 14:48:27 +01001531 if (--lo->lo_refcnt)
1532 goto out;
1533
1534 if (lo->lo_flags & LO_FLAGS_AUTOCLEAR) {
1535 /*
1536 * In autoclear mode, stop the loop thread
1537 * and remove configuration after last close.
1538 */
Alexander Beregalovffcd7dc2009-04-07 13:48:21 +02001539 err = loop_clr_fd(lo, NULL);
1540 if (!err)
1541 goto out_unlocked;
Milan Broz14f27932008-12-12 14:48:27 +01001542 } else {
1543 /*
1544 * Otherwise keep thread (if running) and config,
1545 * but flush possible ongoing bios in thread.
1546 */
1547 loop_flush(lo);
1548 }
David Woodhouse96c58652008-02-06 01:36:27 -08001549
Milan Broz14f27932008-12-12 14:48:27 +01001550out:
Ingo Molnarf85221d2006-03-23 03:00:38 -08001551 mutex_unlock(&lo->lo_ctl_mutex);
Alexander Beregalovffcd7dc2009-04-07 13:48:21 +02001552out_unlocked:
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02001553 mutex_unlock(&loop_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001554 return 0;
1555}
1556
Alexey Dobriyan83d5cde2009-09-21 17:01:13 -07001557static const struct block_device_operations lo_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 .owner = THIS_MODULE,
Al Virobb214882008-03-02 09:29:48 -05001559 .open = lo_open,
1560 .release = lo_release,
1561 .ioctl = lo_ioctl,
David Howells863d5b822006-08-29 19:06:14 +01001562#ifdef CONFIG_COMPAT
Al Virobb214882008-03-02 09:29:48 -05001563 .compat_ioctl = lo_compat_ioctl,
David Howells863d5b822006-08-29 19:06:14 +01001564#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001565};
1566
1567/*
1568 * And now the modules code and kernel interface.
1569 */
Ken Chen73285082007-05-08 00:28:20 -07001570static int max_loop;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571module_param(max_loop, int, 0);
Ken Chena47653f2007-06-08 13:46:44 -07001572MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
Laurent Vivier476a4812008-03-26 12:11:53 +01001573module_param(max_part, int, 0);
1574MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575MODULE_LICENSE("GPL");
1576MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
1577
1578int loop_register_transfer(struct loop_func_table *funcs)
1579{
1580 unsigned int n = funcs->number;
1581
1582 if (n >= MAX_LO_CRYPT || xfer_funcs[n])
1583 return -EINVAL;
1584 xfer_funcs[n] = funcs;
1585 return 0;
1586}
1587
1588int loop_unregister_transfer(int number)
1589{
1590 unsigned int n = number;
1591 struct loop_device *lo;
1592 struct loop_func_table *xfer;
1593
1594 if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL)
1595 return -EINVAL;
1596
1597 xfer_funcs[n] = NULL;
1598
Ken Chen73285082007-05-08 00:28:20 -07001599 list_for_each_entry(lo, &loop_devices, lo_list) {
Ingo Molnarf85221d2006-03-23 03:00:38 -08001600 mutex_lock(&lo->lo_ctl_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601
1602 if (lo->lo_encryption == xfer)
1603 loop_release_xfer(lo);
1604
Ingo Molnarf85221d2006-03-23 03:00:38 -08001605 mutex_unlock(&lo->lo_ctl_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 }
1607
1608 return 0;
1609}
1610
1611EXPORT_SYMBOL(loop_register_transfer);
1612EXPORT_SYMBOL(loop_unregister_transfer);
1613
Ken Chena47653f2007-06-08 13:46:44 -07001614static struct loop_device *loop_alloc(int i)
Ken Chen73285082007-05-08 00:28:20 -07001615{
1616 struct loop_device *lo;
1617 struct gendisk *disk;
1618
1619 lo = kzalloc(sizeof(*lo), GFP_KERNEL);
1620 if (!lo)
1621 goto out;
1622
1623 lo->lo_queue = blk_alloc_queue(GFP_KERNEL);
1624 if (!lo->lo_queue)
1625 goto out_free_dev;
1626
Laurent Vivier476a4812008-03-26 12:11:53 +01001627 disk = lo->lo_disk = alloc_disk(1 << part_shift);
Ken Chen73285082007-05-08 00:28:20 -07001628 if (!disk)
1629 goto out_free_queue;
1630
1631 mutex_init(&lo->lo_ctl_mutex);
1632 lo->lo_number = i;
1633 lo->lo_thread = NULL;
1634 init_waitqueue_head(&lo->lo_event);
1635 spin_lock_init(&lo->lo_lock);
1636 disk->major = LOOP_MAJOR;
Laurent Vivier476a4812008-03-26 12:11:53 +01001637 disk->first_minor = i << part_shift;
Ken Chen73285082007-05-08 00:28:20 -07001638 disk->fops = &lo_fops;
1639 disk->private_data = lo;
1640 disk->queue = lo->lo_queue;
1641 sprintf(disk->disk_name, "loop%d", i);
Ken Chen73285082007-05-08 00:28:20 -07001642 return lo;
1643
1644out_free_queue:
1645 blk_cleanup_queue(lo->lo_queue);
1646out_free_dev:
1647 kfree(lo);
1648out:
Al Viro07002e92007-05-12 16:23:15 -04001649 return NULL;
Ken Chen73285082007-05-08 00:28:20 -07001650}
1651
Ken Chena47653f2007-06-08 13:46:44 -07001652static void loop_free(struct loop_device *lo)
Ken Chen73285082007-05-08 00:28:20 -07001653{
Ken Chen73285082007-05-08 00:28:20 -07001654 blk_cleanup_queue(lo->lo_queue);
1655 put_disk(lo->lo_disk);
1656 list_del(&lo->lo_list);
1657 kfree(lo);
1658}
1659
Ken Chena47653f2007-06-08 13:46:44 -07001660static struct loop_device *loop_init_one(int i)
1661{
1662 struct loop_device *lo;
1663
1664 list_for_each_entry(lo, &loop_devices, lo_list) {
1665 if (lo->lo_number == i)
1666 return lo;
1667 }
1668
1669 lo = loop_alloc(i);
1670 if (lo) {
1671 add_disk(lo->lo_disk);
1672 list_add_tail(&lo->lo_list, &loop_devices);
1673 }
1674 return lo;
1675}
1676
1677static void loop_del_one(struct loop_device *lo)
1678{
1679 del_gendisk(lo->lo_disk);
1680 loop_free(lo);
1681}
1682
Ken Chen73285082007-05-08 00:28:20 -07001683static struct kobject *loop_probe(dev_t dev, int *part, void *data)
1684{
Al Viro705962c2007-05-13 05:52:32 -04001685 struct loop_device *lo;
Al Viro07002e92007-05-12 16:23:15 -04001686 struct kobject *kobj;
Ken Chen73285082007-05-08 00:28:20 -07001687
Al Viro705962c2007-05-13 05:52:32 -04001688 mutex_lock(&loop_devices_mutex);
1689 lo = loop_init_one(dev & MINORMASK);
Al Viro07002e92007-05-12 16:23:15 -04001690 kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM);
Ken Chen73285082007-05-08 00:28:20 -07001691 mutex_unlock(&loop_devices_mutex);
1692
1693 *part = 0;
Al Viro07002e92007-05-12 16:23:15 -04001694 return kobj;
Ken Chen73285082007-05-08 00:28:20 -07001695}
1696
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697static int __init loop_init(void)
1698{
Ken Chena47653f2007-06-08 13:46:44 -07001699 int i, nr;
1700 unsigned long range;
1701 struct loop_device *lo, *next;
1702
1703 /*
1704 * loop module now has a feature to instantiate underlying device
1705 * structure on-demand, provided that there is an access dev node.
1706 * However, this will not work well with user space tool that doesn't
1707 * know about such "feature". In order to not break any existing
1708 * tool, we do the following:
1709 *
1710 * (1) if max_loop is specified, create that many upfront, and this
1711 * also becomes a hard limit.
1712 * (2) if max_loop is not specified, create 8 loop device on module
1713 * load, user can further extend loop device by create dev node
1714 * themselves and have kernel automatically instantiate actual
1715 * device on-demand.
1716 */
Laurent Vivier476a4812008-03-26 12:11:53 +01001717
1718 part_shift = 0;
1719 if (max_part > 0)
1720 part_shift = fls(max_part);
1721
1722 if (max_loop > 1UL << (MINORBITS - part_shift))
Ken Chena47653f2007-06-08 13:46:44 -07001723 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724
Ken Chen73285082007-05-08 00:28:20 -07001725 if (max_loop) {
Ken Chena47653f2007-06-08 13:46:44 -07001726 nr = max_loop;
1727 range = max_loop;
1728 } else {
1729 nr = 8;
Laurent Vivier476a4812008-03-26 12:11:53 +01001730 range = 1UL << (MINORBITS - part_shift);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731 }
Ken Chena47653f2007-06-08 13:46:44 -07001732
1733 if (register_blkdev(LOOP_MAJOR, "loop"))
1734 return -EIO;
1735
1736 for (i = 0; i < nr; i++) {
1737 lo = loop_alloc(i);
1738 if (!lo)
1739 goto Enomem;
1740 list_add_tail(&lo->lo_list, &loop_devices);
1741 }
1742
1743 /* point of no return */
1744
1745 list_for_each_entry(lo, &loop_devices, lo_list)
1746 add_disk(lo->lo_disk);
1747
1748 blk_register_region(MKDEV(LOOP_MAJOR, 0), range,
1749 THIS_MODULE, loop_probe, NULL, NULL);
1750
Ken Chen73285082007-05-08 00:28:20 -07001751 printk(KERN_INFO "loop: module loaded\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001752 return 0;
Ken Chena47653f2007-06-08 13:46:44 -07001753
1754Enomem:
1755 printk(KERN_INFO "loop: out of memory\n");
1756
1757 list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
1758 loop_free(lo);
1759
1760 unregister_blkdev(LOOP_MAJOR, "loop");
1761 return -ENOMEM;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762}
1763
Ken Chen73285082007-05-08 00:28:20 -07001764static void __exit loop_exit(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001765{
Ken Chena47653f2007-06-08 13:46:44 -07001766 unsigned long range;
Ken Chen73285082007-05-08 00:28:20 -07001767 struct loop_device *lo, *next;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768
Laurent Vivier476a4812008-03-26 12:11:53 +01001769 range = max_loop ? max_loop : 1UL << (MINORBITS - part_shift);
Ken Chena47653f2007-06-08 13:46:44 -07001770
Ken Chen73285082007-05-08 00:28:20 -07001771 list_for_each_entry_safe(lo, next, &loop_devices, lo_list)
1772 loop_del_one(lo);
1773
Ken Chena47653f2007-06-08 13:46:44 -07001774 blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range);
Akinobu Mita00d59402007-07-17 04:03:46 -07001775 unregister_blkdev(LOOP_MAJOR, "loop");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776}
1777
1778module_init(loop_init);
1779module_exit(loop_exit);
1780
1781#ifndef MODULE
1782static int __init max_loop_setup(char *str)
1783{
1784 max_loop = simple_strtol(str, NULL, 0);
1785 return 1;
1786}
1787
1788__setup("max_loop=", max_loop_setup);
1789#endif