blob: d9644fd9cc0dfe11902a3539b0c78d7c60875611 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/fs/pipe.c
3 *
4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds
5 */
6
7#include <linux/mm.h>
8#include <linux/file.h>
9#include <linux/poll.h>
10#include <linux/slab.h>
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/fs.h>
14#include <linux/mount.h>
15#include <linux/pipe_fs_i.h>
16#include <linux/uio.h>
17#include <linux/highmem.h>
Jens Axboe5274f052006-03-30 15:15:30 +020018#include <linux/pagemap.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070019
20#include <asm/uaccess.h>
21#include <asm/ioctls.h>
22
23/*
24 * We use a start+len construction, which provides full use of the
25 * allocated memory.
26 * -- Florian Coosmann (FGC)
27 *
28 * Reads with count = 0 should always return 0.
29 * -- Julian Bradfield 1999-06-07.
30 *
31 * FIFOs and Pipes now generate SIGIO for both readers and writers.
32 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
33 *
34 * pipe_read & write cleanup
35 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
36 */
37
38/* Drop the inode semaphore and wait for a pipe event, atomically */
Ingo Molnar3a326a22006-04-10 15:18:35 +020039void pipe_wait(struct pipe_inode_info *pipe)
Linus Torvalds1da177e2005-04-16 15:20:36 -070040{
41 DEFINE_WAIT(wait);
42
Ingo Molnard79fc0f2005-09-10 00:26:12 -070043 /*
44 * Pipes are system-local resources, so sleeping on them
45 * is considered a noninteractive wait:
46 */
Ingo Molnar341b4462006-04-11 13:57:45 +020047 prepare_to_wait(&pipe->wait, &wait,
48 TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE);
Ingo Molnar3a326a22006-04-10 15:18:35 +020049 if (pipe->inode)
50 mutex_unlock(&pipe->inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070051 schedule();
Ingo Molnar3a326a22006-04-10 15:18:35 +020052 finish_wait(&pipe->wait, &wait);
53 if (pipe->inode)
54 mutex_lock(&pipe->inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -070055}
56
Arjan van de Ven858119e2006-01-14 13:20:43 -080057static int
Linus Torvalds1da177e2005-04-16 15:20:36 -070058pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len)
59{
60 unsigned long copy;
61
62 while (len > 0) {
63 while (!iov->iov_len)
64 iov++;
65 copy = min_t(unsigned long, len, iov->iov_len);
66
67 if (copy_from_user(to, iov->iov_base, copy))
68 return -EFAULT;
69 to += copy;
70 len -= copy;
71 iov->iov_base += copy;
72 iov->iov_len -= copy;
73 }
74 return 0;
75}
76
Arjan van de Ven858119e2006-01-14 13:20:43 -080077static int
Linus Torvalds1da177e2005-04-16 15:20:36 -070078pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len)
79{
80 unsigned long copy;
81
82 while (len > 0) {
83 while (!iov->iov_len)
84 iov++;
85 copy = min_t(unsigned long, len, iov->iov_len);
86
87 if (copy_to_user(iov->iov_base, from, copy))
88 return -EFAULT;
89 from += copy;
90 len -= copy;
91 iov->iov_base += copy;
92 iov->iov_len -= copy;
93 }
94 return 0;
95}
96
Ingo Molnar341b4462006-04-11 13:57:45 +020097static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
98 struct pipe_buffer *buf)
Linus Torvalds1da177e2005-04-16 15:20:36 -070099{
100 struct page *page = buf->page;
101
Jens Axboe5274f052006-03-30 15:15:30 +0200102 /*
103 * If nobody else uses this page, and we don't already have a
104 * temporary page, let's keep track of it as a one-deep
Ingo Molnar341b4462006-04-11 13:57:45 +0200105 * allocation cache. (Otherwise just release our reference to it)
Jens Axboe5274f052006-03-30 15:15:30 +0200106 */
Ingo Molnar341b4462006-04-11 13:57:45 +0200107 if (page_count(page) == 1 && !pipe->tmp_page)
Ingo Molnar923f4f22006-04-11 13:53:33 +0200108 pipe->tmp_page = page;
Ingo Molnar341b4462006-04-11 13:57:45 +0200109 else
110 page_cache_release(page);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111}
112
Jens Axboef84d7512006-05-01 19:59:03 +0200113void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
114 struct pipe_buffer *buf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115{
116 return kmap(buf->page);
117}
118
Jens Axboef84d7512006-05-01 19:59:03 +0200119void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
120 struct pipe_buffer *buf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700121{
122 kunmap(buf->page);
123}
124
Ingo Molnar923f4f22006-04-11 13:53:33 +0200125static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
Jens Axboe5abc97a2006-03-30 15:16:46 +0200126 struct pipe_buffer *buf)
127{
Jens Axboe46e678c2006-04-30 16:36:32 +0200128 struct page *page = buf->page;
129
130 if (page_count(page) == 1) {
Jens Axboe46e678c2006-04-30 16:36:32 +0200131 lock_page(page);
132 return 0;
133 }
134
135 return 1;
Jens Axboe5abc97a2006-03-30 15:16:46 +0200136}
137
Jens Axboef84d7512006-05-01 19:59:03 +0200138void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf)
Jens Axboe70524492006-04-11 15:51:17 +0200139{
140 page_cache_get(buf->page);
141}
142
Jens Axboef84d7512006-05-01 19:59:03 +0200143int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf)
144{
145 return 0;
146}
147
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148static struct pipe_buf_operations anon_pipe_buf_ops = {
149 .can_merge = 1,
Jens Axboef84d7512006-05-01 19:59:03 +0200150 .map = generic_pipe_buf_map,
151 .unmap = generic_pipe_buf_unmap,
152 .pin = generic_pipe_buf_pin,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 .release = anon_pipe_buf_release,
Jens Axboe5abc97a2006-03-30 15:16:46 +0200154 .steal = anon_pipe_buf_steal,
Jens Axboef84d7512006-05-01 19:59:03 +0200155 .get = generic_pipe_buf_get,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156};
157
158static ssize_t
159pipe_readv(struct file *filp, const struct iovec *_iov,
160 unsigned long nr_segs, loff_t *ppos)
161{
162 struct inode *inode = filp->f_dentry->d_inode;
Ingo Molnar923f4f22006-04-11 13:53:33 +0200163 struct pipe_inode_info *pipe;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 int do_wakeup;
165 ssize_t ret;
166 struct iovec *iov = (struct iovec *)_iov;
167 size_t total_len;
168
169 total_len = iov_length(iov, nr_segs);
170 /* Null read succeeds. */
171 if (unlikely(total_len == 0))
172 return 0;
173
174 do_wakeup = 0;
175 ret = 0;
Ingo Molnar9aeedfc42006-04-11 13:53:10 +0200176 mutex_lock(&inode->i_mutex);
Ingo Molnar923f4f22006-04-11 13:53:33 +0200177 pipe = inode->i_pipe;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 for (;;) {
Ingo Molnar923f4f22006-04-11 13:53:33 +0200179 int bufs = pipe->nrbufs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 if (bufs) {
Ingo Molnar923f4f22006-04-11 13:53:33 +0200181 int curbuf = pipe->curbuf;
182 struct pipe_buffer *buf = pipe->bufs + curbuf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 struct pipe_buf_operations *ops = buf->ops;
184 void *addr;
185 size_t chars = buf->len;
186 int error;
187
188 if (chars > total_len)
189 chars = total_len;
190
Jens Axboef84d7512006-05-01 19:59:03 +0200191 error = ops->pin(pipe, buf);
192 if (error) {
Jens Axboe5274f052006-03-30 15:15:30 +0200193 if (!ret)
Jens Axboef84d7512006-05-01 19:59:03 +0200194 error = ret;
Jens Axboe5274f052006-03-30 15:15:30 +0200195 break;
196 }
Jens Axboef84d7512006-05-01 19:59:03 +0200197
198 addr = ops->map(pipe, buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars);
Ingo Molnar923f4f22006-04-11 13:53:33 +0200200 ops->unmap(pipe, buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201 if (unlikely(error)) {
Ingo Molnar341b4462006-04-11 13:57:45 +0200202 if (!ret)
203 ret = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 break;
205 }
206 ret += chars;
207 buf->offset += chars;
208 buf->len -= chars;
209 if (!buf->len) {
210 buf->ops = NULL;
Ingo Molnar923f4f22006-04-11 13:53:33 +0200211 ops->release(pipe, buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1);
Ingo Molnar923f4f22006-04-11 13:53:33 +0200213 pipe->curbuf = curbuf;
214 pipe->nrbufs = --bufs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215 do_wakeup = 1;
216 }
217 total_len -= chars;
218 if (!total_len)
219 break; /* common path: read succeeded */
220 }
221 if (bufs) /* More to do? */
222 continue;
Ingo Molnar923f4f22006-04-11 13:53:33 +0200223 if (!pipe->writers)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224 break;
Ingo Molnar923f4f22006-04-11 13:53:33 +0200225 if (!pipe->waiting_writers) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226 /* syscall merging: Usually we must not sleep
227 * if O_NONBLOCK is set, or if we got some data.
228 * But if a writer sleeps in kernel space, then
229 * we can wait for that data without violating POSIX.
230 */
231 if (ret)
232 break;
233 if (filp->f_flags & O_NONBLOCK) {
234 ret = -EAGAIN;
235 break;
236 }
237 }
238 if (signal_pending(current)) {
Ingo Molnar341b4462006-04-11 13:57:45 +0200239 if (!ret)
240 ret = -ERESTARTSYS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 break;
242 }
243 if (do_wakeup) {
Ingo Molnar923f4f22006-04-11 13:53:33 +0200244 wake_up_interruptible_sync(&pipe->wait);
245 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700246 }
Ingo Molnar923f4f22006-04-11 13:53:33 +0200247 pipe_wait(pipe);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248 }
Ingo Molnar9aeedfc42006-04-11 13:53:10 +0200249 mutex_unlock(&inode->i_mutex);
Ingo Molnar341b4462006-04-11 13:57:45 +0200250
251 /* Signal writers asynchronously that there is more room. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 if (do_wakeup) {
Ingo Molnar923f4f22006-04-11 13:53:33 +0200253 wake_up_interruptible(&pipe->wait);
254 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 }
256 if (ret > 0)
257 file_accessed(filp);
258 return ret;
259}
260
261static ssize_t
262pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
263{
264 struct iovec iov = { .iov_base = buf, .iov_len = count };
Ingo Molnar341b4462006-04-11 13:57:45 +0200265
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 return pipe_readv(filp, &iov, 1, ppos);
267}
268
269static ssize_t
270pipe_writev(struct file *filp, const struct iovec *_iov,
271 unsigned long nr_segs, loff_t *ppos)
272{
273 struct inode *inode = filp->f_dentry->d_inode;
Ingo Molnar923f4f22006-04-11 13:53:33 +0200274 struct pipe_inode_info *pipe;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 ssize_t ret;
276 int do_wakeup;
277 struct iovec *iov = (struct iovec *)_iov;
278 size_t total_len;
279 ssize_t chars;
280
281 total_len = iov_length(iov, nr_segs);
282 /* Null write succeeds. */
283 if (unlikely(total_len == 0))
284 return 0;
285
286 do_wakeup = 0;
287 ret = 0;
Ingo Molnar9aeedfc42006-04-11 13:53:10 +0200288 mutex_lock(&inode->i_mutex);
Ingo Molnar923f4f22006-04-11 13:53:33 +0200289 pipe = inode->i_pipe;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290
Ingo Molnar923f4f22006-04-11 13:53:33 +0200291 if (!pipe->readers) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 send_sig(SIGPIPE, current, 0);
293 ret = -EPIPE;
294 goto out;
295 }
296
297 /* We try to merge small writes */
298 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
Ingo Molnar923f4f22006-04-11 13:53:33 +0200299 if (pipe->nrbufs && chars != 0) {
Ingo Molnar341b4462006-04-11 13:57:45 +0200300 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
301 (PIPE_BUFFERS-1);
Ingo Molnar923f4f22006-04-11 13:53:33 +0200302 struct pipe_buffer *buf = pipe->bufs + lastbuf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 struct pipe_buf_operations *ops = buf->ops;
304 int offset = buf->offset + buf->len;
Ingo Molnar341b4462006-04-11 13:57:45 +0200305
Linus Torvalds1da177e2005-04-16 15:20:36 -0700306 if (ops->can_merge && offset + chars <= PAGE_SIZE) {
Jens Axboe5274f052006-03-30 15:15:30 +0200307 void *addr;
308 int error;
309
Jens Axboef84d7512006-05-01 19:59:03 +0200310 error = ops->pin(pipe, buf);
311 if (error)
Jens Axboe5274f052006-03-30 15:15:30 +0200312 goto out;
Jens Axboef84d7512006-05-01 19:59:03 +0200313
314 addr = ops->map(pipe, buf);
Jens Axboe5274f052006-03-30 15:15:30 +0200315 error = pipe_iov_copy_from_user(offset + addr, iov,
316 chars);
Ingo Molnar923f4f22006-04-11 13:53:33 +0200317 ops->unmap(pipe, buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700318 ret = error;
319 do_wakeup = 1;
320 if (error)
321 goto out;
322 buf->len += chars;
323 total_len -= chars;
324 ret = chars;
325 if (!total_len)
326 goto out;
327 }
328 }
329
330 for (;;) {
331 int bufs;
Ingo Molnar341b4462006-04-11 13:57:45 +0200332
Ingo Molnar923f4f22006-04-11 13:53:33 +0200333 if (!pipe->readers) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 send_sig(SIGPIPE, current, 0);
Ingo Molnar341b4462006-04-11 13:57:45 +0200335 if (!ret)
336 ret = -EPIPE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700337 break;
338 }
Ingo Molnar923f4f22006-04-11 13:53:33 +0200339 bufs = pipe->nrbufs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340 if (bufs < PIPE_BUFFERS) {
Ingo Molnar923f4f22006-04-11 13:53:33 +0200341 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1);
342 struct pipe_buffer *buf = pipe->bufs + newbuf;
343 struct page *page = pipe->tmp_page;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700344 int error;
345
346 if (!page) {
347 page = alloc_page(GFP_HIGHUSER);
348 if (unlikely(!page)) {
349 ret = ret ? : -ENOMEM;
350 break;
351 }
Ingo Molnar923f4f22006-04-11 13:53:33 +0200352 pipe->tmp_page = page;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 }
Ingo Molnar341b4462006-04-11 13:57:45 +0200354 /* Always wake up, even if the copy fails. Otherwise
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355 * we lock up (O_NONBLOCK-)readers that sleep due to
356 * syscall merging.
357 * FIXME! Is this really true?
358 */
359 do_wakeup = 1;
360 chars = PAGE_SIZE;
361 if (chars > total_len)
362 chars = total_len;
363
364 error = pipe_iov_copy_from_user(kmap(page), iov, chars);
365 kunmap(page);
366 if (unlikely(error)) {
Ingo Molnar341b4462006-04-11 13:57:45 +0200367 if (!ret)
368 ret = -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 break;
370 }
371 ret += chars;
372
373 /* Insert it into the buffer array */
374 buf->page = page;
375 buf->ops = &anon_pipe_buf_ops;
376 buf->offset = 0;
377 buf->len = chars;
Ingo Molnar923f4f22006-04-11 13:53:33 +0200378 pipe->nrbufs = ++bufs;
379 pipe->tmp_page = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380
381 total_len -= chars;
382 if (!total_len)
383 break;
384 }
385 if (bufs < PIPE_BUFFERS)
386 continue;
387 if (filp->f_flags & O_NONBLOCK) {
Ingo Molnar341b4462006-04-11 13:57:45 +0200388 if (!ret)
389 ret = -EAGAIN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 break;
391 }
392 if (signal_pending(current)) {
Ingo Molnar341b4462006-04-11 13:57:45 +0200393 if (!ret)
394 ret = -ERESTARTSYS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395 break;
396 }
397 if (do_wakeup) {
Ingo Molnar923f4f22006-04-11 13:53:33 +0200398 wake_up_interruptible_sync(&pipe->wait);
399 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400 do_wakeup = 0;
401 }
Ingo Molnar923f4f22006-04-11 13:53:33 +0200402 pipe->waiting_writers++;
403 pipe_wait(pipe);
404 pipe->waiting_writers--;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 }
406out:
Ingo Molnar9aeedfc42006-04-11 13:53:10 +0200407 mutex_unlock(&inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408 if (do_wakeup) {
Ingo Molnar923f4f22006-04-11 13:53:33 +0200409 wake_up_interruptible(&pipe->wait);
410 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 }
412 if (ret > 0)
Christoph Hellwig870f4812006-01-09 20:52:01 -0800413 file_update_time(filp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 return ret;
415}
416
417static ssize_t
418pipe_write(struct file *filp, const char __user *buf,
419 size_t count, loff_t *ppos)
420{
421 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
Ingo Molnar341b4462006-04-11 13:57:45 +0200422
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 return pipe_writev(filp, &iov, 1, ppos);
424}
425
426static ssize_t
427bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
428{
429 return -EBADF;
430}
431
432static ssize_t
Ingo Molnar341b4462006-04-11 13:57:45 +0200433bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
434 loff_t *ppos)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435{
436 return -EBADF;
437}
438
439static int
440pipe_ioctl(struct inode *pino, struct file *filp,
441 unsigned int cmd, unsigned long arg)
442{
443 struct inode *inode = filp->f_dentry->d_inode;
Ingo Molnar923f4f22006-04-11 13:53:33 +0200444 struct pipe_inode_info *pipe;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445 int count, buf, nrbufs;
446
447 switch (cmd) {
448 case FIONREAD:
Ingo Molnar9aeedfc42006-04-11 13:53:10 +0200449 mutex_lock(&inode->i_mutex);
Ingo Molnar923f4f22006-04-11 13:53:33 +0200450 pipe = inode->i_pipe;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 count = 0;
Ingo Molnar923f4f22006-04-11 13:53:33 +0200452 buf = pipe->curbuf;
453 nrbufs = pipe->nrbufs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 while (--nrbufs >= 0) {
Ingo Molnar923f4f22006-04-11 13:53:33 +0200455 count += pipe->bufs[buf].len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 buf = (buf+1) & (PIPE_BUFFERS-1);
457 }
Ingo Molnar9aeedfc42006-04-11 13:53:10 +0200458 mutex_unlock(&inode->i_mutex);
Ingo Molnar923f4f22006-04-11 13:53:33 +0200459
Linus Torvalds1da177e2005-04-16 15:20:36 -0700460 return put_user(count, (int __user *)arg);
461 default:
462 return -EINVAL;
463 }
464}
465
466/* No kernel lock held - fine */
467static unsigned int
468pipe_poll(struct file *filp, poll_table *wait)
469{
470 unsigned int mask;
471 struct inode *inode = filp->f_dentry->d_inode;
Ingo Molnar923f4f22006-04-11 13:53:33 +0200472 struct pipe_inode_info *pipe = inode->i_pipe;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 int nrbufs;
474
Ingo Molnar923f4f22006-04-11 13:53:33 +0200475 poll_wait(filp, &pipe->wait, wait);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476
477 /* Reading only -- no need for acquiring the semaphore. */
Ingo Molnar923f4f22006-04-11 13:53:33 +0200478 nrbufs = pipe->nrbufs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 mask = 0;
480 if (filp->f_mode & FMODE_READ) {
481 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
Ingo Molnar923f4f22006-04-11 13:53:33 +0200482 if (!pipe->writers && filp->f_version != pipe->w_counter)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483 mask |= POLLHUP;
484 }
485
486 if (filp->f_mode & FMODE_WRITE) {
487 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0;
Pekka Enberg5e5d7a22005-09-06 15:17:48 -0700488 /*
489 * Most Unices do not set POLLERR for FIFOs but on Linux they
490 * behave exactly like pipes for poll().
491 */
Ingo Molnar923f4f22006-04-11 13:53:33 +0200492 if (!pipe->readers)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493 mask |= POLLERR;
494 }
495
496 return mask;
497}
498
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499static int
500pipe_release(struct inode *inode, int decr, int decw)
501{
Ingo Molnar923f4f22006-04-11 13:53:33 +0200502 struct pipe_inode_info *pipe;
503
Ingo Molnar9aeedfc42006-04-11 13:53:10 +0200504 mutex_lock(&inode->i_mutex);
Ingo Molnar923f4f22006-04-11 13:53:33 +0200505 pipe = inode->i_pipe;
506 pipe->readers -= decr;
507 pipe->writers -= decw;
Ingo Molnar341b4462006-04-11 13:57:45 +0200508
Ingo Molnar923f4f22006-04-11 13:53:33 +0200509 if (!pipe->readers && !pipe->writers) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700510 free_pipe_info(inode);
511 } else {
Ingo Molnar923f4f22006-04-11 13:53:33 +0200512 wake_up_interruptible(&pipe->wait);
513 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
514 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 }
Ingo Molnar9aeedfc42006-04-11 13:53:10 +0200516 mutex_unlock(&inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517
518 return 0;
519}
520
521static int
522pipe_read_fasync(int fd, struct file *filp, int on)
523{
524 struct inode *inode = filp->f_dentry->d_inode;
525 int retval;
526
Ingo Molnar9aeedfc42006-04-11 13:53:10 +0200527 mutex_lock(&inode->i_mutex);
528 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
529 mutex_unlock(&inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530
531 if (retval < 0)
532 return retval;
533
534 return 0;
535}
536
537
538static int
539pipe_write_fasync(int fd, struct file *filp, int on)
540{
541 struct inode *inode = filp->f_dentry->d_inode;
542 int retval;
543
Ingo Molnar9aeedfc42006-04-11 13:53:10 +0200544 mutex_lock(&inode->i_mutex);
545 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
546 mutex_unlock(&inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547
548 if (retval < 0)
549 return retval;
550
551 return 0;
552}
553
554
555static int
556pipe_rdwr_fasync(int fd, struct file *filp, int on)
557{
558 struct inode *inode = filp->f_dentry->d_inode;
Ingo Molnar341b4462006-04-11 13:57:45 +0200559 struct pipe_inode_info *pipe = inode->i_pipe;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700560 int retval;
561
Ingo Molnar9aeedfc42006-04-11 13:53:10 +0200562 mutex_lock(&inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563
Ingo Molnar341b4462006-04-11 13:57:45 +0200564 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700565
566 if (retval >= 0)
Ingo Molnar341b4462006-04-11 13:57:45 +0200567 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568
Ingo Molnar9aeedfc42006-04-11 13:53:10 +0200569 mutex_unlock(&inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570
571 if (retval < 0)
572 return retval;
573
574 return 0;
575}
576
577
578static int
579pipe_read_release(struct inode *inode, struct file *filp)
580{
581 pipe_read_fasync(-1, filp, 0);
582 return pipe_release(inode, 1, 0);
583}
584
585static int
586pipe_write_release(struct inode *inode, struct file *filp)
587{
588 pipe_write_fasync(-1, filp, 0);
589 return pipe_release(inode, 0, 1);
590}
591
592static int
593pipe_rdwr_release(struct inode *inode, struct file *filp)
594{
595 int decr, decw;
596
597 pipe_rdwr_fasync(-1, filp, 0);
598 decr = (filp->f_mode & FMODE_READ) != 0;
599 decw = (filp->f_mode & FMODE_WRITE) != 0;
600 return pipe_release(inode, decr, decw);
601}
602
603static int
604pipe_read_open(struct inode *inode, struct file *filp)
605{
606 /* We could have perhaps used atomic_t, but this and friends
607 below are the only places. So it doesn't seem worthwhile. */
Ingo Molnar9aeedfc42006-04-11 13:53:10 +0200608 mutex_lock(&inode->i_mutex);
609 inode->i_pipe->readers++;
610 mutex_unlock(&inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611
612 return 0;
613}
614
615static int
616pipe_write_open(struct inode *inode, struct file *filp)
617{
Ingo Molnar9aeedfc42006-04-11 13:53:10 +0200618 mutex_lock(&inode->i_mutex);
619 inode->i_pipe->writers++;
620 mutex_unlock(&inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621
622 return 0;
623}
624
625static int
626pipe_rdwr_open(struct inode *inode, struct file *filp)
627{
Ingo Molnar9aeedfc42006-04-11 13:53:10 +0200628 mutex_lock(&inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629 if (filp->f_mode & FMODE_READ)
Ingo Molnar9aeedfc42006-04-11 13:53:10 +0200630 inode->i_pipe->readers++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700631 if (filp->f_mode & FMODE_WRITE)
Ingo Molnar9aeedfc42006-04-11 13:53:10 +0200632 inode->i_pipe->writers++;
633 mutex_unlock(&inode->i_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634
635 return 0;
636}
637
638/*
639 * The file_operations structs are not static because they
640 * are also used in linux/fs/fifo.c to do operations on FIFOs.
641 */
Arjan van de Ven4b6f5d22006-03-28 01:56:42 -0800642const struct file_operations read_fifo_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700643 .llseek = no_llseek,
644 .read = pipe_read,
645 .readv = pipe_readv,
646 .write = bad_pipe_w,
Pekka Enberg5e5d7a22005-09-06 15:17:48 -0700647 .poll = pipe_poll,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648 .ioctl = pipe_ioctl,
649 .open = pipe_read_open,
650 .release = pipe_read_release,
651 .fasync = pipe_read_fasync,
652};
653
Arjan van de Ven4b6f5d22006-03-28 01:56:42 -0800654const struct file_operations write_fifo_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700655 .llseek = no_llseek,
656 .read = bad_pipe_r,
657 .write = pipe_write,
658 .writev = pipe_writev,
Pekka Enberg5e5d7a22005-09-06 15:17:48 -0700659 .poll = pipe_poll,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700660 .ioctl = pipe_ioctl,
661 .open = pipe_write_open,
662 .release = pipe_write_release,
663 .fasync = pipe_write_fasync,
664};
665
Arjan van de Ven4b6f5d22006-03-28 01:56:42 -0800666const struct file_operations rdwr_fifo_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 .llseek = no_llseek,
668 .read = pipe_read,
669 .readv = pipe_readv,
670 .write = pipe_write,
671 .writev = pipe_writev,
Pekka Enberg5e5d7a22005-09-06 15:17:48 -0700672 .poll = pipe_poll,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 .ioctl = pipe_ioctl,
674 .open = pipe_rdwr_open,
675 .release = pipe_rdwr_release,
676 .fasync = pipe_rdwr_fasync,
677};
678
Linus Torvaldsa19cbd42006-03-08 14:03:09 -0800679static struct file_operations read_pipe_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680 .llseek = no_llseek,
681 .read = pipe_read,
682 .readv = pipe_readv,
683 .write = bad_pipe_w,
684 .poll = pipe_poll,
685 .ioctl = pipe_ioctl,
686 .open = pipe_read_open,
687 .release = pipe_read_release,
688 .fasync = pipe_read_fasync,
689};
690
Linus Torvaldsa19cbd42006-03-08 14:03:09 -0800691static struct file_operations write_pipe_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692 .llseek = no_llseek,
693 .read = bad_pipe_r,
694 .write = pipe_write,
695 .writev = pipe_writev,
696 .poll = pipe_poll,
697 .ioctl = pipe_ioctl,
698 .open = pipe_write_open,
699 .release = pipe_write_release,
700 .fasync = pipe_write_fasync,
701};
702
Linus Torvaldsa19cbd42006-03-08 14:03:09 -0800703static struct file_operations rdwr_pipe_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 .llseek = no_llseek,
705 .read = pipe_read,
706 .readv = pipe_readv,
707 .write = pipe_write,
708 .writev = pipe_writev,
709 .poll = pipe_poll,
710 .ioctl = pipe_ioctl,
711 .open = pipe_rdwr_open,
712 .release = pipe_rdwr_release,
713 .fasync = pipe_rdwr_fasync,
714};
715
Ingo Molnar3a326a22006-04-10 15:18:35 +0200716struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
717{
Ingo Molnar923f4f22006-04-11 13:53:33 +0200718 struct pipe_inode_info *pipe;
Ingo Molnar3a326a22006-04-10 15:18:35 +0200719
Ingo Molnar923f4f22006-04-11 13:53:33 +0200720 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
721 if (pipe) {
722 init_waitqueue_head(&pipe->wait);
723 pipe->r_counter = pipe->w_counter = 1;
724 pipe->inode = inode;
Ingo Molnar3a326a22006-04-10 15:18:35 +0200725 }
726
Ingo Molnar923f4f22006-04-11 13:53:33 +0200727 return pipe;
Ingo Molnar3a326a22006-04-10 15:18:35 +0200728}
729
Ingo Molnar923f4f22006-04-11 13:53:33 +0200730void __free_pipe_info(struct pipe_inode_info *pipe)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731{
732 int i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 for (i = 0; i < PIPE_BUFFERS; i++) {
Ingo Molnar923f4f22006-04-11 13:53:33 +0200735 struct pipe_buffer *buf = pipe->bufs + i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736 if (buf->ops)
Ingo Molnar923f4f22006-04-11 13:53:33 +0200737 buf->ops->release(pipe, buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738 }
Ingo Molnar923f4f22006-04-11 13:53:33 +0200739 if (pipe->tmp_page)
740 __free_page(pipe->tmp_page);
741 kfree(pipe);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742}
743
Jens Axboeb92ce552006-04-11 13:52:07 +0200744void free_pipe_info(struct inode *inode)
745{
746 __free_pipe_info(inode->i_pipe);
747 inode->i_pipe = NULL;
748}
749
Eric Dumazetfa3536c2006-03-26 01:37:24 -0800750static struct vfsmount *pipe_mnt __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751static int pipefs_delete_dentry(struct dentry *dentry)
752{
753 return 1;
754}
Ingo Molnar341b4462006-04-11 13:57:45 +0200755
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756static struct dentry_operations pipefs_dentry_operations = {
757 .d_delete = pipefs_delete_dentry,
758};
759
760static struct inode * get_pipe_inode(void)
761{
762 struct inode *inode = new_inode(pipe_mnt->mnt_sb);
Ingo Molnar923f4f22006-04-11 13:53:33 +0200763 struct pipe_inode_info *pipe;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764
765 if (!inode)
766 goto fail_inode;
767
Ingo Molnar923f4f22006-04-11 13:53:33 +0200768 pipe = alloc_pipe_info(inode);
769 if (!pipe)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 goto fail_iput;
Ingo Molnar923f4f22006-04-11 13:53:33 +0200771 inode->i_pipe = pipe;
Ingo Molnar3a326a22006-04-10 15:18:35 +0200772
Ingo Molnar923f4f22006-04-11 13:53:33 +0200773 pipe->readers = pipe->writers = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774 inode->i_fop = &rdwr_pipe_fops;
775
776 /*
777 * Mark the inode dirty from the very beginning,
778 * that way it will never be moved to the dirty
779 * list because "mark_inode_dirty()" will think
780 * that it already _is_ on the dirty list.
781 */
782 inode->i_state = I_DIRTY;
783 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
784 inode->i_uid = current->fsuid;
785 inode->i_gid = current->fsgid;
786 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
787 inode->i_blksize = PAGE_SIZE;
Ingo Molnar923f4f22006-04-11 13:53:33 +0200788
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789 return inode;
790
791fail_iput:
792 iput(inode);
Ingo Molnar341b4462006-04-11 13:57:45 +0200793
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794fail_inode:
795 return NULL;
796}
797
798int do_pipe(int *fd)
799{
800 struct qstr this;
801 char name[32];
802 struct dentry *dentry;
803 struct inode * inode;
804 struct file *f1, *f2;
805 int error;
Ingo Molnar341b4462006-04-11 13:57:45 +0200806 int i, j;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807
808 error = -ENFILE;
809 f1 = get_empty_filp();
810 if (!f1)
811 goto no_files;
812
813 f2 = get_empty_filp();
814 if (!f2)
815 goto close_f1;
816
817 inode = get_pipe_inode();
818 if (!inode)
819 goto close_f12;
820
821 error = get_unused_fd();
822 if (error < 0)
823 goto close_f12_inode;
824 i = error;
825
826 error = get_unused_fd();
827 if (error < 0)
828 goto close_f12_inode_i;
829 j = error;
830
831 error = -ENOMEM;
832 sprintf(name, "[%lu]", inode->i_ino);
833 this.name = name;
834 this.len = strlen(name);
835 this.hash = inode->i_ino; /* will go */
836 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this);
837 if (!dentry)
838 goto close_f12_inode_i_j;
Ingo Molnar341b4462006-04-11 13:57:45 +0200839
Linus Torvalds1da177e2005-04-16 15:20:36 -0700840 dentry->d_op = &pipefs_dentry_operations;
841 d_add(dentry, inode);
842 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt));
843 f1->f_dentry = f2->f_dentry = dget(dentry);
844 f1->f_mapping = f2->f_mapping = inode->i_mapping;
845
846 /* read file */
847 f1->f_pos = f2->f_pos = 0;
848 f1->f_flags = O_RDONLY;
849 f1->f_op = &read_pipe_fops;
850 f1->f_mode = FMODE_READ;
851 f1->f_version = 0;
852
853 /* write file */
854 f2->f_flags = O_WRONLY;
855 f2->f_op = &write_pipe_fops;
856 f2->f_mode = FMODE_WRITE;
857 f2->f_version = 0;
858
859 fd_install(i, f1);
860 fd_install(j, f2);
861 fd[0] = i;
862 fd[1] = j;
Ingo Molnar341b4462006-04-11 13:57:45 +0200863
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864 return 0;
865
866close_f12_inode_i_j:
867 put_unused_fd(j);
868close_f12_inode_i:
869 put_unused_fd(i);
870close_f12_inode:
871 free_pipe_info(inode);
872 iput(inode);
873close_f12:
874 put_filp(f2);
875close_f1:
876 put_filp(f1);
877no_files:
878 return error;
879}
880
881/*
882 * pipefs should _never_ be mounted by userland - too much of security hassle,
883 * no real gain from having the whole whorehouse mounted. So we don't need
884 * any operations on the root directory. However, we need a non-trivial
885 * d_name - pipe: will go nicely and kill the special-casing in procfs.
886 */
887
Ingo Molnar341b4462006-04-11 13:57:45 +0200888static struct super_block *
889pipefs_get_sb(struct file_system_type *fs_type, int flags,
890 const char *dev_name, void *data)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700891{
892 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
893}
894
895static struct file_system_type pipe_fs_type = {
896 .name = "pipefs",
897 .get_sb = pipefs_get_sb,
898 .kill_sb = kill_anon_super,
899};
900
901static int __init init_pipe_fs(void)
902{
903 int err = register_filesystem(&pipe_fs_type);
Ingo Molnar341b4462006-04-11 13:57:45 +0200904
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905 if (!err) {
906 pipe_mnt = kern_mount(&pipe_fs_type);
907 if (IS_ERR(pipe_mnt)) {
908 err = PTR_ERR(pipe_mnt);
909 unregister_filesystem(&pipe_fs_type);
910 }
911 }
912 return err;
913}
914
915static void __exit exit_pipe_fs(void)
916{
917 unregister_filesystem(&pipe_fs_type);
918 mntput(pipe_mnt);
919}
920
921fs_initcall(init_pipe_fs);
922module_exit(exit_pipe_fs);