blob: 6e2851464f8fa46081dcec3b161e22b7a7da6a55 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
68#include <asm/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#include <linux/rcupdate.h>
90#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090091#include <linux/slab.h>
Herbert Xu352e5122007-11-13 21:34:06 -080092#include <net/dst.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020093#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070094#include <net/protocol.h>
95#include <net/ip.h>
96#include <net/route.h>
97#include <net/inetpeer.h>
98#include <net/sock.h>
99#include <net/ip_fib.h>
100#include <net/arp.h>
101#include <net/tcp.h>
102#include <net/icmp.h>
103#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700104#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700105#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106#ifdef CONFIG_SYSCTL
107#include <linux/sysctl.h>
Shan Wei7426a562012-04-18 18:05:46 +0000108#include <linux/kmemleak.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700110#include <net/secure_seq.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
David S. Miller68a5e3d2011-03-11 20:07:33 -0500112#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114
115#define IP_MAX_MTU 0xFFF0
116
117#define RT_GC_TIMEOUT (300*HZ)
118
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700120static int ip_rt_redirect_number __read_mostly = 9;
121static int ip_rt_redirect_load __read_mostly = HZ / 50;
122static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
123static int ip_rt_error_cost __read_mostly = HZ;
124static int ip_rt_error_burst __read_mostly = 5 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700125static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
126static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
127static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500128
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129/*
130 * Interface to generic destination cache.
131 */
132
133static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800134static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000135static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
137static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700138static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
139 struct sk_buff *skb, u32 mtu);
140static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
141 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700142static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143
Eric Dumazet72cdd1d2010-11-11 07:14:07 +0000144static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
145 int how)
146{
147}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148
David S. Miller62fa8a82011-01-26 20:51:05 -0800149static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
150{
David S. Miller31248732012-07-10 07:08:18 -0700151 WARN_ON(1);
152 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800153}
154
David S. Millerf894cbf2012-07-02 21:52:24 -0700155static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
156 struct sk_buff *skb,
157 const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700158
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159static struct dst_ops ipv4_dst_ops = {
160 .family = AF_INET,
Harvey Harrison09640e632009-02-01 00:45:17 -0800161 .protocol = cpu_to_be16(ETH_P_IP),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800163 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000164 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800165 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700166 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 .ifdown = ipv4_dst_ifdown,
168 .negative_advice = ipv4_negative_advice,
169 .link_failure = ipv4_link_failure,
170 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700171 .redirect = ip_do_redirect,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700172 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700173 .neigh_lookup = ipv4_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174};
175
176#define ECN_OR_COST(class) TC_PRIO_##class
177
Philippe De Muyter4839c522007-07-09 15:32:57 -0700178const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000180 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 TC_PRIO_BESTEFFORT,
182 ECN_OR_COST(BESTEFFORT),
183 TC_PRIO_BULK,
184 ECN_OR_COST(BULK),
185 TC_PRIO_BULK,
186 ECN_OR_COST(BULK),
187 TC_PRIO_INTERACTIVE,
188 ECN_OR_COST(INTERACTIVE),
189 TC_PRIO_INTERACTIVE,
190 ECN_OR_COST(INTERACTIVE),
191 TC_PRIO_INTERACTIVE_BULK,
192 ECN_OR_COST(INTERACTIVE_BULK),
193 TC_PRIO_INTERACTIVE_BULK,
194 ECN_OR_COST(INTERACTIVE_BULK)
195};
Amir Vadaid4a96862012-04-04 21:33:28 +0000196EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197
Eric Dumazet2f970d82006-01-17 02:54:36 -0800198static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Eric Dumazet27f39c73e2010-05-19 22:07:23 +0000199#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
203{
Eric Dumazet29e75252008-01-31 17:05:09 -0800204 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700205 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800206 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207}
208
209static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
210{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700212 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213}
214
215static void rt_cache_seq_stop(struct seq_file *seq, void *v)
216{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217}
218
219static int rt_cache_seq_show(struct seq_file *seq, void *v)
220{
221 if (v == SEQ_START_TOKEN)
222 seq_printf(seq, "%-127s\n",
223 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
224 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
225 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900226 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227}
228
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700229static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230 .start = rt_cache_seq_start,
231 .next = rt_cache_seq_next,
232 .stop = rt_cache_seq_stop,
233 .show = rt_cache_seq_show,
234};
235
236static int rt_cache_seq_open(struct inode *inode, struct file *file)
237{
David S. Miller89aef892012-07-17 11:00:09 -0700238 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239}
240
Arjan van de Ven9a321442007-02-12 00:55:35 -0800241static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 .owner = THIS_MODULE,
243 .open = rt_cache_seq_open,
244 .read = seq_read,
245 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700246 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247};
248
249
250static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
251{
252 int cpu;
253
254 if (*pos == 0)
255 return SEQ_START_TOKEN;
256
Rusty Russell0f23174a2008-12-29 12:23:42 +0000257 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 if (!cpu_possible(cpu))
259 continue;
260 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800261 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 }
263 return NULL;
264}
265
266static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
267{
268 int cpu;
269
Rusty Russell0f23174a2008-12-29 12:23:42 +0000270 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 if (!cpu_possible(cpu))
272 continue;
273 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800274 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 }
276 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900277
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278}
279
280static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
281{
282
283}
284
285static int rt_cpu_seq_show(struct seq_file *seq, void *v)
286{
287 struct rt_cache_stat *st = v;
288
289 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700290 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 return 0;
292 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900293
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
295 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000296 dst_entries_get_slow(&ipv4_dst_ops),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 st->in_hit,
298 st->in_slow_tot,
299 st->in_slow_mc,
300 st->in_no_route,
301 st->in_brd,
302 st->in_martian_dst,
303 st->in_martian_src,
304
305 st->out_hit,
306 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900307 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308
309 st->gc_total,
310 st->gc_ignored,
311 st->gc_goal_miss,
312 st->gc_dst_overflow,
313 st->in_hlist_search,
314 st->out_hlist_search
315 );
316 return 0;
317}
318
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700319static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 .start = rt_cpu_seq_start,
321 .next = rt_cpu_seq_next,
322 .stop = rt_cpu_seq_stop,
323 .show = rt_cpu_seq_show,
324};
325
326
327static int rt_cpu_seq_open(struct inode *inode, struct file *file)
328{
329 return seq_open(file, &rt_cpu_seq_ops);
330}
331
Arjan van de Ven9a321442007-02-12 00:55:35 -0800332static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 .owner = THIS_MODULE,
334 .open = rt_cpu_seq_open,
335 .read = seq_read,
336 .llseek = seq_lseek,
337 .release = seq_release,
338};
339
Patrick McHardyc7066f72011-01-14 13:36:42 +0100340#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800341static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800342{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800343 struct ip_rt_acct *dst, *src;
344 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800345
Alexey Dobriyana661c412009-11-25 15:40:35 -0800346 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
347 if (!dst)
348 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800349
Alexey Dobriyana661c412009-11-25 15:40:35 -0800350 for_each_possible_cpu(i) {
351 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
352 for (j = 0; j < 256; j++) {
353 dst[j].o_bytes += src[j].o_bytes;
354 dst[j].o_packets += src[j].o_packets;
355 dst[j].i_bytes += src[j].i_bytes;
356 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800357 }
358 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800359
360 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
361 kfree(dst);
362 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800363}
Alexey Dobriyana661c412009-11-25 15:40:35 -0800364
365static int rt_acct_proc_open(struct inode *inode, struct file *file)
366{
367 return single_open(file, rt_acct_proc_show, NULL);
368}
369
370static const struct file_operations rt_acct_proc_fops = {
371 .owner = THIS_MODULE,
372 .open = rt_acct_proc_open,
373 .read = seq_read,
374 .llseek = seq_lseek,
375 .release = single_release,
376};
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800377#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800378
Denis V. Lunev73b38712008-02-28 20:51:18 -0800379static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800380{
381 struct proc_dir_entry *pde;
382
Gao fengd4beaa62013-02-18 01:34:54 +0000383 pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
384 &rt_cache_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800385 if (!pde)
386 goto err1;
387
Wang Chen77020722008-02-28 14:14:25 -0800388 pde = proc_create("rt_cache", S_IRUGO,
389 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800390 if (!pde)
391 goto err2;
392
Patrick McHardyc7066f72011-01-14 13:36:42 +0100393#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800394 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800395 if (!pde)
396 goto err3;
397#endif
398 return 0;
399
Patrick McHardyc7066f72011-01-14 13:36:42 +0100400#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800401err3:
402 remove_proc_entry("rt_cache", net->proc_net_stat);
403#endif
404err2:
405 remove_proc_entry("rt_cache", net->proc_net);
406err1:
407 return -ENOMEM;
408}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800409
410static void __net_exit ip_rt_do_proc_exit(struct net *net)
411{
412 remove_proc_entry("rt_cache", net->proc_net_stat);
413 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100414#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800415 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000416#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800417}
418
419static struct pernet_operations ip_rt_proc_ops __net_initdata = {
420 .init = ip_rt_do_proc_init,
421 .exit = ip_rt_do_proc_exit,
422};
423
424static int __init ip_rt_proc_init(void)
425{
426 return register_pernet_subsys(&ip_rt_proc_ops);
427}
428
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800429#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800430static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800431{
432 return 0;
433}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900435
Eric Dumazet4331deb2012-07-25 05:11:23 +0000436static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700437{
Changli Gaod8d1f302010-06-10 23:31:35 -0700438 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700439}
440
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000441void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800442{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +0000443 rt_genid_bump(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000444}
445
David S. Millerf894cbf2012-07-02 21:52:24 -0700446static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
447 struct sk_buff *skb,
448 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000449{
David S. Millerd3aaeb32011-07-18 00:40:17 -0700450 struct net_device *dev = dst->dev;
451 const __be32 *pkey = daddr;
David S. Miller39232972012-01-26 15:22:32 -0500452 const struct rtable *rt;
David Miller3769cff2011-07-11 22:44:24 +0000453 struct neighbour *n;
454
David S. Miller39232972012-01-26 15:22:32 -0500455 rt = (const struct rtable *) dst;
David S. Millera263b302012-07-02 02:02:15 -0700456 if (rt->rt_gateway)
David S. Miller39232972012-01-26 15:22:32 -0500457 pkey = (const __be32 *) &rt->rt_gateway;
David S. Millerf894cbf2012-07-02 21:52:24 -0700458 else if (skb)
459 pkey = &ip_hdr(skb)->daddr;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700460
David S. Miller80703d22012-02-15 17:48:35 -0500461 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700462 if (n)
463 return n;
David Miller32092ec2011-07-25 00:01:41 +0000464 return neigh_create(&arp_tbl, pkey, dev);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700465}
466
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467/*
468 * Peer allocation may fail only in serious out-of-memory conditions. However
469 * we still can generate some output.
470 * Random ID selection looks a bit dangerous because we have no chances to
471 * select ID being unique in a reasonable period of time.
472 * But broken packet identifier may be better than no packet at all.
473 */
474static void ip_select_fb_ident(struct iphdr *iph)
475{
476 static DEFINE_SPINLOCK(ip_fb_id_lock);
477 static u32 ip_fallback_id;
478 u32 salt;
479
480 spin_lock_bh(&ip_fb_id_lock);
Al Viroe4485152006-09-26 22:15:01 -0700481 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 iph->id = htons(salt & 0xFFFF);
483 ip_fallback_id = salt;
484 spin_unlock_bh(&ip_fb_id_lock);
485}
486
487void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
488{
David S. Miller1d861aa2012-07-10 03:58:16 -0700489 struct net *net = dev_net(dst->dev);
490 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491
David S. Miller1d861aa2012-07-10 03:58:16 -0700492 peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
493 if (peer) {
494 iph->id = htons(inet_getid(peer, more));
495 inet_putpeer(peer);
496 return;
497 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
499 ip_select_fb_ident(iph);
500}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000501EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200503static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700504 const struct iphdr *iph,
505 int oif, u8 tos,
506 u8 prot, u32 mark, int flow_flags)
507{
508 if (sk) {
509 const struct inet_sock *inet = inet_sk(sk);
510
511 oif = sk->sk_bound_dev_if;
512 mark = sk->sk_mark;
513 tos = RT_CONN_FLAGS(sk);
514 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
515 }
516 flowi4_init_output(fl4, oif, mark, tos,
517 RT_SCOPE_UNIVERSE, prot,
518 flow_flags,
519 iph->daddr, iph->saddr, 0, 0);
520}
521
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200522static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
523 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700524{
525 const struct iphdr *iph = ip_hdr(skb);
526 int oif = skb->dev->ifindex;
527 u8 tos = RT_TOS(iph->tos);
528 u8 prot = iph->protocol;
529 u32 mark = skb->mark;
530
531 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
532}
533
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200534static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700535{
536 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200537 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700538 __be32 daddr = inet->inet_daddr;
539
540 rcu_read_lock();
541 inet_opt = rcu_dereference(inet->inet_opt);
542 if (inet_opt && inet_opt->opt.srr)
543 daddr = inet_opt->opt.faddr;
544 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
545 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
546 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
547 inet_sk_flowi_flags(sk),
548 daddr, inet->inet_saddr, 0, 0);
549 rcu_read_unlock();
550}
551
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200552static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
553 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700554{
555 if (skb)
556 build_skb_flow_key(fl4, skb, sk);
557 else
558 build_sk_flow_key(fl4, sk);
559}
560
David S. Millerc5038a82012-07-31 15:02:02 -0700561static inline void rt_free(struct rtable *rt)
562{
563 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
564}
565
566static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700567
Julian Anastasovaee06da2012-07-18 10:15:35 +0000568static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700569{
570 struct fib_nh_exception *fnhe, *oldest;
David S. Millerc5038a82012-07-31 15:02:02 -0700571 struct rtable *orig;
David S. Miller4895c772012-07-17 04:19:00 -0700572
573 oldest = rcu_dereference(hash->chain);
574 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
575 fnhe = rcu_dereference(fnhe->fnhe_next)) {
576 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
577 oldest = fnhe;
578 }
David S. Millerc5038a82012-07-31 15:02:02 -0700579 orig = rcu_dereference(oldest->fnhe_rth);
580 if (orig) {
581 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
582 rt_free(orig);
583 }
David S. Miller4895c772012-07-17 04:19:00 -0700584 return oldest;
585}
586
David S. Millerd3a25c92012-07-17 13:23:08 -0700587static inline u32 fnhe_hashfun(__be32 daddr)
588{
589 u32 hval;
590
591 hval = (__force u32) daddr;
592 hval ^= (hval >> 11) ^ (hval >> 22);
593
594 return hval & (FNHE_HASH_SIZE - 1);
595}
596
Julian Anastasovaee06da2012-07-18 10:15:35 +0000597static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
598 u32 pmtu, unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700599{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000600 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700601 struct fib_nh_exception *fnhe;
602 int depth;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000603 u32 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700604
David S. Millerc5038a82012-07-31 15:02:02 -0700605 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000606
607 hash = nh->nh_exceptions;
David S. Miller4895c772012-07-17 04:19:00 -0700608 if (!hash) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000609 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700610 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000611 goto out_unlock;
612 nh->nh_exceptions = hash;
David S. Miller4895c772012-07-17 04:19:00 -0700613 }
614
David S. Miller4895c772012-07-17 04:19:00 -0700615 hash += hval;
616
617 depth = 0;
618 for (fnhe = rcu_dereference(hash->chain); fnhe;
619 fnhe = rcu_dereference(fnhe->fnhe_next)) {
620 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000621 break;
David S. Miller4895c772012-07-17 04:19:00 -0700622 depth++;
623 }
624
Julian Anastasovaee06da2012-07-18 10:15:35 +0000625 if (fnhe) {
626 if (gw)
627 fnhe->fnhe_gw = gw;
628 if (pmtu) {
629 fnhe->fnhe_pmtu = pmtu;
630 fnhe->fnhe_expires = expires;
631 }
632 } else {
633 if (depth > FNHE_RECLAIM_DEPTH)
634 fnhe = fnhe_oldest(hash);
635 else {
636 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
637 if (!fnhe)
638 goto out_unlock;
639
640 fnhe->fnhe_next = hash->chain;
641 rcu_assign_pointer(hash->chain, fnhe);
642 }
643 fnhe->fnhe_daddr = daddr;
644 fnhe->fnhe_gw = gw;
645 fnhe->fnhe_pmtu = pmtu;
646 fnhe->fnhe_expires = expires;
David S. Miller4895c772012-07-17 04:19:00 -0700647 }
David S. Miller4895c772012-07-17 04:19:00 -0700648
David S. Miller4895c772012-07-17 04:19:00 -0700649 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000650
651out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700652 spin_unlock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000653 return;
David S. Miller4895c772012-07-17 04:19:00 -0700654}
655
David S. Millerceb33202012-07-17 11:31:28 -0700656static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
657 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658{
David S. Millere47a1852012-07-11 20:55:47 -0700659 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700660 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700661 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700662 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700663 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700664 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800665 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666
David S. Miller94206122012-07-11 20:38:08 -0700667 switch (icmp_hdr(skb)->code & 7) {
668 case ICMP_REDIR_NET:
669 case ICMP_REDIR_NETTOS:
670 case ICMP_REDIR_HOST:
671 case ICMP_REDIR_HOSTTOS:
672 break;
673
674 default:
675 return;
676 }
677
David S. Millere47a1852012-07-11 20:55:47 -0700678 if (rt->rt_gateway != old_gw)
679 return;
680
681 in_dev = __in_dev_get_rcu(dev);
682 if (!in_dev)
683 return;
684
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900685 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800686 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
687 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
688 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 goto reject_redirect;
690
691 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
692 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
693 goto reject_redirect;
694 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
695 goto reject_redirect;
696 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800697 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700698 goto reject_redirect;
699 }
700
David S. Miller4895c772012-07-17 04:19:00 -0700701 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
David S. Millere47a1852012-07-11 20:55:47 -0700702 if (n) {
703 if (!(n->nud_state & NUD_VALID)) {
704 neigh_event_send(n, NULL);
705 } else {
David S. Miller4895c772012-07-17 04:19:00 -0700706 if (fib_lookup(net, fl4, &res) == 0) {
707 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700708
Julian Anastasovaee06da2012-07-18 10:15:35 +0000709 update_or_create_fnhe(nh, fl4->daddr, new_gw,
710 0, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700711 }
David S. Millerceb33202012-07-17 11:31:28 -0700712 if (kill_route)
713 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700714 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
715 }
716 neigh_release(n);
717 }
718 return;
719
720reject_redirect:
721#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700722 if (IN_DEV_LOG_MARTIANS(in_dev)) {
723 const struct iphdr *iph = (const struct iphdr *) skb->data;
724 __be32 daddr = iph->daddr;
725 __be32 saddr = iph->saddr;
726
David S. Millere47a1852012-07-11 20:55:47 -0700727 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
728 " Advised path = %pI4 -> %pI4\n",
729 &old_gw, dev->name, &new_gw,
730 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700731 }
David S. Millere47a1852012-07-11 20:55:47 -0700732#endif
733 ;
734}
735
David S. Miller4895c772012-07-17 04:19:00 -0700736static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
737{
738 struct rtable *rt;
739 struct flowi4 fl4;
740
741 rt = (struct rtable *) dst;
742
743 ip_rt_build_flow_key(&fl4, sk, skb);
David S. Millerceb33202012-07-17 11:31:28 -0700744 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700745}
746
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
748{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800749 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 struct dst_entry *ret = dst;
751
752 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000753 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 ip_rt_put(rt);
755 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700756 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
757 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700758 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 ret = NULL;
760 }
761 }
762 return ret;
763}
764
765/*
766 * Algorithm:
767 * 1. The first ip_rt_redirect_number redirects are sent
768 * with exponential backoff, then we stop sending them at all,
769 * assuming that the host ignores our redirects.
770 * 2. If we did not see packets requiring redirects
771 * during ip_rt_redirect_silence, we assume that the host
772 * forgot redirected route and start to send redirects again.
773 *
774 * This algorithm is much cheaper and more intelligent than dumb load limiting
775 * in icmp.c.
776 *
777 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
778 * and "frag. need" (breaks PMTU discovery) in icmp.c.
779 */
780
781void ip_rt_send_redirect(struct sk_buff *skb)
782{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000783 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700784 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800785 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700786 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700787 int log_martians;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788
Eric Dumazet30038fc2009-08-28 23:52:01 -0700789 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700790 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700791 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
792 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700794 }
795 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
796 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797
David S. Miller1d861aa2012-07-10 03:58:16 -0700798 net = dev_net(rt->dst.dev);
799 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800800 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000801 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
802 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800803 return;
804 }
805
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806 /* No redirected packets during ip_rt_redirect_silence;
807 * reset the algorithm.
808 */
David S. Miller92d86822011-02-04 15:55:25 -0800809 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
810 peer->rate_tokens = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811
812 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700813 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700814 */
David S. Miller92d86822011-02-04 15:55:25 -0800815 if (peer->rate_tokens >= ip_rt_redirect_number) {
816 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700817 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818 }
819
820 /* Check for load limit; set rate_last to the latest sent
821 * redirect.
822 */
David S. Miller92d86822011-02-04 15:55:25 -0800823 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800824 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800825 (peer->rate_last +
826 (ip_rt_redirect_load << peer->rate_tokens)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000827 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
828
829 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800830 peer->rate_last = jiffies;
831 ++peer->rate_tokens;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700833 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000834 peer->rate_tokens == ip_rt_redirect_number)
835 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700836 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000837 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838#endif
839 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700840out_put_peer:
841 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842}
843
844static int ip_error(struct sk_buff *skb)
845{
David S. Miller251da412012-06-26 16:27:09 -0700846 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +0000847 struct rtable *rt = skb_rtable(skb);
David S. Miller92d86822011-02-04 15:55:25 -0800848 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700850 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800851 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852 int code;
853
David S. Miller251da412012-06-26 16:27:09 -0700854 net = dev_net(rt->dst.dev);
855 if (!IN_DEV_FORWARD(in_dev)) {
856 switch (rt->dst.error) {
857 case EHOSTUNREACH:
858 IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
859 break;
860
861 case ENETUNREACH:
862 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
863 break;
864 }
865 goto out;
866 }
867
Changli Gaod8d1f302010-06-10 23:31:35 -0700868 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000869 case EINVAL:
870 default:
871 goto out;
872 case EHOSTUNREACH:
873 code = ICMP_HOST_UNREACH;
874 break;
875 case ENETUNREACH:
876 code = ICMP_NET_UNREACH;
David S. Miller251da412012-06-26 16:27:09 -0700877 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000878 break;
879 case EACCES:
880 code = ICMP_PKT_FILTERED;
881 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882 }
883
David S. Miller1d861aa2012-07-10 03:58:16 -0700884 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800885
886 send = true;
887 if (peer) {
888 now = jiffies;
889 peer->rate_tokens += now - peer->rate_last;
890 if (peer->rate_tokens > ip_rt_error_burst)
891 peer->rate_tokens = ip_rt_error_burst;
892 peer->rate_last = now;
893 if (peer->rate_tokens >= ip_rt_error_cost)
894 peer->rate_tokens -= ip_rt_error_cost;
895 else
896 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -0700897 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700898 }
David S. Miller92d86822011-02-04 15:55:25 -0800899 if (send)
900 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700901
902out: kfree_skb(skb);
903 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900904}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700905
Steffen Klassertd851c122012-10-07 22:47:25 +0000906static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907{
Steffen Klassertd851c122012-10-07 22:47:25 +0000908 struct dst_entry *dst = &rt->dst;
David S. Miller4895c772012-07-17 04:19:00 -0700909 struct fib_result res;
David S. Miller2c8cec52011-02-09 20:42:07 -0800910
Steffen Klassertfa1e4922013-01-16 20:58:10 +0000911 if (dst_metric_locked(dst, RTAX_MTU))
912 return;
913
Steffen Klassert7f92d3342012-10-07 22:48:18 +0000914 if (dst->dev->mtu < mtu)
915 return;
916
David S. Miller59436342012-07-10 06:58:42 -0700917 if (mtu < ip_rt_min_pmtu)
918 mtu = ip_rt_min_pmtu;
Eric Dumazetfe6fe792011-06-08 06:07:07 +0000919
Steffen Klassertd851c122012-10-07 22:47:25 +0000920 if (!rt->rt_pmtu) {
921 dst->obsolete = DST_OBSOLETE_KILL;
922 } else {
923 rt->rt_pmtu = mtu;
924 dst->expires = max(1UL, jiffies + ip_rt_mtu_expires);
925 }
926
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000927 rcu_read_lock();
Steffen Klassertd851c122012-10-07 22:47:25 +0000928 if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -0700929 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700930
Julian Anastasovaee06da2012-07-18 10:15:35 +0000931 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
932 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -0700933 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000934 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935}
936
David S. Miller4895c772012-07-17 04:19:00 -0700937static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
938 struct sk_buff *skb, u32 mtu)
939{
940 struct rtable *rt = (struct rtable *) dst;
941 struct flowi4 fl4;
942
943 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +0000944 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -0700945}
946
David S. Miller36393392012-06-14 22:21:46 -0700947void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
948 int oif, u32 mark, u8 protocol, int flow_flags)
949{
David S. Miller4895c772012-07-17 04:19:00 -0700950 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -0700951 struct flowi4 fl4;
952 struct rtable *rt;
953
David S. Miller4895c772012-07-17 04:19:00 -0700954 __build_flow_key(&fl4, NULL, iph, oif,
955 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Miller36393392012-06-14 22:21:46 -0700956 rt = __ip_route_output_key(net, &fl4);
957 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -0700958 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -0700959 ip_rt_put(rt);
960 }
961}
962EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
963
Steffen Klassert9cb3a502013-01-21 01:59:11 +0000964static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
David S. Miller36393392012-06-14 22:21:46 -0700965{
David S. Miller4895c772012-07-17 04:19:00 -0700966 const struct iphdr *iph = (const struct iphdr *) skb->data;
967 struct flowi4 fl4;
968 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -0700969
David S. Miller4895c772012-07-17 04:19:00 -0700970 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
971 rt = __ip_route_output_key(sock_net(sk), &fl4);
972 if (!IS_ERR(rt)) {
973 __ip_rt_update_pmtu(rt, &fl4, mtu);
974 ip_rt_put(rt);
975 }
David S. Miller36393392012-06-14 22:21:46 -0700976}
Steffen Klassert9cb3a502013-01-21 01:59:11 +0000977
978void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
979{
980 const struct iphdr *iph = (const struct iphdr *) skb->data;
981 struct flowi4 fl4;
982 struct rtable *rt;
983 struct dst_entry *dst;
Steffen Klassertb44108d2013-01-22 00:01:28 +0000984 bool new = false;
Steffen Klassert9cb3a502013-01-21 01:59:11 +0000985
986 bh_lock_sock(sk);
987 rt = (struct rtable *) __sk_dst_get(sk);
988
989 if (sock_owned_by_user(sk) || !rt) {
990 __ipv4_sk_update_pmtu(skb, sk, mtu);
991 goto out;
992 }
993
994 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
995
996 if (!__sk_dst_check(sk, 0)) {
997 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
998 if (IS_ERR(rt))
999 goto out;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001000
1001 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001002 }
1003
1004 __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
1005
1006 dst = dst_check(&rt->dst, 0);
1007 if (!dst) {
Steffen Klassertb44108d2013-01-22 00:01:28 +00001008 if (new)
1009 dst_release(&rt->dst);
1010
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001011 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1012 if (IS_ERR(rt))
1013 goto out;
1014
Steffen Klassertb44108d2013-01-22 00:01:28 +00001015 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001016 }
1017
Steffen Klassertb44108d2013-01-22 00:01:28 +00001018 if (new)
1019 __sk_dst_set(sk, &rt->dst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001020
1021out:
1022 bh_unlock_sock(sk);
1023}
David S. Miller36393392012-06-14 22:21:46 -07001024EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -08001025
David S. Millerb42597e2012-07-11 21:25:45 -07001026void ipv4_redirect(struct sk_buff *skb, struct net *net,
1027 int oif, u32 mark, u8 protocol, int flow_flags)
1028{
David S. Miller4895c772012-07-17 04:19:00 -07001029 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001030 struct flowi4 fl4;
1031 struct rtable *rt;
1032
David S. Miller4895c772012-07-17 04:19:00 -07001033 __build_flow_key(&fl4, NULL, iph, oif,
1034 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Millerb42597e2012-07-11 21:25:45 -07001035 rt = __ip_route_output_key(net, &fl4);
1036 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001037 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001038 ip_rt_put(rt);
1039 }
1040}
1041EXPORT_SYMBOL_GPL(ipv4_redirect);
1042
1043void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1044{
David S. Miller4895c772012-07-17 04:19:00 -07001045 const struct iphdr *iph = (const struct iphdr *) skb->data;
1046 struct flowi4 fl4;
1047 struct rtable *rt;
David S. Millerb42597e2012-07-11 21:25:45 -07001048
David S. Miller4895c772012-07-17 04:19:00 -07001049 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1050 rt = __ip_route_output_key(sock_net(sk), &fl4);
1051 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001052 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001053 ip_rt_put(rt);
1054 }
David S. Millerb42597e2012-07-11 21:25:45 -07001055}
1056EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1057
David S. Millerefbc368d2011-12-01 13:38:59 -05001058static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1059{
1060 struct rtable *rt = (struct rtable *) dst;
1061
David S. Millerceb33202012-07-17 11:31:28 -07001062 /* All IPV4 dsts are created with ->obsolete set to the value
1063 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1064 * into this function always.
1065 *
1066 * When a PMTU/redirect information update invalidates a
1067 * route, this is indicated by setting obsolete to
1068 * DST_OBSOLETE_KILL.
1069 */
1070 if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt))
David S. Millerefbc368d2011-12-01 13:38:59 -05001071 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001072 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073}
1074
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075static void ipv4_link_failure(struct sk_buff *skb)
1076{
1077 struct rtable *rt;
1078
1079 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1080
Eric Dumazet511c3f92009-06-02 05:14:27 +00001081 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001082 if (rt)
1083 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084}
1085
1086static int ip_rt_bug(struct sk_buff *skb)
1087{
Joe Perches91df42b2012-05-15 14:11:54 +00001088 pr_debug("%s: %pI4 -> %pI4, %s\n",
1089 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1090 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001092 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 return 0;
1094}
1095
1096/*
1097 We do not cache source address of outgoing interface,
1098 because it is used only by IP RR, TS and SRR options,
1099 so that it out of fast path.
1100
1101 BTW remember: "addr" is allowed to be not aligned
1102 in IP options!
1103 */
1104
David S. Miller8e363602011-05-13 17:29:41 -04001105void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106{
Al Viroa61ced52006-09-26 21:27:54 -07001107 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108
David S. Millerc7537962010-11-11 17:07:48 -08001109 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001110 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001111 else {
David S. Miller8e363602011-05-13 17:29:41 -04001112 struct fib_result res;
1113 struct flowi4 fl4;
1114 struct iphdr *iph;
1115
1116 iph = ip_hdr(skb);
1117
1118 memset(&fl4, 0, sizeof(fl4));
1119 fl4.daddr = iph->daddr;
1120 fl4.saddr = iph->saddr;
Julian Anastasovb0fe4a32011-07-23 02:00:41 +00001121 fl4.flowi4_tos = RT_TOS(iph->tos);
David S. Miller8e363602011-05-13 17:29:41 -04001122 fl4.flowi4_oif = rt->dst.dev->ifindex;
1123 fl4.flowi4_iif = skb->dev->ifindex;
1124 fl4.flowi4_mark = skb->mark;
David S. Miller5e2b61f2011-03-04 21:47:09 -08001125
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001126 rcu_read_lock();
David S. Miller68a5e3d2011-03-11 20:07:33 -05001127 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
David S. Miller436c3b62011-03-24 17:42:21 -07001128 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001129 else
David S. Millerf8126f12012-07-13 05:03:45 -07001130 src = inet_select_addr(rt->dst.dev,
1131 rt_nexthop(rt, iph->daddr),
1132 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001133 rcu_read_unlock();
1134 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 memcpy(addr, &src, 4);
1136}
1137
Patrick McHardyc7066f72011-01-14 13:36:42 +01001138#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139static void set_class_tag(struct rtable *rt, u32 tag)
1140{
Changli Gaod8d1f302010-06-10 23:31:35 -07001141 if (!(rt->dst.tclassid & 0xFFFF))
1142 rt->dst.tclassid |= tag & 0xFFFF;
1143 if (!(rt->dst.tclassid & 0xFFFF0000))
1144 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001145}
1146#endif
1147
David S. Miller0dbaee32010-12-13 12:52:14 -08001148static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1149{
1150 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1151
1152 if (advmss == 0) {
1153 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1154 ip_rt_min_advmss);
1155 if (advmss > 65535 - 40)
1156 advmss = 65535 - 40;
1157 }
1158 return advmss;
1159}
1160
Steffen Klassertebb762f2011-11-23 02:12:51 +00001161static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001162{
Steffen Klassert261663b2011-11-23 02:14:50 +00001163 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001164 unsigned int mtu = rt->rt_pmtu;
1165
Alexander Duyck98d75c32012-08-27 06:30:01 +00001166 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001167 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001168
Steffen Klassert38d523e2013-01-16 20:55:01 +00001169 if (mtu)
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001170 return mtu;
1171
1172 mtu = dst->dev->mtu;
David S. Millerd33e4552010-12-14 13:01:14 -08001173
1174 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
Julian Anastasov155e8332012-10-08 11:41:18 +00001175 if (rt->rt_uses_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001176 mtu = 576;
1177 }
1178
1179 if (mtu > IP_MAX_MTU)
1180 mtu = IP_MAX_MTU;
1181
1182 return mtu;
1183}
1184
David S. Millerf2bb4be2012-07-17 12:20:47 -07001185static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001186{
1187 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1188 struct fib_nh_exception *fnhe;
1189 u32 hval;
1190
David S. Millerf2bb4be2012-07-17 12:20:47 -07001191 if (!hash)
1192 return NULL;
1193
David S. Millerd3a25c92012-07-17 13:23:08 -07001194 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001195
1196 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1197 fnhe = rcu_dereference(fnhe->fnhe_next)) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001198 if (fnhe->fnhe_daddr == daddr)
1199 return fnhe;
1200 }
1201 return NULL;
1202}
David S. Miller4895c772012-07-17 04:19:00 -07001203
David S. Millercaacf052012-07-31 15:06:50 -07001204static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001205 __be32 daddr)
1206{
David S. Millercaacf052012-07-31 15:06:50 -07001207 bool ret = false;
1208
David S. Millerc5038a82012-07-31 15:02:02 -07001209 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001210
David S. Millerc5038a82012-07-31 15:02:02 -07001211 if (daddr == fnhe->fnhe_daddr) {
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001212 struct rtable *orig = rcu_dereference(fnhe->fnhe_rth);
1213 if (orig && rt_is_expired(orig)) {
1214 fnhe->fnhe_gw = 0;
1215 fnhe->fnhe_pmtu = 0;
1216 fnhe->fnhe_expires = 0;
1217 }
David S. Millerc5038a82012-07-31 15:02:02 -07001218 if (fnhe->fnhe_pmtu) {
1219 unsigned long expires = fnhe->fnhe_expires;
1220 unsigned long diff = expires - jiffies;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001221
David S. Millerc5038a82012-07-31 15:02:02 -07001222 if (time_before(jiffies, expires)) {
1223 rt->rt_pmtu = fnhe->fnhe_pmtu;
1224 dst_set_expires(&rt->dst, diff);
1225 }
David S. Miller4895c772012-07-17 04:19:00 -07001226 }
David S. Millerc5038a82012-07-31 15:02:02 -07001227 if (fnhe->fnhe_gw) {
1228 rt->rt_flags |= RTCF_REDIRECTED;
1229 rt->rt_gateway = fnhe->fnhe_gw;
Julian Anastasov155e8332012-10-08 11:41:18 +00001230 rt->rt_uses_gateway = 1;
1231 } else if (!rt->rt_gateway)
1232 rt->rt_gateway = daddr;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001233
David S. Millerc5038a82012-07-31 15:02:02 -07001234 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1235 if (orig)
1236 rt_free(orig);
1237
1238 fnhe->fnhe_stamp = jiffies;
David S. Millercaacf052012-07-31 15:06:50 -07001239 ret = true;
David S. Millerc5038a82012-07-31 15:02:02 -07001240 }
1241 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001242
1243 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001244}
1245
David S. Millercaacf052012-07-31 15:06:50 -07001246static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001247{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001248 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001249 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001250
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001251 if (rt_is_input_route(rt)) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001252 p = (struct rtable **)&nh->nh_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001253 } else {
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001254 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1255 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001256 orig = *p;
1257
1258 prev = cmpxchg(p, orig, rt);
1259 if (prev == orig) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001260 if (orig)
Eric Dumazet54764bb2012-07-31 01:08:23 +00001261 rt_free(orig);
Julian Anastasov155e8332012-10-08 11:41:18 +00001262 } else
David S. Millercaacf052012-07-31 15:06:50 -07001263 ret = false;
David S. Millercaacf052012-07-31 15:06:50 -07001264
1265 return ret;
1266}
1267
1268static DEFINE_SPINLOCK(rt_uncached_lock);
1269static LIST_HEAD(rt_uncached_list);
1270
1271static void rt_add_uncached_list(struct rtable *rt)
1272{
1273 spin_lock_bh(&rt_uncached_lock);
1274 list_add_tail(&rt->rt_uncached, &rt_uncached_list);
1275 spin_unlock_bh(&rt_uncached_lock);
1276}
1277
1278static void ipv4_dst_destroy(struct dst_entry *dst)
1279{
1280 struct rtable *rt = (struct rtable *) dst;
1281
Eric Dumazet78df76a2012-08-24 05:40:47 +00001282 if (!list_empty(&rt->rt_uncached)) {
David S. Millercaacf052012-07-31 15:06:50 -07001283 spin_lock_bh(&rt_uncached_lock);
1284 list_del(&rt->rt_uncached);
1285 spin_unlock_bh(&rt_uncached_lock);
1286 }
1287}
1288
1289void rt_flush_dev(struct net_device *dev)
1290{
1291 if (!list_empty(&rt_uncached_list)) {
1292 struct net *net = dev_net(dev);
1293 struct rtable *rt;
1294
1295 spin_lock_bh(&rt_uncached_lock);
1296 list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
1297 if (rt->dst.dev != dev)
1298 continue;
1299 rt->dst.dev = net->loopback_dev;
1300 dev_hold(rt->dst.dev);
1301 dev_put(dev);
1302 }
1303 spin_unlock_bh(&rt_uncached_lock);
David S. Miller4895c772012-07-17 04:19:00 -07001304 }
1305}
1306
Eric Dumazet4331deb2012-07-25 05:11:23 +00001307static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba92012-07-17 12:58:50 -07001308{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001309 return rt &&
1310 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1311 !rt_is_expired(rt);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001312}
1313
David S. Millerf2bb4be2012-07-17 12:20:47 -07001314static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001315 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001316 struct fib_nh_exception *fnhe,
David S. Miller982721f2011-02-16 21:44:24 -08001317 struct fib_info *fi, u16 type, u32 itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318{
David S. Millercaacf052012-07-31 15:06:50 -07001319 bool cached = false;
1320
Linus Torvalds1da177e2005-04-16 15:20:36 -07001321 if (fi) {
David S. Miller4895c772012-07-17 04:19:00 -07001322 struct fib_nh *nh = &FIB_RES_NH(*res);
1323
Julian Anastasov155e8332012-10-08 11:41:18 +00001324 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
David S. Miller4895c772012-07-17 04:19:00 -07001325 rt->rt_gateway = nh->nh_gw;
Julian Anastasov155e8332012-10-08 11:41:18 +00001326 rt->rt_uses_gateway = 1;
1327 }
David S. Miller28605832012-07-17 14:55:59 -07001328 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
Patrick McHardyc7066f72011-01-14 13:36:42 +01001329#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf2bb4be2012-07-17 12:20:47 -07001330 rt->dst.tclassid = nh->nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331#endif
David S. Millerc5038a82012-07-31 15:02:02 -07001332 if (unlikely(fnhe))
David S. Millercaacf052012-07-31 15:06:50 -07001333 cached = rt_bind_exception(rt, fnhe, daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001334 else if (!(rt->dst.flags & DST_NOCACHE))
David S. Millercaacf052012-07-31 15:06:50 -07001335 cached = rt_cache_route(nh, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001336 if (unlikely(!cached)) {
1337 /* Routes we intend to cache in nexthop exception or
1338 * FIB nexthop have the DST_NOCACHE bit clear.
1339 * However, if we are unsuccessful at storing this
1340 * route into the cache we really need to set it.
1341 */
1342 rt->dst.flags |= DST_NOCACHE;
1343 if (!rt->rt_gateway)
1344 rt->rt_gateway = daddr;
1345 rt_add_uncached_list(rt);
1346 }
1347 } else
David S. Millercaacf052012-07-31 15:06:50 -07001348 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349
Patrick McHardyc7066f72011-01-14 13:36:42 +01001350#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001351#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001352 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001353#endif
1354 set_class_tag(rt, itag);
1355#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356}
1357
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001358static struct rtable *rt_dst_alloc(struct net_device *dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001359 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001360{
David S. Millerf5b0a872012-07-19 12:31:33 -07001361 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
David S. Millerc6cffba2012-07-26 11:14:38 +00001362 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001363 (nopolicy ? DST_NOPOLICY : 0) |
1364 (noxfrm ? DST_NOXFRM : 0));
David S. Miller0c4dcd52011-02-17 15:42:37 -08001365}
1366
Eric Dumazet96d36222010-06-02 19:21:31 +00001367/* called in rcu_read_lock() section */
Al Viro9e12bb22006-09-26 21:25:20 -07001368static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 u8 tos, struct net_device *dev, int our)
1370{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371 struct rtable *rth;
Eric Dumazet96d36222010-06-02 19:21:31 +00001372 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 u32 itag = 0;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001374 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375
1376 /* Primary sanity checks. */
1377
1378 if (in_dev == NULL)
1379 return -EINVAL;
1380
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001381 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001382 skb->protocol != htons(ETH_P_IP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383 goto e_inval;
1384
Thomas Grafd0daebc32012-06-12 00:44:01 +00001385 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1386 if (ipv4_is_loopback(saddr))
1387 goto e_inval;
1388
Joe Perchesf97c1e02007-12-16 13:45:43 -08001389 if (ipv4_is_zeronet(saddr)) {
1390 if (!ipv4_is_local_multicast(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001391 goto e_inval;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001392 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001393 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1394 in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001395 if (err < 0)
1396 goto e_err;
1397 }
Benjamin LaHaise4e7b2f12012-03-27 15:55:32 +00001398 rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001399 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001400 if (!rth)
1401 goto e_nobufs;
1402
Patrick McHardyc7066f72011-01-14 13:36:42 +01001403#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001404 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405#endif
David S. Millercf911662011-04-28 14:31:47 -07001406 rth->dst.output = ip_rt_bug;
1407
Denis V. Luneve84f84f2008-07-05 19:04:32 -07001408 rth->rt_genid = rt_genid(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409 rth->rt_flags = RTCF_MULTICAST;
Eric Dumazet29e75252008-01-31 17:05:09 -08001410 rth->rt_type = RTN_MULTICAST;
David S. Miller9917e1e82012-07-17 14:44:26 -07001411 rth->rt_is_input= 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001412 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001413 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001414 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001415 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001416 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417 if (our) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001418 rth->dst.input= ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419 rth->rt_flags |= RTCF_LOCAL;
1420 }
1421
1422#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001423 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001424 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001425#endif
1426 RT_CACHE_STAT_INC(in_slow_mc);
1427
David S. Miller89aef892012-07-17 11:00:09 -07001428 skb_dst_set(skb, &rth->dst);
1429 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430
1431e_nobufs:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433e_inval:
Eric Dumazet96d36222010-06-02 19:21:31 +00001434 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001435e_err:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001436 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437}
1438
1439
1440static void ip_handle_martian_source(struct net_device *dev,
1441 struct in_device *in_dev,
1442 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001443 __be32 daddr,
1444 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001445{
1446 RT_CACHE_STAT_INC(in_martian_src);
1447#ifdef CONFIG_IP_ROUTE_VERBOSE
1448 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1449 /*
1450 * RFC1812 recommendation, if source is martian,
1451 * the only hint is MAC header.
1452 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001453 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001454 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001455 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001456 print_hex_dump(KERN_WARNING, "ll header: ",
1457 DUMP_PREFIX_OFFSET, 16, 1,
1458 skb_mac_header(skb),
1459 dev->hard_header_len, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460 }
1461 }
1462#endif
1463}
1464
Eric Dumazet47360222010-06-03 04:13:21 +00001465/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001466static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001467 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001468 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001469 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 struct rtable *rth;
1472 int err;
1473 struct in_device *out_dev;
Eric Dumazet47360222010-06-03 04:13:21 +00001474 unsigned int flags = 0;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001475 bool do_cache;
Al Virod9c9df82006-09-26 21:28:14 -07001476 u32 itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477
1478 /* get a working reference to the output device */
Eric Dumazet47360222010-06-03 04:13:21 +00001479 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 if (out_dev == NULL) {
Joe Perchese87cc472012-05-13 21:56:26 +00001481 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 return -EINVAL;
1483 }
1484
Michael Smith5c04c812011-04-07 04:51:50 +00001485 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001486 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001487 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001488 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001490
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 goto cleanup;
1492 }
1493
Julian Anastasove81da0e2012-10-08 11:41:15 +00001494 do_cache = res->fi && !itag;
1495 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496 (IN_DEV_SHARED_MEDIA(out_dev) ||
Julian Anastasove81da0e2012-10-08 11:41:15 +00001497 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498 flags |= RTCF_DOREDIRECT;
Julian Anastasove81da0e2012-10-08 11:41:15 +00001499 do_cache = false;
1500 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501
1502 if (skb->protocol != htons(ETH_P_IP)) {
1503 /* Not IP (i.e. ARP). Do not create route, if it is
1504 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001505 *
1506 * Proxy arp feature have been extended to allow, ARP
1507 * replies back to the same interface, to support
1508 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001510 if (out_dev == in_dev &&
1511 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512 err = -EINVAL;
1513 goto cleanup;
1514 }
1515 }
1516
Julian Anastasove81da0e2012-10-08 11:41:15 +00001517 if (do_cache) {
1518 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1519 if (rt_cache_valid(rth)) {
1520 skb_dst_set_noref(skb, &rth->dst);
1521 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001522 }
1523 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001524
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001525 rth = rt_dst_alloc(out_dev->dev,
1526 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba92012-07-17 12:58:50 -07001527 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528 if (!rth) {
1529 err = -ENOBUFS;
1530 goto cleanup;
1531 }
1532
David S. Millercf911662011-04-28 14:31:47 -07001533 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
1534 rth->rt_flags = flags;
1535 rth->rt_type = res->type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001536 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001537 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001538 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001539 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001540 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001541 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542
Changli Gaod8d1f302010-06-10 23:31:35 -07001543 rth->dst.input = ip_forward;
1544 rth->dst.output = ip_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545
David S. Millerd2d68ba92012-07-17 12:58:50 -07001546 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
David S. Millerc6cffba2012-07-26 11:14:38 +00001547 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001548out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549 err = 0;
1550 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001552}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553
Stephen Hemminger5969f712008-04-10 01:52:09 -07001554static int ip_mkroute_input(struct sk_buff *skb,
1555 struct fib_result *res,
David S. Miller68a5e3d2011-03-11 20:07:33 -05001556 const struct flowi4 *fl4,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001557 struct in_device *in_dev,
1558 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001559{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001560#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Millerff3fccb2011-03-10 16:23:24 -08001561 if (res->fi && res->fi->fib_nhs > 1)
David S. Miller1b7fe5932011-03-10 17:01:16 -08001562 fib_select_multipath(res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563#endif
1564
1565 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001566 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001567}
1568
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569/*
1570 * NOTE. We drop all the packets that has local source
1571 * addresses, because every properly looped back packet
1572 * must have correct destination already attached by output routine.
1573 *
1574 * Such approach solves two big problems:
1575 * 1. Not simplex devices are handled properly.
1576 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001577 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578 */
1579
Al Viro9e12bb22006-09-26 21:25:20 -07001580static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David S. Millerc10237e2012-06-27 17:05:06 -07001581 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001582{
1583 struct fib_result res;
Eric Dumazet96d36222010-06-02 19:21:31 +00001584 struct in_device *in_dev = __in_dev_get_rcu(dev);
David S. Miller68a5e3d2011-03-11 20:07:33 -05001585 struct flowi4 fl4;
Eric Dumazet95c96172012-04-15 05:58:06 +00001586 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001588 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589 int err = -EINVAL;
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001590 struct net *net = dev_net(dev);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001591 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592
1593 /* IP on this device is disabled. */
1594
1595 if (!in_dev)
1596 goto out;
1597
1598 /* Check for the most weird martians, which can be not detected
1599 by fib_lookup.
1600 */
1601
Thomas Grafd0daebc32012-06-12 00:44:01 +00001602 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603 goto martian_source;
1604
David S. Millerd2d68ba92012-07-17 12:58:50 -07001605 res.fi = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00001606 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001607 goto brd_input;
1608
1609 /* Accept zero addresses only to limited broadcast;
1610 * I even do not know to fix it or not. Waiting for complains :-)
1611 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001612 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613 goto martian_source;
1614
Thomas Grafd0daebc32012-06-12 00:44:01 +00001615 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616 goto martian_destination;
1617
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001618 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1619 * and call it once if daddr or/and saddr are loopback addresses
1620 */
1621 if (ipv4_is_loopback(daddr)) {
1622 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001623 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001624 } else if (ipv4_is_loopback(saddr)) {
1625 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001626 goto martian_source;
1627 }
1628
Linus Torvalds1da177e2005-04-16 15:20:36 -07001629 /*
1630 * Now we are ready to route packet.
1631 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05001632 fl4.flowi4_oif = 0;
1633 fl4.flowi4_iif = dev->ifindex;
1634 fl4.flowi4_mark = skb->mark;
1635 fl4.flowi4_tos = tos;
1636 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1637 fl4.daddr = daddr;
1638 fl4.saddr = saddr;
1639 err = fib_lookup(net, &fl4, &res);
David S. Miller251da412012-06-26 16:27:09 -07001640 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642
1643 RT_CACHE_STAT_INC(in_slow_tot);
1644
1645 if (res.type == RTN_BROADCAST)
1646 goto brd_input;
1647
1648 if (res.type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00001649 err = fib_validate_source(skb, saddr, daddr, tos,
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001650 LOOPBACK_IFINDEX,
David S. Miller9e56e382012-06-28 18:54:02 -07001651 dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001652 if (err < 0)
1653 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001654 goto local_input;
1655 }
1656
1657 if (!IN_DEV_FORWARD(in_dev))
David S. Miller251da412012-06-26 16:27:09 -07001658 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659 if (res.type != RTN_UNICAST)
1660 goto martian_destination;
1661
David S. Miller68a5e3d2011-03-11 20:07:33 -05001662 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663out: return err;
1664
1665brd_input:
1666 if (skb->protocol != htons(ETH_P_IP))
1667 goto e_inval;
1668
David S. Miller41347dc2012-06-28 04:05:27 -07001669 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07001670 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1671 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672 if (err < 0)
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001673 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674 }
1675 flags |= RTCF_BROADCAST;
1676 res.type = RTN_BROADCAST;
1677 RT_CACHE_STAT_INC(in_brd);
1678
1679local_input:
David S. Millerd2d68ba92012-07-17 12:58:50 -07001680 do_cache = false;
1681 if (res.fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001682 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001683 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
David S. Millerd2d68ba92012-07-17 12:58:50 -07001684 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001685 skb_dst_set_noref(skb, &rth->dst);
1686 err = 0;
1687 goto out;
David S. Millerd2d68ba92012-07-17 12:58:50 -07001688 }
1689 do_cache = true;
1690 }
1691 }
1692
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001693 rth = rt_dst_alloc(net->loopback_dev,
David S. Millerd2d68ba92012-07-17 12:58:50 -07001694 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 if (!rth)
1696 goto e_nobufs;
1697
David S. Millercf911662011-04-28 14:31:47 -07001698 rth->dst.input= ip_local_deliver;
Changli Gaod8d1f302010-06-10 23:31:35 -07001699 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07001700#ifdef CONFIG_IP_ROUTE_CLASSID
1701 rth->dst.tclassid = itag;
1702#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703
David S. Millercf911662011-04-28 14:31:47 -07001704 rth->rt_genid = rt_genid(net);
1705 rth->rt_flags = flags|RTCF_LOCAL;
1706 rth->rt_type = res.type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001707 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001708 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001709 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001710 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001711 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001712 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 if (res.type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001714 rth->dst.input= ip_error;
1715 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 rth->rt_flags &= ~RTCF_LOCAL;
1717 }
David S. Millerd2d68ba92012-07-17 12:58:50 -07001718 if (do_cache)
1719 rt_cache_route(&FIB_RES_NH(res), rth);
David S. Miller89aef892012-07-17 11:00:09 -07001720 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001721 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001722 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723
1724no_route:
1725 RT_CACHE_STAT_INC(in_no_route);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001726 res.type = RTN_UNREACHABLE;
Mitsuru Chinen7f538782007-12-07 01:07:24 -08001727 if (err == -ESRCH)
1728 err = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729 goto local_input;
1730
1731 /*
1732 * Do not cache martian addresses: they should be logged (RFC1812)
1733 */
1734martian_destination:
1735 RT_CACHE_STAT_INC(in_martian_dst);
1736#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00001737 if (IN_DEV_LOG_MARTIANS(in_dev))
1738 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1739 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07001741
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742e_inval:
1743 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001744 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745
1746e_nobufs:
1747 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001748 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749
1750martian_source:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001751 err = -EINVAL;
1752martian_source_keep_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001754 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755}
1756
David S. Millerc6cffba2012-07-26 11:14:38 +00001757int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1758 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001759{
Eric Dumazet96d36222010-06-02 19:21:31 +00001760 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761
Eric Dumazet96d36222010-06-02 19:21:31 +00001762 rcu_read_lock();
1763
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764 /* Multicast recognition logic is moved from route cache to here.
1765 The problem was that too many Ethernet cards have broken/missing
1766 hardware multicast filters :-( As result the host on multicasting
1767 network acquires a lot of useless route cache entries, sort of
1768 SDR messages from all the world. Now we try to get rid of them.
1769 Really, provided software IP multicast filter is organized
1770 reasonably (at least, hashed), it does not result in a slowdown
1771 comparing with route cache reject entries.
1772 Note, that multicast routers are not affected, because
1773 route cache entry is created eventually.
1774 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001775 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001776 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777
Eric Dumazet96d36222010-06-02 19:21:31 +00001778 if (in_dev) {
David S. Millerdbdd9a52011-03-10 16:34:38 -08001779 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
1780 ip_hdr(skb)->protocol);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781 if (our
1782#ifdef CONFIG_IP_MROUTE
Joe Perches9d4fb272009-11-23 10:41:23 -08001783 ||
1784 (!ipv4_is_local_multicast(daddr) &&
1785 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001786#endif
Joe Perches9d4fb272009-11-23 10:41:23 -08001787 ) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001788 int res = ip_route_input_mc(skb, daddr, saddr,
1789 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001790 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00001791 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792 }
1793 }
1794 rcu_read_unlock();
1795 return -EINVAL;
1796 }
David S. Millerc10237e2012-06-27 17:05:06 -07001797 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
Eric Dumazet96d36222010-06-02 19:21:31 +00001798 rcu_read_unlock();
1799 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800}
David S. Millerc6cffba2012-07-26 11:14:38 +00001801EXPORT_SYMBOL(ip_route_input_noref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001803/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08001804static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00001805 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00001806 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08001807 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808{
David S. Miller982721f2011-02-16 21:44:24 -08001809 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001810 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08001811 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08001812 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08001813 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001814 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815
Thomas Grafd0daebc32012-06-12 00:44:01 +00001816 in_dev = __in_dev_get_rcu(dev_out);
1817 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08001818 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819
Thomas Grafd0daebc32012-06-12 00:44:01 +00001820 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1821 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
1822 return ERR_PTR(-EINVAL);
1823
David S. Miller68a5e3d2011-03-11 20:07:33 -05001824 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001825 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001826 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001827 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001828 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08001829 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830
1831 if (dev_out->flags & IFF_LOOPBACK)
1832 flags |= RTCF_LOCAL;
1833
Julian Anastasov63617422012-11-22 23:04:14 +02001834 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08001835 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08001837 fi = NULL;
1838 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001839 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07001840 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
1841 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02001843 else
1844 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001845 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001846 * default one, but do not gateway in this case.
1847 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848 */
David S. Miller982721f2011-02-16 21:44:24 -08001849 if (fi && res->prefixlen < 4)
1850 fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851 }
1852
David S. Millerf2bb4be2012-07-17 12:20:47 -07001853 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02001854 do_cache &= fi != NULL;
1855 if (do_cache) {
David S. Millerc5038a82012-07-31 15:02:02 -07001856 struct rtable __rcu **prth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001857 struct fib_nh *nh = &FIB_RES_NH(*res);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001858
Julian Anastasovc92b9652012-10-08 11:41:19 +00001859 fnhe = find_exception(nh, fl4->daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001860 if (fnhe)
1861 prth = &fnhe->fnhe_rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001862 else {
1863 if (unlikely(fl4->flowi4_flags &
1864 FLOWI_FLAG_KNOWN_NH &&
1865 !(nh->nh_gw &&
1866 nh->nh_scope == RT_SCOPE_LINK))) {
1867 do_cache = false;
1868 goto add;
1869 }
1870 prth = __this_cpu_ptr(nh->nh_pcpu_rth_output);
1871 }
David S. Millerc5038a82012-07-31 15:02:02 -07001872 rth = rcu_dereference(*prth);
1873 if (rt_cache_valid(rth)) {
1874 dst_hold(&rth->dst);
1875 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001876 }
1877 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00001878
1879add:
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001880 rth = rt_dst_alloc(dev_out,
1881 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07001882 IN_DEV_CONF_GET(in_dev, NOXFRM),
Julian Anastasovc92b9652012-10-08 11:41:19 +00001883 do_cache);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001884 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08001885 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001886
David S. Millercf911662011-04-28 14:31:47 -07001887 rth->dst.output = ip_output;
1888
David S. Millercf911662011-04-28 14:31:47 -07001889 rth->rt_genid = rt_genid(dev_net(dev_out));
1890 rth->rt_flags = flags;
1891 rth->rt_type = type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001892 rth->rt_is_input = 0;
David S. Miller13378ca2012-07-23 13:57:45 -07001893 rth->rt_iif = orig_oif ? : 0;
David S. Miller59436342012-07-10 06:58:42 -07001894 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001895 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001896 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001897 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898
1899 RT_CACHE_STAT_INC(out_slow_tot);
1900
David S. Miller41347dc2012-06-28 04:05:27 -07001901 if (flags & RTCF_LOCAL)
Changli Gaod8d1f302010-06-10 23:31:35 -07001902 rth->dst.input = ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001904 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001905 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001906 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001907 RT_CACHE_STAT_INC(out_slow_mc);
1908 }
1909#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08001910 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001911 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07001912 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001913 rth->dst.input = ip_mr_input;
1914 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001915 }
1916 }
1917#endif
1918 }
1919
David S. Millerf2bb4be2012-07-17 12:20:47 -07001920 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001921
David S. Miller5ada5522011-02-17 15:29:00 -08001922 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001923}
1924
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925/*
1926 * Major route resolver routine.
1927 */
1928
David S. Miller89aef892012-07-17 11:00:09 -07001929struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001930{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931 struct net_device *dev_out = NULL;
Julian Anastasovf61759e2011-12-02 11:39:42 +00001932 __u8 tos = RT_FL_TOS(fl4);
David S. Miller813b3b52011-04-28 14:48:42 -07001933 unsigned int flags = 0;
1934 struct fib_result res;
David S. Miller5ada5522011-02-17 15:29:00 -08001935 struct rtable *rth;
David S. Miller813b3b52011-04-28 14:48:42 -07001936 int orig_oif;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001937
David S. Miller85b91b02012-07-13 08:21:29 -07001938 res.tclassid = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07001940 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001941
David S. Miller813b3b52011-04-28 14:48:42 -07001942 orig_oif = fl4->flowi4_oif;
1943
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001944 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07001945 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
1946 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
1947 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08001948
David S. Miller010c2702011-02-17 15:37:09 -08001949 rcu_read_lock();
David S. Miller813b3b52011-04-28 14:48:42 -07001950 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08001951 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07001952 if (ipv4_is_multicast(fl4->saddr) ||
1953 ipv4_is_lbcast(fl4->saddr) ||
1954 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955 goto out;
1956
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957 /* I removed check for oif == dev_out->oif here.
1958 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08001959 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
1960 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961 2. Moreover, we are allowed to send packets with saddr
1962 of another iface. --ANK
1963 */
1964
David S. Miller813b3b52011-04-28 14:48:42 -07001965 if (fl4->flowi4_oif == 0 &&
1966 (ipv4_is_multicast(fl4->daddr) ||
1967 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07001968 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07001969 dev_out = __ip_dev_find(net, fl4->saddr, false);
Julian Anastasova210d012008-10-01 07:28:28 -07001970 if (dev_out == NULL)
1971 goto out;
1972
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973 /* Special hack: user can direct multicasts
1974 and limited broadcast via necessary interface
1975 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
1976 This hack is not just for fun, it allows
1977 vic,vat and friends to work.
1978 They bind socket to loopback, set ttl to zero
1979 and expect that it will work.
1980 From the viewpoint of routing cache they are broken,
1981 because we are not allowed to build multicast path
1982 with loopback source addr (look, routing cache
1983 cannot know, that ttl is zero, so that packet
1984 will not leave this host and route is valid).
1985 Luckily, this hack is good workaround.
1986 */
1987
David S. Miller813b3b52011-04-28 14:48:42 -07001988 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001989 goto make_route;
1990 }
Julian Anastasova210d012008-10-01 07:28:28 -07001991
David S. Miller813b3b52011-04-28 14:48:42 -07001992 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07001993 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07001994 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07001995 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07001996 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997 }
1998
1999
David S. Miller813b3b52011-04-28 14:48:42 -07002000 if (fl4->flowi4_oif) {
2001 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002002 rth = ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002003 if (dev_out == NULL)
2004 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002005
2006 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002007 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002008 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002009 goto out;
2010 }
David S. Miller813b3b52011-04-28 14:48:42 -07002011 if (ipv4_is_local_multicast(fl4->daddr) ||
2012 ipv4_is_lbcast(fl4->daddr)) {
2013 if (!fl4->saddr)
2014 fl4->saddr = inet_select_addr(dev_out, 0,
2015 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002016 goto make_route;
2017 }
David S. Miller813b3b52011-04-28 14:48:42 -07002018 if (fl4->saddr) {
2019 if (ipv4_is_multicast(fl4->daddr))
2020 fl4->saddr = inet_select_addr(dev_out, 0,
2021 fl4->flowi4_scope);
2022 else if (!fl4->daddr)
2023 fl4->saddr = inet_select_addr(dev_out, 0,
2024 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002025 }
2026 }
2027
David S. Miller813b3b52011-04-28 14:48:42 -07002028 if (!fl4->daddr) {
2029 fl4->daddr = fl4->saddr;
2030 if (!fl4->daddr)
2031 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002032 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002033 fl4->flowi4_oif = LOOPBACK_IFINDEX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002034 res.type = RTN_LOCAL;
2035 flags |= RTCF_LOCAL;
2036 goto make_route;
2037 }
2038
David S. Miller813b3b52011-04-28 14:48:42 -07002039 if (fib_lookup(net, fl4, &res)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07002041 res.table = NULL;
David S. Miller813b3b52011-04-28 14:48:42 -07002042 if (fl4->flowi4_oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043 /* Apparently, routing tables are wrong. Assume,
2044 that the destination is on link.
2045
2046 WHY? DW.
2047 Because we are allowed to send to iface
2048 even if it has NO routes and NO assigned
2049 addresses. When oif is specified, routing
2050 tables are looked up with only one purpose:
2051 to catch if destination is gatewayed, rather than
2052 direct. Moreover, if MSG_DONTROUTE is set,
2053 we send packet, ignoring both routing tables
2054 and ifaddr state. --ANK
2055
2056
2057 We could make it even if oif is unknown,
2058 likely IPv6, but we do not.
2059 */
2060
David S. Miller813b3b52011-04-28 14:48:42 -07002061 if (fl4->saddr == 0)
2062 fl4->saddr = inet_select_addr(dev_out, 0,
2063 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064 res.type = RTN_UNICAST;
2065 goto make_route;
2066 }
David S. Millerb23dd4f2011-03-02 14:31:35 -08002067 rth = ERR_PTR(-ENETUNREACH);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068 goto out;
2069 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070
2071 if (res.type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002072 if (!fl4->saddr) {
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002073 if (res.fi->fib_prefsrc)
David S. Miller813b3b52011-04-28 14:48:42 -07002074 fl4->saddr = res.fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002075 else
David S. Miller813b3b52011-04-28 14:48:42 -07002076 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002077 }
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002078 dev_out = net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07002079 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002080 flags |= RTCF_LOCAL;
2081 goto make_route;
2082 }
2083
2084#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller813b3b52011-04-28 14:48:42 -07002085 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
David S. Miller1b7fe5932011-03-10 17:01:16 -08002086 fib_select_multipath(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002087 else
2088#endif
David S. Miller21d8c492011-04-14 14:49:37 -07002089 if (!res.prefixlen &&
2090 res.table->tb_num_default > 1 &&
David S. Miller813b3b52011-04-28 14:48:42 -07002091 res.type == RTN_UNICAST && !fl4->flowi4_oif)
David S. Miller0c838ff2011-01-31 16:16:50 -08002092 fib_select_default(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093
David S. Miller813b3b52011-04-28 14:48:42 -07002094 if (!fl4->saddr)
2095 fl4->saddr = FIB_RES_PREFSRC(net, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002096
Linus Torvalds1da177e2005-04-16 15:20:36 -07002097 dev_out = FIB_RES_DEV(res);
David S. Miller813b3b52011-04-28 14:48:42 -07002098 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099
2100
2101make_route:
David Miller1a00fee2012-07-01 02:02:56 +00002102 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103
David S. Miller010c2702011-02-17 15:37:09 -08002104out:
2105 rcu_read_unlock();
David S. Millerb23dd4f2011-03-02 14:31:35 -08002106 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002107}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002108EXPORT_SYMBOL_GPL(__ip_route_output_key);
2109
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002110static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2111{
2112 return NULL;
2113}
2114
Steffen Klassertebb762f2011-11-23 02:12:51 +00002115static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002116{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002117 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2118
2119 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002120}
2121
David S. Miller6700c272012-07-17 03:29:28 -07002122static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2123 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002124{
2125}
2126
David S. Miller6700c272012-07-17 03:29:28 -07002127static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2128 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002129{
2130}
2131
Held Bernhard0972ddb2011-04-24 22:07:32 +00002132static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2133 unsigned long old)
2134{
2135 return NULL;
2136}
2137
David S. Miller14e50e52007-05-24 18:17:54 -07002138static struct dst_ops ipv4_dst_blackhole_ops = {
2139 .family = AF_INET,
Harvey Harrison09640e632009-02-01 00:45:17 -08002140 .protocol = cpu_to_be16(ETH_P_IP),
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002141 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002142 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002143 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002144 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002145 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002146 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002147 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002148};
2149
David S. Miller2774c132011-03-01 14:59:04 -08002150struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002151{
David S. Miller2774c132011-03-01 14:59:04 -08002152 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002153 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002154
David S. Millerf5b0a872012-07-19 12:31:33 -07002155 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002156 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002157 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002158
David S. Miller14e50e52007-05-24 18:17:54 -07002159 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002160 new->input = dst_discard;
2161 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -07002162
Changli Gaod8d1f302010-06-10 23:31:35 -07002163 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002164 if (new->dev)
2165 dev_hold(new->dev);
2166
David S. Miller9917e1e82012-07-17 14:44:26 -07002167 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002168 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002169 rt->rt_pmtu = ort->rt_pmtu;
David S. Miller14e50e52007-05-24 18:17:54 -07002170
Denis V. Luneve84f84f2008-07-05 19:04:32 -07002171 rt->rt_genid = rt_genid(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002172 rt->rt_flags = ort->rt_flags;
2173 rt->rt_type = ort->rt_type;
David S. Miller14e50e52007-05-24 18:17:54 -07002174 rt->rt_gateway = ort->rt_gateway;
Julian Anastasov155e8332012-10-08 11:41:18 +00002175 rt->rt_uses_gateway = ort->rt_uses_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -07002176
David S. Millercaacf052012-07-31 15:06:50 -07002177 INIT_LIST_HEAD(&rt->rt_uncached);
2178
David S. Miller14e50e52007-05-24 18:17:54 -07002179 dst_free(new);
2180 }
2181
David S. Miller2774c132011-03-01 14:59:04 -08002182 dst_release(dst_orig);
2183
2184 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002185}
2186
David S. Miller9d6ec932011-03-12 01:12:47 -05002187struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
David S. Millerb23dd4f2011-03-02 14:31:35 -08002188 struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002189{
David S. Miller9d6ec932011-03-12 01:12:47 -05002190 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002191
David S. Millerb23dd4f2011-03-02 14:31:35 -08002192 if (IS_ERR(rt))
2193 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002194
David S. Miller56157872011-05-02 14:37:45 -07002195 if (flp4->flowi4_proto)
David S. Miller9d6ec932011-03-12 01:12:47 -05002196 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2197 flowi4_to_flowi(flp4),
2198 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002199
David S. Millerb23dd4f2011-03-02 14:31:35 -08002200 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002201}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002202EXPORT_SYMBOL_GPL(ip_route_output_flow);
2203
David S. Millerf1ce3062012-07-12 10:10:17 -07002204static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002205 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
David S. Millerf1ce3062012-07-12 10:10:17 -07002206 u32 seq, int event, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002207{
Eric Dumazet511c3f92009-06-02 05:14:27 +00002208 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002209 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002210 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002211 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002212 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002213 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002214
Eric W. Biederman15e47302012-09-07 20:12:54 +00002215 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
Thomas Grafbe403ea2006-08-17 18:15:17 -07002216 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002217 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002218
2219 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002220 r->rtm_family = AF_INET;
2221 r->rtm_dst_len = 32;
2222 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002223 r->rtm_tos = fl4->flowi4_tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002224 r->rtm_table = RT_TABLE_MAIN;
David S. Millerf3756b72012-04-01 20:39:02 -04002225 if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
2226 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002227 r->rtm_type = rt->rt_type;
2228 r->rtm_scope = RT_SCOPE_UNIVERSE;
2229 r->rtm_protocol = RTPROT_UNSPEC;
2230 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2231 if (rt->rt_flags & RTCF_NOTIFY)
2232 r->rtm_flags |= RTM_F_NOTIFY;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002233
David S. Millerf1ce3062012-07-12 10:10:17 -07002234 if (nla_put_be32(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002235 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002236 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002237 r->rtm_src_len = 32;
David Miller1a00fee2012-07-01 02:02:56 +00002238 if (nla_put_be32(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002239 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002240 }
David S. Millerf3756b72012-04-01 20:39:02 -04002241 if (rt->dst.dev &&
2242 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2243 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002244#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002245 if (rt->dst.tclassid &&
2246 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2247 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002249 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002250 fl4->saddr != src) {
2251 if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002252 goto nla_put_failure;
2253 }
Julian Anastasov155e8332012-10-08 11:41:18 +00002254 if (rt->rt_uses_gateway &&
David S. Millerf3756b72012-04-01 20:39:02 -04002255 nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
2256 goto nla_put_failure;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002257
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002258 expires = rt->dst.expires;
2259 if (expires) {
2260 unsigned long now = jiffies;
2261
2262 if (time_before(now, expires))
2263 expires -= now;
2264 else
2265 expires = 0;
2266 }
2267
Julian Anastasov521f5492012-07-20 12:02:08 +03002268 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002269 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03002270 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2271 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002272 goto nla_put_failure;
2273
David Millerb4869882012-07-01 02:03:01 +00002274 if (fl4->flowi4_mark &&
stephen hemminger68aaed52012-10-10 08:27:25 +00002275 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002276 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002277
Changli Gaod8d1f302010-06-10 23:31:35 -07002278 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002279
David S. Millerc7537962010-11-11 17:07:48 -08002280 if (rt_is_input_route(rt)) {
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002281#ifdef CONFIG_IP_MROUTE
2282 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2283 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2284 int err = ipmr_get_route(net, skb,
2285 fl4->saddr, fl4->daddr,
2286 r, nowait);
2287 if (err <= 0) {
2288 if (!nowait) {
2289 if (err == 0)
2290 return 0;
2291 goto nla_put_failure;
2292 } else {
2293 if (err == -EMSGSIZE)
2294 goto nla_put_failure;
2295 error = err;
2296 }
2297 }
2298 } else
2299#endif
2300 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
2301 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002302 }
2303
David S. Millerf1850712012-07-10 07:26:01 -07002304 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002305 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002306
Thomas Grafbe403ea2006-08-17 18:15:17 -07002307 return nlmsg_end(skb, nlh);
2308
2309nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002310 nlmsg_cancel(skb, nlh);
2311 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002312}
2313
Daniel Baluta5e73ea12012-04-15 01:34:41 +00002314static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002315{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002316 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002317 struct rtmsg *rtm;
2318 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002319 struct rtable *rt = NULL;
David Millerd6c0a4f2012-07-01 02:02:59 +00002320 struct flowi4 fl4;
Al Viro9e12bb22006-09-26 21:25:20 -07002321 __be32 dst = 0;
2322 __be32 src = 0;
2323 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002324 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002325 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326 struct sk_buff *skb;
2327
Thomas Grafd889ce32006-08-17 18:15:44 -07002328 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2329 if (err < 0)
2330 goto errout;
2331
2332 rtm = nlmsg_data(nlh);
2333
Linus Torvalds1da177e2005-04-16 15:20:36 -07002334 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafd889ce32006-08-17 18:15:44 -07002335 if (skb == NULL) {
2336 err = -ENOBUFS;
2337 goto errout;
2338 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339
2340 /* Reserve room for dummy headers, this skb can pass
2341 through good chunk of routing engine.
2342 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002343 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002344 skb_reset_network_header(skb);
Stephen Hemmingerd2c962b2006-04-17 17:27:11 -07002345
2346 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07002347 ip_hdr(skb)->protocol = IPPROTO_ICMP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2349
Al Viro17fb2c62006-09-26 22:15:25 -07002350 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2351 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002352 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002353 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002354
David Millerd6c0a4f2012-07-01 02:02:59 +00002355 memset(&fl4, 0, sizeof(fl4));
2356 fl4.daddr = dst;
2357 fl4.saddr = src;
2358 fl4.flowi4_tos = rtm->rtm_tos;
2359 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2360 fl4.flowi4_mark = mark;
2361
Linus Torvalds1da177e2005-04-16 15:20:36 -07002362 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002363 struct net_device *dev;
2364
Denis V. Lunev19375042008-02-28 20:52:04 -08002365 dev = __dev_get_by_index(net, iif);
Thomas Grafd889ce32006-08-17 18:15:44 -07002366 if (dev == NULL) {
2367 err = -ENODEV;
2368 goto errout_free;
2369 }
2370
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371 skb->protocol = htons(ETH_P_IP);
2372 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002373 skb->mark = mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374 local_bh_disable();
2375 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2376 local_bh_enable();
Thomas Grafd889ce32006-08-17 18:15:44 -07002377
Eric Dumazet511c3f92009-06-02 05:14:27 +00002378 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002379 if (err == 0 && rt->dst.error)
2380 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381 } else {
David S. Miller9d6ec932011-03-12 01:12:47 -05002382 rt = ip_route_output_key(net, &fl4);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002383
2384 err = 0;
2385 if (IS_ERR(rt))
2386 err = PTR_ERR(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002387 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002388
Linus Torvalds1da177e2005-04-16 15:20:36 -07002389 if (err)
Thomas Grafd889ce32006-08-17 18:15:44 -07002390 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002391
Changli Gaod8d1f302010-06-10 23:31:35 -07002392 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002393 if (rtm->rtm_flags & RTM_F_NOTIFY)
2394 rt->rt_flags |= RTCF_NOTIFY;
2395
David S. Millerf1ce3062012-07-12 10:10:17 -07002396 err = rt_fill_info(net, dst, src, &fl4, skb,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002397 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
Denis V. Lunev19375042008-02-28 20:52:04 -08002398 RTM_NEWROUTE, 0, 0);
Thomas Grafd889ce32006-08-17 18:15:44 -07002399 if (err <= 0)
2400 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401
Eric W. Biederman15e47302012-09-07 20:12:54 +00002402 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Thomas Grafd889ce32006-08-17 18:15:44 -07002403errout:
Thomas Graf2942e902006-08-15 00:30:25 -07002404 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002405
Thomas Grafd889ce32006-08-17 18:15:44 -07002406errout_free:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407 kfree_skb(skb);
Thomas Grafd889ce32006-08-17 18:15:44 -07002408 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002409}
2410
2411int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2412{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413 return skb->len;
2414}
2415
2416void ip_rt_multicast_event(struct in_device *in_dev)
2417{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002418 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002419}
2420
2421#ifdef CONFIG_SYSCTL
Gao feng082c7ca2013-02-19 00:43:12 +00002422static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
2423static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2424static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2425static int ip_rt_gc_elasticity __read_mostly = 8;
2426
Denis V. Lunev81c684d2008-07-08 03:05:28 -07002427static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002428 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002429 size_t *lenp, loff_t *ppos)
2430{
2431 if (write) {
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002432 rt_cache_flush((struct net *)__ctl->extra1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002433 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002434 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002435
2436 return -EINVAL;
2437}
2438
Al Viroeeb61f72008-07-27 08:59:33 +01002439static ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002440 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441 .procname = "gc_thresh",
2442 .data = &ipv4_dst_ops.gc_thresh,
2443 .maxlen = sizeof(int),
2444 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002445 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002446 },
2447 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002448 .procname = "max_size",
2449 .data = &ip_rt_max_size,
2450 .maxlen = sizeof(int),
2451 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002452 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002453 },
2454 {
2455 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002456
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457 .procname = "gc_min_interval",
2458 .data = &ip_rt_gc_min_interval,
2459 .maxlen = sizeof(int),
2460 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002461 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462 },
2463 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002464 .procname = "gc_min_interval_ms",
2465 .data = &ip_rt_gc_min_interval,
2466 .maxlen = sizeof(int),
2467 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002468 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002469 },
2470 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002471 .procname = "gc_timeout",
2472 .data = &ip_rt_gc_timeout,
2473 .maxlen = sizeof(int),
2474 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002475 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002476 },
2477 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05002478 .procname = "gc_interval",
2479 .data = &ip_rt_gc_interval,
2480 .maxlen = sizeof(int),
2481 .mode = 0644,
2482 .proc_handler = proc_dointvec_jiffies,
2483 },
2484 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485 .procname = "redirect_load",
2486 .data = &ip_rt_redirect_load,
2487 .maxlen = sizeof(int),
2488 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002489 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 },
2491 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002492 .procname = "redirect_number",
2493 .data = &ip_rt_redirect_number,
2494 .maxlen = sizeof(int),
2495 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002496 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002497 },
2498 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002499 .procname = "redirect_silence",
2500 .data = &ip_rt_redirect_silence,
2501 .maxlen = sizeof(int),
2502 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002503 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002504 },
2505 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002506 .procname = "error_cost",
2507 .data = &ip_rt_error_cost,
2508 .maxlen = sizeof(int),
2509 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002510 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511 },
2512 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002513 .procname = "error_burst",
2514 .data = &ip_rt_error_burst,
2515 .maxlen = sizeof(int),
2516 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002517 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002518 },
2519 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002520 .procname = "gc_elasticity",
2521 .data = &ip_rt_gc_elasticity,
2522 .maxlen = sizeof(int),
2523 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002524 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002525 },
2526 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002527 .procname = "mtu_expires",
2528 .data = &ip_rt_mtu_expires,
2529 .maxlen = sizeof(int),
2530 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002531 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002532 },
2533 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002534 .procname = "min_pmtu",
2535 .data = &ip_rt_min_pmtu,
2536 .maxlen = sizeof(int),
2537 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002538 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002539 },
2540 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002541 .procname = "min_adv_mss",
2542 .data = &ip_rt_min_advmss,
2543 .maxlen = sizeof(int),
2544 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002545 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002546 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002547 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002549
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002550static struct ctl_table ipv4_route_flush_table[] = {
2551 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002552 .procname = "flush",
2553 .maxlen = sizeof(int),
2554 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002555 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002556 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002557 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002558};
2559
2560static __net_init int sysctl_route_net_init(struct net *net)
2561{
2562 struct ctl_table *tbl;
2563
2564 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08002565 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002566 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2567 if (tbl == NULL)
2568 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00002569
2570 /* Don't export sysctls to unprivileged users */
2571 if (net->user_ns != &init_user_ns)
2572 tbl[0].procname = NULL;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002573 }
2574 tbl[0].extra1 = net;
2575
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00002576 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002577 if (net->ipv4.route_hdr == NULL)
2578 goto err_reg;
2579 return 0;
2580
2581err_reg:
2582 if (tbl != ipv4_route_flush_table)
2583 kfree(tbl);
2584err_dup:
2585 return -ENOMEM;
2586}
2587
2588static __net_exit void sysctl_route_net_exit(struct net *net)
2589{
2590 struct ctl_table *tbl;
2591
2592 tbl = net->ipv4.route_hdr->ctl_table_arg;
2593 unregister_net_sysctl_table(net->ipv4.route_hdr);
2594 BUG_ON(tbl == ipv4_route_flush_table);
2595 kfree(tbl);
2596}
2597
2598static __net_initdata struct pernet_operations sysctl_route_ops = {
2599 .init = sysctl_route_net_init,
2600 .exit = sysctl_route_net_exit,
2601};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002602#endif
2603
Neil Horman3ee94372010-05-08 01:57:52 -07002604static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002605{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +00002606 atomic_set(&net->rt_genid, 0);
David S. Miller436c3b62011-03-24 17:42:21 -07002607 get_random_bytes(&net->ipv4.dev_addr_genid,
2608 sizeof(net->ipv4.dev_addr_genid));
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002609 return 0;
2610}
2611
Neil Horman3ee94372010-05-08 01:57:52 -07002612static __net_initdata struct pernet_operations rt_genid_ops = {
2613 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002614};
2615
David S. Millerc3426b42012-06-09 16:27:05 -07002616static int __net_init ipv4_inetpeer_init(struct net *net)
2617{
2618 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2619
2620 if (!bp)
2621 return -ENOMEM;
2622 inet_peer_base_init(bp);
2623 net->ipv4.peers = bp;
2624 return 0;
2625}
2626
2627static void __net_exit ipv4_inetpeer_exit(struct net *net)
2628{
2629 struct inet_peer_base *bp = net->ipv4.peers;
2630
2631 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07002632 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07002633 kfree(bp);
2634}
2635
2636static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2637 .init = ipv4_inetpeer_init,
2638 .exit = ipv4_inetpeer_exit,
2639};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002640
Patrick McHardyc7066f72011-01-14 13:36:42 +01002641#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00002642struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002643#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002644
Linus Torvalds1da177e2005-04-16 15:20:36 -07002645int __init ip_rt_init(void)
2646{
Eric Dumazet424c4b72005-07-05 14:58:19 -07002647 int rc = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002648
Patrick McHardyc7066f72011-01-14 13:36:42 +01002649#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01002650 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002651 if (!ip_rt_acct)
2652 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002653#endif
2654
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002655 ipv4_dst_ops.kmem_cachep =
2656 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002657 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002658
David S. Miller14e50e52007-05-24 18:17:54 -07002659 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2660
Eric Dumazetfc66f952010-10-08 06:37:34 +00002661 if (dst_entries_init(&ipv4_dst_ops) < 0)
2662 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2663
2664 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2665 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2666
David S. Miller89aef892012-07-17 11:00:09 -07002667 ipv4_dst_ops.gc_thresh = ~0;
2668 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002669
Linus Torvalds1da177e2005-04-16 15:20:36 -07002670 devinet_init();
2671 ip_fib_init();
2672
Denis V. Lunev73b38712008-02-28 20:51:18 -08002673 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00002674 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002675#ifdef CONFIG_XFRM
2676 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01002677 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002678#endif
Greg Rosec7ac8672011-06-10 01:27:09 +00002679 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
Thomas Graf63f34442007-03-22 11:55:17 -07002680
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002681#ifdef CONFIG_SYSCTL
2682 register_pernet_subsys(&sysctl_route_ops);
2683#endif
Neil Horman3ee94372010-05-08 01:57:52 -07002684 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07002685 register_pernet_subsys(&ipv4_inetpeer_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002686 return rc;
2687}
2688
Al Viroa1bc6eb2008-07-30 06:32:52 -04002689#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01002690/*
2691 * We really need to sanitize the damn ipv4 init order, then all
2692 * this nonsense will go away.
2693 */
2694void __init ip_static_sysctl_init(void)
2695{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00002696 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01002697}
Al Viroa1bc6eb2008-07-30 06:32:52 -04002698#endif