blob: 7bcffa6ddba371c841912395b8f5d3ae2c910c6a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16/* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
37#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <linux/if_arp.h>
39
40#ifdef CONFIG_PROC_FS
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
43#endif
44
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070055#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070056#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -080075#define CLONE_OFFLINK_ROUTE 0
Linus Torvalds1da177e2005-04-16 15:20:36 -070076
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -080077#define RT6_SELECT_F_IFACE 0x1
78#define RT6_SELECT_F_REACHABLE 0x2
79
Linus Torvalds1da177e2005-04-16 15:20:36 -070080static int ip6_rt_max_size = 4096;
81static int ip6_rt_gc_min_interval = HZ / 2;
82static int ip6_rt_gc_timeout = 60*HZ;
83int ip6_rt_gc_interval = 30*HZ;
84static int ip6_rt_gc_elasticity = 9;
85static int ip6_rt_mtu_expires = 10*60*HZ;
86static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94static int ip6_dst_gc(void);
95
96static int ip6_pkt_discard(struct sk_buff *skb);
97static int ip6_pkt_discard_out(struct sk_buff *skb);
98static void ip6_link_failure(struct sk_buff *skb);
99static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800101#ifdef CONFIG_IPV6_ROUTE_INFO
102static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107#endif
108
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
121};
122
123struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
136 }
137 },
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141};
142
Thomas Graf101367c2006-08-04 03:39:02 -0700143#ifdef CONFIG_IPV6_MULTIPLE_TABLES
144
145struct rt6_info ip6_prohibit_entry = {
146 .u = {
147 .dst = {
148 .__refcnt = ATOMIC_INIT(1),
149 .__use = 1,
150 .dev = &loopback_dev,
151 .obsolete = -1,
152 .error = -EACCES,
153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
154 .input = ip6_pkt_discard,
155 .output = ip6_pkt_discard_out,
156 .ops = &ip6_dst_ops,
157 .path = (struct dst_entry*)&ip6_prohibit_entry,
158 }
159 },
160 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
161 .rt6i_metric = ~(u32) 0,
162 .rt6i_ref = ATOMIC_INIT(1),
163};
164
165struct rt6_info ip6_blk_hole_entry = {
166 .u = {
167 .dst = {
168 .__refcnt = ATOMIC_INIT(1),
169 .__use = 1,
170 .dev = &loopback_dev,
171 .obsolete = -1,
172 .error = -EINVAL,
173 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
174 .input = ip6_pkt_discard,
175 .output = ip6_pkt_discard_out,
176 .ops = &ip6_dst_ops,
177 .path = (struct dst_entry*)&ip6_blk_hole_entry,
178 }
179 },
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183};
184
185#endif
186
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187/* allocate dst with ip6_dst_ops */
188static __inline__ struct rt6_info *ip6_dst_alloc(void)
189{
190 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
191}
192
193static void ip6_dst_destroy(struct dst_entry *dst)
194{
195 struct rt6_info *rt = (struct rt6_info *)dst;
196 struct inet6_dev *idev = rt->rt6i_idev;
197
198 if (idev != NULL) {
199 rt->rt6i_idev = NULL;
200 in6_dev_put(idev);
201 }
202}
203
204static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
205 int how)
206{
207 struct rt6_info *rt = (struct rt6_info *)dst;
208 struct inet6_dev *idev = rt->rt6i_idev;
209
210 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
211 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
212 if (loopback_idev != NULL) {
213 rt->rt6i_idev = loopback_idev;
214 in6_dev_put(idev);
215 }
216 }
217}
218
219static __inline__ int rt6_check_expired(const struct rt6_info *rt)
220{
221 return (rt->rt6i_flags & RTF_EXPIRES &&
222 time_after(jiffies, rt->rt6i_expires));
223}
224
Thomas Grafc71099a2006-08-04 23:20:06 -0700225static inline int rt6_need_strict(struct in6_addr *daddr)
226{
227 return (ipv6_addr_type(daddr) &
228 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
229}
230
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700232 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 */
234
235static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
236 int oif,
237 int strict)
238{
239 struct rt6_info *local = NULL;
240 struct rt6_info *sprt;
241
242 if (oif) {
243 for (sprt = rt; sprt; sprt = sprt->u.next) {
244 struct net_device *dev = sprt->rt6i_dev;
245 if (dev->ifindex == oif)
246 return sprt;
247 if (dev->flags & IFF_LOOPBACK) {
248 if (sprt->rt6i_idev == NULL ||
249 sprt->rt6i_idev->dev->ifindex != oif) {
250 if (strict && oif)
251 continue;
252 if (local && (!oif ||
253 local->rt6i_idev->dev->ifindex == oif))
254 continue;
255 }
256 local = sprt;
257 }
258 }
259
260 if (local)
261 return local;
262
263 if (strict)
264 return &ip6_null_entry;
265 }
266 return rt;
267}
268
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800269#ifdef CONFIG_IPV6_ROUTER_PREF
270static void rt6_probe(struct rt6_info *rt)
271{
272 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
273 /*
274 * Okay, this does not seem to be appropriate
275 * for now, however, we need to check if it
276 * is really so; aka Router Reachability Probing.
277 *
278 * Router Reachability Probe MUST be rate-limited
279 * to no more than one per minute.
280 */
281 if (!neigh || (neigh->nud_state & NUD_VALID))
282 return;
283 read_lock_bh(&neigh->lock);
284 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e163562006-03-20 17:05:47 -0800285 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800286 struct in6_addr mcaddr;
287 struct in6_addr *target;
288
289 neigh->updated = jiffies;
290 read_unlock_bh(&neigh->lock);
291
292 target = (struct in6_addr *)&neigh->primary_key;
293 addrconf_addr_solict_mult(target, &mcaddr);
294 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
295 } else
296 read_unlock_bh(&neigh->lock);
297}
298#else
299static inline void rt6_probe(struct rt6_info *rt)
300{
301 return;
302}
303#endif
304
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800306 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307 */
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800308static int inline rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800310 struct net_device *dev = rt->rt6i_dev;
311 if (!oif || dev->ifindex == oif)
312 return 2;
313 if ((dev->flags & IFF_LOOPBACK) &&
314 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
315 return 1;
316 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317}
318
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800319static int inline rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800321 struct neighbour *neigh = rt->rt6i_nexthop;
322 int m = 0;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700323 if (rt->rt6i_flags & RTF_NONEXTHOP ||
324 !(rt->rt6i_flags & RTF_GATEWAY))
325 m = 1;
326 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800327 read_lock_bh(&neigh->lock);
328 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700329 m = 2;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800330 read_unlock_bh(&neigh->lock);
331 }
332 return m;
333}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800335static int rt6_score_route(struct rt6_info *rt, int oif,
336 int strict)
337{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700338 int m, n;
339
340 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800341 if (!m && (strict & RT6_SELECT_F_IFACE))
342 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800343#ifdef CONFIG_IPV6_ROUTER_PREF
344 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
345#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700346 n = rt6_check_neigh(rt);
347 if (n > 1)
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800348 m |= 16;
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700349 else if (!n && strict & RT6_SELECT_F_REACHABLE)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800350 return -1;
351 return m;
352}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800354static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
355 int strict)
356{
357 struct rt6_info *match = NULL, *last = NULL;
358 struct rt6_info *rt, *rt0 = *head;
359 u32 metric;
360 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800362 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
363 __FUNCTION__, head, head ? *head : NULL, oif);
364
365 for (rt = rt0, metric = rt0->rt6i_metric;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700366 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800367 rt = rt->u.next) {
368 int m;
369
370 if (rt6_check_expired(rt))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371 continue;
372
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800373 last = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800375 m = rt6_score_route(rt, oif, strict);
376 if (m < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800379 if (m > mpri) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800380 rt6_probe(match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800381 match = rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 mpri = m;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800383 } else {
384 rt6_probe(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 }
386 }
387
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800388 if (!match &&
389 (strict & RT6_SELECT_F_REACHABLE) &&
390 last && last != rt0) {
391 /* no entries matched; do round-robin */
Ingo Molnar34af9462006-06-27 02:53:55 -0700392 static DEFINE_SPINLOCK(lock);
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700393 spin_lock(&lock);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800394 *head = rt0->u.next;
395 rt0->u.next = last->u.next;
396 last->u.next = rt0;
YOSHIFUJI Hideakic302e6d2006-04-28 15:59:15 -0700397 spin_unlock(&lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700398 }
399
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800400 RT6_TRACE("%s() => %p, score=%d\n",
401 __FUNCTION__, match, mpri);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800403 return (match ? match : &ip6_null_entry);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700404}
405
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800406#ifdef CONFIG_IPV6_ROUTE_INFO
407int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
408 struct in6_addr *gwaddr)
409{
410 struct route_info *rinfo = (struct route_info *) opt;
411 struct in6_addr prefix_buf, *prefix;
412 unsigned int pref;
413 u32 lifetime;
414 struct rt6_info *rt;
415
416 if (len < sizeof(struct route_info)) {
417 return -EINVAL;
418 }
419
420 /* Sanity check for prefix_len and length */
421 if (rinfo->length > 3) {
422 return -EINVAL;
423 } else if (rinfo->prefix_len > 128) {
424 return -EINVAL;
425 } else if (rinfo->prefix_len > 64) {
426 if (rinfo->length < 2) {
427 return -EINVAL;
428 }
429 } else if (rinfo->prefix_len > 0) {
430 if (rinfo->length < 1) {
431 return -EINVAL;
432 }
433 }
434
435 pref = rinfo->route_pref;
436 if (pref == ICMPV6_ROUTER_PREF_INVALID)
437 pref = ICMPV6_ROUTER_PREF_MEDIUM;
438
439 lifetime = htonl(rinfo->lifetime);
440 if (lifetime == 0xffffffff) {
441 /* infinity */
442 } else if (lifetime > 0x7fffffff/HZ) {
443 /* Avoid arithmetic overflow */
444 lifetime = 0x7fffffff/HZ - 1;
445 }
446
447 if (rinfo->length == 3)
448 prefix = (struct in6_addr *)rinfo->prefix;
449 else {
450 /* this function is safe */
451 ipv6_addr_prefix(&prefix_buf,
452 (struct in6_addr *)rinfo->prefix,
453 rinfo->prefix_len);
454 prefix = &prefix_buf;
455 }
456
457 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
458
459 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700460 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800461 rt = NULL;
462 }
463
464 if (!rt && lifetime)
465 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
466 pref);
467 else if (rt)
468 rt->rt6i_flags = RTF_ROUTEINFO |
469 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
470
471 if (rt) {
472 if (lifetime == 0xffffffff) {
473 rt->rt6i_flags &= ~RTF_EXPIRES;
474 } else {
475 rt->rt6i_expires = jiffies + HZ * lifetime;
476 rt->rt6i_flags |= RTF_EXPIRES;
477 }
478 dst_release(&rt->u.dst);
479 }
480 return 0;
481}
482#endif
483
Thomas Grafc71099a2006-08-04 23:20:06 -0700484#define BACKTRACK() \
485if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
486 while ((fn = fn->parent) != NULL) { \
487 if (fn->fn_flags & RTN_TL_ROOT) { \
488 dst_hold(&rt->u.dst); \
489 goto out; \
490 } \
491 if (fn->fn_flags & RTN_RTINFO) \
492 goto restart; \
493 } \
494}
495
496static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
497 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498{
499 struct fib6_node *fn;
500 struct rt6_info *rt;
501
Thomas Grafc71099a2006-08-04 23:20:06 -0700502 read_lock_bh(&table->tb6_lock);
503 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
504restart:
505 rt = fn->leaf;
506 rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
507 BACKTRACK();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700509out:
510 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700511
512 rt->u.dst.lastuse = jiffies;
Thomas Grafc71099a2006-08-04 23:20:06 -0700513 rt->u.dst.__use++;
514
515 return rt;
516
517}
518
519struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
520 int oif, int strict)
521{
522 struct flowi fl = {
523 .oif = oif,
524 .nl_u = {
525 .ip6_u = {
526 .daddr = *daddr,
527 /* TODO: saddr */
528 },
529 },
530 };
531 struct dst_entry *dst;
532 int flags = strict ? RT6_F_STRICT : 0;
533
534 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
535 if (dst->error == 0)
536 return (struct rt6_info *) dst;
537
538 dst_release(dst);
539
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 return NULL;
541}
542
Thomas Grafc71099a2006-08-04 23:20:06 -0700543/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544 It takes new route entry, the addition fails by any reason the
545 route is freed. In any case, if caller does not hold it, it may
546 be destroyed.
547 */
548
Thomas Graf86872cb2006-08-22 00:01:08 -0700549static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550{
551 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700552 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553
Thomas Grafc71099a2006-08-04 23:20:06 -0700554 table = rt->rt6i_table;
555 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700556 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700557 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558
559 return err;
560}
561
Thomas Graf40e22e82006-08-22 00:00:45 -0700562int ip6_ins_rt(struct rt6_info *rt)
563{
Thomas Graf86872cb2006-08-22 00:01:08 -0700564 return __ip6_ins_rt(rt, NULL);
Thomas Graf40e22e82006-08-22 00:00:45 -0700565}
566
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800567static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
568 struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570 struct rt6_info *rt;
571
572 /*
573 * Clone the route.
574 */
575
576 rt = ip6_rt_copy(ort);
577
578 if (rt) {
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900579 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
580 if (rt->rt6i_dst.plen != 128 &&
581 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
582 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900584 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900586 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587 rt->rt6i_dst.plen = 128;
588 rt->rt6i_flags |= RTF_CACHE;
589 rt->u.dst.flags |= DST_HOST;
590
591#ifdef CONFIG_IPV6_SUBTREES
592 if (rt->rt6i_src.plen && saddr) {
593 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
594 rt->rt6i_src.plen = 128;
595 }
596#endif
597
598 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
599
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800600 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800602 return rt;
603}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800605static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
606{
607 struct rt6_info *rt = ip6_rt_copy(ort);
608 if (rt) {
609 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
610 rt->rt6i_dst.plen = 128;
611 rt->rt6i_flags |= RTF_CACHE;
612 if (rt->rt6i_flags & RTF_REJECT)
613 rt->u.dst.error = ort->u.dst.error;
614 rt->u.dst.flags |= DST_HOST;
615 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
616 }
617 return rt;
618}
619
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700620static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
621 struct flowi *fl, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700622{
623 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800624 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700625 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800627 int err;
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800628 int reachable = RT6_SELECT_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629
Thomas Grafc71099a2006-08-04 23:20:06 -0700630 if (flags & RT6_F_STRICT)
631 strict = RT6_SELECT_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632
633relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700634 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800636restart_2:
Thomas Grafc71099a2006-08-04 23:20:06 -0700637 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638
639restart:
Thomas Grafc71099a2006-08-04 23:20:06 -0700640 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641 BACKTRACK();
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800642 if (rt == &ip6_null_entry ||
643 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef0442006-03-20 17:01:24 -0800644 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800646 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700647 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800648
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800649 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800650 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800651 else {
652#if CLONE_OFFLINK_ROUTE
653 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
654#else
655 goto out2;
656#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 }
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800658
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800659 dst_release(&rt->u.dst);
660 rt = nrt ? : &ip6_null_entry;
661
662 dst_hold(&rt->u.dst);
663 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700664 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800665 if (!err)
666 goto out2;
667 }
668
669 if (--attempts <= 0)
670 goto out2;
671
672 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700673 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800674 * released someone could insert this route. Relookup.
675 */
676 dst_release(&rt->u.dst);
677 goto relookup;
678
679out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800680 if (reachable) {
681 reachable = 0;
682 goto restart_2;
683 }
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800684 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700685 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686out2:
687 rt->u.dst.lastuse = jiffies;
688 rt->u.dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700689
690 return rt;
691}
692
693void ip6_route_input(struct sk_buff *skb)
694{
695 struct ipv6hdr *iph = skb->nh.ipv6h;
696 struct flowi fl = {
697 .iif = skb->dev->ifindex,
698 .nl_u = {
699 .ip6_u = {
700 .daddr = iph->daddr,
701 .saddr = iph->saddr,
702 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
703 },
704 },
705 .proto = iph->nexthdr,
706 };
707 int flags = 0;
708
709 if (rt6_need_strict(&iph->daddr))
710 flags |= RT6_F_STRICT;
711
712 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
713}
714
715static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
716 struct flowi *fl, int flags)
717{
718 struct fib6_node *fn;
719 struct rt6_info *rt, *nrt;
720 int strict = 0;
721 int attempts = 3;
722 int err;
723 int reachable = RT6_SELECT_F_REACHABLE;
724
725 if (flags & RT6_F_STRICT)
726 strict = RT6_SELECT_F_IFACE;
727
728relookup:
729 read_lock_bh(&table->tb6_lock);
730
731restart_2:
732 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
733
734restart:
735 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
736 BACKTRACK();
737 if (rt == &ip6_null_entry ||
738 rt->rt6i_flags & RTF_CACHE)
739 goto out;
740
741 dst_hold(&rt->u.dst);
742 read_unlock_bh(&table->tb6_lock);
743
744 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
745 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
746 else {
747#if CLONE_OFFLINK_ROUTE
748 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
749#else
750 goto out2;
751#endif
752 }
753
754 dst_release(&rt->u.dst);
755 rt = nrt ? : &ip6_null_entry;
756
757 dst_hold(&rt->u.dst);
758 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700759 err = ip6_ins_rt(nrt);
Thomas Grafc71099a2006-08-04 23:20:06 -0700760 if (!err)
761 goto out2;
762 }
763
764 if (--attempts <= 0)
765 goto out2;
766
767 /*
768 * Race condition! In the gap, when table->tb6_lock was
769 * released someone could insert this route. Relookup.
770 */
771 dst_release(&rt->u.dst);
772 goto relookup;
773
774out:
775 if (reachable) {
776 reachable = 0;
777 goto restart_2;
778 }
779 dst_hold(&rt->u.dst);
780 read_unlock_bh(&table->tb6_lock);
781out2:
782 rt->u.dst.lastuse = jiffies;
783 rt->u.dst.__use++;
784 return rt;
785}
786
787struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
788{
789 int flags = 0;
790
791 if (rt6_need_strict(&fl->fl6_dst))
792 flags |= RT6_F_STRICT;
793
794 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795}
796
797
798/*
799 * Destination cache support functions
800 */
801
802static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
803{
804 struct rt6_info *rt;
805
806 rt = (struct rt6_info *) dst;
807
808 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
809 return dst;
810
811 return NULL;
812}
813
814static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
815{
816 struct rt6_info *rt = (struct rt6_info *) dst;
817
818 if (rt) {
819 if (rt->rt6i_flags & RTF_CACHE)
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700820 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821 else
822 dst_release(dst);
823 }
824 return NULL;
825}
826
827static void ip6_link_failure(struct sk_buff *skb)
828{
829 struct rt6_info *rt;
830
831 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
832
833 rt = (struct rt6_info *) skb->dst;
834 if (rt) {
835 if (rt->rt6i_flags&RTF_CACHE) {
836 dst_set_expires(&rt->u.dst, 0);
837 rt->rt6i_flags |= RTF_EXPIRES;
838 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
839 rt->rt6i_node->fn_sernum = -1;
840 }
841}
842
843static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
844{
845 struct rt6_info *rt6 = (struct rt6_info*)dst;
846
847 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
848 rt6->rt6i_flags |= RTF_MODIFIED;
849 if (mtu < IPV6_MIN_MTU) {
850 mtu = IPV6_MIN_MTU;
851 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
852 }
853 dst->metrics[RTAX_MTU-1] = mtu;
Tom Tucker8d717402006-07-30 20:43:36 -0700854 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855 }
856}
857
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858static int ipv6_get_mtu(struct net_device *dev);
859
860static inline unsigned int ipv6_advmss(unsigned int mtu)
861{
862 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
863
864 if (mtu < ip6_rt_min_advmss)
865 mtu = ip6_rt_min_advmss;
866
867 /*
868 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
869 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
870 * IPV6_MAXPLEN is also valid and means: "any MSS,
871 * rely only on pmtu discovery"
872 */
873 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
874 mtu = IPV6_MAXPLEN;
875 return mtu;
876}
877
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700878static struct dst_entry *ndisc_dst_gc_list;
Adrian Bunk8ce11e62006-08-07 21:50:48 -0700879static DEFINE_SPINLOCK(ndisc_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700880
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
882 struct neighbour *neigh,
883 struct in6_addr *addr,
884 int (*output)(struct sk_buff *))
885{
886 struct rt6_info *rt;
887 struct inet6_dev *idev = in6_dev_get(dev);
888
889 if (unlikely(idev == NULL))
890 return NULL;
891
892 rt = ip6_dst_alloc();
893 if (unlikely(rt == NULL)) {
894 in6_dev_put(idev);
895 goto out;
896 }
897
898 dev_hold(dev);
899 if (neigh)
900 neigh_hold(neigh);
901 else
902 neigh = ndisc_get_neigh(dev, addr);
903
904 rt->rt6i_dev = dev;
905 rt->rt6i_idev = idev;
906 rt->rt6i_nexthop = neigh;
907 atomic_set(&rt->u.dst.__refcnt, 1);
908 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
909 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
910 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
911 rt->u.dst.output = output;
912
913#if 0 /* there's no chance to use these for ndisc */
914 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
915 ? DST_HOST
916 : 0;
917 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
918 rt->rt6i_dst.plen = 128;
919#endif
920
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700921 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922 rt->u.dst.next = ndisc_dst_gc_list;
923 ndisc_dst_gc_list = &rt->u.dst;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700924 spin_unlock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925
926 fib6_force_start_gc();
927
928out:
929 return (struct dst_entry *)rt;
930}
931
932int ndisc_dst_gc(int *more)
933{
934 struct dst_entry *dst, *next, **pprev;
935 int freed;
936
937 next = NULL;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700938 freed = 0;
939
940 spin_lock_bh(&ndisc_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941 pprev = &ndisc_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700942
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943 while ((dst = *pprev) != NULL) {
944 if (!atomic_read(&dst->__refcnt)) {
945 *pprev = dst->next;
946 dst_free(dst);
947 freed++;
948 } else {
949 pprev = &dst->next;
950 (*more)++;
951 }
952 }
953
Thomas Graf5d0bbee2006-08-04 03:37:36 -0700954 spin_unlock_bh(&ndisc_lock);
955
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 return freed;
957}
958
959static int ip6_dst_gc(void)
960{
961 static unsigned expire = 30*HZ;
962 static unsigned long last_gc;
963 unsigned long now = jiffies;
964
965 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
966 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
967 goto out;
968
969 expire++;
970 fib6_run_gc(expire);
971 last_gc = now;
972 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
973 expire = ip6_rt_gc_timeout>>1;
974
975out:
976 expire -= expire>>ip6_rt_gc_elasticity;
977 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
978}
979
980/* Clean host part of a prefix. Not necessary in radix tree,
981 but results in cleaner routing tables.
982
983 Remove it only when all the things will work!
984 */
985
986static int ipv6_get_mtu(struct net_device *dev)
987{
988 int mtu = IPV6_MIN_MTU;
989 struct inet6_dev *idev;
990
991 idev = in6_dev_get(dev);
992 if (idev) {
993 mtu = idev->cnf.mtu6;
994 in6_dev_put(idev);
995 }
996 return mtu;
997}
998
999int ipv6_get_hoplimit(struct net_device *dev)
1000{
1001 int hoplimit = ipv6_devconf.hop_limit;
1002 struct inet6_dev *idev;
1003
1004 idev = in6_dev_get(dev);
1005 if (idev) {
1006 hoplimit = idev->cnf.hop_limit;
1007 in6_dev_put(idev);
1008 }
1009 return hoplimit;
1010}
1011
1012/*
1013 *
1014 */
1015
Thomas Graf86872cb2006-08-22 00:01:08 -07001016int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017{
1018 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 struct rt6_info *rt = NULL;
1020 struct net_device *dev = NULL;
1021 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001022 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023 int addr_type;
1024
Thomas Graf86872cb2006-08-22 00:01:08 -07001025 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 return -EINVAL;
1027#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001028 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029 return -EINVAL;
1030#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001031 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 err = -ENODEV;
Thomas Graf86872cb2006-08-22 00:01:08 -07001033 dev = dev_get_by_index(cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034 if (!dev)
1035 goto out;
1036 idev = in6_dev_get(dev);
1037 if (!idev)
1038 goto out;
1039 }
1040
Thomas Graf86872cb2006-08-22 00:01:08 -07001041 if (cfg->fc_metric == 0)
1042 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043
Thomas Graf86872cb2006-08-22 00:01:08 -07001044 table = fib6_new_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001045 if (table == NULL) {
1046 err = -ENOBUFS;
1047 goto out;
1048 }
1049
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050 rt = ip6_dst_alloc();
1051
1052 if (rt == NULL) {
1053 err = -ENOMEM;
1054 goto out;
1055 }
1056
1057 rt->u.dst.obsolete = -1;
Thomas Graf86872cb2006-08-22 00:01:08 -07001058 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059
Thomas Graf86872cb2006-08-22 00:01:08 -07001060 if (cfg->fc_protocol == RTPROT_UNSPEC)
1061 cfg->fc_protocol = RTPROT_BOOT;
1062 rt->rt6i_protocol = cfg->fc_protocol;
1063
1064 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065
1066 if (addr_type & IPV6_ADDR_MULTICAST)
1067 rt->u.dst.input = ip6_mc_input;
1068 else
1069 rt->u.dst.input = ip6_forward;
1070
1071 rt->u.dst.output = ip6_output;
1072
Thomas Graf86872cb2006-08-22 00:01:08 -07001073 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1074 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 if (rt->rt6i_dst.plen == 128)
1076 rt->u.dst.flags = DST_HOST;
1077
1078#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001079 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1080 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081#endif
1082
Thomas Graf86872cb2006-08-22 00:01:08 -07001083 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084
1085 /* We cannot add true routes via loopback here,
1086 they would result in kernel looping; promote them to reject routes
1087 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001088 if ((cfg->fc_flags & RTF_REJECT) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1090 /* hold loopback dev/idev if we haven't done so. */
1091 if (dev != &loopback_dev) {
1092 if (dev) {
1093 dev_put(dev);
1094 in6_dev_put(idev);
1095 }
1096 dev = &loopback_dev;
1097 dev_hold(dev);
1098 idev = in6_dev_get(dev);
1099 if (!idev) {
1100 err = -ENODEV;
1101 goto out;
1102 }
1103 }
1104 rt->u.dst.output = ip6_pkt_discard_out;
1105 rt->u.dst.input = ip6_pkt_discard;
1106 rt->u.dst.error = -ENETUNREACH;
1107 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1108 goto install_route;
1109 }
1110
Thomas Graf86872cb2006-08-22 00:01:08 -07001111 if (cfg->fc_flags & RTF_GATEWAY) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112 struct in6_addr *gw_addr;
1113 int gwa_type;
1114
Thomas Graf86872cb2006-08-22 00:01:08 -07001115 gw_addr = &cfg->fc_gateway;
1116 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117 gwa_type = ipv6_addr_type(gw_addr);
1118
1119 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1120 struct rt6_info *grt;
1121
1122 /* IPv6 strictly inhibits using not link-local
1123 addresses as nexthop address.
1124 Otherwise, router will not able to send redirects.
1125 It is very good, but in some (rare!) circumstances
1126 (SIT, PtP, NBMA NOARP links) it is handy to allow
1127 some exceptions. --ANK
1128 */
1129 err = -EINVAL;
1130 if (!(gwa_type&IPV6_ADDR_UNICAST))
1131 goto out;
1132
Thomas Graf86872cb2006-08-22 00:01:08 -07001133 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134
1135 err = -EHOSTUNREACH;
1136 if (grt == NULL)
1137 goto out;
1138 if (dev) {
1139 if (dev != grt->rt6i_dev) {
1140 dst_release(&grt->u.dst);
1141 goto out;
1142 }
1143 } else {
1144 dev = grt->rt6i_dev;
1145 idev = grt->rt6i_idev;
1146 dev_hold(dev);
1147 in6_dev_hold(grt->rt6i_idev);
1148 }
1149 if (!(grt->rt6i_flags&RTF_GATEWAY))
1150 err = 0;
1151 dst_release(&grt->u.dst);
1152
1153 if (err)
1154 goto out;
1155 }
1156 err = -EINVAL;
1157 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1158 goto out;
1159 }
1160
1161 err = -ENODEV;
1162 if (dev == NULL)
1163 goto out;
1164
Thomas Graf86872cb2006-08-22 00:01:08 -07001165 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1167 if (IS_ERR(rt->rt6i_nexthop)) {
1168 err = PTR_ERR(rt->rt6i_nexthop);
1169 rt->rt6i_nexthop = NULL;
1170 goto out;
1171 }
1172 }
1173
Thomas Graf86872cb2006-08-22 00:01:08 -07001174 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175
1176install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001177 if (cfg->fc_mx) {
1178 struct nlattr *nla;
1179 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001180
Thomas Graf86872cb2006-08-22 00:01:08 -07001181 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1182 int type = nla->nla_type;
1183
1184 if (type) {
1185 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 err = -EINVAL;
1187 goto out;
1188 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001189
1190 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192 }
1193 }
1194
1195 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1196 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1197 if (!rt->u.dst.metrics[RTAX_MTU-1])
1198 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1199 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1200 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1201 rt->u.dst.dev = dev;
1202 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001203 rt->rt6i_table = table;
Thomas Graf86872cb2006-08-22 00:01:08 -07001204 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205
1206out:
1207 if (dev)
1208 dev_put(dev);
1209 if (idev)
1210 in6_dev_put(idev);
1211 if (rt)
1212 dst_free((struct dst_entry *) rt);
1213 return err;
1214}
1215
Thomas Graf86872cb2006-08-22 00:01:08 -07001216static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217{
1218 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001219 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220
Patrick McHardy6c813a72006-08-06 22:22:47 -07001221 if (rt == &ip6_null_entry)
1222 return -ENOENT;
1223
Thomas Grafc71099a2006-08-04 23:20:06 -07001224 table = rt->rt6i_table;
1225 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226
Thomas Graf86872cb2006-08-22 00:01:08 -07001227 err = fib6_del(rt, info);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001228 dst_release(&rt->u.dst);
1229
Thomas Grafc71099a2006-08-04 23:20:06 -07001230 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231
1232 return err;
1233}
1234
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001235int ip6_del_rt(struct rt6_info *rt)
1236{
Thomas Graf86872cb2006-08-22 00:01:08 -07001237 return __ip6_del_rt(rt, NULL);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001238}
1239
Thomas Graf86872cb2006-08-22 00:01:08 -07001240static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241{
Thomas Grafc71099a2006-08-04 23:20:06 -07001242 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001243 struct fib6_node *fn;
1244 struct rt6_info *rt;
1245 int err = -ESRCH;
1246
Thomas Graf86872cb2006-08-22 00:01:08 -07001247 table = fib6_get_table(cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001248 if (table == NULL)
1249 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250
Thomas Grafc71099a2006-08-04 23:20:06 -07001251 read_lock_bh(&table->tb6_lock);
1252
1253 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001254 &cfg->fc_dst, cfg->fc_dst_len,
1255 &cfg->fc_src, cfg->fc_src_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256
1257 if (fn) {
1258 for (rt = fn->leaf; rt; rt = rt->u.next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001259 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001260 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001261 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001263 if (cfg->fc_flags & RTF_GATEWAY &&
1264 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001266 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267 continue;
1268 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001269 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001270
Thomas Graf86872cb2006-08-22 00:01:08 -07001271 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001272 }
1273 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001274 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001275
1276 return err;
1277}
1278
1279/*
1280 * Handle redirects
1281 */
1282void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1283 struct neighbour *neigh, u8 *lladdr, int on_link)
1284{
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001285 struct rt6_info *rt, *nrt = NULL;
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001286 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001287 struct fib6_table *table;
Tom Tucker8d717402006-07-30 20:43:36 -07001288 struct netevent_redirect netevent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001289
Thomas Grafc71099a2006-08-04 23:20:06 -07001290 /* TODO: Very lazy, might need to check all tables */
1291 table = fib6_get_table(RT6_TABLE_MAIN);
1292 if (table == NULL)
1293 return;
1294
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001296 * Get the "current" route for this destination and
1297 * check if the redirect has come from approriate router.
1298 *
1299 * RFC 2461 specifies that redirects should only be
1300 * accepted if they come from the nexthop to the target.
1301 * Due to the way the routes are chosen, this notion
1302 * is a bit fuzzy and one might need to check all possible
1303 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305
Thomas Grafc71099a2006-08-04 23:20:06 -07001306 read_lock_bh(&table->tb6_lock);
1307 fn = fib6_lookup(&table->tb6_root, dest, NULL);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001308restart:
1309 for (rt = fn->leaf; rt; rt = rt->u.next) {
1310 /*
1311 * Current route is on-link; redirect is always invalid.
1312 *
1313 * Seems, previous statement is not true. It could
1314 * be node, which looks for us as on-link (f.e. proxy ndisc)
1315 * But then router serving it might decide, that we should
1316 * know truth 8)8) --ANK (980726).
1317 */
1318 if (rt6_check_expired(rt))
1319 continue;
1320 if (!(rt->rt6i_flags & RTF_GATEWAY))
1321 continue;
1322 if (neigh->dev != rt->rt6i_dev)
1323 continue;
1324 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1325 continue;
1326 break;
1327 }
1328 if (rt)
1329 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001330 else if (rt6_need_strict(dest)) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001331 while ((fn = fn->parent) != NULL) {
1332 if (fn->fn_flags & RTN_ROOT)
1333 break;
1334 if (fn->fn_flags & RTN_RTINFO)
1335 goto restart;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001336 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001337 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001338 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001339
1340 if (!rt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 if (net_ratelimit())
1342 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1343 "for redirect target\n");
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001344 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345 }
1346
Linus Torvalds1da177e2005-04-16 15:20:36 -07001347 /*
1348 * We have finally decided to accept it.
1349 */
1350
1351 neigh_update(neigh, lladdr, NUD_STALE,
1352 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1353 NEIGH_UPDATE_F_OVERRIDE|
1354 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1355 NEIGH_UPDATE_F_ISROUTER))
1356 );
1357
1358 /*
1359 * Redirect received -> path was valid.
1360 * Look, redirects are sent only in response to data packets,
1361 * so that this nexthop apparently is reachable. --ANK
1362 */
1363 dst_confirm(&rt->u.dst);
1364
1365 /* Duplicate redirect: silently ignore. */
1366 if (neigh == rt->u.dst.neighbour)
1367 goto out;
1368
1369 nrt = ip6_rt_copy(rt);
1370 if (nrt == NULL)
1371 goto out;
1372
1373 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1374 if (on_link)
1375 nrt->rt6i_flags &= ~RTF_GATEWAY;
1376
1377 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1378 nrt->rt6i_dst.plen = 128;
1379 nrt->u.dst.flags |= DST_HOST;
1380
1381 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1382 nrt->rt6i_nexthop = neigh_clone(neigh);
1383 /* Reset pmtu, it may be better */
1384 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1385 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1386
Thomas Graf40e22e82006-08-22 00:00:45 -07001387 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 goto out;
1389
Tom Tucker8d717402006-07-30 20:43:36 -07001390 netevent.old = &rt->u.dst;
1391 netevent.new = &nrt->u.dst;
1392 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1393
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001395 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396 return;
1397 }
1398
1399out:
1400 dst_release(&rt->u.dst);
1401 return;
1402}
1403
1404/*
1405 * Handle ICMP "packet too big" messages
1406 * i.e. Path MTU discovery
1407 */
1408
1409void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1410 struct net_device *dev, u32 pmtu)
1411{
1412 struct rt6_info *rt, *nrt;
1413 int allfrag = 0;
1414
1415 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1416 if (rt == NULL)
1417 return;
1418
1419 if (pmtu >= dst_mtu(&rt->u.dst))
1420 goto out;
1421
1422 if (pmtu < IPV6_MIN_MTU) {
1423 /*
1424 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1425 * MTU (1280) and a fragment header should always be included
1426 * after a node receiving Too Big message reporting PMTU is
1427 * less than the IPv6 Minimum Link MTU.
1428 */
1429 pmtu = IPV6_MIN_MTU;
1430 allfrag = 1;
1431 }
1432
1433 /* New mtu received -> path was valid.
1434 They are sent only in response to data packets,
1435 so that this nexthop apparently is reachable. --ANK
1436 */
1437 dst_confirm(&rt->u.dst);
1438
1439 /* Host route. If it is static, it would be better
1440 not to override it, but add new one, so that
1441 when cache entry will expire old pmtu
1442 would return automatically.
1443 */
1444 if (rt->rt6i_flags & RTF_CACHE) {
1445 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1446 if (allfrag)
1447 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1448 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1449 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1450 goto out;
1451 }
1452
1453 /* Network route.
1454 Two cases are possible:
1455 1. It is connected route. Action: COW
1456 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1457 */
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001458 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001459 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001460 else
1461 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001462
YOSHIFUJI Hideakid5315b502006-03-20 16:58:48 -08001463 if (nrt) {
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001464 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1465 if (allfrag)
1466 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1467
1468 /* According to RFC 1981, detecting PMTU increase shouldn't be
1469 * happened within 5 mins, the recommended timer is 10 mins.
1470 * Here this route expiration time is set to ip6_rt_mtu_expires
1471 * which is 10 mins. After 10 mins the decreased pmtu is expired
1472 * and detecting PMTU increase will be automatically happened.
1473 */
1474 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1475 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1476
Thomas Graf40e22e82006-08-22 00:00:45 -07001477 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479out:
1480 dst_release(&rt->u.dst);
1481}
1482
1483/*
1484 * Misc support functions
1485 */
1486
1487static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1488{
1489 struct rt6_info *rt = ip6_dst_alloc();
1490
1491 if (rt) {
1492 rt->u.dst.input = ort->u.dst.input;
1493 rt->u.dst.output = ort->u.dst.output;
1494
1495 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1496 rt->u.dst.dev = ort->u.dst.dev;
1497 if (rt->u.dst.dev)
1498 dev_hold(rt->u.dst.dev);
1499 rt->rt6i_idev = ort->rt6i_idev;
1500 if (rt->rt6i_idev)
1501 in6_dev_hold(rt->rt6i_idev);
1502 rt->u.dst.lastuse = jiffies;
1503 rt->rt6i_expires = 0;
1504
1505 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1506 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1507 rt->rt6i_metric = 0;
1508
1509 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1510#ifdef CONFIG_IPV6_SUBTREES
1511 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1512#endif
Thomas Grafc71099a2006-08-04 23:20:06 -07001513 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 }
1515 return rt;
1516}
1517
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001518#ifdef CONFIG_IPV6_ROUTE_INFO
1519static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1520 struct in6_addr *gwaddr, int ifindex)
1521{
1522 struct fib6_node *fn;
1523 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001524 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001525
Thomas Grafc71099a2006-08-04 23:20:06 -07001526 table = fib6_get_table(RT6_TABLE_INFO);
1527 if (table == NULL)
1528 return NULL;
1529
1530 write_lock_bh(&table->tb6_lock);
1531 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001532 if (!fn)
1533 goto out;
1534
1535 for (rt = fn->leaf; rt; rt = rt->u.next) {
1536 if (rt->rt6i_dev->ifindex != ifindex)
1537 continue;
1538 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1539 continue;
1540 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1541 continue;
1542 dst_hold(&rt->u.dst);
1543 break;
1544 }
1545out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001546 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001547 return rt;
1548}
1549
1550static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1551 struct in6_addr *gwaddr, int ifindex,
1552 unsigned pref)
1553{
Thomas Graf86872cb2006-08-22 00:01:08 -07001554 struct fib6_config cfg = {
1555 .fc_table = RT6_TABLE_INFO,
1556 .fc_metric = 1024,
1557 .fc_ifindex = ifindex,
1558 .fc_dst_len = prefixlen,
1559 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1560 RTF_UP | RTF_PREF(pref),
1561 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001562
Thomas Graf86872cb2006-08-22 00:01:08 -07001563 ipv6_addr_copy(&cfg.fc_dst, prefix);
1564 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1565
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001566 /* We should treat it as a default route if prefix length is 0. */
1567 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001568 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001569
Thomas Graf86872cb2006-08-22 00:01:08 -07001570 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001571
1572 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1573}
1574#endif
1575
Linus Torvalds1da177e2005-04-16 15:20:36 -07001576struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1577{
1578 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001579 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580
Thomas Grafc71099a2006-08-04 23:20:06 -07001581 table = fib6_get_table(RT6_TABLE_DFLT);
1582 if (table == NULL)
1583 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584
Thomas Grafc71099a2006-08-04 23:20:06 -07001585 write_lock_bh(&table->tb6_lock);
1586 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001588 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1590 break;
1591 }
1592 if (rt)
1593 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001594 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001595 return rt;
1596}
1597
1598struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001599 struct net_device *dev,
1600 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601{
Thomas Graf86872cb2006-08-22 00:01:08 -07001602 struct fib6_config cfg = {
1603 .fc_table = RT6_TABLE_DFLT,
1604 .fc_metric = 1024,
1605 .fc_ifindex = dev->ifindex,
1606 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1607 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1608 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609
Thomas Graf86872cb2006-08-22 00:01:08 -07001610 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611
Thomas Graf86872cb2006-08-22 00:01:08 -07001612 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613
Linus Torvalds1da177e2005-04-16 15:20:36 -07001614 return rt6_get_dflt_router(gwaddr, dev);
1615}
1616
1617void rt6_purge_dflt_routers(void)
1618{
1619 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001620 struct fib6_table *table;
1621
1622 /* NOTE: Keep consistent with rt6_get_dflt_router */
1623 table = fib6_get_table(RT6_TABLE_DFLT);
1624 if (table == NULL)
1625 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626
1627restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001628 read_lock_bh(&table->tb6_lock);
1629 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1631 dst_hold(&rt->u.dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001632 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001633 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634 goto restart;
1635 }
1636 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001637 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638}
1639
Thomas Graf86872cb2006-08-22 00:01:08 -07001640static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1641 struct fib6_config *cfg)
1642{
1643 memset(cfg, 0, sizeof(*cfg));
1644
1645 cfg->fc_table = RT6_TABLE_MAIN;
1646 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1647 cfg->fc_metric = rtmsg->rtmsg_metric;
1648 cfg->fc_expires = rtmsg->rtmsg_info;
1649 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1650 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1651 cfg->fc_flags = rtmsg->rtmsg_flags;
1652
1653 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1654 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1655 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1656}
1657
Linus Torvalds1da177e2005-04-16 15:20:36 -07001658int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1659{
Thomas Graf86872cb2006-08-22 00:01:08 -07001660 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661 struct in6_rtmsg rtmsg;
1662 int err;
1663
1664 switch(cmd) {
1665 case SIOCADDRT: /* Add a route */
1666 case SIOCDELRT: /* Delete a route */
1667 if (!capable(CAP_NET_ADMIN))
1668 return -EPERM;
1669 err = copy_from_user(&rtmsg, arg,
1670 sizeof(struct in6_rtmsg));
1671 if (err)
1672 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001673
1674 rtmsg_to_fib6_config(&rtmsg, &cfg);
1675
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 rtnl_lock();
1677 switch (cmd) {
1678 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001679 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680 break;
1681 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001682 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 break;
1684 default:
1685 err = -EINVAL;
1686 }
1687 rtnl_unlock();
1688
1689 return err;
1690 };
1691
1692 return -EINVAL;
1693}
1694
1695/*
1696 * Drop the packet on the floor
1697 */
1698
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001699static int ip6_pkt_discard(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700{
Lv Liangying76d0cc12006-08-29 00:00:47 -07001701 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1702 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1703 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1704
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1706 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1707 kfree_skb(skb);
1708 return 0;
1709}
1710
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03001711static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712{
1713 skb->dev = skb->dst->dev;
1714 return ip6_pkt_discard(skb);
1715}
1716
1717/*
1718 * Allocate a dst for local (unicast / anycast) address.
1719 */
1720
1721struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1722 const struct in6_addr *addr,
1723 int anycast)
1724{
1725 struct rt6_info *rt = ip6_dst_alloc();
1726
1727 if (rt == NULL)
1728 return ERR_PTR(-ENOMEM);
1729
1730 dev_hold(&loopback_dev);
1731 in6_dev_hold(idev);
1732
1733 rt->u.dst.flags = DST_HOST;
1734 rt->u.dst.input = ip6_input;
1735 rt->u.dst.output = ip6_output;
1736 rt->rt6i_dev = &loopback_dev;
1737 rt->rt6i_idev = idev;
1738 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1739 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1740 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1741 rt->u.dst.obsolete = -1;
1742
1743 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09001744 if (anycast)
1745 rt->rt6i_flags |= RTF_ANYCAST;
1746 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747 rt->rt6i_flags |= RTF_LOCAL;
1748 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1749 if (rt->rt6i_nexthop == NULL) {
1750 dst_free((struct dst_entry *) rt);
1751 return ERR_PTR(-ENOMEM);
1752 }
1753
1754 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1755 rt->rt6i_dst.plen = 128;
Thomas Grafc71099a2006-08-04 23:20:06 -07001756 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757
1758 atomic_set(&rt->u.dst.__refcnt, 1);
1759
1760 return rt;
1761}
1762
1763static int fib6_ifdown(struct rt6_info *rt, void *arg)
1764{
1765 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1766 rt != &ip6_null_entry) {
1767 RT6_TRACE("deleted by ifdown %p\n", rt);
1768 return -1;
1769 }
1770 return 0;
1771}
1772
1773void rt6_ifdown(struct net_device *dev)
1774{
Thomas Grafc71099a2006-08-04 23:20:06 -07001775 fib6_clean_all(fib6_ifdown, 0, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776}
1777
1778struct rt6_mtu_change_arg
1779{
1780 struct net_device *dev;
1781 unsigned mtu;
1782};
1783
1784static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1785{
1786 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1787 struct inet6_dev *idev;
1788
1789 /* In IPv6 pmtu discovery is not optional,
1790 so that RTAX_MTU lock cannot disable it.
1791 We still use this lock to block changes
1792 caused by addrconf/ndisc.
1793 */
1794
1795 idev = __in6_dev_get(arg->dev);
1796 if (idev == NULL)
1797 return 0;
1798
1799 /* For administrative MTU increase, there is no way to discover
1800 IPv6 PMTU increase, so PMTU increase should be updated here.
1801 Since RFC 1981 doesn't include administrative MTU increase
1802 update PMTU increase is a MUST. (i.e. jumbo frame)
1803 */
1804 /*
1805 If new MTU is less than route PMTU, this new MTU will be the
1806 lowest MTU in the path, update the route PMTU to reflect PMTU
1807 decreases; if new MTU is greater than route PMTU, and the
1808 old MTU is the lowest MTU in the path, update the route PMTU
1809 to reflect the increase. In this case if the other nodes' MTU
1810 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1811 PMTU discouvery.
1812 */
1813 if (rt->rt6i_dev == arg->dev &&
1814 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1815 (dst_mtu(&rt->u.dst) > arg->mtu ||
1816 (dst_mtu(&rt->u.dst) < arg->mtu &&
1817 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1818 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1819 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1820 return 0;
1821}
1822
1823void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1824{
Thomas Grafc71099a2006-08-04 23:20:06 -07001825 struct rt6_mtu_change_arg arg = {
1826 .dev = dev,
1827 .mtu = mtu,
1828 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829
Thomas Grafc71099a2006-08-04 23:20:06 -07001830 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831}
1832
Thomas Graf86872cb2006-08-22 00:01:08 -07001833static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = {
1834 [RTA_GATEWAY] = { .minlen = sizeof(struct in6_addr) },
1835 [RTA_OIF] = { .type = NLA_U32 },
1836 [RTA_PRIORITY] = { .type = NLA_U32 },
1837 [RTA_METRICS] = { .type = NLA_NESTED },
1838};
1839
1840static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1841 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001842{
Thomas Graf86872cb2006-08-22 00:01:08 -07001843 struct rtmsg *rtm;
1844 struct nlattr *tb[RTA_MAX+1];
1845 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001846
Thomas Graf86872cb2006-08-22 00:01:08 -07001847 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1848 if (err < 0)
1849 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001850
Thomas Graf86872cb2006-08-22 00:01:08 -07001851 err = -EINVAL;
1852 rtm = nlmsg_data(nlh);
1853 memset(cfg, 0, sizeof(*cfg));
1854
1855 cfg->fc_table = rtm->rtm_table;
1856 cfg->fc_dst_len = rtm->rtm_dst_len;
1857 cfg->fc_src_len = rtm->rtm_src_len;
1858 cfg->fc_flags = RTF_UP;
1859 cfg->fc_protocol = rtm->rtm_protocol;
1860
1861 if (rtm->rtm_type == RTN_UNREACHABLE)
1862 cfg->fc_flags |= RTF_REJECT;
1863
1864 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1865 cfg->fc_nlinfo.nlh = nlh;
1866
1867 if (tb[RTA_GATEWAY]) {
1868 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1869 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001871
1872 if (tb[RTA_DST]) {
1873 int plen = (rtm->rtm_dst_len + 7) >> 3;
1874
1875 if (nla_len(tb[RTA_DST]) < plen)
1876 goto errout;
1877
1878 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001879 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001880
1881 if (tb[RTA_SRC]) {
1882 int plen = (rtm->rtm_src_len + 7) >> 3;
1883
1884 if (nla_len(tb[RTA_SRC]) < plen)
1885 goto errout;
1886
1887 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001888 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001889
1890 if (tb[RTA_OIF])
1891 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
1892
1893 if (tb[RTA_PRIORITY])
1894 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
1895
1896 if (tb[RTA_METRICS]) {
1897 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
1898 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001900
1901 if (tb[RTA_TABLE])
1902 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
1903
1904 err = 0;
1905errout:
1906 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001907}
1908
1909int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1910{
Thomas Graf86872cb2006-08-22 00:01:08 -07001911 struct fib6_config cfg;
1912 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001913
Thomas Graf86872cb2006-08-22 00:01:08 -07001914 err = rtm_to_fib6_config(skb, nlh, &cfg);
1915 if (err < 0)
1916 return err;
1917
1918 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919}
1920
1921int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1922{
Thomas Graf86872cb2006-08-22 00:01:08 -07001923 struct fib6_config cfg;
1924 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001925
Thomas Graf86872cb2006-08-22 00:01:08 -07001926 err = rtm_to_fib6_config(skb, nlh, &cfg);
1927 if (err < 0)
1928 return err;
1929
1930 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931}
1932
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07001934 struct in6_addr *dst, struct in6_addr *src,
1935 int iif, int type, u32 pid, u32 seq,
1936 int prefix, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001937{
1938 struct rtmsg *rtm;
1939 struct nlmsghdr *nlh;
1940 unsigned char *b = skb->tail;
1941 struct rta_cacheinfo ci;
Patrick McHardy9e762a42006-08-10 23:09:48 -07001942 u32 table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001943
1944 if (prefix) { /* user wants prefix routes only */
1945 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1946 /* success since this is not a prefix route */
1947 return 1;
1948 }
1949 }
1950
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -07001951 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952 rtm = NLMSG_DATA(nlh);
1953 rtm->rtm_family = AF_INET6;
1954 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1955 rtm->rtm_src_len = rt->rt6i_src.plen;
1956 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07001957 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07001958 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07001959 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07001960 table = RT6_TABLE_UNSPEC;
1961 rtm->rtm_table = table;
1962 RTA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963 if (rt->rt6i_flags&RTF_REJECT)
1964 rtm->rtm_type = RTN_UNREACHABLE;
1965 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1966 rtm->rtm_type = RTN_LOCAL;
1967 else
1968 rtm->rtm_type = RTN_UNICAST;
1969 rtm->rtm_flags = 0;
1970 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1971 rtm->rtm_protocol = rt->rt6i_protocol;
1972 if (rt->rt6i_flags&RTF_DYNAMIC)
1973 rtm->rtm_protocol = RTPROT_REDIRECT;
1974 else if (rt->rt6i_flags & RTF_ADDRCONF)
1975 rtm->rtm_protocol = RTPROT_KERNEL;
1976 else if (rt->rt6i_flags&RTF_DEFAULT)
1977 rtm->rtm_protocol = RTPROT_RA;
1978
1979 if (rt->rt6i_flags&RTF_CACHE)
1980 rtm->rtm_flags |= RTM_F_CLONED;
1981
1982 if (dst) {
1983 RTA_PUT(skb, RTA_DST, 16, dst);
1984 rtm->rtm_dst_len = 128;
1985 } else if (rtm->rtm_dst_len)
1986 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1987#ifdef CONFIG_IPV6_SUBTREES
1988 if (src) {
1989 RTA_PUT(skb, RTA_SRC, 16, src);
1990 rtm->rtm_src_len = 128;
1991 } else if (rtm->rtm_src_len)
1992 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1993#endif
1994 if (iif)
1995 RTA_PUT(skb, RTA_IIF, 4, &iif);
1996 else if (dst) {
1997 struct in6_addr saddr_buf;
1998 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1999 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2000 }
2001 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2002 goto rtattr_failure;
2003 if (rt->u.dst.neighbour)
2004 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2005 if (rt->u.dst.dev)
2006 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
2007 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
2008 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
2009 if (rt->rt6i_expires)
2010 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
2011 else
2012 ci.rta_expires = 0;
2013 ci.rta_used = rt->u.dst.__use;
2014 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
2015 ci.rta_error = rt->u.dst.error;
2016 ci.rta_id = 0;
2017 ci.rta_ts = 0;
2018 ci.rta_tsage = 0;
2019 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
2020 nlh->nlmsg_len = skb->tail - b;
2021 return skb->len;
2022
2023nlmsg_failure:
2024rtattr_failure:
2025 skb_trim(skb, b - skb->data);
2026 return -1;
2027}
2028
Patrick McHardy1b43af52006-08-10 23:11:17 -07002029int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002030{
2031 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2032 int prefix;
2033
2034 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
2035 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
2036 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2037 } else
2038 prefix = 0;
2039
2040 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2041 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002042 prefix, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043}
2044
Linus Torvalds1da177e2005-04-16 15:20:36 -07002045int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2046{
2047 struct rtattr **rta = arg;
2048 int iif = 0;
2049 int err = -ENOBUFS;
2050 struct sk_buff *skb;
2051 struct flowi fl;
2052 struct rt6_info *rt;
2053
2054 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2055 if (skb == NULL)
2056 goto out;
2057
2058 /* Reserve room for dummy headers, this skb can pass
2059 through good chunk of routing engine.
2060 */
2061 skb->mac.raw = skb->data;
2062 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2063
2064 memset(&fl, 0, sizeof(fl));
2065 if (rta[RTA_SRC-1])
2066 ipv6_addr_copy(&fl.fl6_src,
2067 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
2068 if (rta[RTA_DST-1])
2069 ipv6_addr_copy(&fl.fl6_dst,
2070 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
2071
2072 if (rta[RTA_IIF-1])
2073 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
2074
2075 if (iif) {
2076 struct net_device *dev;
2077 dev = __dev_get_by_index(iif);
2078 if (!dev) {
2079 err = -ENODEV;
2080 goto out_free;
2081 }
2082 }
2083
2084 fl.oif = 0;
2085 if (rta[RTA_OIF-1])
2086 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
2087
2088 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
2089
2090 skb->dst = &rt->u.dst;
2091
2092 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
2093 err = rt6_fill_node(skb, rt,
2094 &fl.fl6_dst, &fl.fl6_src,
2095 iif,
2096 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002097 nlh->nlmsg_seq, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002098 if (err < 0) {
2099 err = -EMSGSIZE;
2100 goto out_free;
2101 }
2102
Thomas Graf2942e902006-08-15 00:30:25 -07002103 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104out:
2105 return err;
2106out_free:
2107 kfree_skb(skb);
2108 goto out;
2109}
2110
Thomas Graf86872cb2006-08-22 00:01:08 -07002111void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002112{
2113 struct sk_buff *skb;
Thomas Graf86872cb2006-08-22 00:01:08 -07002114 u32 pid = 0, seq = 0;
2115 struct nlmsghdr *nlh = NULL;
Thomas Graf21713eb2006-08-15 00:35:24 -07002116 int payload = sizeof(struct rtmsg) + 256;
2117 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002118
Thomas Graf86872cb2006-08-22 00:01:08 -07002119 if (info) {
2120 pid = info->pid;
2121 nlh = info->nlh;
2122 if (nlh)
2123 seq = nlh->nlmsg_seq;
2124 }
2125
Thomas Graf21713eb2006-08-15 00:35:24 -07002126 skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2127 if (skb == NULL)
2128 goto errout;
2129
2130 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2131 if (err < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002132 kfree_skb(skb);
Thomas Graf21713eb2006-08-15 00:35:24 -07002133 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134 }
Thomas Graf21713eb2006-08-15 00:35:24 -07002135
2136 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2137errout:
2138 if (err < 0)
2139 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002140}
2141
2142/*
2143 * /proc
2144 */
2145
2146#ifdef CONFIG_PROC_FS
2147
2148#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2149
2150struct rt6_proc_arg
2151{
2152 char *buffer;
2153 int offset;
2154 int length;
2155 int skip;
2156 int len;
2157};
2158
2159static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2160{
2161 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2162 int i;
2163
2164 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2165 arg->skip++;
2166 return 0;
2167 }
2168
2169 if (arg->len >= arg->length)
2170 return 0;
2171
2172 for (i=0; i<16; i++) {
2173 sprintf(arg->buffer + arg->len, "%02x",
2174 rt->rt6i_dst.addr.s6_addr[i]);
2175 arg->len += 2;
2176 }
2177 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2178 rt->rt6i_dst.plen);
2179
2180#ifdef CONFIG_IPV6_SUBTREES
2181 for (i=0; i<16; i++) {
2182 sprintf(arg->buffer + arg->len, "%02x",
2183 rt->rt6i_src.addr.s6_addr[i]);
2184 arg->len += 2;
2185 }
2186 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2187 rt->rt6i_src.plen);
2188#else
2189 sprintf(arg->buffer + arg->len,
2190 "00000000000000000000000000000000 00 ");
2191 arg->len += 36;
2192#endif
2193
2194 if (rt->rt6i_nexthop) {
2195 for (i=0; i<16; i++) {
2196 sprintf(arg->buffer + arg->len, "%02x",
2197 rt->rt6i_nexthop->primary_key[i]);
2198 arg->len += 2;
2199 }
2200 } else {
2201 sprintf(arg->buffer + arg->len,
2202 "00000000000000000000000000000000");
2203 arg->len += 32;
2204 }
2205 arg->len += sprintf(arg->buffer + arg->len,
2206 " %08x %08x %08x %08x %8s\n",
2207 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2208 rt->u.dst.__use, rt->rt6i_flags,
2209 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2210 return 0;
2211}
2212
2213static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2214{
Thomas Grafc71099a2006-08-04 23:20:06 -07002215 struct rt6_proc_arg arg = {
2216 .buffer = buffer,
2217 .offset = offset,
2218 .length = length,
2219 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002220
Thomas Grafc71099a2006-08-04 23:20:06 -07002221 fib6_clean_all(rt6_info_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002222
2223 *start = buffer;
2224 if (offset)
2225 *start += offset % RT6_INFO_LEN;
2226
2227 arg.len -= offset % RT6_INFO_LEN;
2228
2229 if (arg.len > length)
2230 arg.len = length;
2231 if (arg.len < 0)
2232 arg.len = 0;
2233
2234 return arg.len;
2235}
2236
Linus Torvalds1da177e2005-04-16 15:20:36 -07002237static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2238{
2239 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2240 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2241 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2242 rt6_stats.fib_rt_cache,
2243 atomic_read(&ip6_dst_ops.entries),
2244 rt6_stats.fib_discarded_routes);
2245
2246 return 0;
2247}
2248
2249static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2250{
2251 return single_open(file, rt6_stats_seq_show, NULL);
2252}
2253
2254static struct file_operations rt6_stats_seq_fops = {
2255 .owner = THIS_MODULE,
2256 .open = rt6_stats_seq_open,
2257 .read = seq_read,
2258 .llseek = seq_lseek,
2259 .release = single_release,
2260};
2261#endif /* CONFIG_PROC_FS */
2262
2263#ifdef CONFIG_SYSCTL
2264
2265static int flush_delay;
2266
2267static
2268int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2269 void __user *buffer, size_t *lenp, loff_t *ppos)
2270{
2271 if (write) {
2272 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2273 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2274 return 0;
2275 } else
2276 return -EINVAL;
2277}
2278
2279ctl_table ipv6_route_table[] = {
2280 {
2281 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2282 .procname = "flush",
2283 .data = &flush_delay,
2284 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002285 .mode = 0200,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002286 .proc_handler = &ipv6_sysctl_rtcache_flush
2287 },
2288 {
2289 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2290 .procname = "gc_thresh",
2291 .data = &ip6_dst_ops.gc_thresh,
2292 .maxlen = sizeof(int),
2293 .mode = 0644,
2294 .proc_handler = &proc_dointvec,
2295 },
2296 {
2297 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2298 .procname = "max_size",
2299 .data = &ip6_rt_max_size,
2300 .maxlen = sizeof(int),
2301 .mode = 0644,
2302 .proc_handler = &proc_dointvec,
2303 },
2304 {
2305 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2306 .procname = "gc_min_interval",
2307 .data = &ip6_rt_gc_min_interval,
2308 .maxlen = sizeof(int),
2309 .mode = 0644,
2310 .proc_handler = &proc_dointvec_jiffies,
2311 .strategy = &sysctl_jiffies,
2312 },
2313 {
2314 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2315 .procname = "gc_timeout",
2316 .data = &ip6_rt_gc_timeout,
2317 .maxlen = sizeof(int),
2318 .mode = 0644,
2319 .proc_handler = &proc_dointvec_jiffies,
2320 .strategy = &sysctl_jiffies,
2321 },
2322 {
2323 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2324 .procname = "gc_interval",
2325 .data = &ip6_rt_gc_interval,
2326 .maxlen = sizeof(int),
2327 .mode = 0644,
2328 .proc_handler = &proc_dointvec_jiffies,
2329 .strategy = &sysctl_jiffies,
2330 },
2331 {
2332 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2333 .procname = "gc_elasticity",
2334 .data = &ip6_rt_gc_elasticity,
2335 .maxlen = sizeof(int),
2336 .mode = 0644,
2337 .proc_handler = &proc_dointvec_jiffies,
2338 .strategy = &sysctl_jiffies,
2339 },
2340 {
2341 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2342 .procname = "mtu_expires",
2343 .data = &ip6_rt_mtu_expires,
2344 .maxlen = sizeof(int),
2345 .mode = 0644,
2346 .proc_handler = &proc_dointvec_jiffies,
2347 .strategy = &sysctl_jiffies,
2348 },
2349 {
2350 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2351 .procname = "min_adv_mss",
2352 .data = &ip6_rt_min_advmss,
2353 .maxlen = sizeof(int),
2354 .mode = 0644,
2355 .proc_handler = &proc_dointvec_jiffies,
2356 .strategy = &sysctl_jiffies,
2357 },
2358 {
2359 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2360 .procname = "gc_min_interval_ms",
2361 .data = &ip6_rt_gc_min_interval,
2362 .maxlen = sizeof(int),
2363 .mode = 0644,
2364 .proc_handler = &proc_dointvec_ms_jiffies,
2365 .strategy = &sysctl_ms_jiffies,
2366 },
2367 { .ctl_name = 0 }
2368};
2369
2370#endif
2371
2372void __init ip6_route_init(void)
2373{
2374 struct proc_dir_entry *p;
2375
2376 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2377 sizeof(struct rt6_info),
2378 0, SLAB_HWCACHE_ALIGN,
2379 NULL, NULL);
2380 if (!ip6_dst_ops.kmem_cachep)
2381 panic("cannot create ip6_dst_cache");
2382
2383 fib6_init();
2384#ifdef CONFIG_PROC_FS
2385 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2386 if (p)
2387 p->owner = THIS_MODULE;
2388
2389 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2390#endif
2391#ifdef CONFIG_XFRM
2392 xfrm6_init();
2393#endif
Thomas Graf101367c2006-08-04 03:39:02 -07002394#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2395 fib6_rules_init();
2396#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002397}
2398
2399void ip6_route_cleanup(void)
2400{
Thomas Graf101367c2006-08-04 03:39:02 -07002401#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2402 fib6_rules_cleanup();
2403#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07002404#ifdef CONFIG_PROC_FS
2405 proc_net_remove("ipv6_route");
2406 proc_net_remove("rt6_stats");
2407#endif
2408#ifdef CONFIG_XFRM
2409 xfrm6_fini();
2410#endif
2411 rt6_ifdown(NULL);
2412 fib6_gc_cleanup();
2413 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2414}