blob: af8ee7b9d4467927a7e7af2f0f47e6d5b3ff0f51 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09006 * Pedro Roque <roque@di.fc.ul.pt>
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
YOSHIFUJI Hideakic0bece92006-08-23 17:23:25 -070023 * Ville Nuorvala
24 * Fixed routing subtrees.
Linus Torvalds1da177e2005-04-16 15:20:36 -070025 */
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/errno.h>
Paul Gortmakerbc3b2d72011-07-15 11:47:34 -040029#include <linux/export.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090038#include <linux/mroute6.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070041#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
Daniel Lezcano5b7c9312008-03-03 23:28:58 -080043#include <linux/nsproxy.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090044#include <linux/slab.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020045#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -070056#include <net/netevent.h>
Thomas Graf21713eb2006-08-15 00:35:24 -070057#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070058
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
65/* Set to 3 to get tracing. */
66#define RT6_DEBUG 2
67
68#if RT6_DEBUG >= 3
69#define RDBG(x) printk x
70#define RT6_TRACE(x...) printk(KERN_DEBUG x)
71#else
72#define RDBG(x)
73#define RT6_TRACE(x...) do { ; } while (0)
74#endif
75
Eric Dumazet21efcfa2011-07-19 20:18:36 +000076static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
77 const struct in6_addr *dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -070078static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -080079static unsigned int ip6_default_advmss(const struct dst_entry *dst);
David S. Millerd33e4552010-12-14 13:01:14 -080080static unsigned int ip6_default_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -070081static struct dst_entry *ip6_negative_advice(struct dst_entry *);
82static void ip6_dst_destroy(struct dst_entry *);
83static void ip6_dst_ifdown(struct dst_entry *,
84 struct net_device *dev, int how);
Daniel Lezcano569d3642008-01-18 03:56:57 -080085static int ip6_dst_gc(struct dst_ops *ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -070086
87static int ip6_pkt_discard(struct sk_buff *skb);
88static int ip6_pkt_discard_out(struct sk_buff *skb);
89static void ip6_link_failure(struct sk_buff *skb);
90static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080092#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080093static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000094 const struct in6_addr *prefix, int prefixlen,
95 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -080096 unsigned pref);
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -080097static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +000098 const struct in6_addr *prefix, int prefixlen,
99 const struct in6_addr *gwaddr, int ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800100#endif
101
David S. Miller06582542011-01-27 14:58:42 -0800102static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
103{
104 struct rt6_info *rt = (struct rt6_info *) dst;
105 struct inet_peer *peer;
106 u32 *p = NULL;
107
Yan, Zheng8e2ec632011-09-05 21:34:30 +0000108 if (!(rt->dst.flags & DST_HOST))
109 return NULL;
110
David S. Miller06582542011-01-27 14:58:42 -0800111 if (!rt->rt6i_peer)
112 rt6_bind_peer(rt, 1);
113
114 peer = rt->rt6i_peer;
115 if (peer) {
116 u32 *old_p = __DST_METRICS_PTR(old);
117 unsigned long prev, new;
118
119 p = peer->metrics;
120 if (inet_metrics_new(peer))
121 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122
123 new = (unsigned long) p;
124 prev = cmpxchg(&dst->_metrics, old, new);
125
126 if (prev != old) {
127 p = __DST_METRICS_PTR(prev);
128 if (prev & DST_METRICS_READ_ONLY)
129 p = NULL;
130 }
131 }
132 return p;
133}
134
David S. Millerd3aaeb32011-07-18 00:40:17 -0700135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136{
137 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
138}
139
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -0800140static struct dst_ops ip6_dst_ops_template = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800142 .protocol = cpu_to_be16(ETH_P_IPV6),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143 .gc = ip6_dst_gc,
144 .gc_thresh = 1024,
145 .check = ip6_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800146 .default_advmss = ip6_default_advmss,
David S. Millerd33e4552010-12-14 13:01:14 -0800147 .default_mtu = ip6_default_mtu,
David S. Miller06582542011-01-27 14:58:42 -0800148 .cow_metrics = ipv6_cow_metrics,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149 .destroy = ip6_dst_destroy,
150 .ifdown = ip6_dst_ifdown,
151 .negative_advice = ip6_negative_advice,
152 .link_failure = ip6_link_failure,
153 .update_pmtu = ip6_rt_update_pmtu,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700154 .local_out = __ip6_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700155 .neigh_lookup = ip6_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700156};
157
Roland Dreierec831ea2011-01-31 13:16:00 -0800158static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
159{
160 return 0;
161}
162
David S. Miller14e50e52007-05-24 18:17:54 -0700163static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
164{
165}
166
Held Bernhard0972ddb2011-04-24 22:07:32 +0000167static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
168 unsigned long old)
169{
170 return NULL;
171}
172
David S. Miller14e50e52007-05-24 18:17:54 -0700173static struct dst_ops ip6_dst_blackhole_ops = {
174 .family = AF_INET6,
Harvey Harrison09640e62009-02-01 00:45:17 -0800175 .protocol = cpu_to_be16(ETH_P_IPV6),
David S. Miller14e50e52007-05-24 18:17:54 -0700176 .destroy = ip6_dst_destroy,
177 .check = ip6_dst_check,
Roland Dreierec831ea2011-01-31 13:16:00 -0800178 .default_mtu = ip6_blackhole_default_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -0800179 .default_advmss = ip6_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -0700180 .update_pmtu = ip6_rt_blackhole_update_pmtu,
Held Bernhard0972ddb2011-04-24 22:07:32 +0000181 .cow_metrics = ip6_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700182 .neigh_lookup = ip6_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -0700183};
184
David S. Miller62fa8a82011-01-26 20:51:05 -0800185static const u32 ip6_template_metrics[RTAX_MAX] = {
186 [RTAX_HOPLIMIT - 1] = 255,
187};
188
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800189static struct rt6_info ip6_null_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700190 .dst = {
191 .__refcnt = ATOMIC_INIT(1),
192 .__use = 1,
193 .obsolete = -1,
194 .error = -ENETUNREACH,
Changli Gaod8d1f302010-06-10 23:31:35 -0700195 .input = ip6_pkt_discard,
196 .output = ip6_pkt_discard_out,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197 },
198 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700199 .rt6i_protocol = RTPROT_KERNEL,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200 .rt6i_metric = ~(u32) 0,
201 .rt6i_ref = ATOMIC_INIT(1),
202};
203
Thomas Graf101367c2006-08-04 03:39:02 -0700204#ifdef CONFIG_IPV6_MULTIPLE_TABLES
205
David S. Miller6723ab52006-10-18 21:20:57 -0700206static int ip6_pkt_prohibit(struct sk_buff *skb);
207static int ip6_pkt_prohibit_out(struct sk_buff *skb);
David S. Miller6723ab52006-10-18 21:20:57 -0700208
Adrian Bunk280a34c2008-04-21 02:29:32 -0700209static struct rt6_info ip6_prohibit_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700210 .dst = {
211 .__refcnt = ATOMIC_INIT(1),
212 .__use = 1,
213 .obsolete = -1,
214 .error = -EACCES,
Changli Gaod8d1f302010-06-10 23:31:35 -0700215 .input = ip6_pkt_prohibit,
216 .output = ip6_pkt_prohibit_out,
Thomas Graf101367c2006-08-04 03:39:02 -0700217 },
218 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700219 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700220 .rt6i_metric = ~(u32) 0,
221 .rt6i_ref = ATOMIC_INIT(1),
222};
223
Daniel Lezcanobdb32892008-03-04 13:48:10 -0800224static struct rt6_info ip6_blk_hole_entry_template = {
Changli Gaod8d1f302010-06-10 23:31:35 -0700225 .dst = {
226 .__refcnt = ATOMIC_INIT(1),
227 .__use = 1,
228 .obsolete = -1,
229 .error = -EINVAL,
Changli Gaod8d1f302010-06-10 23:31:35 -0700230 .input = dst_discard,
231 .output = dst_discard,
Thomas Graf101367c2006-08-04 03:39:02 -0700232 },
233 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
Jean-Mickael Guerin4f724272009-05-20 17:38:59 -0700234 .rt6i_protocol = RTPROT_KERNEL,
Thomas Graf101367c2006-08-04 03:39:02 -0700235 .rt6i_metric = ~(u32) 0,
236 .rt6i_ref = ATOMIC_INIT(1),
237};
238
239#endif
240
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241/* allocate dst with ip6_dst_ops */
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700242static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
David S. Miller957c6652011-06-24 15:25:00 -0700243 struct net_device *dev,
244 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245{
David S. Miller957c6652011-06-24 15:25:00 -0700246 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
David S. Millercf911662011-04-28 14:31:47 -0700247
Madalin Bucurfbe58182011-09-26 07:04:56 +0000248 if (rt != NULL)
249 memset(&rt->rt6i_table, 0,
250 sizeof(*rt) - sizeof(struct dst_entry));
David S. Millercf911662011-04-28 14:31:47 -0700251
252 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253}
254
255static void ip6_dst_destroy(struct dst_entry *dst)
256{
257 struct rt6_info *rt = (struct rt6_info *)dst;
258 struct inet6_dev *idev = rt->rt6i_idev;
David S. Millerb3419362010-11-30 12:27:11 -0800259 struct inet_peer *peer = rt->rt6i_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260
Yan, Zheng8e2ec632011-09-05 21:34:30 +0000261 if (!(rt->dst.flags & DST_HOST))
262 dst_destroy_metrics_generic(dst);
263
Linus Torvalds1da177e2005-04-16 15:20:36 -0700264 if (idev != NULL) {
265 rt->rt6i_idev = NULL;
266 in6_dev_put(idev);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900267 }
David S. Millerb3419362010-11-30 12:27:11 -0800268 if (peer) {
David S. Millerb3419362010-11-30 12:27:11 -0800269 rt->rt6i_peer = NULL;
270 inet_putpeer(peer);
271 }
272}
273
David S. Miller6431cbc2011-02-07 20:38:06 -0800274static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
275
276static u32 rt6_peer_genid(void)
277{
278 return atomic_read(&__rt6_peer_genid);
279}
280
David S. Millerb3419362010-11-30 12:27:11 -0800281void rt6_bind_peer(struct rt6_info *rt, int create)
282{
283 struct inet_peer *peer;
284
David S. Millerb3419362010-11-30 12:27:11 -0800285 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
286 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
287 inet_putpeer(peer);
David S. Miller6431cbc2011-02-07 20:38:06 -0800288 else
289 rt->rt6i_peer_genid = rt6_peer_genid();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700290}
291
292static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
293 int how)
294{
295 struct rt6_info *rt = (struct rt6_info *)dst;
296 struct inet6_dev *idev = rt->rt6i_idev;
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800297 struct net_device *loopback_dev =
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900298 dev_net(dev)->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700299
Denis V. Lunev5a3e55d2007-12-07 00:38:10 -0800300 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
301 struct inet6_dev *loopback_idev =
302 in6_dev_get(loopback_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 if (loopback_idev != NULL) {
304 rt->rt6i_idev = loopback_idev;
305 in6_dev_put(idev);
306 }
307 }
308}
309
310static __inline__ int rt6_check_expired(const struct rt6_info *rt)
311{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000312 return (rt->rt6i_flags & RTF_EXPIRES) &&
313 time_after(jiffies, rt->rt6i_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314}
315
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000316static inline int rt6_need_strict(const struct in6_addr *daddr)
Thomas Grafc71099a2006-08-04 23:20:06 -0700317{
Eric Dumazeta02cec22010-09-22 20:43:57 +0000318 return ipv6_addr_type(daddr) &
319 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
Thomas Grafc71099a2006-08-04 23:20:06 -0700320}
321
Linus Torvalds1da177e2005-04-16 15:20:36 -0700322/*
Thomas Grafc71099a2006-08-04 23:20:06 -0700323 * Route lookup. Any table->tb6_lock is implied.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 */
325
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800326static inline struct rt6_info *rt6_device_match(struct net *net,
327 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000328 const struct in6_addr *saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329 int oif,
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700330 int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331{
332 struct rt6_info *local = NULL;
333 struct rt6_info *sprt;
334
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900335 if (!oif && ipv6_addr_any(saddr))
336 goto out;
337
Changli Gaod8d1f302010-06-10 23:31:35 -0700338 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900339 struct net_device *dev = sprt->rt6i_dev;
340
341 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700342 if (dev->ifindex == oif)
343 return sprt;
344 if (dev->flags & IFF_LOOPBACK) {
345 if (sprt->rt6i_idev == NULL ||
346 sprt->rt6i_idev->dev->ifindex != oif) {
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700347 if (flags & RT6_LOOKUP_F_IFACE && oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348 continue;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900349 if (local && (!oif ||
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350 local->rt6i_idev->dev->ifindex == oif))
351 continue;
352 }
353 local = sprt;
354 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900355 } else {
356 if (ipv6_chk_addr(net, saddr, dev,
357 flags & RT6_LOOKUP_F_IFACE))
358 return sprt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900360 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700361
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900362 if (oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 if (local)
364 return local;
365
YOSHIFUJI Hideakid4208952008-06-27 20:14:54 -0700366 if (flags & RT6_LOOKUP_F_IFACE)
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800367 return net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 }
YOSHIFUJI Hideakidd3abc42008-07-02 18:30:18 +0900369out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 return rt;
371}
372
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800373#ifdef CONFIG_IPV6_ROUTER_PREF
374static void rt6_probe(struct rt6_info *rt)
375{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000376 struct neighbour *neigh;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800377 /*
378 * Okay, this does not seem to be appropriate
379 * for now, however, we need to check if it
380 * is really so; aka Router Reachability Probing.
381 *
382 * Router Reachability Probe MUST be rate-limited
383 * to no more than one per minute.
384 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000385 rcu_read_lock();
386 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800387 if (!neigh || (neigh->nud_state & NUD_VALID))
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000388 goto out;
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800389 read_lock_bh(&neigh->lock);
390 if (!(neigh->nud_state & NUD_VALID) &&
YOSHIFUJI Hideaki52e16352006-03-20 17:05:47 -0800391 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800392 struct in6_addr mcaddr;
393 struct in6_addr *target;
394
395 neigh->updated = jiffies;
396 read_unlock_bh(&neigh->lock);
397
398 target = (struct in6_addr *)&neigh->primary_key;
399 addrconf_addr_solict_mult(target, &mcaddr);
400 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000401 } else {
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800402 read_unlock_bh(&neigh->lock);
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000403 }
404out:
405 rcu_read_unlock();
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800406}
407#else
408static inline void rt6_probe(struct rt6_info *rt)
409{
YOSHIFUJI Hideaki27097252006-03-20 17:05:13 -0800410}
411#endif
412
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413/*
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800414 * Default Router Selection (RFC 2461 6.3.6)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 */
Dave Jonesb6f99a22007-03-22 12:27:49 -0700416static inline int rt6_check_dev(struct rt6_info *rt, int oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417{
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800418 struct net_device *dev = rt->rt6i_dev;
David S. Miller161980f2007-04-06 11:42:27 -0700419 if (!oif || dev->ifindex == oif)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800420 return 2;
David S. Miller161980f2007-04-06 11:42:27 -0700421 if ((dev->flags & IFF_LOOPBACK) &&
422 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
423 return 1;
424 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425}
426
Dave Jonesb6f99a22007-03-22 12:27:49 -0700427static inline int rt6_check_neigh(struct rt6_info *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428{
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000429 struct neighbour *neigh;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800430 int m;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000431
432 rcu_read_lock();
433 neigh = dst_get_neighbour(&rt->dst);
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700434 if (rt->rt6i_flags & RTF_NONEXTHOP ||
435 !(rt->rt6i_flags & RTF_GATEWAY))
436 m = 1;
437 else if (neigh) {
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800438 read_lock_bh(&neigh->lock);
439 if (neigh->nud_state & NUD_VALID)
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700440 m = 2;
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800441#ifdef CONFIG_IPV6_ROUTER_PREF
442 else if (neigh->nud_state & NUD_FAILED)
443 m = 0;
444#endif
445 else
YOSHIFUJI Hideakiea73ee22006-11-06 09:45:44 -0800446 m = 1;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800447 read_unlock_bh(&neigh->lock);
YOSHIFUJI Hideaki398bcbe2008-01-19 00:35:16 -0800448 } else
449 m = 0;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000450 rcu_read_unlock();
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800451 return m;
452}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800454static int rt6_score_route(struct rt6_info *rt, int oif,
455 int strict)
456{
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700457 int m, n;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900458
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700459 m = rt6_check_dev(rt, oif);
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700460 if (!m && (strict & RT6_LOOKUP_F_IFACE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800461 return -1;
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -0800462#ifdef CONFIG_IPV6_ROUTER_PREF
463 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
464#endif
YOSHIFUJI Hideaki4d0c5912006-05-26 13:23:41 -0700465 n = rt6_check_neigh(rt);
YOSHIFUJI Hideaki557e92e2006-11-06 09:45:45 -0800466 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800467 return -1;
468 return m;
469}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470
David S. Millerf11e6652007-03-24 20:36:25 -0700471static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
472 int *mpri, struct rt6_info *match)
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800473{
David S. Millerf11e6652007-03-24 20:36:25 -0700474 int m;
475
476 if (rt6_check_expired(rt))
477 goto out;
478
479 m = rt6_score_route(rt, oif, strict);
480 if (m < 0)
481 goto out;
482
483 if (m > *mpri) {
484 if (strict & RT6_LOOKUP_F_REACHABLE)
485 rt6_probe(match);
486 *mpri = m;
487 match = rt;
488 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
489 rt6_probe(rt);
490 }
491
492out:
493 return match;
494}
495
496static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
497 struct rt6_info *rr_head,
498 u32 metric, int oif, int strict)
499{
500 struct rt6_info *rt, *match;
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800501 int mpri = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502
David S. Millerf11e6652007-03-24 20:36:25 -0700503 match = NULL;
504 for (rt = rr_head; rt && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700505 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700506 match = find_match(rt, oif, strict, &mpri, match);
507 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
Changli Gaod8d1f302010-06-10 23:31:35 -0700508 rt = rt->dst.rt6_next)
David S. Millerf11e6652007-03-24 20:36:25 -0700509 match = find_match(rt, oif, strict, &mpri, match);
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800510
David S. Millerf11e6652007-03-24 20:36:25 -0700511 return match;
512}
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800513
David S. Millerf11e6652007-03-24 20:36:25 -0700514static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
515{
516 struct rt6_info *match, *rt0;
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800517 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518
David S. Millerf11e6652007-03-24 20:36:25 -0700519 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800520 __func__, fn->leaf, oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521
David S. Millerf11e6652007-03-24 20:36:25 -0700522 rt0 = fn->rr_ptr;
523 if (!rt0)
524 fn->rr_ptr = rt0 = fn->leaf;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525
David S. Millerf11e6652007-03-24 20:36:25 -0700526 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800528 if (!match &&
David S. Millerf11e6652007-03-24 20:36:25 -0700529 (strict & RT6_LOOKUP_F_REACHABLE)) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700530 struct rt6_info *next = rt0->dst.rt6_next;
David S. Millerf11e6652007-03-24 20:36:25 -0700531
YOSHIFUJI Hideaki554cfb72006-03-20 17:00:26 -0800532 /* no entries matched; do round-robin */
David S. Millerf11e6652007-03-24 20:36:25 -0700533 if (!next || next->rt6i_metric != rt0->rt6i_metric)
534 next = fn->leaf;
535
536 if (next != rt0)
537 fn->rr_ptr = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 }
539
David S. Millerf11e6652007-03-24 20:36:25 -0700540 RT6_TRACE("%s() => %p\n",
Harvey Harrison0dc47872008-03-05 20:47:47 -0800541 __func__, match);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900543 net = dev_net(rt0->rt6i_dev);
Eric Dumazeta02cec22010-09-22 20:43:57 +0000544 return match ? match : net->ipv6.ip6_null_entry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700545}
546
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800547#ifdef CONFIG_IPV6_ROUTE_INFO
548int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000549 const struct in6_addr *gwaddr)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800550{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900551 struct net *net = dev_net(dev);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800552 struct route_info *rinfo = (struct route_info *) opt;
553 struct in6_addr prefix_buf, *prefix;
554 unsigned int pref;
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900555 unsigned long lifetime;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800556 struct rt6_info *rt;
557
558 if (len < sizeof(struct route_info)) {
559 return -EINVAL;
560 }
561
562 /* Sanity check for prefix_len and length */
563 if (rinfo->length > 3) {
564 return -EINVAL;
565 } else if (rinfo->prefix_len > 128) {
566 return -EINVAL;
567 } else if (rinfo->prefix_len > 64) {
568 if (rinfo->length < 2) {
569 return -EINVAL;
570 }
571 } else if (rinfo->prefix_len > 0) {
572 if (rinfo->length < 1) {
573 return -EINVAL;
574 }
575 }
576
577 pref = rinfo->route_pref;
578 if (pref == ICMPV6_ROUTER_PREF_INVALID)
Jens Rosenboom3933fc92009-09-10 06:25:11 +0000579 return -EINVAL;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800580
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900581 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800582
583 if (rinfo->length == 3)
584 prefix = (struct in6_addr *)rinfo->prefix;
585 else {
586 /* this function is safe */
587 ipv6_addr_prefix(&prefix_buf,
588 (struct in6_addr *)rinfo->prefix,
589 rinfo->prefix_len);
590 prefix = &prefix_buf;
591 }
592
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800593 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
594 dev->ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800595
596 if (rt && !lifetime) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -0700597 ip6_del_rt(rt);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800598 rt = NULL;
599 }
600
601 if (!rt && lifetime)
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -0800602 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800603 pref);
604 else if (rt)
605 rt->rt6i_flags = RTF_ROUTEINFO |
606 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
607
608 if (rt) {
YOSHIFUJI Hideaki4bed72e2008-05-27 17:37:49 +0900609 if (!addrconf_finite_timeout(lifetime)) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800610 rt->rt6i_flags &= ~RTF_EXPIRES;
611 } else {
612 rt->rt6i_expires = jiffies + HZ * lifetime;
613 rt->rt6i_flags |= RTF_EXPIRES;
614 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700615 dst_release(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -0800616 }
617 return 0;
618}
619#endif
620
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800621#define BACKTRACK(__net, saddr) \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700622do { \
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800623 if (rt == __net->ipv6.ip6_null_entry) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700624 struct fib6_node *pn; \
Ville Nuorvalae0eda7b2006-10-16 22:11:11 -0700625 while (1) { \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700626 if (fn->fn_flags & RTN_TL_ROOT) \
627 goto out; \
628 pn = fn->parent; \
629 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
Kim Nordlund8bce65b2006-12-13 16:38:29 -0800630 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700631 else \
632 fn = pn; \
633 if (fn->fn_flags & RTN_RTINFO) \
634 goto restart; \
Thomas Grafc71099a2006-08-04 23:20:06 -0700635 } \
Thomas Grafc71099a2006-08-04 23:20:06 -0700636 } \
YOSHIFUJI Hideaki982f56f2006-08-23 17:22:39 -0700637} while(0)
Thomas Grafc71099a2006-08-04 23:20:06 -0700638
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800639static struct rt6_info *ip6_pol_route_lookup(struct net *net,
640 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500641 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642{
643 struct fib6_node *fn;
644 struct rt6_info *rt;
645
Thomas Grafc71099a2006-08-04 23:20:06 -0700646 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -0500647 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700648restart:
649 rt = fn->leaf;
David S. Miller4c9483b2011-03-12 16:22:43 -0500650 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
651 BACKTRACK(net, &fl6->saddr);
Thomas Grafc71099a2006-08-04 23:20:06 -0700652out:
Changli Gaod8d1f302010-06-10 23:31:35 -0700653 dst_use(&rt->dst, jiffies);
Thomas Grafc71099a2006-08-04 23:20:06 -0700654 read_unlock_bh(&table->tb6_lock);
Thomas Grafc71099a2006-08-04 23:20:06 -0700655 return rt;
656
657}
658
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +0900659struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
660 const struct in6_addr *saddr, int oif, int strict)
Thomas Grafc71099a2006-08-04 23:20:06 -0700661{
David S. Miller4c9483b2011-03-12 16:22:43 -0500662 struct flowi6 fl6 = {
663 .flowi6_oif = oif,
664 .daddr = *daddr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700665 };
666 struct dst_entry *dst;
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700667 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
Thomas Grafc71099a2006-08-04 23:20:06 -0700668
Thomas Grafadaa70b2006-10-13 15:01:03 -0700669 if (saddr) {
David S. Miller4c9483b2011-03-12 16:22:43 -0500670 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
Thomas Grafadaa70b2006-10-13 15:01:03 -0700671 flags |= RT6_LOOKUP_F_HAS_SADDR;
672 }
673
David S. Miller4c9483b2011-03-12 16:22:43 -0500674 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
Thomas Grafc71099a2006-08-04 23:20:06 -0700675 if (dst->error == 0)
676 return (struct rt6_info *) dst;
677
678 dst_release(dst);
679
Linus Torvalds1da177e2005-04-16 15:20:36 -0700680 return NULL;
681}
682
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900683EXPORT_SYMBOL(rt6_lookup);
684
Thomas Grafc71099a2006-08-04 23:20:06 -0700685/* ip6_ins_rt is called with FREE table->tb6_lock.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686 It takes new route entry, the addition fails by any reason the
687 route is freed. In any case, if caller does not hold it, it may
688 be destroyed.
689 */
690
Thomas Graf86872cb2006-08-22 00:01:08 -0700691static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692{
693 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -0700694 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695
Thomas Grafc71099a2006-08-04 23:20:06 -0700696 table = rt->rt6i_table;
697 write_lock_bh(&table->tb6_lock);
Thomas Graf86872cb2006-08-22 00:01:08 -0700698 err = fib6_add(&table->tb6_root, rt, info);
Thomas Grafc71099a2006-08-04 23:20:06 -0700699 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700
701 return err;
702}
703
Thomas Graf40e22e82006-08-22 00:00:45 -0700704int ip6_ins_rt(struct rt6_info *rt)
705{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800706 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900707 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800708 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -0800709 return __ip6_ins_rt(rt, &info);
Thomas Graf40e22e82006-08-22 00:00:45 -0700710}
711
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000712static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
713 const struct in6_addr *daddr,
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000714 const struct in6_addr *saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 struct rt6_info *rt;
717
718 /*
719 * Clone the route.
720 */
721
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000722 rt = ip6_rt_copy(ort, daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723
724 if (rt) {
David S. Miller14deae42009-01-04 16:04:39 -0800725 struct neighbour *neigh;
726 int attempts = !in_softirq();
727
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900728 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
729 if (rt->rt6i_dst.plen != 128 &&
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000730 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900731 rt->rt6i_flags |= RTF_ANYCAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +0900733 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735 rt->rt6i_flags |= RTF_CACHE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736
737#ifdef CONFIG_IPV6_SUBTREES
738 if (rt->rt6i_src.plen && saddr) {
739 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
740 rt->rt6i_src.plen = 128;
741 }
742#endif
743
David S. Miller14deae42009-01-04 16:04:39 -0800744 retry:
745 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
746 if (IS_ERR(neigh)) {
747 struct net *net = dev_net(rt->rt6i_dev);
748 int saved_rt_min_interval =
749 net->ipv6.sysctl.ip6_rt_gc_min_interval;
750 int saved_rt_elasticity =
751 net->ipv6.sysctl.ip6_rt_gc_elasticity;
752
753 if (attempts-- > 0) {
754 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
755 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
756
Alexey Dobriyan86393e52009-08-29 01:34:49 +0000757 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
David S. Miller14deae42009-01-04 16:04:39 -0800758
759 net->ipv6.sysctl.ip6_rt_gc_elasticity =
760 saved_rt_elasticity;
761 net->ipv6.sysctl.ip6_rt_gc_min_interval =
762 saved_rt_min_interval;
763 goto retry;
764 }
765
766 if (net_ratelimit())
767 printk(KERN_WARNING
Ulrich Weber7e1b33e2010-09-27 15:02:18 -0700768 "ipv6: Neighbour table overflow.\n");
Changli Gaod8d1f302010-06-10 23:31:35 -0700769 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -0800770 return NULL;
771 }
David S. Miller69cce1d2011-07-17 23:09:49 -0700772 dst_set_neighbour(&rt->dst, neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800774 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775
YOSHIFUJI Hideaki95a9a5b2006-03-20 16:55:51 -0800776 return rt;
777}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000779static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
780 const struct in6_addr *daddr)
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800781{
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000782 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
783
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800784 if (rt) {
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800785 rt->rt6i_flags |= RTF_CACHE;
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000786 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
YOSHIFUJI Hideaki299d9932006-03-20 16:58:32 -0800787 }
788 return rt;
789}
790
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800791static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
David S. Miller4c9483b2011-03-12 16:22:43 -0500792 struct flowi6 *fl6, int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700793{
794 struct fib6_node *fn;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800795 struct rt6_info *rt, *nrt;
Thomas Grafc71099a2006-08-04 23:20:06 -0700796 int strict = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 int attempts = 3;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800798 int err;
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -0700799 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700801 strict |= flags & RT6_LOOKUP_F_IFACE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802
803relookup:
Thomas Grafc71099a2006-08-04 23:20:06 -0700804 read_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800806restart_2:
David S. Miller4c9483b2011-03-12 16:22:43 -0500807 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808
809restart:
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700810 rt = rt6_select(fn, oif, strict | reachable);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800811
David S. Miller4c9483b2011-03-12 16:22:43 -0500812 BACKTRACK(net, &fl6->saddr);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800813 if (rt == net->ipv6.ip6_null_entry ||
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800814 rt->rt6i_flags & RTF_CACHE)
YOSHIFUJI Hideaki1ddef042006-03-20 17:01:24 -0800815 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816
Changli Gaod8d1f302010-06-10 23:31:35 -0700817 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700818 read_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideakifb9de912006-03-20 16:59:08 -0800819
Eric Dumazetf2c31e32011-07-29 19:00:53 +0000820 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
David S. Miller4c9483b2011-03-12 16:22:43 -0500821 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800822 else if (!(rt->dst.flags & DST_HOST))
David S. Miller4c9483b2011-03-12 16:22:43 -0500823 nrt = rt6_alloc_clone(rt, &fl6->daddr);
David S. Miller7343ff32011-03-09 19:55:25 -0800824 else
825 goto out2;
YOSHIFUJI Hideakie40cf352006-03-20 16:59:27 -0800826
Changli Gaod8d1f302010-06-10 23:31:35 -0700827 dst_release(&rt->dst);
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800828 rt = nrt ? : net->ipv6.ip6_null_entry;
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800829
Changli Gaod8d1f302010-06-10 23:31:35 -0700830 dst_hold(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800831 if (nrt) {
Thomas Graf40e22e82006-08-22 00:00:45 -0700832 err = ip6_ins_rt(nrt);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800833 if (!err)
834 goto out2;
835 }
836
837 if (--attempts <= 0)
838 goto out2;
839
840 /*
Thomas Grafc71099a2006-08-04 23:20:06 -0700841 * Race condition! In the gap, when table->tb6_lock was
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800842 * released someone could insert this route. Relookup.
843 */
Changli Gaod8d1f302010-06-10 23:31:35 -0700844 dst_release(&rt->dst);
YOSHIFUJI Hideaki519fbd82006-03-20 17:00:05 -0800845 goto relookup;
846
847out:
YOSHIFUJI Hideaki8238dd02006-03-20 17:04:35 -0800848 if (reachable) {
849 reachable = 0;
850 goto restart_2;
851 }
Changli Gaod8d1f302010-06-10 23:31:35 -0700852 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -0700853 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854out2:
Changli Gaod8d1f302010-06-10 23:31:35 -0700855 rt->dst.lastuse = jiffies;
856 rt->dst.__use++;
Thomas Grafc71099a2006-08-04 23:20:06 -0700857
858 return rt;
859}
860
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800861static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500862 struct flowi6 *fl6, int flags)
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700863{
David S. Miller4c9483b2011-03-12 16:22:43 -0500864 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
Pavel Emelyanov4acad722007-10-15 13:02:51 -0700865}
866
Thomas Grafc71099a2006-08-04 23:20:06 -0700867void ip6_route_input(struct sk_buff *skb)
868{
Eric Dumazetb71d1d42011-04-22 04:53:02 +0000869 const struct ipv6hdr *iph = ipv6_hdr(skb);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900870 struct net *net = dev_net(skb->dev);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700871 int flags = RT6_LOOKUP_F_HAS_SADDR;
David S. Miller4c9483b2011-03-12 16:22:43 -0500872 struct flowi6 fl6 = {
873 .flowi6_iif = skb->dev->ifindex,
874 .daddr = iph->daddr,
875 .saddr = iph->saddr,
876 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
877 .flowi6_mark = skb->mark,
878 .flowi6_proto = iph->nexthdr,
Thomas Grafc71099a2006-08-04 23:20:06 -0700879 };
Thomas Grafadaa70b2006-10-13 15:01:03 -0700880
Thomas Goff1d6e55f2009-01-27 22:39:59 -0800881 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
Thomas Grafadaa70b2006-10-13 15:01:03 -0700882 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700883
David S. Miller4c9483b2011-03-12 16:22:43 -0500884 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
Thomas Grafc71099a2006-08-04 23:20:06 -0700885}
886
Daniel Lezcano8ed67782008-03-04 13:48:30 -0800887static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -0500888 struct flowi6 *fl6, int flags)
Thomas Grafc71099a2006-08-04 23:20:06 -0700889{
David S. Miller4c9483b2011-03-12 16:22:43 -0500890 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
Thomas Grafc71099a2006-08-04 23:20:06 -0700891}
892
Florian Westphal9c7a4f92011-03-22 19:17:36 -0700893struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
David S. Miller4c9483b2011-03-12 16:22:43 -0500894 struct flowi6 *fl6)
Thomas Grafc71099a2006-08-04 23:20:06 -0700895{
896 int flags = 0;
897
David S. Miller4c9483b2011-03-12 16:22:43 -0500898 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
YOSHIFUJI Hideaki77d16f42006-08-23 17:25:05 -0700899 flags |= RT6_LOOKUP_F_IFACE;
Thomas Grafc71099a2006-08-04 23:20:06 -0700900
David S. Miller4c9483b2011-03-12 16:22:43 -0500901 if (!ipv6_addr_any(&fl6->saddr))
Thomas Grafadaa70b2006-10-13 15:01:03 -0700902 flags |= RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideaki / 吉藤英明0c9a2ac2010-03-07 00:14:44 +0000903 else if (sk)
904 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
Thomas Grafadaa70b2006-10-13 15:01:03 -0700905
David S. Miller4c9483b2011-03-12 16:22:43 -0500906 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700907}
908
YOSHIFUJI Hideaki71590392007-02-22 22:05:40 +0900909EXPORT_SYMBOL(ip6_route_output);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910
David S. Miller2774c132011-03-01 14:59:04 -0800911struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -0700912{
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700913 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
David S. Miller14e50e52007-05-24 18:17:54 -0700914 struct dst_entry *new = NULL;
915
David S. Miller5c1e6aa2011-04-28 14:13:38 -0700916 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
David S. Miller14e50e52007-05-24 18:17:54 -0700917 if (rt) {
David S. Millercf911662011-04-28 14:31:47 -0700918 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
919
Changli Gaod8d1f302010-06-10 23:31:35 -0700920 new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -0700921
David S. Miller14e50e52007-05-24 18:17:54 -0700922 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -0800923 new->input = dst_discard;
924 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -0700925
Eric Dumazet21efcfa2011-07-19 20:18:36 +0000926 if (dst_metrics_read_only(&ort->dst))
927 new->_metrics = ort->dst._metrics;
928 else
929 dst_copy_metrics(new, &ort->dst);
David S. Miller14e50e52007-05-24 18:17:54 -0700930 rt->rt6i_idev = ort->rt6i_idev;
931 if (rt->rt6i_idev)
932 in6_dev_hold(rt->rt6i_idev);
933 rt->rt6i_expires = 0;
934
935 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
936 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
937 rt->rt6i_metric = 0;
938
939 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
940#ifdef CONFIG_IPV6_SUBTREES
941 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
942#endif
943
944 dst_free(new);
945 }
946
David S. Miller69ead7a2011-03-01 14:45:33 -0800947 dst_release(dst_orig);
948 return new ? new : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -0700949}
David S. Miller14e50e52007-05-24 18:17:54 -0700950
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951/*
952 * Destination cache support functions
953 */
954
955static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
956{
957 struct rt6_info *rt;
958
959 rt = (struct rt6_info *) dst;
960
David S. Miller6431cbc2011-02-07 20:38:06 -0800961 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
962 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
963 if (!rt->rt6i_peer)
964 rt6_bind_peer(rt, 0);
965 rt->rt6i_peer_genid = rt6_peer_genid();
966 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 return dst;
David S. Miller6431cbc2011-02-07 20:38:06 -0800968 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700969 return NULL;
970}
971
972static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
973{
974 struct rt6_info *rt = (struct rt6_info *) dst;
975
976 if (rt) {
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000977 if (rt->rt6i_flags & RTF_CACHE) {
978 if (rt6_check_expired(rt)) {
979 ip6_del_rt(rt);
980 dst = NULL;
981 }
982 } else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 dst_release(dst);
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000984 dst = NULL;
985 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 }
YOSHIFUJI Hideaki / 吉藤英明54c1a852010-03-28 07:15:45 +0000987 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988}
989
990static void ip6_link_failure(struct sk_buff *skb)
991{
992 struct rt6_info *rt;
993
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +0000994 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995
Eric Dumazetadf30902009-06-02 05:19:30 +0000996 rt = (struct rt6_info *) skb_dst(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700997 if (rt) {
998 if (rt->rt6i_flags&RTF_CACHE) {
Changli Gaod8d1f302010-06-10 23:31:35 -0700999 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 rt->rt6i_flags |= RTF_EXPIRES;
1001 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1002 rt->rt6i_node->fn_sernum = -1;
1003 }
1004}
1005
1006static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1007{
1008 struct rt6_info *rt6 = (struct rt6_info*)dst;
1009
1010 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1011 rt6->rt6i_flags |= RTF_MODIFIED;
1012 if (mtu < IPV6_MIN_MTU) {
David S. Millerdefb3512010-12-08 21:16:57 -08001013 u32 features = dst_metric(dst, RTAX_FEATURES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001014 mtu = IPV6_MIN_MTU;
David S. Millerdefb3512010-12-08 21:16:57 -08001015 features |= RTAX_FEATURE_ALLFRAG;
1016 dst_metric_set(dst, RTAX_FEATURES, features);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 }
David S. Millerdefb3512010-12-08 21:16:57 -08001018 dst_metric_set(dst, RTAX_MTU, mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001019 }
1020}
1021
David S. Miller0dbaee32010-12-13 12:52:14 -08001022static unsigned int ip6_default_advmss(const struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001023{
David S. Miller0dbaee32010-12-13 12:52:14 -08001024 struct net_device *dev = dst->dev;
1025 unsigned int mtu = dst_mtu(dst);
1026 struct net *net = dev_net(dev);
1027
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1029
Daniel Lezcano55786892008-03-04 13:47:47 -08001030 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1031 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032
1033 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001034 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1035 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1036 * IPV6_MAXPLEN is also valid and means: "any MSS,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037 * rely only on pmtu discovery"
1038 */
1039 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1040 mtu = IPV6_MAXPLEN;
1041 return mtu;
1042}
1043
David S. Millerd33e4552010-12-14 13:01:14 -08001044static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1045{
1046 unsigned int mtu = IPV6_MIN_MTU;
1047 struct inet6_dev *idev;
1048
1049 rcu_read_lock();
1050 idev = __in6_dev_get(dst->dev);
1051 if (idev)
1052 mtu = idev->cnf.mtu6;
1053 rcu_read_unlock();
1054
1055 return mtu;
1056}
1057
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001058static struct dst_entry *icmp6_dst_gc_list;
1059static DEFINE_SPINLOCK(icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001060
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001061struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062 struct neighbour *neigh,
YOSHIFUJI Hideaki9acd9f32008-04-10 15:42:10 +09001063 const struct in6_addr *addr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001064{
1065 struct rt6_info *rt;
1066 struct inet6_dev *idev = in6_dev_get(dev);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001067 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068
1069 if (unlikely(idev == NULL))
1070 return NULL;
1071
David S. Miller957c6652011-06-24 15:25:00 -07001072 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073 if (unlikely(rt == NULL)) {
1074 in6_dev_put(idev);
1075 goto out;
1076 }
1077
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 if (neigh)
1079 neigh_hold(neigh);
David S. Miller14deae42009-01-04 16:04:39 -08001080 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001081 neigh = ndisc_get_neigh(dev, addr);
David S. Miller14deae42009-01-04 16:04:39 -08001082 if (IS_ERR(neigh))
1083 neigh = NULL;
1084 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001086 rt->dst.flags |= DST_HOST;
1087 rt->dst.output = ip6_output;
David S. Miller69cce1d2011-07-17 23:09:49 -07001088 dst_set_neighbour(&rt->dst, neigh);
Changli Gaod8d1f302010-06-10 23:31:35 -07001089 atomic_set(&rt->dst.__refcnt, 1);
David S. Millerdefb3512010-12-08 21:16:57 -08001090 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001091
1092 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1093 rt->rt6i_dst.plen = 128;
1094 rt->rt6i_idev = idev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001095
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001096 spin_lock_bh(&icmp6_dst_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001097 rt->dst.next = icmp6_dst_gc_list;
1098 icmp6_dst_gc_list = &rt->dst;
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001099 spin_unlock_bh(&icmp6_dst_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100
Daniel Lezcano55786892008-03-04 13:47:47 -08001101 fib6_force_start_gc(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001102
1103out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001104 return &rt->dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105}
1106
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001107int icmp6_dst_gc(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108{
Hagen Paul Pfeifere9476e92011-02-25 05:45:19 +00001109 struct dst_entry *dst, **pprev;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001110 int more = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001111
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001112 spin_lock_bh(&icmp6_dst_lock);
1113 pprev = &icmp6_dst_gc_list;
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001114
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115 while ((dst = *pprev) != NULL) {
1116 if (!atomic_read(&dst->__refcnt)) {
1117 *pprev = dst->next;
1118 dst_free(dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119 } else {
1120 pprev = &dst->next;
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001121 ++more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 }
1123 }
1124
YOSHIFUJI Hideaki3b009442007-12-06 16:11:48 -08001125 spin_unlock_bh(&icmp6_dst_lock);
Thomas Graf5d0bbee2006-08-04 03:37:36 -07001126
Stephen Hemminger3d0f24a2008-07-22 14:35:50 -07001127 return more;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001128}
1129
David S. Miller1e493d12008-09-10 17:27:15 -07001130static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1131 void *arg)
1132{
1133 struct dst_entry *dst, **pprev;
1134
1135 spin_lock_bh(&icmp6_dst_lock);
1136 pprev = &icmp6_dst_gc_list;
1137 while ((dst = *pprev) != NULL) {
1138 struct rt6_info *rt = (struct rt6_info *) dst;
1139 if (func(rt, arg)) {
1140 *pprev = dst->next;
1141 dst_free(dst);
1142 } else {
1143 pprev = &dst->next;
1144 }
1145 }
1146 spin_unlock_bh(&icmp6_dst_lock);
1147}
1148
Daniel Lezcano569d3642008-01-18 03:56:57 -08001149static int ip6_dst_gc(struct dst_ops *ops)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151 unsigned long now = jiffies;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00001152 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001153 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1154 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1155 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1156 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1157 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001158 int entries;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159
Eric Dumazetfc66f952010-10-08 06:37:34 +00001160 entries = dst_entries_get_fast(ops);
Daniel Lezcano7019b782008-03-04 13:50:14 -08001161 if (time_after(rt_last_gc + rt_min_interval, now) &&
Eric Dumazetfc66f952010-10-08 06:37:34 +00001162 entries <= rt_max_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163 goto out;
1164
Benjamin Thery6891a342008-03-04 13:49:47 -08001165 net->ipv6.ip6_rt_gc_expire++;
1166 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1167 net->ipv6.ip6_rt_last_gc = now;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001168 entries = dst_entries_get_slow(ops);
1169 if (entries < ops->gc_thresh)
Daniel Lezcano7019b782008-03-04 13:50:14 -08001170 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171out:
Daniel Lezcano7019b782008-03-04 13:50:14 -08001172 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
Eric Dumazetfc66f952010-10-08 06:37:34 +00001173 return entries > rt_max_size;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174}
1175
1176/* Clean host part of a prefix. Not necessary in radix tree,
1177 but results in cleaner routing tables.
1178
1179 Remove it only when all the things will work!
1180 */
1181
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001182int ip6_dst_hoplimit(struct dst_entry *dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183{
David S. Miller5170ae82010-12-12 21:35:57 -08001184 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
David S. Millera02e4b72010-12-12 21:39:02 -08001185 if (hoplimit == 0) {
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001186 struct net_device *dev = dst->dev;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001187 struct inet6_dev *idev;
1188
1189 rcu_read_lock();
1190 idev = __in6_dev_get(dev);
1191 if (idev)
YOSHIFUJI Hideaki6b75d092008-03-10 06:00:30 -04001192 hoplimit = idev->cnf.hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001193 else
YOSHIFUJI Hideaki53b79972008-07-19 22:35:03 -07001194 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
Eric Dumazetc68f24c2010-06-14 04:46:20 +00001195 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196 }
1197 return hoplimit;
1198}
David S. Millerabbf46a2010-12-12 21:14:46 -08001199EXPORT_SYMBOL(ip6_dst_hoplimit);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200
1201/*
1202 *
1203 */
1204
Thomas Graf86872cb2006-08-22 00:01:08 -07001205int ip6_route_add(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001206{
1207 int err;
Daniel Lezcano55786892008-03-04 13:47:47 -08001208 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 struct rt6_info *rt = NULL;
1210 struct net_device *dev = NULL;
1211 struct inet6_dev *idev = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001212 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001213 int addr_type;
1214
Thomas Graf86872cb2006-08-22 00:01:08 -07001215 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001216 return -EINVAL;
1217#ifndef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001218 if (cfg->fc_src_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 return -EINVAL;
1220#endif
Thomas Graf86872cb2006-08-22 00:01:08 -07001221 if (cfg->fc_ifindex) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001222 err = -ENODEV;
Daniel Lezcano55786892008-03-04 13:47:47 -08001223 dev = dev_get_by_index(net, cfg->fc_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224 if (!dev)
1225 goto out;
1226 idev = in6_dev_get(dev);
1227 if (!idev)
1228 goto out;
1229 }
1230
Thomas Graf86872cb2006-08-22 00:01:08 -07001231 if (cfg->fc_metric == 0)
1232 cfg->fc_metric = IP6_RT_PRIO_USER;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233
Daniel Lezcano55786892008-03-04 13:47:47 -08001234 table = fib6_new_table(net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001235 if (table == NULL) {
1236 err = -ENOBUFS;
1237 goto out;
1238 }
1239
David S. Miller957c6652011-06-24 15:25:00 -07001240 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241
1242 if (rt == NULL) {
1243 err = -ENOMEM;
1244 goto out;
1245 }
1246
Changli Gaod8d1f302010-06-10 23:31:35 -07001247 rt->dst.obsolete = -1;
YOSHIFUJI Hideaki6f704992008-05-19 16:56:11 -07001248 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1249 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1250 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001251
Thomas Graf86872cb2006-08-22 00:01:08 -07001252 if (cfg->fc_protocol == RTPROT_UNSPEC)
1253 cfg->fc_protocol = RTPROT_BOOT;
1254 rt->rt6i_protocol = cfg->fc_protocol;
1255
1256 addr_type = ipv6_addr_type(&cfg->fc_dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001257
1258 if (addr_type & IPV6_ADDR_MULTICAST)
Changli Gaod8d1f302010-06-10 23:31:35 -07001259 rt->dst.input = ip6_mc_input;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001260 else if (cfg->fc_flags & RTF_LOCAL)
1261 rt->dst.input = ip6_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001262 else
Changli Gaod8d1f302010-06-10 23:31:35 -07001263 rt->dst.input = ip6_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264
Changli Gaod8d1f302010-06-10 23:31:35 -07001265 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266
Thomas Graf86872cb2006-08-22 00:01:08 -07001267 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1268 rt->rt6i_dst.plen = cfg->fc_dst_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269 if (rt->rt6i_dst.plen == 128)
David S. Miller11d53b42011-06-24 15:23:34 -07001270 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001271
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001272 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1273 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1274 if (!metrics) {
1275 err = -ENOMEM;
1276 goto out;
1277 }
1278 dst_init_metrics(&rt->dst, metrics, 0);
1279 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001280#ifdef CONFIG_IPV6_SUBTREES
Thomas Graf86872cb2006-08-22 00:01:08 -07001281 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1282 rt->rt6i_src.plen = cfg->fc_src_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001283#endif
1284
Thomas Graf86872cb2006-08-22 00:01:08 -07001285 rt->rt6i_metric = cfg->fc_metric;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001286
1287 /* We cannot add true routes via loopback here,
1288 they would result in kernel looping; promote them to reject routes
1289 */
Thomas Graf86872cb2006-08-22 00:01:08 -07001290 if ((cfg->fc_flags & RTF_REJECT) ||
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00001291 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1292 && !(cfg->fc_flags&RTF_LOCAL))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293 /* hold loopback dev/idev if we haven't done so. */
Daniel Lezcano55786892008-03-04 13:47:47 -08001294 if (dev != net->loopback_dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 if (dev) {
1296 dev_put(dev);
1297 in6_dev_put(idev);
1298 }
Daniel Lezcano55786892008-03-04 13:47:47 -08001299 dev = net->loopback_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001300 dev_hold(dev);
1301 idev = in6_dev_get(dev);
1302 if (!idev) {
1303 err = -ENODEV;
1304 goto out;
1305 }
1306 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001307 rt->dst.output = ip6_pkt_discard_out;
1308 rt->dst.input = ip6_pkt_discard;
1309 rt->dst.error = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1311 goto install_route;
1312 }
1313
Thomas Graf86872cb2006-08-22 00:01:08 -07001314 if (cfg->fc_flags & RTF_GATEWAY) {
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001315 const struct in6_addr *gw_addr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316 int gwa_type;
1317
Thomas Graf86872cb2006-08-22 00:01:08 -07001318 gw_addr = &cfg->fc_gateway;
1319 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001320 gwa_type = ipv6_addr_type(gw_addr);
1321
1322 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1323 struct rt6_info *grt;
1324
1325 /* IPv6 strictly inhibits using not link-local
1326 addresses as nexthop address.
1327 Otherwise, router will not able to send redirects.
1328 It is very good, but in some (rare!) circumstances
1329 (SIT, PtP, NBMA NOARP links) it is handy to allow
1330 some exceptions. --ANK
1331 */
1332 err = -EINVAL;
1333 if (!(gwa_type&IPV6_ADDR_UNICAST))
1334 goto out;
1335
Daniel Lezcano55786892008-03-04 13:47:47 -08001336 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337
1338 err = -EHOSTUNREACH;
1339 if (grt == NULL)
1340 goto out;
1341 if (dev) {
1342 if (dev != grt->rt6i_dev) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001343 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344 goto out;
1345 }
1346 } else {
1347 dev = grt->rt6i_dev;
1348 idev = grt->rt6i_idev;
1349 dev_hold(dev);
1350 in6_dev_hold(grt->rt6i_idev);
1351 }
1352 if (!(grt->rt6i_flags&RTF_GATEWAY))
1353 err = 0;
Changli Gaod8d1f302010-06-10 23:31:35 -07001354 dst_release(&grt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355
1356 if (err)
1357 goto out;
1358 }
1359 err = -EINVAL;
1360 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1361 goto out;
1362 }
1363
1364 err = -ENODEV;
1365 if (dev == NULL)
1366 goto out;
1367
Daniel Walterc3968a82011-04-13 21:10:57 +00001368 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1369 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1370 err = -EINVAL;
1371 goto out;
1372 }
1373 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1374 rt->rt6i_prefsrc.plen = 128;
1375 } else
1376 rt->rt6i_prefsrc.plen = 0;
1377
Thomas Graf86872cb2006-08-22 00:01:08 -07001378 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
David S. Miller69cce1d2011-07-17 23:09:49 -07001379 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1380 if (IS_ERR(n)) {
1381 err = PTR_ERR(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001382 goto out;
1383 }
David S. Miller69cce1d2011-07-17 23:09:49 -07001384 dst_set_neighbour(&rt->dst, n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 }
1386
Thomas Graf86872cb2006-08-22 00:01:08 -07001387 rt->rt6i_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388
1389install_route:
Thomas Graf86872cb2006-08-22 00:01:08 -07001390 if (cfg->fc_mx) {
1391 struct nlattr *nla;
1392 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393
Thomas Graf86872cb2006-08-22 00:01:08 -07001394 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +02001395 int type = nla_type(nla);
Thomas Graf86872cb2006-08-22 00:01:08 -07001396
1397 if (type) {
1398 if (type > RTAX_MAX) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399 err = -EINVAL;
1400 goto out;
1401 }
Thomas Graf86872cb2006-08-22 00:01:08 -07001402
David S. Millerdefb3512010-12-08 21:16:57 -08001403 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405 }
1406 }
1407
Changli Gaod8d1f302010-06-10 23:31:35 -07001408 rt->dst.dev = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409 rt->rt6i_idev = idev;
Thomas Grafc71099a2006-08-04 23:20:06 -07001410 rt->rt6i_table = table;
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001411
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001412 cfg->fc_nlinfo.nl_net = dev_net(dev);
Daniel Lezcano63152fc2008-03-03 23:31:11 -08001413
Thomas Graf86872cb2006-08-22 00:01:08 -07001414 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415
1416out:
1417 if (dev)
1418 dev_put(dev);
1419 if (idev)
1420 in6_dev_put(idev);
1421 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001422 dst_free(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 return err;
1424}
1425
Thomas Graf86872cb2006-08-22 00:01:08 -07001426static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427{
1428 int err;
Thomas Grafc71099a2006-08-04 23:20:06 -07001429 struct fib6_table *table;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001430 struct net *net = dev_net(rt->rt6i_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001432 if (rt == net->ipv6.ip6_null_entry)
Patrick McHardy6c813a72006-08-06 22:22:47 -07001433 return -ENOENT;
1434
Thomas Grafc71099a2006-08-04 23:20:06 -07001435 table = rt->rt6i_table;
1436 write_lock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437
Thomas Graf86872cb2006-08-22 00:01:08 -07001438 err = fib6_del(rt, info);
Changli Gaod8d1f302010-06-10 23:31:35 -07001439 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440
Thomas Grafc71099a2006-08-04 23:20:06 -07001441 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442
1443 return err;
1444}
1445
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001446int ip6_del_rt(struct rt6_info *rt)
1447{
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001448 struct nl_info info = {
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001449 .nl_net = dev_net(rt->rt6i_dev),
Denis V. Lunev4d1169c2008-01-10 03:26:13 -08001450 };
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08001451 return __ip6_del_rt(rt, &info);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001452}
1453
Thomas Graf86872cb2006-08-22 00:01:08 -07001454static int ip6_route_del(struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455{
Thomas Grafc71099a2006-08-04 23:20:06 -07001456 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 struct fib6_node *fn;
1458 struct rt6_info *rt;
1459 int err = -ESRCH;
1460
Daniel Lezcano55786892008-03-04 13:47:47 -08001461 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
Thomas Grafc71099a2006-08-04 23:20:06 -07001462 if (table == NULL)
1463 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001464
Thomas Grafc71099a2006-08-04 23:20:06 -07001465 read_lock_bh(&table->tb6_lock);
1466
1467 fn = fib6_locate(&table->tb6_root,
Thomas Graf86872cb2006-08-22 00:01:08 -07001468 &cfg->fc_dst, cfg->fc_dst_len,
1469 &cfg->fc_src, cfg->fc_src_len);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001470
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 if (fn) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001472 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
Thomas Graf86872cb2006-08-22 00:01:08 -07001473 if (cfg->fc_ifindex &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474 (rt->rt6i_dev == NULL ||
Thomas Graf86872cb2006-08-22 00:01:08 -07001475 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001476 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001477 if (cfg->fc_flags & RTF_GATEWAY &&
1478 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 continue;
Thomas Graf86872cb2006-08-22 00:01:08 -07001480 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001482 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001483 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484
Thomas Graf86872cb2006-08-22 00:01:08 -07001485 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486 }
1487 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001488 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489
1490 return err;
1491}
1492
1493/*
1494 * Handle redirects
1495 */
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001496struct ip6rd_flowi {
David S. Miller4c9483b2011-03-12 16:22:43 -05001497 struct flowi6 fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001498 struct in6_addr gateway;
1499};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001501static struct rt6_info *__ip6_route_redirect(struct net *net,
1502 struct fib6_table *table,
David S. Miller4c9483b2011-03-12 16:22:43 -05001503 struct flowi6 *fl6,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001504 int flags)
1505{
David S. Miller4c9483b2011-03-12 16:22:43 -05001506 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001507 struct rt6_info *rt;
1508 struct fib6_node *fn;
Thomas Grafc71099a2006-08-04 23:20:06 -07001509
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510 /*
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001511 * Get the "current" route for this destination and
1512 * check if the redirect has come from approriate router.
1513 *
1514 * RFC 2461 specifies that redirects should only be
1515 * accepted if they come from the nexthop to the target.
1516 * Due to the way the routes are chosen, this notion
1517 * is a bit fuzzy and one might need to check all possible
1518 * routes.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001519 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520
Thomas Grafc71099a2006-08-04 23:20:06 -07001521 read_lock_bh(&table->tb6_lock);
David S. Miller4c9483b2011-03-12 16:22:43 -05001522 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001523restart:
Changli Gaod8d1f302010-06-10 23:31:35 -07001524 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001525 /*
1526 * Current route is on-link; redirect is always invalid.
1527 *
1528 * Seems, previous statement is not true. It could
1529 * be node, which looks for us as on-link (f.e. proxy ndisc)
1530 * But then router serving it might decide, that we should
1531 * know truth 8)8) --ANK (980726).
1532 */
1533 if (rt6_check_expired(rt))
1534 continue;
1535 if (!(rt->rt6i_flags & RTF_GATEWAY))
1536 continue;
David S. Miller4c9483b2011-03-12 16:22:43 -05001537 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001538 continue;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001539 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001540 continue;
1541 break;
1542 }
YOSHIFUJI Hideakie843b9e2006-03-20 17:07:49 -08001543
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001544 if (!rt)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001545 rt = net->ipv6.ip6_null_entry;
David S. Miller4c9483b2011-03-12 16:22:43 -05001546 BACKTRACK(net, &fl6->saddr);
YOSHIFUJI Hideakicb15d9c2006-08-23 17:23:11 -07001547out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001548 dst_hold(&rt->dst);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001549
1550 read_unlock_bh(&table->tb6_lock);
1551
1552 return rt;
1553};
1554
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001555static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1556 const struct in6_addr *src,
1557 const struct in6_addr *gateway,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001558 struct net_device *dev)
1559{
Thomas Grafadaa70b2006-10-13 15:01:03 -07001560 int flags = RT6_LOOKUP_F_HAS_SADDR;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001561 struct net *net = dev_net(dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001562 struct ip6rd_flowi rdfl = {
David S. Miller4c9483b2011-03-12 16:22:43 -05001563 .fl6 = {
1564 .flowi6_oif = dev->ifindex,
1565 .daddr = *dest,
1566 .saddr = *src,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001567 },
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001568 };
Thomas Grafadaa70b2006-10-13 15:01:03 -07001569
Brian Haley86c36ce2009-10-07 13:58:01 -07001570 ipv6_addr_copy(&rdfl.gateway, gateway);
1571
Thomas Grafadaa70b2006-10-13 15:01:03 -07001572 if (rt6_need_strict(dest))
1573 flags |= RT6_LOOKUP_F_IFACE;
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001574
David S. Miller4c9483b2011-03-12 16:22:43 -05001575 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
Daniel Lezcano58f09b72008-03-03 23:25:27 -08001576 flags, __ip6_route_redirect);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001577}
1578
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001579void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1580 const struct in6_addr *saddr,
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001581 struct neighbour *neigh, u8 *lladdr, int on_link)
1582{
1583 struct rt6_info *rt, *nrt = NULL;
1584 struct netevent_redirect netevent;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001585 struct net *net = dev_net(neigh->dev);
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001586
1587 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1588
Daniel Lezcano8ed67782008-03-04 13:48:30 -08001589 if (rt == net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590 if (net_ratelimit())
1591 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1592 "for redirect target\n");
YOSHIFUJI Hideakia6279452006-08-23 17:18:26 -07001593 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 }
1595
Linus Torvalds1da177e2005-04-16 15:20:36 -07001596 /*
1597 * We have finally decided to accept it.
1598 */
1599
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001600 neigh_update(neigh, lladdr, NUD_STALE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1602 NEIGH_UPDATE_F_OVERRIDE|
1603 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1604 NEIGH_UPDATE_F_ISROUTER))
1605 );
1606
1607 /*
1608 * Redirect received -> path was valid.
1609 * Look, redirects are sent only in response to data packets,
1610 * so that this nexthop apparently is reachable. --ANK
1611 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001612 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613
1614 /* Duplicate redirect: silently ignore. */
Eric Dumazetf2c31e32011-07-29 19:00:53 +00001615 if (neigh == dst_get_neighbour_raw(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616 goto out;
1617
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001618 nrt = ip6_rt_copy(rt, dest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001619 if (nrt == NULL)
1620 goto out;
1621
1622 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1623 if (on_link)
1624 nrt->rt6i_flags &= ~RTF_GATEWAY;
1625
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
David S. Miller69cce1d2011-07-17 23:09:49 -07001627 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628
Thomas Graf40e22e82006-08-22 00:00:45 -07001629 if (ip6_ins_rt(nrt))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001630 goto out;
1631
Changli Gaod8d1f302010-06-10 23:31:35 -07001632 netevent.old = &rt->dst;
1633 netevent.new = &nrt->dst;
Tom Tucker8d717402006-07-30 20:43:36 -07001634 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1635
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636 if (rt->rt6i_flags&RTF_CACHE) {
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001637 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001638 return;
1639 }
1640
1641out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001642 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643}
1644
1645/*
1646 * Handle ICMP "packet too big" messages
1647 * i.e. Path MTU discovery
1648 */
1649
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001650static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001651 struct net *net, u32 pmtu, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001652{
1653 struct rt6_info *rt, *nrt;
1654 int allfrag = 0;
Andrey Vagind3052b52010-12-11 15:20:11 +00001655again:
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001656 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001657 if (rt == NULL)
1658 return;
1659
Andrey Vagind3052b52010-12-11 15:20:11 +00001660 if (rt6_check_expired(rt)) {
1661 ip6_del_rt(rt);
1662 goto again;
1663 }
1664
Changli Gaod8d1f302010-06-10 23:31:35 -07001665 if (pmtu >= dst_mtu(&rt->dst))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666 goto out;
1667
1668 if (pmtu < IPV6_MIN_MTU) {
1669 /*
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001670 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671 * MTU (1280) and a fragment header should always be included
1672 * after a node receiving Too Big message reporting PMTU is
1673 * less than the IPv6 Minimum Link MTU.
1674 */
1675 pmtu = IPV6_MIN_MTU;
1676 allfrag = 1;
1677 }
1678
1679 /* New mtu received -> path was valid.
1680 They are sent only in response to data packets,
1681 so that this nexthop apparently is reachable. --ANK
1682 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001683 dst_confirm(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684
1685 /* Host route. If it is static, it would be better
1686 not to override it, but add new one, so that
1687 when cache entry will expire old pmtu
1688 would return automatically.
1689 */
1690 if (rt->rt6i_flags & RTF_CACHE) {
David S. Millerdefb3512010-12-08 21:16:57 -08001691 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1692 if (allfrag) {
1693 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1694 features |= RTAX_FEATURE_ALLFRAG;
1695 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1696 }
Changli Gaod8d1f302010-06-10 23:31:35 -07001697 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1699 goto out;
1700 }
1701
1702 /* Network route.
1703 Two cases are possible:
1704 1. It is connected route. Action: COW
1705 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1706 */
Eric Dumazetf2c31e32011-07-29 19:00:53 +00001707 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001708 nrt = rt6_alloc_cow(rt, daddr, saddr);
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001709 else
1710 nrt = rt6_alloc_clone(rt, daddr);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001711
YOSHIFUJI Hideakid5315b52006-03-20 16:58:48 -08001712 if (nrt) {
David S. Millerdefb3512010-12-08 21:16:57 -08001713 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1714 if (allfrag) {
1715 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1716 features |= RTAX_FEATURE_ALLFRAG;
1717 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1718 }
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001719
1720 /* According to RFC 1981, detecting PMTU increase shouldn't be
1721 * happened within 5 mins, the recommended timer is 10 mins.
1722 * Here this route expiration time is set to ip6_rt_mtu_expires
1723 * which is 10 mins. After 10 mins the decreased pmtu is expired
1724 * and detecting PMTU increase will be automatically happened.
1725 */
Changli Gaod8d1f302010-06-10 23:31:35 -07001726 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
YOSHIFUJI Hideakia1e78362006-03-20 16:56:32 -08001727 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1728
Thomas Graf40e22e82006-08-22 00:00:45 -07001729 ip6_ins_rt(nrt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731out:
Changli Gaod8d1f302010-06-10 23:31:35 -07001732 dst_release(&rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733}
1734
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001735void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
Maciej Żenczykowskiae878ae2010-10-03 14:49:00 -07001736 struct net_device *dev, u32 pmtu)
1737{
1738 struct net *net = dev_net(dev);
1739
1740 /*
1741 * RFC 1981 states that a node "MUST reduce the size of the packets it
1742 * is sending along the path" that caused the Packet Too Big message.
1743 * Since it's not possible in the general case to determine which
1744 * interface was used to send the original packet, we update the MTU
1745 * on the interface that will be used to send future packets. We also
1746 * update the MTU on the interface that received the Packet Too Big in
1747 * case the original packet was forced out that interface with
1748 * SO_BINDTODEVICE or similar. This is the next best thing to the
1749 * correct behaviour, which would be to update the MTU on all
1750 * interfaces.
1751 */
1752 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1753 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1754}
1755
Linus Torvalds1da177e2005-04-16 15:20:36 -07001756/*
1757 * Misc support functions
1758 */
1759
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001760static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1761 const struct in6_addr *dest)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001763 struct net *net = dev_net(ort->rt6i_dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001764 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07001765 ort->dst.dev, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001766
1767 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001768 rt->dst.input = ort->dst.input;
1769 rt->dst.output = ort->dst.output;
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001770 rt->dst.flags |= DST_HOST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771
Eric Dumazet21efcfa2011-07-19 20:18:36 +00001772 ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
Yan, Zheng8e2ec632011-09-05 21:34:30 +00001773 rt->rt6i_dst.plen = 128;
David S. Millerdefb3512010-12-08 21:16:57 -08001774 dst_copy_metrics(&rt->dst, &ort->dst);
Changli Gaod8d1f302010-06-10 23:31:35 -07001775 rt->dst.error = ort->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776 rt->rt6i_idev = ort->rt6i_idev;
1777 if (rt->rt6i_idev)
1778 in6_dev_hold(rt->rt6i_idev);
Changli Gaod8d1f302010-06-10 23:31:35 -07001779 rt->dst.lastuse = jiffies;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780 rt->rt6i_expires = 0;
1781
1782 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1783 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1784 rt->rt6i_metric = 0;
1785
Linus Torvalds1da177e2005-04-16 15:20:36 -07001786#ifdef CONFIG_IPV6_SUBTREES
1787 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1788#endif
Florian Westphal0f6c6392011-05-20 11:27:24 +00001789 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
Thomas Grafc71099a2006-08-04 23:20:06 -07001790 rt->rt6i_table = ort->rt6i_table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001791 }
1792 return rt;
1793}
1794
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001795#ifdef CONFIG_IPV6_ROUTE_INFO
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001796static struct rt6_info *rt6_get_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001797 const struct in6_addr *prefix, int prefixlen,
1798 const struct in6_addr *gwaddr, int ifindex)
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001799{
1800 struct fib6_node *fn;
1801 struct rt6_info *rt = NULL;
Thomas Grafc71099a2006-08-04 23:20:06 -07001802 struct fib6_table *table;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001803
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001804 table = fib6_get_table(net, RT6_TABLE_INFO);
Thomas Grafc71099a2006-08-04 23:20:06 -07001805 if (table == NULL)
1806 return NULL;
1807
1808 write_lock_bh(&table->tb6_lock);
1809 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001810 if (!fn)
1811 goto out;
1812
Changli Gaod8d1f302010-06-10 23:31:35 -07001813 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001814 if (rt->rt6i_dev->ifindex != ifindex)
1815 continue;
1816 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1817 continue;
1818 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1819 continue;
Changli Gaod8d1f302010-06-10 23:31:35 -07001820 dst_hold(&rt->dst);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001821 break;
1822 }
1823out:
Thomas Grafc71099a2006-08-04 23:20:06 -07001824 write_unlock_bh(&table->tb6_lock);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001825 return rt;
1826}
1827
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001828static struct rt6_info *rt6_add_route_info(struct net *net,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001829 const struct in6_addr *prefix, int prefixlen,
1830 const struct in6_addr *gwaddr, int ifindex,
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001831 unsigned pref)
1832{
Thomas Graf86872cb2006-08-22 00:01:08 -07001833 struct fib6_config cfg = {
1834 .fc_table = RT6_TABLE_INFO,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001835 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001836 .fc_ifindex = ifindex,
1837 .fc_dst_len = prefixlen,
1838 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1839 RTF_UP | RTF_PREF(pref),
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001840 .fc_nlinfo.pid = 0,
1841 .fc_nlinfo.nlh = NULL,
1842 .fc_nlinfo.nl_net = net,
Thomas Graf86872cb2006-08-22 00:01:08 -07001843 };
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001844
Thomas Graf86872cb2006-08-22 00:01:08 -07001845 ipv6_addr_copy(&cfg.fc_dst, prefix);
1846 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1847
YOSHIFUJI Hideakie317da92006-03-20 17:06:42 -08001848 /* We should treat it as a default route if prefix length is 0. */
1849 if (!prefixlen)
Thomas Graf86872cb2006-08-22 00:01:08 -07001850 cfg.fc_flags |= RTF_DEFAULT;
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001851
Thomas Graf86872cb2006-08-22 00:01:08 -07001852 ip6_route_add(&cfg);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001853
Daniel Lezcanoefa2cea2008-03-04 13:46:48 -08001854 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
YOSHIFUJI Hideaki70ceb4f2006-03-20 17:06:24 -08001855}
1856#endif
1857
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001858struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09001859{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001861 struct fib6_table *table;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001863 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001864 if (table == NULL)
1865 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866
Thomas Grafc71099a2006-08-04 23:20:06 -07001867 write_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001868 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 if (dev == rt->rt6i_dev &&
YOSHIFUJI Hideaki045927f2006-03-20 17:00:48 -08001870 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001871 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1872 break;
1873 }
1874 if (rt)
Changli Gaod8d1f302010-06-10 23:31:35 -07001875 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001876 write_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001877 return rt;
1878}
1879
Eric Dumazetb71d1d42011-04-22 04:53:02 +00001880struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
YOSHIFUJI Hideakiebacaaa2006-03-20 17:04:53 -08001881 struct net_device *dev,
1882 unsigned int pref)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883{
Thomas Graf86872cb2006-08-22 00:01:08 -07001884 struct fib6_config cfg = {
1885 .fc_table = RT6_TABLE_DFLT,
Rami Rosen238fc7e2008-02-09 23:43:11 -08001886 .fc_metric = IP6_RT_PRIO_USER,
Thomas Graf86872cb2006-08-22 00:01:08 -07001887 .fc_ifindex = dev->ifindex,
1888 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1889 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
Daniel Lezcano55786892008-03-04 13:47:47 -08001890 .fc_nlinfo.pid = 0,
1891 .fc_nlinfo.nlh = NULL,
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09001892 .fc_nlinfo.nl_net = dev_net(dev),
Thomas Graf86872cb2006-08-22 00:01:08 -07001893 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894
Thomas Graf86872cb2006-08-22 00:01:08 -07001895 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896
Thomas Graf86872cb2006-08-22 00:01:08 -07001897 ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001898
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899 return rt6_get_dflt_router(gwaddr, dev);
1900}
1901
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001902void rt6_purge_dflt_routers(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903{
1904 struct rt6_info *rt;
Thomas Grafc71099a2006-08-04 23:20:06 -07001905 struct fib6_table *table;
1906
1907 /* NOTE: Keep consistent with rt6_get_dflt_router */
Daniel Lezcano7b4da532008-03-04 13:47:14 -08001908 table = fib6_get_table(net, RT6_TABLE_DFLT);
Thomas Grafc71099a2006-08-04 23:20:06 -07001909 if (table == NULL)
1910 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001911
1912restart:
Thomas Grafc71099a2006-08-04 23:20:06 -07001913 read_lock_bh(&table->tb6_lock);
Changli Gaod8d1f302010-06-10 23:31:35 -07001914 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001915 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001916 dst_hold(&rt->dst);
Thomas Grafc71099a2006-08-04 23:20:06 -07001917 read_unlock_bh(&table->tb6_lock);
Thomas Grafe0a1ad732006-08-22 00:00:21 -07001918 ip6_del_rt(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919 goto restart;
1920 }
1921 }
Thomas Grafc71099a2006-08-04 23:20:06 -07001922 read_unlock_bh(&table->tb6_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001923}
1924
Daniel Lezcano55786892008-03-04 13:47:47 -08001925static void rtmsg_to_fib6_config(struct net *net,
1926 struct in6_rtmsg *rtmsg,
Thomas Graf86872cb2006-08-22 00:01:08 -07001927 struct fib6_config *cfg)
1928{
1929 memset(cfg, 0, sizeof(*cfg));
1930
1931 cfg->fc_table = RT6_TABLE_MAIN;
1932 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1933 cfg->fc_metric = rtmsg->rtmsg_metric;
1934 cfg->fc_expires = rtmsg->rtmsg_info;
1935 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1936 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1937 cfg->fc_flags = rtmsg->rtmsg_flags;
1938
Daniel Lezcano55786892008-03-04 13:47:47 -08001939 cfg->fc_nlinfo.nl_net = net;
Benjamin Theryf1243c22008-02-26 18:10:03 -08001940
Thomas Graf86872cb2006-08-22 00:01:08 -07001941 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1942 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1943 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1944}
1945
Daniel Lezcano55786892008-03-04 13:47:47 -08001946int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947{
Thomas Graf86872cb2006-08-22 00:01:08 -07001948 struct fib6_config cfg;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001949 struct in6_rtmsg rtmsg;
1950 int err;
1951
1952 switch(cmd) {
1953 case SIOCADDRT: /* Add a route */
1954 case SIOCDELRT: /* Delete a route */
1955 if (!capable(CAP_NET_ADMIN))
1956 return -EPERM;
1957 err = copy_from_user(&rtmsg, arg,
1958 sizeof(struct in6_rtmsg));
1959 if (err)
1960 return -EFAULT;
Thomas Graf86872cb2006-08-22 00:01:08 -07001961
Daniel Lezcano55786892008-03-04 13:47:47 -08001962 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
Thomas Graf86872cb2006-08-22 00:01:08 -07001963
Linus Torvalds1da177e2005-04-16 15:20:36 -07001964 rtnl_lock();
1965 switch (cmd) {
1966 case SIOCADDRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001967 err = ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001968 break;
1969 case SIOCDELRT:
Thomas Graf86872cb2006-08-22 00:01:08 -07001970 err = ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001971 break;
1972 default:
1973 err = -EINVAL;
1974 }
1975 rtnl_unlock();
1976
1977 return err;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -07001978 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001979
1980 return -EINVAL;
1981}
1982
1983/*
1984 * Drop the packet on the floor
1985 */
1986
Brian Haleyd5fdd6b2009-06-23 04:31:07 -07001987static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001988{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001989 int type;
Eric Dumazetadf30902009-06-02 05:19:30 +00001990 struct dst_entry *dst = skb_dst(skb);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001991 switch (ipstats_mib_noroutes) {
1992 case IPSTATS_MIB_INNOROUTES:
Arnaldo Carvalho de Melo0660e032007-04-25 17:54:47 -07001993 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
Ulrich Weber45bb0062010-02-25 23:28:58 +00001994 if (type == IPV6_ADDR_ANY) {
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07001995 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1996 IPSTATS_MIB_INADDRERRORS);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07001997 break;
1998 }
1999 /* FALLTHROUGH */
2000 case IPSTATS_MIB_OUTNOROUTES:
Denis V. Lunev3bd653c2008-10-08 10:54:51 -07002001 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2002 ipstats_mib_noroutes);
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002003 break;
2004 }
Alexey Dobriyan3ffe5332010-02-18 08:25:24 +00002005 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006 kfree_skb(skb);
2007 return 0;
2008}
2009
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002010static int ip6_pkt_discard(struct sk_buff *skb)
2011{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002012 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002013}
2014
Arnaldo Carvalho de Melo20380732005-08-16 02:18:02 -03002015static int ip6_pkt_discard_out(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002016{
Eric Dumazetadf30902009-06-02 05:19:30 +00002017 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002018 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002019}
2020
David S. Miller6723ab52006-10-18 21:20:57 -07002021#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2022
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002023static int ip6_pkt_prohibit(struct sk_buff *skb)
2024{
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002025 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002026}
2027
2028static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2029{
Eric Dumazetadf30902009-06-02 05:19:30 +00002030 skb->dev = skb_dst(skb)->dev;
YOSHIFUJI Hideaki612f09e2007-04-13 16:18:02 -07002031 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Thomas Graf9ce8ade2006-10-18 20:46:54 -07002032}
2033
David S. Miller6723ab52006-10-18 21:20:57 -07002034#endif
2035
Linus Torvalds1da177e2005-04-16 15:20:36 -07002036/*
2037 * Allocate a dst for local (unicast / anycast) address.
2038 */
2039
2040struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2041 const struct in6_addr *addr,
2042 int anycast)
2043{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002044 struct net *net = dev_net(idev->dev);
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002045 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
David S. Miller957c6652011-06-24 15:25:00 -07002046 net->loopback_dev, 0);
David S. Miller14deae42009-01-04 16:04:39 -08002047 struct neighbour *neigh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048
Ben Greear40385652010-11-08 12:33:48 +00002049 if (rt == NULL) {
2050 if (net_ratelimit())
2051 pr_warning("IPv6: Maximum number of routes reached,"
2052 " consider increasing route/max_size.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053 return ERR_PTR(-ENOMEM);
Ben Greear40385652010-11-08 12:33:48 +00002054 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056 in6_dev_hold(idev);
2057
David S. Miller11d53b42011-06-24 15:23:34 -07002058 rt->dst.flags |= DST_HOST;
Changli Gaod8d1f302010-06-10 23:31:35 -07002059 rt->dst.input = ip6_input;
2060 rt->dst.output = ip6_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002061 rt->rt6i_idev = idev;
Changli Gaod8d1f302010-06-10 23:31:35 -07002062 rt->dst.obsolete = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002063
2064 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
YOSHIFUJI Hideaki58c4fb82005-12-21 22:56:42 +09002065 if (anycast)
2066 rt->rt6i_flags |= RTF_ANYCAST;
2067 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07002068 rt->rt6i_flags |= RTF_LOCAL;
David S. Miller14deae42009-01-04 16:04:39 -08002069 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2070 if (IS_ERR(neigh)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002071 dst_free(&rt->dst);
David S. Miller14deae42009-01-04 16:04:39 -08002072
David S. Miller29546a62011-03-03 12:10:37 -08002073 return ERR_CAST(neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074 }
David S. Miller69cce1d2011-07-17 23:09:49 -07002075 dst_set_neighbour(&rt->dst, neigh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076
2077 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2078 rt->rt6i_dst.plen = 128;
Daniel Lezcano55786892008-03-04 13:47:47 -08002079 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002080
Changli Gaod8d1f302010-06-10 23:31:35 -07002081 atomic_set(&rt->dst.__refcnt, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002082
2083 return rt;
2084}
2085
Daniel Walterc3968a82011-04-13 21:10:57 +00002086int ip6_route_get_saddr(struct net *net,
2087 struct rt6_info *rt,
Eric Dumazetb71d1d42011-04-22 04:53:02 +00002088 const struct in6_addr *daddr,
Daniel Walterc3968a82011-04-13 21:10:57 +00002089 unsigned int prefs,
2090 struct in6_addr *saddr)
2091{
2092 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2093 int err = 0;
2094 if (rt->rt6i_prefsrc.plen)
2095 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2096 else
2097 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2098 daddr, prefs, saddr);
2099 return err;
2100}
2101
2102/* remove deleted ip from prefsrc entries */
2103struct arg_dev_net_ip {
2104 struct net_device *dev;
2105 struct net *net;
2106 struct in6_addr *addr;
2107};
2108
2109static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2110{
2111 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2112 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2113 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2114
2115 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2116 rt != net->ipv6.ip6_null_entry &&
2117 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2118 /* remove prefsrc entry */
2119 rt->rt6i_prefsrc.plen = 0;
2120 }
2121 return 0;
2122}
2123
2124void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2125{
2126 struct net *net = dev_net(ifp->idev->dev);
2127 struct arg_dev_net_ip adni = {
2128 .dev = ifp->idev->dev,
2129 .net = net,
2130 .addr = &ifp->addr,
2131 };
2132 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2133}
2134
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002135struct arg_dev_net {
2136 struct net_device *dev;
2137 struct net *net;
2138};
2139
Linus Torvalds1da177e2005-04-16 15:20:36 -07002140static int fib6_ifdown(struct rt6_info *rt, void *arg)
2141{
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002142 const struct arg_dev_net *adn = arg;
2143 const struct net_device *dev = adn->dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002144
stephen hemmingerbc3ef662010-12-16 17:42:40 +00002145 if ((rt->rt6i_dev == dev || dev == NULL) &&
2146 rt != adn->net->ipv6.ip6_null_entry) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147 RT6_TRACE("deleted by ifdown %p\n", rt);
2148 return -1;
2149 }
2150 return 0;
2151}
2152
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002153void rt6_ifdown(struct net *net, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002155 struct arg_dev_net adn = {
2156 .dev = dev,
2157 .net = net,
2158 };
2159
2160 fib6_clean_all(net, fib6_ifdown, 0, &adn);
David S. Miller1e493d12008-09-10 17:27:15 -07002161 icmp6_clean_all(fib6_ifdown, &adn);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162}
2163
2164struct rt6_mtu_change_arg
2165{
2166 struct net_device *dev;
2167 unsigned mtu;
2168};
2169
2170static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2171{
2172 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2173 struct inet6_dev *idev;
2174
2175 /* In IPv6 pmtu discovery is not optional,
2176 so that RTAX_MTU lock cannot disable it.
2177 We still use this lock to block changes
2178 caused by addrconf/ndisc.
2179 */
2180
2181 idev = __in6_dev_get(arg->dev);
2182 if (idev == NULL)
2183 return 0;
2184
2185 /* For administrative MTU increase, there is no way to discover
2186 IPv6 PMTU increase, so PMTU increase should be updated here.
2187 Since RFC 1981 doesn't include administrative MTU increase
2188 update PMTU increase is a MUST. (i.e. jumbo frame)
2189 */
2190 /*
2191 If new MTU is less than route PMTU, this new MTU will be the
2192 lowest MTU in the path, update the route PMTU to reflect PMTU
2193 decreases; if new MTU is greater than route PMTU, and the
2194 old MTU is the lowest MTU in the path, update the route PMTU
2195 to reflect the increase. In this case if the other nodes' MTU
2196 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2197 PMTU discouvery.
2198 */
2199 if (rt->rt6i_dev == arg->dev &&
Changli Gaod8d1f302010-06-10 23:31:35 -07002200 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2201 (dst_mtu(&rt->dst) >= arg->mtu ||
2202 (dst_mtu(&rt->dst) < arg->mtu &&
2203 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
David S. Millerdefb3512010-12-08 21:16:57 -08002204 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
Simon Arlott566cfd82007-07-26 00:09:55 -07002205 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002206 return 0;
2207}
2208
2209void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2210{
Thomas Grafc71099a2006-08-04 23:20:06 -07002211 struct rt6_mtu_change_arg arg = {
2212 .dev = dev,
2213 .mtu = mtu,
2214 };
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002216 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217}
2218
Patrick McHardyef7c79e2007-06-05 12:38:30 -07002219static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
Thomas Graf5176f912006-08-26 20:13:18 -07002220 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
Thomas Graf86872cb2006-08-22 00:01:08 -07002221 [RTA_OIF] = { .type = NLA_U32 },
Thomas Grafab364a62006-08-22 00:01:47 -07002222 [RTA_IIF] = { .type = NLA_U32 },
Thomas Graf86872cb2006-08-22 00:01:08 -07002223 [RTA_PRIORITY] = { .type = NLA_U32 },
2224 [RTA_METRICS] = { .type = NLA_NESTED },
2225};
2226
2227static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2228 struct fib6_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002229{
Thomas Graf86872cb2006-08-22 00:01:08 -07002230 struct rtmsg *rtm;
2231 struct nlattr *tb[RTA_MAX+1];
2232 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002233
Thomas Graf86872cb2006-08-22 00:01:08 -07002234 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2235 if (err < 0)
2236 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002237
Thomas Graf86872cb2006-08-22 00:01:08 -07002238 err = -EINVAL;
2239 rtm = nlmsg_data(nlh);
2240 memset(cfg, 0, sizeof(*cfg));
2241
2242 cfg->fc_table = rtm->rtm_table;
2243 cfg->fc_dst_len = rtm->rtm_dst_len;
2244 cfg->fc_src_len = rtm->rtm_src_len;
2245 cfg->fc_flags = RTF_UP;
2246 cfg->fc_protocol = rtm->rtm_protocol;
2247
2248 if (rtm->rtm_type == RTN_UNREACHABLE)
2249 cfg->fc_flags |= RTF_REJECT;
2250
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002251 if (rtm->rtm_type == RTN_LOCAL)
2252 cfg->fc_flags |= RTF_LOCAL;
2253
Thomas Graf86872cb2006-08-22 00:01:08 -07002254 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2255 cfg->fc_nlinfo.nlh = nlh;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002256 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
Thomas Graf86872cb2006-08-22 00:01:08 -07002257
2258 if (tb[RTA_GATEWAY]) {
2259 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2260 cfg->fc_flags |= RTF_GATEWAY;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002262
2263 if (tb[RTA_DST]) {
2264 int plen = (rtm->rtm_dst_len + 7) >> 3;
2265
2266 if (nla_len(tb[RTA_DST]) < plen)
2267 goto errout;
2268
2269 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002270 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002271
2272 if (tb[RTA_SRC]) {
2273 int plen = (rtm->rtm_src_len + 7) >> 3;
2274
2275 if (nla_len(tb[RTA_SRC]) < plen)
2276 goto errout;
2277
2278 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002280
Daniel Walterc3968a82011-04-13 21:10:57 +00002281 if (tb[RTA_PREFSRC])
2282 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2283
Thomas Graf86872cb2006-08-22 00:01:08 -07002284 if (tb[RTA_OIF])
2285 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2286
2287 if (tb[RTA_PRIORITY])
2288 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2289
2290 if (tb[RTA_METRICS]) {
2291 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2292 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002293 }
Thomas Graf86872cb2006-08-22 00:01:08 -07002294
2295 if (tb[RTA_TABLE])
2296 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2297
2298 err = 0;
2299errout:
2300 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002301}
2302
Thomas Grafc127ea22007-03-22 11:58:32 -07002303static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002304{
Thomas Graf86872cb2006-08-22 00:01:08 -07002305 struct fib6_config cfg;
2306 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002307
Thomas Graf86872cb2006-08-22 00:01:08 -07002308 err = rtm_to_fib6_config(skb, nlh, &cfg);
2309 if (err < 0)
2310 return err;
2311
2312 return ip6_route_del(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002313}
2314
Thomas Grafc127ea22007-03-22 11:58:32 -07002315static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002316{
Thomas Graf86872cb2006-08-22 00:01:08 -07002317 struct fib6_config cfg;
2318 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002319
Thomas Graf86872cb2006-08-22 00:01:08 -07002320 err = rtm_to_fib6_config(skb, nlh, &cfg);
2321 if (err < 0)
2322 return err;
2323
2324 return ip6_route_add(&cfg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002325}
2326
Thomas Graf339bf982006-11-10 14:10:15 -08002327static inline size_t rt6_nlmsg_size(void)
2328{
2329 return NLMSG_ALIGN(sizeof(struct rtmsg))
2330 + nla_total_size(16) /* RTA_SRC */
2331 + nla_total_size(16) /* RTA_DST */
2332 + nla_total_size(16) /* RTA_GATEWAY */
2333 + nla_total_size(16) /* RTA_PREFSRC */
2334 + nla_total_size(4) /* RTA_TABLE */
2335 + nla_total_size(4) /* RTA_IIF */
2336 + nla_total_size(4) /* RTA_OIF */
2337 + nla_total_size(4) /* RTA_PRIORITY */
Noriaki TAKAMIYA6a2b9ce2007-01-23 22:09:41 -08002338 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
Thomas Graf339bf982006-11-10 14:10:15 -08002339 + nla_total_size(sizeof(struct rta_cacheinfo));
2340}
2341
Brian Haley191cd582008-08-14 15:33:21 -07002342static int rt6_fill_node(struct net *net,
2343 struct sk_buff *skb, struct rt6_info *rt,
Jamal Hadi Salim0d51aa82005-06-21 13:51:04 -07002344 struct in6_addr *dst, struct in6_addr *src,
2345 int iif, int type, u32 pid, u32 seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002346 int prefix, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347{
2348 struct rtmsg *rtm;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002349 struct nlmsghdr *nlh;
Thomas Grafe3703b32006-11-27 09:27:07 -08002350 long expires;
Patrick McHardy9e762a42006-08-10 23:09:48 -07002351 u32 table;
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002352 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002353
2354 if (prefix) { /* user wants prefix routes only */
2355 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2356 /* success since this is not a prefix route */
2357 return 1;
2358 }
2359 }
2360
Thomas Graf2d7202b2006-08-22 00:01:27 -07002361 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2362 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002363 return -EMSGSIZE;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002364
2365 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002366 rtm->rtm_family = AF_INET6;
2367 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2368 rtm->rtm_src_len = rt->rt6i_src.plen;
2369 rtm->rtm_tos = 0;
Thomas Grafc71099a2006-08-04 23:20:06 -07002370 if (rt->rt6i_table)
Patrick McHardy9e762a42006-08-10 23:09:48 -07002371 table = rt->rt6i_table->tb6_id;
Thomas Grafc71099a2006-08-04 23:20:06 -07002372 else
Patrick McHardy9e762a42006-08-10 23:09:48 -07002373 table = RT6_TABLE_UNSPEC;
2374 rtm->rtm_table = table;
Thomas Graf2d7202b2006-08-22 00:01:27 -07002375 NLA_PUT_U32(skb, RTA_TABLE, table);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376 if (rt->rt6i_flags&RTF_REJECT)
2377 rtm->rtm_type = RTN_UNREACHABLE;
Maciej Żenczykowskiab79ad12010-09-27 00:07:02 +00002378 else if (rt->rt6i_flags&RTF_LOCAL)
2379 rtm->rtm_type = RTN_LOCAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002380 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2381 rtm->rtm_type = RTN_LOCAL;
2382 else
2383 rtm->rtm_type = RTN_UNICAST;
2384 rtm->rtm_flags = 0;
2385 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2386 rtm->rtm_protocol = rt->rt6i_protocol;
2387 if (rt->rt6i_flags&RTF_DYNAMIC)
2388 rtm->rtm_protocol = RTPROT_REDIRECT;
2389 else if (rt->rt6i_flags & RTF_ADDRCONF)
2390 rtm->rtm_protocol = RTPROT_KERNEL;
2391 else if (rt->rt6i_flags&RTF_DEFAULT)
2392 rtm->rtm_protocol = RTPROT_RA;
2393
2394 if (rt->rt6i_flags&RTF_CACHE)
2395 rtm->rtm_flags |= RTM_F_CLONED;
2396
2397 if (dst) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002398 NLA_PUT(skb, RTA_DST, 16, dst);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002399 rtm->rtm_dst_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400 } else if (rtm->rtm_dst_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002401 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002402#ifdef CONFIG_IPV6_SUBTREES
2403 if (src) {
Thomas Graf2d7202b2006-08-22 00:01:27 -07002404 NLA_PUT(skb, RTA_SRC, 16, src);
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002405 rtm->rtm_src_len = 128;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406 } else if (rtm->rtm_src_len)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002407 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002408#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002409 if (iif) {
2410#ifdef CONFIG_IPV6_MROUTE
2411 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
Benjamin Thery8229efd2008-12-10 16:30:15 -08002412 int err = ip6mr_get_route(net, skb, rtm, nowait);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002413 if (err <= 0) {
2414 if (!nowait) {
2415 if (err == 0)
2416 return 0;
2417 goto nla_put_failure;
2418 } else {
2419 if (err == -EMSGSIZE)
2420 goto nla_put_failure;
2421 }
2422 }
2423 } else
2424#endif
2425 NLA_PUT_U32(skb, RTA_IIF, iif);
2426 } else if (dst) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002427 struct in6_addr saddr_buf;
Daniel Walterc3968a82011-04-13 21:10:57 +00002428 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002429 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002430 }
Thomas Graf2d7202b2006-08-22 00:01:27 -07002431
Daniel Walterc3968a82011-04-13 21:10:57 +00002432 if (rt->rt6i_prefsrc.plen) {
2433 struct in6_addr saddr_buf;
2434 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2435 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2436 }
2437
David S. Millerdefb3512010-12-08 21:16:57 -08002438 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002439 goto nla_put_failure;
2440
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002441 rcu_read_lock();
2442 n = dst_get_neighbour(&rt->dst);
2443 if (n)
2444 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2445 rcu_read_unlock();
Thomas Graf2d7202b2006-08-22 00:01:27 -07002446
Changli Gaod8d1f302010-06-10 23:31:35 -07002447 if (rt->dst.dev)
Thomas Graf2d7202b2006-08-22 00:01:27 -07002448 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2449
2450 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
Thomas Grafe3703b32006-11-27 09:27:07 -08002451
YOSHIFUJI Hideaki36e3dea2008-05-13 02:52:55 +09002452 if (!(rt->rt6i_flags & RTF_EXPIRES))
2453 expires = 0;
2454 else if (rt->rt6i_expires - jiffies < INT_MAX)
2455 expires = rt->rt6i_expires - jiffies;
2456 else
2457 expires = INT_MAX;
YOSHIFUJI Hideaki69cdf8f2008-05-19 16:55:13 -07002458
Changli Gaod8d1f302010-06-10 23:31:35 -07002459 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2460 expires, rt->dst.error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002461 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462
Thomas Graf2d7202b2006-08-22 00:01:27 -07002463 return nlmsg_end(skb, nlh);
2464
2465nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002466 nlmsg_cancel(skb, nlh);
2467 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468}
2469
Patrick McHardy1b43af52006-08-10 23:11:17 -07002470int rt6_dump_route(struct rt6_info *rt, void *p_arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002471{
2472 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2473 int prefix;
2474
Thomas Graf2d7202b2006-08-22 00:01:27 -07002475 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2476 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002477 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2478 } else
2479 prefix = 0;
2480
Brian Haley191cd582008-08-14 15:33:21 -07002481 return rt6_fill_node(arg->net,
2482 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002483 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002484 prefix, 0, NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485}
2486
Thomas Grafc127ea22007-03-22 11:58:32 -07002487static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002489 struct net *net = sock_net(in_skb->sk);
Thomas Grafab364a62006-08-22 00:01:47 -07002490 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491 struct rt6_info *rt;
Thomas Grafab364a62006-08-22 00:01:47 -07002492 struct sk_buff *skb;
2493 struct rtmsg *rtm;
David S. Miller4c9483b2011-03-12 16:22:43 -05002494 struct flowi6 fl6;
Thomas Grafab364a62006-08-22 00:01:47 -07002495 int err, iif = 0;
2496
2497 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2498 if (err < 0)
2499 goto errout;
2500
2501 err = -EINVAL;
David S. Miller4c9483b2011-03-12 16:22:43 -05002502 memset(&fl6, 0, sizeof(fl6));
Thomas Grafab364a62006-08-22 00:01:47 -07002503
2504 if (tb[RTA_SRC]) {
2505 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2506 goto errout;
2507
David S. Miller4c9483b2011-03-12 16:22:43 -05002508 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
Thomas Grafab364a62006-08-22 00:01:47 -07002509 }
2510
2511 if (tb[RTA_DST]) {
2512 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2513 goto errout;
2514
David S. Miller4c9483b2011-03-12 16:22:43 -05002515 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
Thomas Grafab364a62006-08-22 00:01:47 -07002516 }
2517
2518 if (tb[RTA_IIF])
2519 iif = nla_get_u32(tb[RTA_IIF]);
2520
2521 if (tb[RTA_OIF])
David S. Miller4c9483b2011-03-12 16:22:43 -05002522 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
Thomas Grafab364a62006-08-22 00:01:47 -07002523
2524 if (iif) {
2525 struct net_device *dev;
Daniel Lezcano55786892008-03-04 13:47:47 -08002526 dev = __dev_get_by_index(net, iif);
Thomas Grafab364a62006-08-22 00:01:47 -07002527 if (!dev) {
2528 err = -ENODEV;
2529 goto errout;
2530 }
2531 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002532
2533 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafab364a62006-08-22 00:01:47 -07002534 if (skb == NULL) {
2535 err = -ENOBUFS;
2536 goto errout;
2537 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002538
2539 /* Reserve room for dummy headers, this skb can pass
2540 through good chunk of routing engine.
2541 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002542 skb_reset_mac_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002543 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2544
David S. Miller4c9483b2011-03-12 16:22:43 -05002545 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
Changli Gaod8d1f302010-06-10 23:31:35 -07002546 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002547
David S. Miller4c9483b2011-03-12 16:22:43 -05002548 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002550 nlh->nlmsg_seq, 0, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551 if (err < 0) {
Thomas Grafab364a62006-08-22 00:01:47 -07002552 kfree_skb(skb);
2553 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554 }
2555
Daniel Lezcano55786892008-03-04 13:47:47 -08002556 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
Thomas Grafab364a62006-08-22 00:01:47 -07002557errout:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002558 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559}
2560
Thomas Graf86872cb2006-08-22 00:01:08 -07002561void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562{
2563 struct sk_buff *skb;
Daniel Lezcano55786892008-03-04 13:47:47 -08002564 struct net *net = info->nl_net;
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002565 u32 seq;
2566 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567
Denis V. Lunev528c4ce2007-12-13 09:45:12 -08002568 err = -ENOBUFS;
2569 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
Thomas Graf86872cb2006-08-22 00:01:08 -07002570
Thomas Graf339bf982006-11-10 14:10:15 -08002571 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
Thomas Graf21713eb2006-08-15 00:35:24 -07002572 if (skb == NULL)
2573 goto errout;
2574
Brian Haley191cd582008-08-14 15:33:21 -07002575 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09002576 event, info->pid, seq, 0, 0, 0);
Patrick McHardy26932562007-01-31 23:16:40 -08002577 if (err < 0) {
2578 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2579 WARN_ON(err == -EMSGSIZE);
2580 kfree_skb(skb);
2581 goto errout;
2582 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -08002583 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2584 info->nlh, gfp_any());
2585 return;
Thomas Graf21713eb2006-08-15 00:35:24 -07002586errout:
2587 if (err < 0)
Daniel Lezcano55786892008-03-04 13:47:47 -08002588 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002589}
2590
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002591static int ip6_route_dev_notify(struct notifier_block *this,
2592 unsigned long event, void *data)
2593{
2594 struct net_device *dev = (struct net_device *)data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002595 struct net *net = dev_net(dev);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002596
2597 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002598 net->ipv6.ip6_null_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002599 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2600#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002601 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002602 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002603 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002604 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2605#endif
2606 }
2607
2608 return NOTIFY_OK;
2609}
2610
Linus Torvalds1da177e2005-04-16 15:20:36 -07002611/*
2612 * /proc
2613 */
2614
2615#ifdef CONFIG_PROC_FS
2616
Linus Torvalds1da177e2005-04-16 15:20:36 -07002617struct rt6_proc_arg
2618{
2619 char *buffer;
2620 int offset;
2621 int length;
2622 int skip;
2623 int len;
2624};
2625
2626static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2627{
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002628 struct seq_file *m = p_arg;
David S. Miller69cce1d2011-07-17 23:09:49 -07002629 struct neighbour *n;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002630
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002631 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002632
2633#ifdef CONFIG_IPV6_SUBTREES
Harvey Harrison4b7a4272008-10-29 12:50:24 -07002634 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002635#else
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002636 seq_puts(m, "00000000000000000000000000000000 00 ");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002637#endif
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002638 rcu_read_lock();
David S. Miller69cce1d2011-07-17 23:09:49 -07002639 n = dst_get_neighbour(&rt->dst);
2640 if (n) {
2641 seq_printf(m, "%pi6", n->primary_key);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002642 } else {
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002643 seq_puts(m, "00000000000000000000000000000000");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002644 }
Eric Dumazetf2c31e32011-07-29 19:00:53 +00002645 rcu_read_unlock();
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002646 seq_printf(m, " %08x %08x %08x %08x %8s\n",
Changli Gaod8d1f302010-06-10 23:31:35 -07002647 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2648 rt->dst.__use, rt->rt6i_flags,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002649 rt->rt6i_dev ? rt->rt6i_dev->name : "");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002650 return 0;
2651}
2652
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002653static int ipv6_route_show(struct seq_file *m, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002654{
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002655 struct net *net = (struct net *)m->private;
2656 fib6_clean_all(net, rt6_info_route, 0, m);
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002657 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002658}
2659
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002660static int ipv6_route_open(struct inode *inode, struct file *file)
2661{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002662 return single_open_net(inode, file, ipv6_route_show);
Daniel Lezcanof3db4852008-03-03 23:27:06 -08002663}
2664
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002665static const struct file_operations ipv6_route_proc_fops = {
2666 .owner = THIS_MODULE,
2667 .open = ipv6_route_open,
2668 .read = seq_read,
2669 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002670 .release = single_release_net,
Alexey Dobriyan33120b32007-11-06 05:27:11 -08002671};
2672
Linus Torvalds1da177e2005-04-16 15:20:36 -07002673static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2674{
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002675 struct net *net = (struct net *)seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002676 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002677 net->ipv6.rt6_stats->fib_nodes,
2678 net->ipv6.rt6_stats->fib_route_nodes,
2679 net->ipv6.rt6_stats->fib_rt_alloc,
2680 net->ipv6.rt6_stats->fib_rt_entries,
2681 net->ipv6.rt6_stats->fib_rt_cache,
Eric Dumazetfc66f952010-10-08 06:37:34 +00002682 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002683 net->ipv6.rt6_stats->fib_discarded_routes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002684
2685 return 0;
2686}
2687
2688static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2689{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002690 return single_open_net(inode, file, rt6_stats_seq_show);
Daniel Lezcano69ddb802008-03-04 13:46:23 -08002691}
2692
Arjan van de Ven9a321442007-02-12 00:55:35 -08002693static const struct file_operations rt6_stats_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002694 .owner = THIS_MODULE,
2695 .open = rt6_stats_seq_open,
2696 .read = seq_read,
2697 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002698 .release = single_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002699};
2700#endif /* CONFIG_PROC_FS */
2701
2702#ifdef CONFIG_SYSCTL
2703
Linus Torvalds1da177e2005-04-16 15:20:36 -07002704static
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002705int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002706 void __user *buffer, size_t *lenp, loff_t *ppos)
2707{
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002708 struct net *net;
2709 int delay;
2710 if (!write)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002711 return -EINVAL;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002712
2713 net = (struct net *)ctl->extra1;
2714 delay = net->ipv6.sysctl.flush_delay;
2715 proc_dointvec(ctl, write, buffer, lenp, ppos);
2716 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2717 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002718}
2719
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002720ctl_table ipv6_route_table_template[] = {
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +09002721 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002722 .procname = "flush",
Daniel Lezcano49905092008-01-10 03:01:01 -08002723 .data = &init_net.ipv6.sysctl.flush_delay,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002724 .maxlen = sizeof(int),
Dave Jones89c8b3a12005-04-28 12:11:49 -07002725 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002726 .proc_handler = ipv6_sysctl_rtcache_flush
Linus Torvalds1da177e2005-04-16 15:20:36 -07002727 },
2728 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002729 .procname = "gc_thresh",
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002730 .data = &ip6_dst_ops_template.gc_thresh,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002731 .maxlen = sizeof(int),
2732 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002733 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002734 },
2735 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002736 .procname = "max_size",
Daniel Lezcano49905092008-01-10 03:01:01 -08002737 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002738 .maxlen = sizeof(int),
2739 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002740 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002741 },
2742 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002743 .procname = "gc_min_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002744 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002745 .maxlen = sizeof(int),
2746 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002747 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002748 },
2749 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002750 .procname = "gc_timeout",
Daniel Lezcano49905092008-01-10 03:01:01 -08002751 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002752 .maxlen = sizeof(int),
2753 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002754 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002755 },
2756 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002757 .procname = "gc_interval",
Daniel Lezcano49905092008-01-10 03:01:01 -08002758 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002759 .maxlen = sizeof(int),
2760 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002761 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002762 },
2763 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002764 .procname = "gc_elasticity",
Daniel Lezcano49905092008-01-10 03:01:01 -08002765 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002766 .maxlen = sizeof(int),
2767 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002768 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002769 },
2770 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002771 .procname = "mtu_expires",
Daniel Lezcano49905092008-01-10 03:01:01 -08002772 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002773 .maxlen = sizeof(int),
2774 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002775 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002776 },
2777 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002778 .procname = "min_adv_mss",
Daniel Lezcano49905092008-01-10 03:01:01 -08002779 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002780 .maxlen = sizeof(int),
2781 .mode = 0644,
Min Zhangf3d3f612010-08-14 22:42:51 -07002782 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002783 },
2784 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002785 .procname = "gc_min_interval_ms",
Daniel Lezcano49905092008-01-10 03:01:01 -08002786 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002787 .maxlen = sizeof(int),
2788 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002789 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002790 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002791 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002792};
2793
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002794struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002795{
2796 struct ctl_table *table;
2797
2798 table = kmemdup(ipv6_route_table_template,
2799 sizeof(ipv6_route_table_template),
2800 GFP_KERNEL);
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002801
2802 if (table) {
2803 table[0].data = &net->ipv6.sysctl.flush_delay;
Lucian Adrian Grijincuc486da32011-02-24 19:48:03 +00002804 table[0].extra1 = net;
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002805 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002806 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2807 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2808 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2809 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2810 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2811 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2812 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
Alexey Dobriyan9c69fab2009-12-18 20:11:03 -08002813 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
YOSHIFUJI Hideaki5ee09102008-02-28 00:24:28 +09002814 }
2815
Daniel Lezcano760f2d02008-01-10 02:53:43 -08002816 return table;
2817}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002818#endif
2819
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002820static int __net_init ip6_route_net_init(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002821{
Pavel Emelyanov633d4242008-04-21 14:25:23 -07002822 int ret = -ENOMEM;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002823
Alexey Dobriyan86393e52009-08-29 01:34:49 +00002824 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2825 sizeof(net->ipv6.ip6_dst_ops));
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002826
Eric Dumazetfc66f952010-10-08 06:37:34 +00002827 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2828 goto out_ip6_dst_ops;
2829
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002830 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2831 sizeof(*net->ipv6.ip6_null_entry),
2832 GFP_KERNEL);
2833 if (!net->ipv6.ip6_null_entry)
Eric Dumazetfc66f952010-10-08 06:37:34 +00002834 goto out_ip6_dst_entries;
Changli Gaod8d1f302010-06-10 23:31:35 -07002835 net->ipv6.ip6_null_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002836 (struct dst_entry *)net->ipv6.ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002837 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002838 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2839 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002840
2841#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2842 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2843 sizeof(*net->ipv6.ip6_prohibit_entry),
2844 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002845 if (!net->ipv6.ip6_prohibit_entry)
2846 goto out_ip6_null_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002847 net->ipv6.ip6_prohibit_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002848 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002849 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002850 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2851 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002852
2853 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2854 sizeof(*net->ipv6.ip6_blk_hole_entry),
2855 GFP_KERNEL);
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002856 if (!net->ipv6.ip6_blk_hole_entry)
2857 goto out_ip6_prohibit_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002858 net->ipv6.ip6_blk_hole_entry->dst.path =
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002859 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
Changli Gaod8d1f302010-06-10 23:31:35 -07002860 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
David S. Miller62fa8a82011-01-26 20:51:05 -08002861 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2862 ip6_template_metrics, true);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002863#endif
2864
Peter Zijlstrab339a47c2008-10-07 14:15:00 -07002865 net->ipv6.sysctl.flush_delay = 0;
2866 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2867 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2868 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2869 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2870 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2871 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2872 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2873
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002874#ifdef CONFIG_PROC_FS
2875 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2876 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2877#endif
Benjamin Thery6891a342008-03-04 13:49:47 -08002878 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2879
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002880 ret = 0;
2881out:
2882 return ret;
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002883
Peter Zijlstra68fffc62008-10-07 14:12:10 -07002884#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2885out_ip6_prohibit_entry:
2886 kfree(net->ipv6.ip6_prohibit_entry);
2887out_ip6_null_entry:
2888 kfree(net->ipv6.ip6_null_entry);
2889#endif
Eric Dumazetfc66f952010-10-08 06:37:34 +00002890out_ip6_dst_entries:
2891 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002892out_ip6_dst_ops:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002893 goto out;
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002894}
2895
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002896static void __net_exit ip6_route_net_exit(struct net *net)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002897{
2898#ifdef CONFIG_PROC_FS
2899 proc_net_remove(net, "ipv6_route");
2900 proc_net_remove(net, "rt6_stats");
2901#endif
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002902 kfree(net->ipv6.ip6_null_entry);
2903#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2904 kfree(net->ipv6.ip6_prohibit_entry);
2905 kfree(net->ipv6.ip6_blk_hole_entry);
2906#endif
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002907 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002908}
2909
2910static struct pernet_operations ip6_route_net_ops = {
2911 .init = ip6_route_net_init,
2912 .exit = ip6_route_net_exit,
2913};
2914
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002915static struct notifier_block ip6_route_dev_notifier = {
2916 .notifier_call = ip6_route_dev_notify,
2917 .priority = 0,
2918};
2919
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002920int __init ip6_route_init(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002921{
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002922 int ret;
2923
Daniel Lezcano9a7ec3a2008-03-04 13:48:53 -08002924 ret = -ENOMEM;
2925 ip6_dst_ops_template.kmem_cachep =
2926 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2927 SLAB_HWCACHE_ALIGN, NULL);
2928 if (!ip6_dst_ops_template.kmem_cachep)
Fernando Carrijoc19a28e2009-01-07 18:09:08 -08002929 goto out;
David S. Miller14e50e52007-05-24 18:17:54 -07002930
Eric Dumazetfc66f952010-10-08 06:37:34 +00002931 ret = dst_entries_init(&ip6_dst_blackhole_ops);
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002932 if (ret)
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002933 goto out_kmem_cache;
Daniel Lezcanobdb32892008-03-04 13:48:10 -08002934
Eric Dumazetfc66f952010-10-08 06:37:34 +00002935 ret = register_pernet_subsys(&ip6_route_net_ops);
2936 if (ret)
2937 goto out_dst_entries;
2938
Arnaud Ebalard5dc121e2008-10-01 02:37:56 -07002939 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2940
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002941 /* Registering of the loopback is done before this portion of code,
2942 * the loopback reference in rt6_info will not be taken, do it
2943 * manually for init_net */
Changli Gaod8d1f302010-06-10 23:31:35 -07002944 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002945 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2946 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
Changli Gaod8d1f302010-06-10 23:31:35 -07002947 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002948 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
Changli Gaod8d1f302010-06-10 23:31:35 -07002949 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002950 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2951 #endif
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002952 ret = fib6_init();
2953 if (ret)
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002954 goto out_register_subsys;
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002955
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002956 ret = xfrm6_init();
2957 if (ret)
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002958 goto out_fib6_init;
Daniel Lezcanoc35b7e72007-12-08 00:14:11 -08002959
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002960 ret = fib6_rules_init();
2961 if (ret)
2962 goto xfrm6_init;
Daniel Lezcano7e5449c2007-12-08 00:14:54 -08002963
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002964 ret = -ENOBUFS;
Greg Rosec7ac8672011-06-10 01:27:09 +00002965 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2966 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2967 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002968 goto fib6_rules_init;
2969
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002970 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
Daniel Lezcanocdb18762008-03-04 13:45:33 -08002971 if (ret)
2972 goto fib6_rules_init;
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002973
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002974out:
2975 return ret;
2976
2977fib6_rules_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002978 fib6_rules_cleanup();
2979xfrm6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002980 xfrm6_fini();
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002981out_fib6_init:
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002982 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002983out_register_subsys:
2984 unregister_pernet_subsys(&ip6_route_net_ops);
Eric Dumazetfc66f952010-10-08 06:37:34 +00002985out_dst_entries:
2986 dst_entries_destroy(&ip6_dst_blackhole_ops);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002987out_kmem_cache:
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08002988 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Daniel Lezcano433d49c2007-12-07 00:43:48 -08002989 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002990}
2991
2992void ip6_route_cleanup(void)
2993{
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002994 unregister_netdevice_notifier(&ip6_route_dev_notifier);
Thomas Graf101367c2006-08-04 03:39:02 -07002995 fib6_rules_cleanup();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002996 xfrm6_fini();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002997 fib6_gc_cleanup();
Daniel Lezcano8ed67782008-03-04 13:48:30 -08002998 unregister_pernet_subsys(&ip6_route_net_ops);
Xiaotian Feng41bb78b2010-11-02 16:11:05 +00002999 dst_entries_destroy(&ip6_dst_blackhole_ops);
Benjamin Theryf2fc6a52008-03-04 13:49:23 -08003000 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003001}