Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /* Generic nexthop implementation
3 : *
4 : * Copyright (c) 2017-19 Cumulus Networks
5 : * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
6 : */
7 :
8 : #include <linux/nexthop.h>
9 : #include <linux/rtnetlink.h>
10 : #include <linux/slab.h>
11 : #include <net/arp.h>
12 : #include <net/ipv6_stubs.h>
13 : #include <net/lwtunnel.h>
14 : #include <net/ndisc.h>
15 : #include <net/nexthop.h>
16 : #include <net/route.h>
17 : #include <net/sock.h>
18 :
19 : static void remove_nexthop(struct net *net, struct nexthop *nh,
20 : struct nl_info *nlinfo);
21 :
22 : #define NH_DEV_HASHBITS 8
23 : #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
24 :
25 : static const struct nla_policy rtm_nh_policy_new[] = {
26 : [NHA_ID] = { .type = NLA_U32 },
27 : [NHA_GROUP] = { .type = NLA_BINARY },
28 : [NHA_GROUP_TYPE] = { .type = NLA_U16 },
29 : [NHA_BLACKHOLE] = { .type = NLA_FLAG },
30 : [NHA_OIF] = { .type = NLA_U32 },
31 : [NHA_GATEWAY] = { .type = NLA_BINARY },
32 : [NHA_ENCAP_TYPE] = { .type = NLA_U16 },
33 : [NHA_ENCAP] = { .type = NLA_NESTED },
34 : [NHA_FDB] = { .type = NLA_FLAG },
35 : };
36 :
37 : static const struct nla_policy rtm_nh_policy_get[] = {
38 : [NHA_ID] = { .type = NLA_U32 },
39 : };
40 :
41 : static const struct nla_policy rtm_nh_policy_dump[] = {
42 : [NHA_OIF] = { .type = NLA_U32 },
43 : [NHA_GROUPS] = { .type = NLA_FLAG },
44 : [NHA_MASTER] = { .type = NLA_U32 },
45 : [NHA_FDB] = { .type = NLA_FLAG },
46 : };
47 :
48 0 : static bool nexthop_notifiers_is_empty(struct net *net)
49 : {
50 0 : return !net->nexthop.notifier_chain.head;
51 : }
52 :
53 : static void
54 0 : __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
55 : const struct nexthop *nh)
56 : {
57 0 : struct nh_info *nhi = rtnl_dereference(nh->nh_info);
58 :
59 0 : nh_info->dev = nhi->fib_nhc.nhc_dev;
60 0 : nh_info->gw_family = nhi->fib_nhc.nhc_gw_family;
61 0 : if (nh_info->gw_family == AF_INET)
62 0 : nh_info->ipv4 = nhi->fib_nhc.nhc_gw.ipv4;
63 0 : else if (nh_info->gw_family == AF_INET6)
64 0 : nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6;
65 :
66 0 : nh_info->is_reject = nhi->reject_nh;
67 0 : nh_info->is_fdb = nhi->fdb_nh;
68 0 : nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate;
69 0 : }
70 :
71 0 : static int nh_notifier_single_info_init(struct nh_notifier_info *info,
72 : const struct nexthop *nh)
73 : {
74 0 : info->type = NH_NOTIFIER_INFO_TYPE_SINGLE;
75 0 : info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL);
76 0 : if (!info->nh)
77 : return -ENOMEM;
78 :
79 0 : __nh_notifier_single_info_init(info->nh, nh);
80 :
81 0 : return 0;
82 : }
83 :
84 0 : static void nh_notifier_single_info_fini(struct nh_notifier_info *info)
85 : {
86 0 : kfree(info->nh);
87 0 : }
88 :
89 0 : static int nh_notifier_mp_info_init(struct nh_notifier_info *info,
90 : struct nh_group *nhg)
91 : {
92 0 : u16 num_nh = nhg->num_nh;
93 0 : int i;
94 :
95 0 : info->type = NH_NOTIFIER_INFO_TYPE_GRP;
96 0 : info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh),
97 : GFP_KERNEL);
98 0 : if (!info->nh_grp)
99 : return -ENOMEM;
100 :
101 0 : info->nh_grp->num_nh = num_nh;
102 0 : info->nh_grp->is_fdb = nhg->fdb_nh;
103 :
104 0 : for (i = 0; i < num_nh; i++) {
105 0 : struct nh_grp_entry *nhge = &nhg->nh_entries[i];
106 :
107 0 : info->nh_grp->nh_entries[i].id = nhge->nh->id;
108 0 : info->nh_grp->nh_entries[i].weight = nhge->weight;
109 0 : __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
110 0 : nhge->nh);
111 : }
112 :
113 : return 0;
114 : }
115 :
116 0 : static int nh_notifier_grp_info_init(struct nh_notifier_info *info,
117 : const struct nexthop *nh)
118 : {
119 0 : struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
120 :
121 0 : if (nhg->mpath)
122 0 : return nh_notifier_mp_info_init(info, nhg);
123 : return -EINVAL;
124 : }
125 :
126 0 : static void nh_notifier_grp_info_fini(struct nh_notifier_info *info,
127 : const struct nexthop *nh)
128 : {
129 0 : struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
130 :
131 0 : if (nhg->mpath)
132 0 : kfree(info->nh_grp);
133 0 : }
134 :
135 0 : static int nh_notifier_info_init(struct nh_notifier_info *info,
136 : const struct nexthop *nh)
137 : {
138 0 : info->id = nh->id;
139 :
140 0 : if (nh->is_group)
141 0 : return nh_notifier_grp_info_init(info, nh);
142 : else
143 0 : return nh_notifier_single_info_init(info, nh);
144 : }
145 :
146 0 : static void nh_notifier_info_fini(struct nh_notifier_info *info,
147 : const struct nexthop *nh)
148 : {
149 0 : if (nh->is_group)
150 0 : nh_notifier_grp_info_fini(info, nh);
151 : else
152 0 : nh_notifier_single_info_fini(info);
153 0 : }
154 :
155 0 : static int call_nexthop_notifiers(struct net *net,
156 : enum nexthop_event_type event_type,
157 : struct nexthop *nh,
158 : struct netlink_ext_ack *extack)
159 : {
160 0 : struct nh_notifier_info info = {
161 : .net = net,
162 : .extack = extack,
163 : };
164 0 : int err;
165 :
166 0 : ASSERT_RTNL();
167 :
168 0 : if (nexthop_notifiers_is_empty(net))
169 : return 0;
170 :
171 0 : err = nh_notifier_info_init(&info, nh);
172 0 : if (err) {
173 0 : NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info");
174 0 : return err;
175 : }
176 :
177 0 : err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
178 : event_type, &info);
179 0 : nh_notifier_info_fini(&info, nh);
180 :
181 0 : return notifier_to_errno(err);
182 : }
183 :
184 0 : static int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
185 : enum nexthop_event_type event_type,
186 : struct nexthop *nh,
187 : struct netlink_ext_ack *extack)
188 : {
189 0 : struct nh_notifier_info info = {
190 : .net = net,
191 : .extack = extack,
192 : };
193 0 : int err;
194 :
195 0 : err = nh_notifier_info_init(&info, nh);
196 0 : if (err)
197 : return err;
198 :
199 0 : err = nb->notifier_call(nb, event_type, &info);
200 0 : nh_notifier_info_fini(&info, nh);
201 :
202 0 : return notifier_to_errno(err);
203 : }
204 :
205 0 : static unsigned int nh_dev_hashfn(unsigned int val)
206 : {
207 0 : unsigned int mask = NH_DEV_HASHSIZE - 1;
208 :
209 0 : return (val ^
210 0 : (val >> NH_DEV_HASHBITS) ^
211 0 : (val >> (NH_DEV_HASHBITS * 2))) & mask;
212 : }
213 :
214 0 : static void nexthop_devhash_add(struct net *net, struct nh_info *nhi)
215 : {
216 0 : struct net_device *dev = nhi->fib_nhc.nhc_dev;
217 0 : struct hlist_head *head;
218 0 : unsigned int hash;
219 :
220 0 : WARN_ON(!dev);
221 :
222 0 : hash = nh_dev_hashfn(dev->ifindex);
223 0 : head = &net->nexthop.devhash[hash];
224 0 : hlist_add_head(&nhi->dev_hash, head);
225 0 : }
226 :
227 0 : static void nexthop_free_group(struct nexthop *nh)
228 : {
229 0 : struct nh_group *nhg;
230 0 : int i;
231 :
232 0 : nhg = rcu_dereference_raw(nh->nh_grp);
233 0 : for (i = 0; i < nhg->num_nh; ++i) {
234 0 : struct nh_grp_entry *nhge = &nhg->nh_entries[i];
235 :
236 0 : WARN_ON(!list_empty(&nhge->nh_list));
237 0 : nexthop_put(nhge->nh);
238 : }
239 :
240 0 : WARN_ON(nhg->spare == nhg);
241 :
242 0 : kfree(nhg->spare);
243 0 : kfree(nhg);
244 0 : }
245 :
246 0 : static void nexthop_free_single(struct nexthop *nh)
247 : {
248 0 : struct nh_info *nhi;
249 :
250 0 : nhi = rcu_dereference_raw(nh->nh_info);
251 0 : switch (nhi->family) {
252 0 : case AF_INET:
253 0 : fib_nh_release(nh->net, &nhi->fib_nh);
254 0 : break;
255 0 : case AF_INET6:
256 0 : ipv6_stub->fib6_nh_release(&nhi->fib6_nh);
257 0 : break;
258 : }
259 0 : kfree(nhi);
260 0 : }
261 :
262 0 : void nexthop_free_rcu(struct rcu_head *head)
263 : {
264 0 : struct nexthop *nh = container_of(head, struct nexthop, rcu);
265 :
266 0 : if (nh->is_group)
267 0 : nexthop_free_group(nh);
268 : else
269 0 : nexthop_free_single(nh);
270 :
271 0 : kfree(nh);
272 0 : }
273 : EXPORT_SYMBOL_GPL(nexthop_free_rcu);
274 :
275 0 : static struct nexthop *nexthop_alloc(void)
276 : {
277 0 : struct nexthop *nh;
278 :
279 0 : nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
280 0 : if (nh) {
281 0 : INIT_LIST_HEAD(&nh->fi_list);
282 0 : INIT_LIST_HEAD(&nh->f6i_list);
283 0 : INIT_LIST_HEAD(&nh->grp_list);
284 0 : INIT_LIST_HEAD(&nh->fdb_list);
285 : }
286 0 : return nh;
287 : }
288 :
289 0 : static struct nh_group *nexthop_grp_alloc(u16 num_nh)
290 : {
291 0 : struct nh_group *nhg;
292 :
293 0 : nhg = kzalloc(struct_size(nhg, nh_entries, num_nh), GFP_KERNEL);
294 0 : if (nhg)
295 0 : nhg->num_nh = num_nh;
296 :
297 0 : return nhg;
298 : }
299 :
300 0 : static void nh_base_seq_inc(struct net *net)
301 : {
302 0 : while (++net->nexthop.seq == 0)
303 0 : ;
304 : }
305 :
306 : /* no reference taken; rcu lock or rtnl must be held */
307 0 : struct nexthop *nexthop_find_by_id(struct net *net, u32 id)
308 : {
309 0 : struct rb_node **pp, *parent = NULL, *next;
310 :
311 0 : pp = &net->nexthop.rb_root.rb_node;
312 0 : while (1) {
313 0 : struct nexthop *nh;
314 :
315 0 : next = rcu_dereference_raw(*pp);
316 0 : if (!next)
317 : break;
318 0 : parent = next;
319 :
320 0 : nh = rb_entry(parent, struct nexthop, rb_node);
321 0 : if (id < nh->id)
322 0 : pp = &next->rb_left;
323 0 : else if (id > nh->id)
324 0 : pp = &next->rb_right;
325 : else
326 0 : return nh;
327 : }
328 : return NULL;
329 : }
330 : EXPORT_SYMBOL_GPL(nexthop_find_by_id);
331 :
332 : /* used for auto id allocation; called with rtnl held */
333 0 : static u32 nh_find_unused_id(struct net *net)
334 : {
335 0 : u32 id_start = net->nexthop.last_id_allocated;
336 :
337 0 : while (1) {
338 0 : net->nexthop.last_id_allocated++;
339 0 : if (net->nexthop.last_id_allocated == id_start)
340 : break;
341 :
342 0 : if (!nexthop_find_by_id(net, net->nexthop.last_id_allocated))
343 0 : return net->nexthop.last_id_allocated;
344 : }
345 : return 0;
346 : }
347 :
348 0 : static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
349 : {
350 0 : struct nexthop_grp *p;
351 0 : size_t len = nhg->num_nh * sizeof(*p);
352 0 : struct nlattr *nla;
353 0 : u16 group_type = 0;
354 0 : int i;
355 :
356 0 : if (nhg->mpath)
357 : group_type = NEXTHOP_GRP_TYPE_MPATH;
358 :
359 0 : if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type))
360 0 : goto nla_put_failure;
361 :
362 0 : nla = nla_reserve(skb, NHA_GROUP, len);
363 0 : if (!nla)
364 0 : goto nla_put_failure;
365 :
366 0 : p = nla_data(nla);
367 0 : for (i = 0; i < nhg->num_nh; ++i) {
368 0 : p->id = nhg->nh_entries[i].nh->id;
369 0 : p->weight = nhg->nh_entries[i].weight - 1;
370 0 : p += 1;
371 : }
372 :
373 : return 0;
374 :
375 : nla_put_failure:
376 : return -EMSGSIZE;
377 : }
378 :
379 0 : static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
380 : int event, u32 portid, u32 seq, unsigned int nlflags)
381 : {
382 0 : struct fib6_nh *fib6_nh;
383 0 : struct fib_nh *fib_nh;
384 0 : struct nlmsghdr *nlh;
385 0 : struct nh_info *nhi;
386 0 : struct nhmsg *nhm;
387 :
388 0 : nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags);
389 0 : if (!nlh)
390 : return -EMSGSIZE;
391 :
392 0 : nhm = nlmsg_data(nlh);
393 0 : nhm->nh_family = AF_UNSPEC;
394 0 : nhm->nh_flags = nh->nh_flags;
395 0 : nhm->nh_protocol = nh->protocol;
396 0 : nhm->nh_scope = 0;
397 0 : nhm->resvd = 0;
398 :
399 0 : if (nla_put_u32(skb, NHA_ID, nh->id))
400 0 : goto nla_put_failure;
401 :
402 0 : if (nh->is_group) {
403 0 : struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
404 :
405 0 : if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
406 0 : goto nla_put_failure;
407 0 : if (nla_put_nh_group(skb, nhg))
408 0 : goto nla_put_failure;
409 0 : goto out;
410 : }
411 :
412 0 : nhi = rtnl_dereference(nh->nh_info);
413 0 : nhm->nh_family = nhi->family;
414 0 : if (nhi->reject_nh) {
415 0 : if (nla_put_flag(skb, NHA_BLACKHOLE))
416 0 : goto nla_put_failure;
417 0 : goto out;
418 0 : } else if (nhi->fdb_nh) {
419 0 : if (nla_put_flag(skb, NHA_FDB))
420 0 : goto nla_put_failure;
421 : } else {
422 0 : const struct net_device *dev;
423 :
424 0 : dev = nhi->fib_nhc.nhc_dev;
425 0 : if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex))
426 0 : goto nla_put_failure;
427 : }
428 :
429 0 : nhm->nh_scope = nhi->fib_nhc.nhc_scope;
430 0 : switch (nhi->family) {
431 0 : case AF_INET:
432 0 : fib_nh = &nhi->fib_nh;
433 0 : if (fib_nh->fib_nh_gw_family &&
434 0 : nla_put_be32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4))
435 0 : goto nla_put_failure;
436 : break;
437 :
438 0 : case AF_INET6:
439 0 : fib6_nh = &nhi->fib6_nh;
440 0 : if (fib6_nh->fib_nh_gw_family &&
441 0 : nla_put_in6_addr(skb, NHA_GATEWAY, &fib6_nh->fib_nh_gw6))
442 0 : goto nla_put_failure;
443 : break;
444 : }
445 :
446 0 : if (nhi->fib_nhc.nhc_lwtstate &&
447 0 : lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate,
448 : NHA_ENCAP, NHA_ENCAP_TYPE) < 0)
449 : goto nla_put_failure;
450 :
451 0 : out:
452 0 : nlmsg_end(skb, nlh);
453 0 : return 0;
454 :
455 0 : nla_put_failure:
456 0 : nlmsg_cancel(skb, nlh);
457 0 : return -EMSGSIZE;
458 : }
459 :
460 0 : static size_t nh_nlmsg_size_grp(struct nexthop *nh)
461 : {
462 0 : struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
463 0 : size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh;
464 :
465 0 : return nla_total_size(sz) +
466 0 : nla_total_size(2); /* NHA_GROUP_TYPE */
467 : }
468 :
469 0 : static size_t nh_nlmsg_size_single(struct nexthop *nh)
470 : {
471 0 : struct nh_info *nhi = rtnl_dereference(nh->nh_info);
472 0 : size_t sz;
473 :
474 : /* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE
475 : * are mutually exclusive
476 : */
477 0 : sz = nla_total_size(4); /* NHA_OIF */
478 :
479 0 : switch (nhi->family) {
480 0 : case AF_INET:
481 0 : if (nhi->fib_nh.fib_nh_gw_family)
482 0 : sz += nla_total_size(4); /* NHA_GATEWAY */
483 : break;
484 :
485 0 : case AF_INET6:
486 : /* NHA_GATEWAY */
487 0 : if (nhi->fib6_nh.fib_nh_gw_family)
488 0 : sz += nla_total_size(sizeof(const struct in6_addr));
489 : break;
490 : }
491 :
492 0 : if (nhi->fib_nhc.nhc_lwtstate) {
493 0 : sz += lwtunnel_get_encap_size(nhi->fib_nhc.nhc_lwtstate);
494 0 : sz += nla_total_size(2); /* NHA_ENCAP_TYPE */
495 : }
496 :
497 0 : return sz;
498 : }
499 :
500 0 : static size_t nh_nlmsg_size(struct nexthop *nh)
501 : {
502 0 : size_t sz = NLMSG_ALIGN(sizeof(struct nhmsg));
503 :
504 0 : sz += nla_total_size(4); /* NHA_ID */
505 :
506 0 : if (nh->is_group)
507 0 : sz += nh_nlmsg_size_grp(nh);
508 : else
509 0 : sz += nh_nlmsg_size_single(nh);
510 :
511 0 : return sz;
512 : }
513 :
514 0 : static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
515 : {
516 0 : unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0;
517 0 : u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
518 0 : struct sk_buff *skb;
519 0 : int err = -ENOBUFS;
520 :
521 0 : skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any());
522 0 : if (!skb)
523 0 : goto errout;
524 :
525 0 : err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags);
526 0 : if (err < 0) {
527 : /* -EMSGSIZE implies BUG in nh_nlmsg_size() */
528 0 : WARN_ON(err == -EMSGSIZE);
529 0 : kfree_skb(skb);
530 0 : goto errout;
531 : }
532 :
533 0 : rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_NEXTHOP,
534 : info->nlh, gfp_any());
535 0 : return;
536 0 : errout:
537 0 : if (err < 0)
538 0 : rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
539 : }
540 :
541 0 : static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
542 : bool *is_fdb, struct netlink_ext_ack *extack)
543 : {
544 0 : if (nh->is_group) {
545 0 : struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
546 :
547 : /* nested multipath (group within a group) is not
548 : * supported
549 : */
550 0 : if (nhg->mpath) {
551 0 : NL_SET_ERR_MSG(extack,
552 : "Multipath group can not be a nexthop within a group");
553 0 : return false;
554 : }
555 0 : *is_fdb = nhg->fdb_nh;
556 : } else {
557 0 : struct nh_info *nhi = rtnl_dereference(nh->nh_info);
558 :
559 0 : if (nhi->reject_nh && npaths > 1) {
560 0 : NL_SET_ERR_MSG(extack,
561 : "Blackhole nexthop can not be used in a group with more than 1 path");
562 0 : return false;
563 : }
564 0 : *is_fdb = nhi->fdb_nh;
565 : }
566 :
567 : return true;
568 : }
569 :
570 0 : static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
571 : struct netlink_ext_ack *extack)
572 : {
573 0 : struct nh_info *nhi;
574 :
575 0 : nhi = rtnl_dereference(nh->nh_info);
576 :
577 0 : if (!nhi->fdb_nh) {
578 0 : NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops");
579 0 : return -EINVAL;
580 : }
581 :
582 0 : if (*nh_family == AF_UNSPEC) {
583 0 : *nh_family = nhi->family;
584 0 : } else if (*nh_family != nhi->family) {
585 0 : NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops");
586 0 : return -EINVAL;
587 : }
588 :
589 : return 0;
590 : }
591 :
592 0 : static int nh_check_attr_group(struct net *net,
593 : struct nlattr *tb[], size_t tb_size,
594 : struct netlink_ext_ack *extack)
595 : {
596 0 : unsigned int len = nla_len(tb[NHA_GROUP]);
597 0 : u8 nh_family = AF_UNSPEC;
598 0 : struct nexthop_grp *nhg;
599 0 : unsigned int i, j;
600 0 : u8 nhg_fdb = 0;
601 :
602 0 : if (!len || len & (sizeof(struct nexthop_grp) - 1)) {
603 0 : NL_SET_ERR_MSG(extack,
604 : "Invalid length for nexthop group attribute");
605 0 : return -EINVAL;
606 : }
607 :
608 : /* convert len to number of nexthop ids */
609 0 : len /= sizeof(*nhg);
610 :
611 0 : nhg = nla_data(tb[NHA_GROUP]);
612 0 : for (i = 0; i < len; ++i) {
613 0 : if (nhg[i].resvd1 || nhg[i].resvd2) {
614 0 : NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0");
615 0 : return -EINVAL;
616 : }
617 0 : if (nhg[i].weight > 254) {
618 0 : NL_SET_ERR_MSG(extack, "Invalid value for weight");
619 0 : return -EINVAL;
620 : }
621 0 : for (j = i + 1; j < len; ++j) {
622 0 : if (nhg[i].id == nhg[j].id) {
623 0 : NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group");
624 0 : return -EINVAL;
625 : }
626 : }
627 : }
628 :
629 0 : if (tb[NHA_FDB])
630 0 : nhg_fdb = 1;
631 0 : nhg = nla_data(tb[NHA_GROUP]);
632 0 : for (i = 0; i < len; ++i) {
633 0 : struct nexthop *nh;
634 0 : bool is_fdb_nh;
635 :
636 0 : nh = nexthop_find_by_id(net, nhg[i].id);
637 0 : if (!nh) {
638 0 : NL_SET_ERR_MSG(extack, "Invalid nexthop id");
639 0 : return -EINVAL;
640 : }
641 0 : if (!valid_group_nh(nh, len, &is_fdb_nh, extack))
642 : return -EINVAL;
643 :
644 0 : if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack))
645 : return -EINVAL;
646 :
647 0 : if (!nhg_fdb && is_fdb_nh) {
648 0 : NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops");
649 0 : return -EINVAL;
650 : }
651 : }
652 0 : for (i = NHA_GROUP_TYPE + 1; i < tb_size; ++i) {
653 0 : if (!tb[i])
654 0 : continue;
655 0 : if (i == NHA_FDB)
656 0 : continue;
657 0 : NL_SET_ERR_MSG(extack,
658 : "No other attributes can be set in nexthop groups");
659 : return -EINVAL;
660 : }
661 :
662 : return 0;
663 : }
664 :
665 0 : static bool ipv6_good_nh(const struct fib6_nh *nh)
666 : {
667 0 : int state = NUD_REACHABLE;
668 0 : struct neighbour *n;
669 :
670 0 : rcu_read_lock_bh();
671 :
672 0 : n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6);
673 0 : if (n)
674 0 : state = n->nud_state;
675 :
676 0 : rcu_read_unlock_bh();
677 :
678 0 : return !!(state & NUD_VALID);
679 : }
680 :
681 0 : static bool ipv4_good_nh(const struct fib_nh *nh)
682 : {
683 0 : int state = NUD_REACHABLE;
684 0 : struct neighbour *n;
685 :
686 0 : rcu_read_lock_bh();
687 :
688 0 : n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
689 0 : (__force u32)nh->fib_nh_gw4);
690 0 : if (n)
691 0 : state = n->nud_state;
692 :
693 0 : rcu_read_unlock_bh();
694 :
695 0 : return !!(state & NUD_VALID);
696 : }
697 :
698 0 : static struct nexthop *nexthop_select_path_mp(struct nh_group *nhg, int hash)
699 : {
700 0 : struct nexthop *rc = NULL;
701 0 : int i;
702 :
703 0 : for (i = 0; i < nhg->num_nh; ++i) {
704 0 : struct nh_grp_entry *nhge = &nhg->nh_entries[i];
705 0 : struct nh_info *nhi;
706 :
707 0 : if (hash > atomic_read(&nhge->mpath.upper_bound))
708 0 : continue;
709 :
710 0 : nhi = rcu_dereference(nhge->nh->nh_info);
711 0 : if (nhi->fdb_nh)
712 0 : return nhge->nh;
713 :
714 : /* nexthops always check if it is good and does
715 : * not rely on a sysctl for this behavior
716 : */
717 0 : switch (nhi->family) {
718 0 : case AF_INET:
719 0 : if (ipv4_good_nh(&nhi->fib_nh))
720 0 : return nhge->nh;
721 : break;
722 0 : case AF_INET6:
723 0 : if (ipv6_good_nh(&nhi->fib6_nh))
724 0 : return nhge->nh;
725 : break;
726 : }
727 :
728 0 : if (!rc)
729 0 : rc = nhge->nh;
730 : }
731 :
732 : return rc;
733 : }
734 :
735 0 : struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
736 : {
737 0 : struct nh_group *nhg;
738 :
739 0 : if (!nh->is_group)
740 : return nh;
741 :
742 0 : nhg = rcu_dereference(nh->nh_grp);
743 0 : if (nhg->mpath)
744 0 : return nexthop_select_path_mp(nhg, hash);
745 :
746 : /* Unreachable. */
747 : return NULL;
748 : }
749 : EXPORT_SYMBOL_GPL(nexthop_select_path);
750 :
751 0 : int nexthop_for_each_fib6_nh(struct nexthop *nh,
752 : int (*cb)(struct fib6_nh *nh, void *arg),
753 : void *arg)
754 : {
755 0 : struct nh_info *nhi;
756 0 : int err;
757 :
758 0 : if (nh->is_group) {
759 0 : struct nh_group *nhg;
760 0 : int i;
761 :
762 0 : nhg = rcu_dereference_rtnl(nh->nh_grp);
763 0 : for (i = 0; i < nhg->num_nh; i++) {
764 0 : struct nh_grp_entry *nhge = &nhg->nh_entries[i];
765 :
766 0 : nhi = rcu_dereference_rtnl(nhge->nh->nh_info);
767 0 : err = cb(&nhi->fib6_nh, arg);
768 0 : if (err)
769 0 : return err;
770 : }
771 : } else {
772 0 : nhi = rcu_dereference_rtnl(nh->nh_info);
773 0 : err = cb(&nhi->fib6_nh, arg);
774 0 : if (err)
775 0 : return err;
776 : }
777 :
778 : return 0;
779 : }
780 : EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh);
781 :
782 0 : static int check_src_addr(const struct in6_addr *saddr,
783 : struct netlink_ext_ack *extack)
784 : {
785 0 : if (!ipv6_addr_any(saddr)) {
786 0 : NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
787 0 : return -EINVAL;
788 : }
789 : return 0;
790 : }
791 :
792 0 : int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
793 : struct netlink_ext_ack *extack)
794 : {
795 0 : struct nh_info *nhi;
796 0 : bool is_fdb_nh;
797 :
798 : /* fib6_src is unique to a fib6_info and limits the ability to cache
799 : * routes in fib6_nh within a nexthop that is potentially shared
800 : * across multiple fib entries. If the config wants to use source
801 : * routing it can not use nexthop objects. mlxsw also does not allow
802 : * fib6_src on routes.
803 : */
804 0 : if (cfg && check_src_addr(&cfg->fc_src, extack) < 0)
805 : return -EINVAL;
806 :
807 0 : if (nh->is_group) {
808 0 : struct nh_group *nhg;
809 :
810 0 : nhg = rtnl_dereference(nh->nh_grp);
811 0 : if (nhg->has_v4)
812 0 : goto no_v4_nh;
813 0 : is_fdb_nh = nhg->fdb_nh;
814 : } else {
815 0 : nhi = rtnl_dereference(nh->nh_info);
816 0 : if (nhi->family == AF_INET)
817 0 : goto no_v4_nh;
818 0 : is_fdb_nh = nhi->fdb_nh;
819 : }
820 :
821 0 : if (is_fdb_nh) {
822 0 : NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
823 0 : return -EINVAL;
824 : }
825 :
826 : return 0;
827 0 : no_v4_nh:
828 0 : NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop");
829 : return -EINVAL;
830 : }
831 : EXPORT_SYMBOL_GPL(fib6_check_nexthop);
832 :
833 : /* if existing nexthop has ipv6 routes linked to it, need
834 : * to verify this new spec works with ipv6
835 : */
836 0 : static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new,
837 : struct netlink_ext_ack *extack)
838 : {
839 0 : struct fib6_info *f6i;
840 :
841 0 : if (list_empty(&old->f6i_list))
842 : return 0;
843 :
844 0 : list_for_each_entry(f6i, &old->f6i_list, nh_list) {
845 0 : if (check_src_addr(&f6i->fib6_src.addr, extack) < 0)
846 : return -EINVAL;
847 : }
848 :
849 0 : return fib6_check_nexthop(new, NULL, extack);
850 : }
851 :
852 0 : static int nexthop_check_scope(struct nh_info *nhi, u8 scope,
853 : struct netlink_ext_ack *extack)
854 : {
855 0 : if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) {
856 0 : NL_SET_ERR_MSG(extack,
857 : "Route with host scope can not have a gateway");
858 0 : return -EINVAL;
859 : }
860 :
861 0 : if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) {
862 0 : NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop");
863 0 : return -EINVAL;
864 : }
865 :
866 : return 0;
867 : }
868 :
869 : /* Invoked by fib add code to verify nexthop by id is ok with
870 : * config for prefix; parts of fib_check_nh not done when nexthop
871 : * object is used.
872 : */
873 0 : int fib_check_nexthop(struct nexthop *nh, u8 scope,
874 : struct netlink_ext_ack *extack)
875 : {
876 0 : struct nh_info *nhi;
877 0 : int err = 0;
878 :
879 0 : if (nh->is_group) {
880 0 : struct nh_group *nhg;
881 :
882 0 : nhg = rtnl_dereference(nh->nh_grp);
883 0 : if (nhg->fdb_nh) {
884 0 : NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
885 0 : err = -EINVAL;
886 0 : goto out;
887 : }
888 :
889 0 : if (scope == RT_SCOPE_HOST) {
890 0 : NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops");
891 0 : err = -EINVAL;
892 0 : goto out;
893 : }
894 :
895 : /* all nexthops in a group have the same scope */
896 0 : nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info);
897 0 : err = nexthop_check_scope(nhi, scope, extack);
898 : } else {
899 0 : nhi = rtnl_dereference(nh->nh_info);
900 0 : if (nhi->fdb_nh) {
901 0 : NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
902 0 : err = -EINVAL;
903 0 : goto out;
904 : }
905 0 : err = nexthop_check_scope(nhi, scope, extack);
906 : }
907 :
908 0 : out:
909 0 : return err;
910 : }
911 :
912 0 : static int fib_check_nh_list(struct nexthop *old, struct nexthop *new,
913 : struct netlink_ext_ack *extack)
914 : {
915 0 : struct fib_info *fi;
916 :
917 0 : list_for_each_entry(fi, &old->fi_list, nh_list) {
918 0 : int err;
919 :
920 0 : err = fib_check_nexthop(new, fi->fib_scope, extack);
921 0 : if (err)
922 0 : return err;
923 : }
924 : return 0;
925 : }
926 :
927 0 : static void nh_group_rebalance(struct nh_group *nhg)
928 : {
929 0 : int total = 0;
930 0 : int w = 0;
931 0 : int i;
932 :
933 0 : for (i = 0; i < nhg->num_nh; ++i)
934 0 : total += nhg->nh_entries[i].weight;
935 :
936 0 : for (i = 0; i < nhg->num_nh; ++i) {
937 0 : struct nh_grp_entry *nhge = &nhg->nh_entries[i];
938 0 : int upper_bound;
939 :
940 0 : w += nhge->weight;
941 0 : upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
942 0 : atomic_set(&nhge->mpath.upper_bound, upper_bound);
943 : }
944 0 : }
945 :
946 0 : static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
947 : struct nl_info *nlinfo)
948 : {
949 0 : struct nh_grp_entry *nhges, *new_nhges;
950 0 : struct nexthop *nhp = nhge->nh_parent;
951 0 : struct netlink_ext_ack extack;
952 0 : struct nexthop *nh = nhge->nh;
953 0 : struct nh_group *nhg, *newg;
954 0 : int i, j, err;
955 :
956 0 : WARN_ON(!nh);
957 :
958 0 : nhg = rtnl_dereference(nhp->nh_grp);
959 0 : newg = nhg->spare;
960 :
961 : /* last entry, keep it visible and remove the parent */
962 0 : if (nhg->num_nh == 1) {
963 0 : remove_nexthop(net, nhp, nlinfo);
964 0 : return;
965 : }
966 :
967 0 : newg->has_v4 = false;
968 0 : newg->mpath = nhg->mpath;
969 0 : newg->fdb_nh = nhg->fdb_nh;
970 0 : newg->num_nh = nhg->num_nh;
971 :
972 : /* copy old entries to new except the one getting removed */
973 0 : nhges = nhg->nh_entries;
974 0 : new_nhges = newg->nh_entries;
975 0 : for (i = 0, j = 0; i < nhg->num_nh; ++i) {
976 0 : struct nh_info *nhi;
977 :
978 : /* current nexthop getting removed */
979 0 : if (nhg->nh_entries[i].nh == nh) {
980 0 : newg->num_nh--;
981 0 : continue;
982 : }
983 :
984 0 : nhi = rtnl_dereference(nhges[i].nh->nh_info);
985 0 : if (nhi->family == AF_INET)
986 0 : newg->has_v4 = true;
987 :
988 0 : list_del(&nhges[i].nh_list);
989 0 : new_nhges[j].nh_parent = nhges[i].nh_parent;
990 0 : new_nhges[j].nh = nhges[i].nh;
991 0 : new_nhges[j].weight = nhges[i].weight;
992 0 : list_add(&new_nhges[j].nh_list, &new_nhges[j].nh->grp_list);
993 0 : j++;
994 : }
995 :
996 0 : nh_group_rebalance(newg);
997 0 : rcu_assign_pointer(nhp->nh_grp, newg);
998 :
999 0 : list_del(&nhge->nh_list);
1000 0 : nexthop_put(nhge->nh);
1001 :
1002 0 : err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, &extack);
1003 0 : if (err)
1004 0 : pr_err("%s\n", extack._msg);
1005 :
1006 0 : if (nlinfo)
1007 0 : nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo);
1008 : }
1009 :
1010 0 : static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
1011 : struct nl_info *nlinfo)
1012 : {
1013 0 : struct nh_grp_entry *nhge, *tmp;
1014 :
1015 0 : list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list)
1016 0 : remove_nh_grp_entry(net, nhge, nlinfo);
1017 :
1018 : /* make sure all see the newly published array before releasing rtnl */
1019 0 : synchronize_net();
1020 0 : }
1021 :
1022 0 : static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
1023 : {
1024 0 : struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
1025 0 : int i, num_nh = nhg->num_nh;
1026 :
1027 0 : for (i = 0; i < num_nh; ++i) {
1028 0 : struct nh_grp_entry *nhge = &nhg->nh_entries[i];
1029 :
1030 0 : if (WARN_ON(!nhge->nh))
1031 0 : continue;
1032 :
1033 0 : list_del_init(&nhge->nh_list);
1034 : }
1035 0 : }
1036 :
1037 : /* not called for nexthop replace */
1038 0 : static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
1039 : {
1040 0 : struct fib6_info *f6i, *tmp;
1041 0 : bool do_flush = false;
1042 0 : struct fib_info *fi;
1043 :
1044 0 : list_for_each_entry(fi, &nh->fi_list, nh_list) {
1045 0 : fi->fib_flags |= RTNH_F_DEAD;
1046 0 : do_flush = true;
1047 : }
1048 0 : if (do_flush)
1049 0 : fib_flush(net);
1050 :
1051 : /* ip6_del_rt removes the entry from this list hence the _safe */
1052 0 : list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
1053 : /* __ip6_del_rt does a release, so do a hold here */
1054 0 : fib6_info_hold(f6i);
1055 0 : ipv6_stub->ip6_del_rt(net, f6i,
1056 0 : !net->ipv4.sysctl_nexthop_compat_mode);
1057 : }
1058 0 : }
1059 :
1060 0 : static void __remove_nexthop(struct net *net, struct nexthop *nh,
1061 : struct nl_info *nlinfo)
1062 : {
1063 0 : __remove_nexthop_fib(net, nh);
1064 :
1065 0 : if (nh->is_group) {
1066 0 : remove_nexthop_group(nh, nlinfo);
1067 : } else {
1068 0 : struct nh_info *nhi;
1069 :
1070 0 : nhi = rtnl_dereference(nh->nh_info);
1071 0 : if (nhi->fib_nhc.nhc_dev)
1072 0 : hlist_del(&nhi->dev_hash);
1073 :
1074 0 : remove_nexthop_from_groups(net, nh, nlinfo);
1075 : }
1076 0 : }
1077 :
1078 0 : static void remove_nexthop(struct net *net, struct nexthop *nh,
1079 : struct nl_info *nlinfo)
1080 : {
1081 0 : call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh, NULL);
1082 :
1083 : /* remove from the tree */
1084 0 : rb_erase(&nh->rb_node, &net->nexthop.rb_root);
1085 :
1086 0 : if (nlinfo)
1087 0 : nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo);
1088 :
1089 0 : __remove_nexthop(net, nh, nlinfo);
1090 0 : nh_base_seq_inc(net);
1091 :
1092 0 : nexthop_put(nh);
1093 0 : }
1094 :
1095 : /* if any FIB entries reference this nexthop, any dst entries
1096 : * need to be regenerated
1097 : */
1098 0 : static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
1099 : {
1100 0 : struct fib6_info *f6i;
1101 :
1102 0 : if (!list_empty(&nh->fi_list))
1103 0 : rt_cache_flush(net);
1104 :
1105 0 : list_for_each_entry(f6i, &nh->f6i_list, nh_list)
1106 0 : ipv6_stub->fib6_update_sernum(net, f6i);
1107 0 : }
1108 :
1109 0 : static int replace_nexthop_grp(struct net *net, struct nexthop *old,
1110 : struct nexthop *new,
1111 : struct netlink_ext_ack *extack)
1112 : {
1113 0 : struct nh_group *oldg, *newg;
1114 0 : int i, err;
1115 :
1116 0 : if (!new->is_group) {
1117 0 : NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop.");
1118 0 : return -EINVAL;
1119 : }
1120 :
1121 0 : err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
1122 0 : if (err)
1123 : return err;
1124 :
1125 0 : oldg = rtnl_dereference(old->nh_grp);
1126 0 : newg = rtnl_dereference(new->nh_grp);
1127 :
1128 : /* update parents - used by nexthop code for cleanup */
1129 0 : for (i = 0; i < newg->num_nh; i++)
1130 0 : newg->nh_entries[i].nh_parent = old;
1131 :
1132 0 : rcu_assign_pointer(old->nh_grp, newg);
1133 :
1134 0 : for (i = 0; i < oldg->num_nh; i++)
1135 0 : oldg->nh_entries[i].nh_parent = new;
1136 :
1137 0 : rcu_assign_pointer(new->nh_grp, oldg);
1138 :
1139 0 : return 0;
1140 : }
1141 :
1142 0 : static void nh_group_v4_update(struct nh_group *nhg)
1143 : {
1144 0 : struct nh_grp_entry *nhges;
1145 0 : bool has_v4 = false;
1146 0 : int i;
1147 :
1148 0 : nhges = nhg->nh_entries;
1149 0 : for (i = 0; i < nhg->num_nh; i++) {
1150 0 : struct nh_info *nhi;
1151 :
1152 0 : nhi = rtnl_dereference(nhges[i].nh->nh_info);
1153 0 : if (nhi->family == AF_INET)
1154 0 : has_v4 = true;
1155 : }
1156 0 : nhg->has_v4 = has_v4;
1157 0 : }
1158 :
1159 0 : static int replace_nexthop_single(struct net *net, struct nexthop *old,
1160 : struct nexthop *new,
1161 : struct netlink_ext_ack *extack)
1162 : {
1163 0 : u8 old_protocol, old_nh_flags;
1164 0 : struct nh_info *oldi, *newi;
1165 0 : struct nh_grp_entry *nhge;
1166 0 : int err;
1167 :
1168 0 : if (new->is_group) {
1169 0 : NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group.");
1170 0 : return -EINVAL;
1171 : }
1172 :
1173 0 : err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
1174 0 : if (err)
1175 : return err;
1176 :
1177 : /* Hardware flags were set on 'old' as 'new' is not in the red-black
1178 : * tree. Therefore, inherit the flags from 'old' to 'new'.
1179 : */
1180 0 : new->nh_flags |= old->nh_flags & (RTNH_F_OFFLOAD | RTNH_F_TRAP);
1181 :
1182 0 : oldi = rtnl_dereference(old->nh_info);
1183 0 : newi = rtnl_dereference(new->nh_info);
1184 :
1185 0 : newi->nh_parent = old;
1186 0 : oldi->nh_parent = new;
1187 :
1188 0 : old_protocol = old->protocol;
1189 0 : old_nh_flags = old->nh_flags;
1190 :
1191 0 : old->protocol = new->protocol;
1192 0 : old->nh_flags = new->nh_flags;
1193 :
1194 0 : rcu_assign_pointer(old->nh_info, newi);
1195 0 : rcu_assign_pointer(new->nh_info, oldi);
1196 :
1197 : /* Send a replace notification for all the groups using the nexthop. */
1198 0 : list_for_each_entry(nhge, &old->grp_list, nh_list) {
1199 0 : struct nexthop *nhp = nhge->nh_parent;
1200 :
1201 0 : err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp,
1202 : extack);
1203 0 : if (err)
1204 0 : goto err_notify;
1205 : }
1206 :
1207 : /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially
1208 : * update IPv4 indication in all the groups using the nexthop.
1209 : */
1210 0 : if (oldi->family == AF_INET && newi->family == AF_INET6) {
1211 0 : list_for_each_entry(nhge, &old->grp_list, nh_list) {
1212 0 : struct nexthop *nhp = nhge->nh_parent;
1213 0 : struct nh_group *nhg;
1214 :
1215 0 : nhg = rtnl_dereference(nhp->nh_grp);
1216 0 : nh_group_v4_update(nhg);
1217 : }
1218 : }
1219 :
1220 : return 0;
1221 :
1222 0 : err_notify:
1223 0 : rcu_assign_pointer(new->nh_info, newi);
1224 0 : rcu_assign_pointer(old->nh_info, oldi);
1225 0 : old->nh_flags = old_nh_flags;
1226 0 : old->protocol = old_protocol;
1227 0 : oldi->nh_parent = old;
1228 0 : newi->nh_parent = new;
1229 0 : list_for_each_entry_continue_reverse(nhge, &old->grp_list, nh_list) {
1230 0 : struct nexthop *nhp = nhge->nh_parent;
1231 :
1232 0 : call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, nhp, extack);
1233 : }
1234 0 : call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, old, extack);
1235 0 : return err;
1236 : }
1237 :
1238 0 : static void __nexthop_replace_notify(struct net *net, struct nexthop *nh,
1239 : struct nl_info *info)
1240 : {
1241 0 : struct fib6_info *f6i;
1242 :
1243 0 : if (!list_empty(&nh->fi_list)) {
1244 0 : struct fib_info *fi;
1245 :
1246 : /* expectation is a few fib_info per nexthop and then
1247 : * a lot of routes per fib_info. So mark the fib_info
1248 : * and then walk the fib tables once
1249 : */
1250 0 : list_for_each_entry(fi, &nh->fi_list, nh_list)
1251 0 : fi->nh_updated = true;
1252 :
1253 0 : fib_info_notify_update(net, info);
1254 :
1255 0 : list_for_each_entry(fi, &nh->fi_list, nh_list)
1256 0 : fi->nh_updated = false;
1257 : }
1258 :
1259 0 : list_for_each_entry(f6i, &nh->f6i_list, nh_list)
1260 0 : ipv6_stub->fib6_rt_update(net, f6i, info);
1261 0 : }
1262 :
1263 : /* send RTM_NEWROUTE with REPLACE flag set for all FIB entries
1264 : * linked to this nexthop and for all groups that the nexthop
1265 : * is a member of
1266 : */
1267 0 : static void nexthop_replace_notify(struct net *net, struct nexthop *nh,
1268 : struct nl_info *info)
1269 : {
1270 0 : struct nh_grp_entry *nhge;
1271 :
1272 0 : __nexthop_replace_notify(net, nh, info);
1273 :
1274 0 : list_for_each_entry(nhge, &nh->grp_list, nh_list)
1275 0 : __nexthop_replace_notify(net, nhge->nh_parent, info);
1276 0 : }
1277 :
1278 0 : static int replace_nexthop(struct net *net, struct nexthop *old,
1279 : struct nexthop *new, struct netlink_ext_ack *extack)
1280 : {
1281 0 : bool new_is_reject = false;
1282 0 : struct nh_grp_entry *nhge;
1283 0 : int err;
1284 :
1285 : /* check that existing FIB entries are ok with the
1286 : * new nexthop definition
1287 : */
1288 0 : err = fib_check_nh_list(old, new, extack);
1289 0 : if (err)
1290 : return err;
1291 :
1292 0 : err = fib6_check_nh_list(old, new, extack);
1293 0 : if (err)
1294 : return err;
1295 :
1296 0 : if (!new->is_group) {
1297 0 : struct nh_info *nhi = rtnl_dereference(new->nh_info);
1298 :
1299 0 : new_is_reject = nhi->reject_nh;
1300 : }
1301 :
1302 0 : list_for_each_entry(nhge, &old->grp_list, nh_list) {
1303 : /* if new nexthop is a blackhole, any groups using this
1304 : * nexthop cannot have more than 1 path
1305 : */
1306 0 : if (new_is_reject &&
1307 0 : nexthop_num_path(nhge->nh_parent) > 1) {
1308 0 : NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path");
1309 0 : return -EINVAL;
1310 : }
1311 :
1312 0 : err = fib_check_nh_list(nhge->nh_parent, new, extack);
1313 0 : if (err)
1314 0 : return err;
1315 :
1316 0 : err = fib6_check_nh_list(nhge->nh_parent, new, extack);
1317 0 : if (err)
1318 0 : return err;
1319 : }
1320 :
1321 0 : if (old->is_group)
1322 0 : err = replace_nexthop_grp(net, old, new, extack);
1323 : else
1324 0 : err = replace_nexthop_single(net, old, new, extack);
1325 :
1326 0 : if (!err) {
1327 0 : nh_rt_cache_flush(net, old);
1328 :
1329 0 : __remove_nexthop(net, new, NULL);
1330 0 : nexthop_put(new);
1331 : }
1332 :
1333 : return err;
1334 : }
1335 :
1336 : /* called with rtnl_lock held */
1337 0 : static int insert_nexthop(struct net *net, struct nexthop *new_nh,
1338 : struct nh_config *cfg, struct netlink_ext_ack *extack)
1339 : {
1340 0 : struct rb_node **pp, *parent = NULL, *next;
1341 0 : struct rb_root *root = &net->nexthop.rb_root;
1342 0 : bool replace = !!(cfg->nlflags & NLM_F_REPLACE);
1343 0 : bool create = !!(cfg->nlflags & NLM_F_CREATE);
1344 0 : u32 new_id = new_nh->id;
1345 0 : int replace_notify = 0;
1346 0 : int rc = -EEXIST;
1347 :
1348 0 : pp = &root->rb_node;
1349 0 : while (1) {
1350 0 : struct nexthop *nh;
1351 :
1352 0 : next = *pp;
1353 0 : if (!next)
1354 : break;
1355 :
1356 0 : parent = next;
1357 :
1358 0 : nh = rb_entry(parent, struct nexthop, rb_node);
1359 0 : if (new_id < nh->id) {
1360 0 : pp = &next->rb_left;
1361 0 : } else if (new_id > nh->id) {
1362 0 : pp = &next->rb_right;
1363 0 : } else if (replace) {
1364 0 : rc = replace_nexthop(net, nh, new_nh, extack);
1365 0 : if (!rc) {
1366 0 : new_nh = nh; /* send notification with old nh */
1367 0 : replace_notify = 1;
1368 : }
1369 0 : goto out;
1370 : } else {
1371 : /* id already exists and not a replace */
1372 0 : goto out;
1373 : }
1374 : }
1375 :
1376 0 : if (replace && !create) {
1377 0 : NL_SET_ERR_MSG(extack, "Replace specified without create and no entry exists");
1378 0 : rc = -ENOENT;
1379 0 : goto out;
1380 : }
1381 :
1382 0 : rb_link_node_rcu(&new_nh->rb_node, parent, pp);
1383 0 : rb_insert_color(&new_nh->rb_node, root);
1384 :
1385 0 : rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack);
1386 0 : if (rc)
1387 0 : rb_erase(&new_nh->rb_node, &net->nexthop.rb_root);
1388 :
1389 0 : out:
1390 0 : if (!rc) {
1391 0 : nh_base_seq_inc(net);
1392 0 : nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
1393 0 : if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode)
1394 0 : nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
1395 : }
1396 :
1397 0 : return rc;
1398 : }
1399 :
1400 : /* rtnl */
1401 : /* remove all nexthops tied to a device being deleted */
1402 0 : static void nexthop_flush_dev(struct net_device *dev, unsigned long event)
1403 : {
1404 0 : unsigned int hash = nh_dev_hashfn(dev->ifindex);
1405 0 : struct net *net = dev_net(dev);
1406 0 : struct hlist_head *head = &net->nexthop.devhash[hash];
1407 0 : struct hlist_node *n;
1408 0 : struct nh_info *nhi;
1409 :
1410 0 : hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
1411 0 : if (nhi->fib_nhc.nhc_dev != dev)
1412 0 : continue;
1413 :
1414 0 : if (nhi->reject_nh &&
1415 0 : (event == NETDEV_DOWN || event == NETDEV_CHANGE))
1416 0 : continue;
1417 :
1418 0 : remove_nexthop(net, nhi->nh_parent, NULL);
1419 : }
1420 0 : }
1421 :
1422 : /* rtnl; called when net namespace is deleted */
1423 0 : static void flush_all_nexthops(struct net *net)
1424 : {
1425 0 : struct rb_root *root = &net->nexthop.rb_root;
1426 0 : struct rb_node *node;
1427 0 : struct nexthop *nh;
1428 :
1429 0 : while ((node = rb_first(root))) {
1430 0 : nh = rb_entry(node, struct nexthop, rb_node);
1431 0 : remove_nexthop(net, nh, NULL);
1432 0 : cond_resched();
1433 : }
1434 0 : }
1435 :
1436 0 : static struct nexthop *nexthop_create_group(struct net *net,
1437 : struct nh_config *cfg)
1438 : {
1439 0 : struct nlattr *grps_attr = cfg->nh_grp;
1440 0 : struct nexthop_grp *entry = nla_data(grps_attr);
1441 0 : u16 num_nh = nla_len(grps_attr) / sizeof(*entry);
1442 0 : struct nh_group *nhg;
1443 0 : struct nexthop *nh;
1444 0 : int i;
1445 :
1446 0 : if (WARN_ON(!num_nh))
1447 0 : return ERR_PTR(-EINVAL);
1448 :
1449 0 : nh = nexthop_alloc();
1450 0 : if (!nh)
1451 0 : return ERR_PTR(-ENOMEM);
1452 :
1453 0 : nh->is_group = 1;
1454 :
1455 0 : nhg = nexthop_grp_alloc(num_nh);
1456 0 : if (!nhg) {
1457 0 : kfree(nh);
1458 0 : return ERR_PTR(-ENOMEM);
1459 : }
1460 :
1461 : /* spare group used for removals */
1462 0 : nhg->spare = nexthop_grp_alloc(num_nh);
1463 0 : if (!nhg->spare) {
1464 0 : kfree(nhg);
1465 0 : kfree(nh);
1466 0 : return ERR_PTR(-ENOMEM);
1467 : }
1468 0 : nhg->spare->spare = nhg;
1469 :
1470 0 : for (i = 0; i < nhg->num_nh; ++i) {
1471 0 : struct nexthop *nhe;
1472 0 : struct nh_info *nhi;
1473 :
1474 0 : nhe = nexthop_find_by_id(net, entry[i].id);
1475 0 : if (!nexthop_get(nhe))
1476 0 : goto out_no_nh;
1477 :
1478 0 : nhi = rtnl_dereference(nhe->nh_info);
1479 0 : if (nhi->family == AF_INET)
1480 0 : nhg->has_v4 = true;
1481 :
1482 0 : nhg->nh_entries[i].nh = nhe;
1483 0 : nhg->nh_entries[i].weight = entry[i].weight + 1;
1484 0 : list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
1485 0 : nhg->nh_entries[i].nh_parent = nh;
1486 : }
1487 :
1488 0 : if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH)
1489 0 : nhg->mpath = 1;
1490 :
1491 0 : WARN_ON_ONCE(nhg->mpath != 1);
1492 :
1493 0 : if (nhg->mpath)
1494 0 : nh_group_rebalance(nhg);
1495 :
1496 0 : if (cfg->nh_fdb)
1497 0 : nhg->fdb_nh = 1;
1498 :
1499 0 : rcu_assign_pointer(nh->nh_grp, nhg);
1500 :
1501 0 : return nh;
1502 :
1503 0 : out_no_nh:
1504 0 : for (i--; i >= 0; --i) {
1505 0 : list_del(&nhg->nh_entries[i].nh_list);
1506 0 : nexthop_put(nhg->nh_entries[i].nh);
1507 : }
1508 :
1509 0 : kfree(nhg->spare);
1510 0 : kfree(nhg);
1511 0 : kfree(nh);
1512 :
1513 0 : return ERR_PTR(-ENOENT);
1514 : }
1515 :
1516 0 : static int nh_create_ipv4(struct net *net, struct nexthop *nh,
1517 : struct nh_info *nhi, struct nh_config *cfg,
1518 : struct netlink_ext_ack *extack)
1519 : {
1520 0 : struct fib_nh *fib_nh = &nhi->fib_nh;
1521 0 : struct fib_config fib_cfg = {
1522 0 : .fc_oif = cfg->nh_ifindex,
1523 : .fc_gw4 = cfg->gw.ipv4,
1524 0 : .fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0,
1525 0 : .fc_flags = cfg->nh_flags,
1526 0 : .fc_encap = cfg->nh_encap,
1527 0 : .fc_encap_type = cfg->nh_encap_type,
1528 : };
1529 0 : u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN);
1530 0 : int err;
1531 :
1532 0 : err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack);
1533 0 : if (err) {
1534 0 : fib_nh_release(net, fib_nh);
1535 0 : goto out;
1536 : }
1537 :
1538 0 : if (nhi->fdb_nh)
1539 0 : goto out;
1540 :
1541 : /* sets nh_dev if successful */
1542 0 : err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
1543 0 : if (!err) {
1544 0 : nh->nh_flags = fib_nh->fib_nh_flags;
1545 0 : fib_info_update_nhc_saddr(net, &fib_nh->nh_common,
1546 0 : fib_nh->fib_nh_scope);
1547 : } else {
1548 0 : fib_nh_release(net, fib_nh);
1549 : }
1550 0 : out:
1551 0 : return err;
1552 : }
1553 :
1554 0 : static int nh_create_ipv6(struct net *net, struct nexthop *nh,
1555 : struct nh_info *nhi, struct nh_config *cfg,
1556 : struct netlink_ext_ack *extack)
1557 : {
1558 0 : struct fib6_nh *fib6_nh = &nhi->fib6_nh;
1559 0 : struct fib6_config fib6_cfg = {
1560 0 : .fc_table = l3mdev_fib_table(cfg->dev),
1561 0 : .fc_ifindex = cfg->nh_ifindex,
1562 : .fc_gateway = cfg->gw.ipv6,
1563 0 : .fc_flags = cfg->nh_flags,
1564 0 : .fc_encap = cfg->nh_encap,
1565 0 : .fc_encap_type = cfg->nh_encap_type,
1566 0 : .fc_is_fdb = cfg->nh_fdb,
1567 : };
1568 0 : int err;
1569 :
1570 0 : if (!ipv6_addr_any(&cfg->gw.ipv6))
1571 0 : fib6_cfg.fc_flags |= RTF_GATEWAY;
1572 :
1573 : /* sets nh_dev if successful */
1574 0 : err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
1575 : extack);
1576 0 : if (err)
1577 0 : ipv6_stub->fib6_nh_release(fib6_nh);
1578 : else
1579 0 : nh->nh_flags = fib6_nh->fib_nh_flags;
1580 :
1581 0 : return err;
1582 : }
1583 :
1584 0 : static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
1585 : struct netlink_ext_ack *extack)
1586 : {
1587 0 : struct nh_info *nhi;
1588 0 : struct nexthop *nh;
1589 0 : int err = 0;
1590 :
1591 0 : nh = nexthop_alloc();
1592 0 : if (!nh)
1593 0 : return ERR_PTR(-ENOMEM);
1594 :
1595 0 : nhi = kzalloc(sizeof(*nhi), GFP_KERNEL);
1596 0 : if (!nhi) {
1597 0 : kfree(nh);
1598 0 : return ERR_PTR(-ENOMEM);
1599 : }
1600 :
1601 0 : nh->nh_flags = cfg->nh_flags;
1602 0 : nh->net = net;
1603 :
1604 0 : nhi->nh_parent = nh;
1605 0 : nhi->family = cfg->nh_family;
1606 0 : nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;
1607 :
1608 0 : if (cfg->nh_fdb)
1609 0 : nhi->fdb_nh = 1;
1610 :
1611 0 : if (cfg->nh_blackhole) {
1612 0 : nhi->reject_nh = 1;
1613 0 : cfg->nh_ifindex = net->loopback_dev->ifindex;
1614 : }
1615 :
1616 0 : switch (cfg->nh_family) {
1617 0 : case AF_INET:
1618 0 : err = nh_create_ipv4(net, nh, nhi, cfg, extack);
1619 0 : break;
1620 0 : case AF_INET6:
1621 0 : err = nh_create_ipv6(net, nh, nhi, cfg, extack);
1622 0 : break;
1623 : }
1624 :
1625 0 : if (err) {
1626 0 : kfree(nhi);
1627 0 : kfree(nh);
1628 0 : return ERR_PTR(err);
1629 : }
1630 :
1631 : /* add the entry to the device based hash */
1632 0 : if (!nhi->fdb_nh)
1633 0 : nexthop_devhash_add(net, nhi);
1634 :
1635 0 : rcu_assign_pointer(nh->nh_info, nhi);
1636 :
1637 0 : return nh;
1638 : }
1639 :
1640 : /* called with rtnl lock held */
1641 0 : static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
1642 : struct netlink_ext_ack *extack)
1643 : {
1644 0 : struct nexthop *nh;
1645 0 : int err;
1646 :
1647 0 : if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) {
1648 0 : NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
1649 0 : return ERR_PTR(-EINVAL);
1650 : }
1651 :
1652 0 : if (!cfg->nh_id) {
1653 0 : cfg->nh_id = nh_find_unused_id(net);
1654 0 : if (!cfg->nh_id) {
1655 0 : NL_SET_ERR_MSG(extack, "No unused id");
1656 0 : return ERR_PTR(-EINVAL);
1657 : }
1658 : }
1659 :
1660 0 : if (cfg->nh_grp)
1661 0 : nh = nexthop_create_group(net, cfg);
1662 : else
1663 0 : nh = nexthop_create(net, cfg, extack);
1664 :
1665 0 : if (IS_ERR(nh))
1666 : return nh;
1667 :
1668 0 : refcount_set(&nh->refcnt, 1);
1669 0 : nh->id = cfg->nh_id;
1670 0 : nh->protocol = cfg->nh_protocol;
1671 0 : nh->net = net;
1672 :
1673 0 : err = insert_nexthop(net, nh, cfg, extack);
1674 0 : if (err) {
1675 0 : __remove_nexthop(net, nh, NULL);
1676 0 : nexthop_put(nh);
1677 0 : nh = ERR_PTR(err);
1678 : }
1679 :
1680 : return nh;
1681 : }
1682 :
1683 0 : static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
1684 : struct nlmsghdr *nlh, struct nh_config *cfg,
1685 : struct netlink_ext_ack *extack)
1686 : {
1687 0 : struct nhmsg *nhm = nlmsg_data(nlh);
1688 0 : struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_new)];
1689 0 : int err;
1690 :
1691 0 : err = nlmsg_parse(nlh, sizeof(*nhm), tb,
1692 : ARRAY_SIZE(rtm_nh_policy_new) - 1,
1693 : rtm_nh_policy_new, extack);
1694 0 : if (err < 0)
1695 : return err;
1696 :
1697 0 : err = -EINVAL;
1698 0 : if (nhm->resvd || nhm->nh_scope) {
1699 0 : NL_SET_ERR_MSG(extack, "Invalid values in ancillary header");
1700 0 : goto out;
1701 : }
1702 0 : if (nhm->nh_flags & ~NEXTHOP_VALID_USER_FLAGS) {
1703 0 : NL_SET_ERR_MSG(extack, "Invalid nexthop flags in ancillary header");
1704 0 : goto out;
1705 : }
1706 :
1707 0 : switch (nhm->nh_family) {
1708 : case AF_INET:
1709 : case AF_INET6:
1710 : break;
1711 0 : case AF_UNSPEC:
1712 0 : if (tb[NHA_GROUP])
1713 : break;
1714 0 : fallthrough;
1715 : default:
1716 0 : NL_SET_ERR_MSG(extack, "Invalid address family");
1717 0 : goto out;
1718 : }
1719 :
1720 0 : memset(cfg, 0, sizeof(*cfg));
1721 0 : cfg->nlflags = nlh->nlmsg_flags;
1722 0 : cfg->nlinfo.portid = NETLINK_CB(skb).portid;
1723 0 : cfg->nlinfo.nlh = nlh;
1724 0 : cfg->nlinfo.nl_net = net;
1725 :
1726 0 : cfg->nh_family = nhm->nh_family;
1727 0 : cfg->nh_protocol = nhm->nh_protocol;
1728 0 : cfg->nh_flags = nhm->nh_flags;
1729 :
1730 0 : if (tb[NHA_ID])
1731 0 : cfg->nh_id = nla_get_u32(tb[NHA_ID]);
1732 :
1733 0 : if (tb[NHA_FDB]) {
1734 0 : if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] ||
1735 0 : tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) {
1736 0 : NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole");
1737 0 : goto out;
1738 : }
1739 0 : if (nhm->nh_flags) {
1740 0 : NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header");
1741 0 : goto out;
1742 : }
1743 0 : cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]);
1744 : }
1745 :
1746 0 : if (tb[NHA_GROUP]) {
1747 0 : if (nhm->nh_family != AF_UNSPEC) {
1748 0 : NL_SET_ERR_MSG(extack, "Invalid family for group");
1749 0 : goto out;
1750 : }
1751 0 : cfg->nh_grp = tb[NHA_GROUP];
1752 :
1753 0 : cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH;
1754 0 : if (tb[NHA_GROUP_TYPE])
1755 0 : cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]);
1756 :
1757 0 : if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) {
1758 0 : NL_SET_ERR_MSG(extack, "Invalid group type");
1759 0 : goto out;
1760 : }
1761 0 : err = nh_check_attr_group(net, tb, ARRAY_SIZE(tb), extack);
1762 :
1763 : /* no other attributes should be set */
1764 0 : goto out;
1765 : }
1766 :
1767 0 : if (tb[NHA_BLACKHOLE]) {
1768 0 : if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
1769 0 : tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) {
1770 0 : NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb");
1771 0 : goto out;
1772 : }
1773 :
1774 0 : cfg->nh_blackhole = 1;
1775 0 : err = 0;
1776 0 : goto out;
1777 : }
1778 :
1779 0 : if (!cfg->nh_fdb && !tb[NHA_OIF]) {
1780 0 : NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops");
1781 0 : goto out;
1782 : }
1783 :
1784 0 : if (!cfg->nh_fdb && tb[NHA_OIF]) {
1785 0 : cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
1786 0 : if (cfg->nh_ifindex)
1787 0 : cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
1788 :
1789 0 : if (!cfg->dev) {
1790 0 : NL_SET_ERR_MSG(extack, "Invalid device index");
1791 0 : goto out;
1792 0 : } else if (!(cfg->dev->flags & IFF_UP)) {
1793 0 : NL_SET_ERR_MSG(extack, "Nexthop device is not up");
1794 0 : err = -ENETDOWN;
1795 0 : goto out;
1796 0 : } else if (!netif_carrier_ok(cfg->dev)) {
1797 0 : NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
1798 0 : err = -ENETDOWN;
1799 0 : goto out;
1800 : }
1801 : }
1802 :
1803 0 : err = -EINVAL;
1804 0 : if (tb[NHA_GATEWAY]) {
1805 0 : struct nlattr *gwa = tb[NHA_GATEWAY];
1806 :
1807 0 : switch (cfg->nh_family) {
1808 : case AF_INET:
1809 0 : if (nla_len(gwa) != sizeof(u32)) {
1810 0 : NL_SET_ERR_MSG(extack, "Invalid gateway");
1811 0 : goto out;
1812 : }
1813 0 : cfg->gw.ipv4 = nla_get_be32(gwa);
1814 0 : break;
1815 : case AF_INET6:
1816 0 : if (nla_len(gwa) != sizeof(struct in6_addr)) {
1817 0 : NL_SET_ERR_MSG(extack, "Invalid gateway");
1818 0 : goto out;
1819 : }
1820 0 : cfg->gw.ipv6 = nla_get_in6_addr(gwa);
1821 0 : break;
1822 0 : default:
1823 0 : NL_SET_ERR_MSG(extack,
1824 : "Unknown address family for gateway");
1825 0 : goto out;
1826 : }
1827 : } else {
1828 : /* device only nexthop (no gateway) */
1829 0 : if (cfg->nh_flags & RTNH_F_ONLINK) {
1830 0 : NL_SET_ERR_MSG(extack,
1831 : "ONLINK flag can not be set for nexthop without a gateway");
1832 0 : goto out;
1833 : }
1834 : }
1835 :
1836 0 : if (tb[NHA_ENCAP]) {
1837 0 : cfg->nh_encap = tb[NHA_ENCAP];
1838 :
1839 0 : if (!tb[NHA_ENCAP_TYPE]) {
1840 0 : NL_SET_ERR_MSG(extack, "LWT encapsulation type is missing");
1841 0 : goto out;
1842 : }
1843 :
1844 0 : cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]);
1845 0 : err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack);
1846 0 : if (err < 0)
1847 0 : goto out;
1848 :
1849 0 : } else if (tb[NHA_ENCAP_TYPE]) {
1850 0 : NL_SET_ERR_MSG(extack, "LWT encapsulation attribute is missing");
1851 0 : goto out;
1852 : }
1853 :
1854 :
1855 : err = 0;
1856 : out:
1857 : return err;
1858 : }
1859 :
1860 : /* rtnl */
1861 0 : static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
1862 : struct netlink_ext_ack *extack)
1863 : {
1864 0 : struct net *net = sock_net(skb->sk);
1865 0 : struct nh_config cfg;
1866 0 : struct nexthop *nh;
1867 0 : int err;
1868 :
1869 0 : err = rtm_to_nh_config(net, skb, nlh, &cfg, extack);
1870 0 : if (!err) {
1871 0 : nh = nexthop_add(net, &cfg, extack);
1872 0 : if (IS_ERR(nh))
1873 0 : err = PTR_ERR(nh);
1874 : }
1875 :
1876 0 : return err;
1877 : }
1878 :
1879 0 : static int __nh_valid_get_del_req(const struct nlmsghdr *nlh,
1880 : struct nlattr **tb, u32 *id,
1881 : struct netlink_ext_ack *extack)
1882 : {
1883 0 : struct nhmsg *nhm = nlmsg_data(nlh);
1884 :
1885 0 : if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
1886 0 : NL_SET_ERR_MSG(extack, "Invalid values in header");
1887 0 : return -EINVAL;
1888 : }
1889 :
1890 0 : if (!tb[NHA_ID]) {
1891 0 : NL_SET_ERR_MSG(extack, "Nexthop id is missing");
1892 0 : return -EINVAL;
1893 : }
1894 :
1895 0 : *id = nla_get_u32(tb[NHA_ID]);
1896 0 : if (!(*id)) {
1897 0 : NL_SET_ERR_MSG(extack, "Invalid nexthop id");
1898 0 : return -EINVAL;
1899 : }
1900 :
1901 : return 0;
1902 : }
1903 :
1904 0 : static int nh_valid_get_del_req(const struct nlmsghdr *nlh, u32 *id,
1905 : struct netlink_ext_ack *extack)
1906 : {
1907 0 : struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_get)];
1908 0 : int err;
1909 :
1910 0 : err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
1911 : ARRAY_SIZE(rtm_nh_policy_get) - 1,
1912 : rtm_nh_policy_get, extack);
1913 0 : if (err < 0)
1914 : return err;
1915 :
1916 0 : return __nh_valid_get_del_req(nlh, tb, id, extack);
1917 : }
1918 :
1919 : /* rtnl */
1920 0 : static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
1921 : struct netlink_ext_ack *extack)
1922 : {
1923 0 : struct net *net = sock_net(skb->sk);
1924 0 : struct nl_info nlinfo = {
1925 : .nlh = nlh,
1926 : .nl_net = net,
1927 0 : .portid = NETLINK_CB(skb).portid,
1928 : };
1929 0 : struct nexthop *nh;
1930 0 : int err;
1931 0 : u32 id;
1932 :
1933 0 : err = nh_valid_get_del_req(nlh, &id, extack);
1934 0 : if (err)
1935 : return err;
1936 :
1937 0 : nh = nexthop_find_by_id(net, id);
1938 0 : if (!nh)
1939 : return -ENOENT;
1940 :
1941 0 : remove_nexthop(net, nh, &nlinfo);
1942 :
1943 0 : return 0;
1944 : }
1945 :
1946 : /* rtnl */
1947 0 : static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
1948 : struct netlink_ext_ack *extack)
1949 : {
1950 0 : struct net *net = sock_net(in_skb->sk);
1951 0 : struct sk_buff *skb = NULL;
1952 0 : struct nexthop *nh;
1953 0 : int err;
1954 0 : u32 id;
1955 :
1956 0 : err = nh_valid_get_del_req(nlh, &id, extack);
1957 0 : if (err)
1958 : return err;
1959 :
1960 0 : err = -ENOBUFS;
1961 0 : skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1962 0 : if (!skb)
1963 0 : goto out;
1964 :
1965 0 : err = -ENOENT;
1966 0 : nh = nexthop_find_by_id(net, id);
1967 0 : if (!nh)
1968 0 : goto errout_free;
1969 :
1970 0 : err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid,
1971 : nlh->nlmsg_seq, 0);
1972 0 : if (err < 0) {
1973 0 : WARN_ON(err == -EMSGSIZE);
1974 0 : goto errout_free;
1975 : }
1976 :
1977 0 : err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1978 : out:
1979 : return err;
1980 0 : errout_free:
1981 0 : kfree_skb(skb);
1982 0 : goto out;
1983 : }
1984 :
1985 : struct nh_dump_filter {
1986 : int dev_idx;
1987 : int master_idx;
1988 : bool group_filter;
1989 : bool fdb_filter;
1990 : };
1991 :
1992 0 : static bool nh_dump_filtered(struct nexthop *nh,
1993 : struct nh_dump_filter *filter, u8 family)
1994 : {
1995 0 : const struct net_device *dev;
1996 0 : const struct nh_info *nhi;
1997 :
1998 0 : if (filter->group_filter && !nh->is_group)
1999 : return true;
2000 :
2001 0 : if (!filter->dev_idx && !filter->master_idx && !family)
2002 : return false;
2003 :
2004 0 : if (nh->is_group)
2005 : return true;
2006 :
2007 0 : nhi = rtnl_dereference(nh->nh_info);
2008 0 : if (family && nhi->family != family)
2009 : return true;
2010 :
2011 0 : dev = nhi->fib_nhc.nhc_dev;
2012 0 : if (filter->dev_idx && (!dev || dev->ifindex != filter->dev_idx))
2013 : return true;
2014 :
2015 0 : if (filter->master_idx) {
2016 0 : struct net_device *master;
2017 :
2018 0 : if (!dev)
2019 : return true;
2020 :
2021 0 : master = netdev_master_upper_dev_get((struct net_device *)dev);
2022 0 : if (!master || master->ifindex != filter->master_idx)
2023 0 : return true;
2024 : }
2025 :
2026 : return false;
2027 : }
2028 :
2029 0 : static int __nh_valid_dump_req(const struct nlmsghdr *nlh, struct nlattr **tb,
2030 : struct nh_dump_filter *filter,
2031 : struct netlink_ext_ack *extack)
2032 : {
2033 0 : struct nhmsg *nhm;
2034 0 : u32 idx;
2035 :
2036 0 : if (tb[NHA_OIF]) {
2037 0 : idx = nla_get_u32(tb[NHA_OIF]);
2038 0 : if (idx > INT_MAX) {
2039 0 : NL_SET_ERR_MSG(extack, "Invalid device index");
2040 0 : return -EINVAL;
2041 : }
2042 0 : filter->dev_idx = idx;
2043 : }
2044 0 : if (tb[NHA_MASTER]) {
2045 0 : idx = nla_get_u32(tb[NHA_MASTER]);
2046 0 : if (idx > INT_MAX) {
2047 0 : NL_SET_ERR_MSG(extack, "Invalid master device index");
2048 0 : return -EINVAL;
2049 : }
2050 0 : filter->master_idx = idx;
2051 : }
2052 0 : filter->group_filter = nla_get_flag(tb[NHA_GROUPS]);
2053 0 : filter->fdb_filter = nla_get_flag(tb[NHA_FDB]);
2054 :
2055 0 : nhm = nlmsg_data(nlh);
2056 0 : if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
2057 0 : NL_SET_ERR_MSG(extack, "Invalid values in header for nexthop dump request");
2058 0 : return -EINVAL;
2059 : }
2060 :
2061 : return 0;
2062 : }
2063 :
2064 0 : static int nh_valid_dump_req(const struct nlmsghdr *nlh,
2065 : struct nh_dump_filter *filter,
2066 : struct netlink_callback *cb)
2067 : {
2068 0 : struct nlattr *tb[ARRAY_SIZE(rtm_nh_policy_dump)];
2069 0 : int err;
2070 :
2071 0 : err = nlmsg_parse(nlh, sizeof(struct nhmsg), tb,
2072 : ARRAY_SIZE(rtm_nh_policy_dump) - 1,
2073 : rtm_nh_policy_dump, cb->extack);
2074 0 : if (err < 0)
2075 : return err;
2076 :
2077 0 : return __nh_valid_dump_req(nlh, tb, filter, cb->extack);
2078 : }
2079 :
2080 : struct rtm_dump_nh_ctx {
2081 : u32 idx;
2082 : };
2083 :
2084 : static struct rtm_dump_nh_ctx *
2085 0 : rtm_dump_nh_ctx(struct netlink_callback *cb)
2086 : {
2087 0 : struct rtm_dump_nh_ctx *ctx = (void *)cb->ctx;
2088 :
2089 0 : BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
2090 0 : return ctx;
2091 : }
2092 :
2093 0 : static int rtm_dump_walk_nexthops(struct sk_buff *skb,
2094 : struct netlink_callback *cb,
2095 : struct rb_root *root,
2096 : struct rtm_dump_nh_ctx *ctx,
2097 : int (*nh_cb)(struct sk_buff *skb,
2098 : struct netlink_callback *cb,
2099 : struct nexthop *nh, void *data),
2100 : void *data)
2101 : {
2102 0 : struct rb_node *node;
2103 0 : int idx = 0, s_idx;
2104 0 : int err;
2105 :
2106 0 : s_idx = ctx->idx;
2107 0 : for (node = rb_first(root); node; node = rb_next(node)) {
2108 0 : struct nexthop *nh;
2109 :
2110 0 : if (idx < s_idx)
2111 0 : goto cont;
2112 :
2113 0 : nh = rb_entry(node, struct nexthop, rb_node);
2114 0 : ctx->idx = idx;
2115 0 : err = nh_cb(skb, cb, nh, data);
2116 0 : if (err)
2117 0 : return err;
2118 0 : cont:
2119 0 : idx++;
2120 : }
2121 :
2122 0 : ctx->idx = idx;
2123 0 : return 0;
2124 : }
2125 :
2126 0 : static int rtm_dump_nexthop_cb(struct sk_buff *skb, struct netlink_callback *cb,
2127 : struct nexthop *nh, void *data)
2128 : {
2129 0 : struct nhmsg *nhm = nlmsg_data(cb->nlh);
2130 0 : struct nh_dump_filter *filter = data;
2131 :
2132 0 : if (nh_dump_filtered(nh, filter, nhm->nh_family))
2133 : return 0;
2134 :
2135 0 : return nh_fill_node(skb, nh, RTM_NEWNEXTHOP,
2136 0 : NETLINK_CB(cb->skb).portid,
2137 0 : cb->nlh->nlmsg_seq, NLM_F_MULTI);
2138 : }
2139 :
2140 : /* rtnl */
2141 0 : static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
2142 : {
2143 0 : struct rtm_dump_nh_ctx *ctx = rtm_dump_nh_ctx(cb);
2144 0 : struct net *net = sock_net(skb->sk);
2145 0 : struct rb_root *root = &net->nexthop.rb_root;
2146 0 : struct nh_dump_filter filter = {};
2147 0 : int err;
2148 :
2149 0 : err = nh_valid_dump_req(cb->nlh, &filter, cb);
2150 0 : if (err < 0)
2151 : return err;
2152 :
2153 0 : err = rtm_dump_walk_nexthops(skb, cb, root, ctx,
2154 : &rtm_dump_nexthop_cb, &filter);
2155 0 : if (err < 0) {
2156 0 : if (likely(skb->len))
2157 0 : goto out;
2158 0 : goto out_err;
2159 : }
2160 :
2161 0 : out:
2162 0 : err = skb->len;
2163 0 : out_err:
2164 0 : cb->seq = net->nexthop.seq;
2165 0 : nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2166 0 : return err;
2167 : }
2168 :
2169 0 : static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
2170 : {
2171 0 : unsigned int hash = nh_dev_hashfn(dev->ifindex);
2172 0 : struct net *net = dev_net(dev);
2173 0 : struct hlist_head *head = &net->nexthop.devhash[hash];
2174 0 : struct hlist_node *n;
2175 0 : struct nh_info *nhi;
2176 :
2177 0 : hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
2178 0 : if (nhi->fib_nhc.nhc_dev == dev) {
2179 0 : if (nhi->family == AF_INET)
2180 0 : fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu,
2181 : orig_mtu);
2182 : }
2183 : }
2184 0 : }
2185 :
2186 : /* rtnl */
2187 7 : static int nh_netdev_event(struct notifier_block *this,
2188 : unsigned long event, void *ptr)
2189 : {
2190 7 : struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2191 7 : struct netdev_notifier_info_ext *info_ext;
2192 :
2193 7 : switch (event) {
2194 0 : case NETDEV_DOWN:
2195 : case NETDEV_UNREGISTER:
2196 0 : nexthop_flush_dev(dev, event);
2197 0 : break;
2198 0 : case NETDEV_CHANGE:
2199 0 : if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP)))
2200 0 : nexthop_flush_dev(dev, event);
2201 : break;
2202 0 : case NETDEV_CHANGEMTU:
2203 0 : info_ext = ptr;
2204 0 : nexthop_sync_mtu(dev, info_ext->ext.mtu);
2205 0 : rt_cache_flush(dev_net(dev));
2206 0 : break;
2207 : }
2208 7 : return NOTIFY_DONE;
2209 : }
2210 :
2211 : static struct notifier_block nh_netdev_notifier = {
2212 : .notifier_call = nh_netdev_event,
2213 : };
2214 :
2215 0 : static int nexthops_dump(struct net *net, struct notifier_block *nb,
2216 : struct netlink_ext_ack *extack)
2217 : {
2218 0 : struct rb_root *root = &net->nexthop.rb_root;
2219 0 : struct rb_node *node;
2220 0 : int err = 0;
2221 :
2222 0 : for (node = rb_first(root); node; node = rb_next(node)) {
2223 0 : struct nexthop *nh;
2224 :
2225 0 : nh = rb_entry(node, struct nexthop, rb_node);
2226 0 : err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh,
2227 : extack);
2228 0 : if (err)
2229 : break;
2230 : }
2231 :
2232 0 : return err;
2233 : }
2234 :
2235 0 : int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
2236 : struct netlink_ext_ack *extack)
2237 : {
2238 0 : int err;
2239 :
2240 0 : rtnl_lock();
2241 0 : err = nexthops_dump(net, nb, extack);
2242 0 : if (err)
2243 0 : goto unlock;
2244 0 : err = blocking_notifier_chain_register(&net->nexthop.notifier_chain,
2245 : nb);
2246 0 : unlock:
2247 0 : rtnl_unlock();
2248 0 : return err;
2249 : }
2250 : EXPORT_SYMBOL(register_nexthop_notifier);
2251 :
2252 0 : int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
2253 : {
2254 0 : return blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
2255 : nb);
2256 : }
2257 : EXPORT_SYMBOL(unregister_nexthop_notifier);
2258 :
2259 0 : void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap)
2260 : {
2261 0 : struct nexthop *nexthop;
2262 :
2263 0 : rcu_read_lock();
2264 :
2265 0 : nexthop = nexthop_find_by_id(net, id);
2266 0 : if (!nexthop)
2267 0 : goto out;
2268 :
2269 0 : nexthop->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
2270 0 : if (offload)
2271 0 : nexthop->nh_flags |= RTNH_F_OFFLOAD;
2272 0 : if (trap)
2273 0 : nexthop->nh_flags |= RTNH_F_TRAP;
2274 :
2275 0 : out:
2276 0 : rcu_read_unlock();
2277 0 : }
2278 : EXPORT_SYMBOL(nexthop_set_hw_flags);
2279 :
2280 0 : static void __net_exit nexthop_net_exit(struct net *net)
2281 : {
2282 0 : rtnl_lock();
2283 0 : flush_all_nexthops(net);
2284 0 : rtnl_unlock();
2285 0 : kfree(net->nexthop.devhash);
2286 0 : }
2287 :
2288 1 : static int __net_init nexthop_net_init(struct net *net)
2289 : {
2290 1 : size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE;
2291 :
2292 1 : net->nexthop.rb_root = RB_ROOT;
2293 1 : net->nexthop.devhash = kzalloc(sz, GFP_KERNEL);
2294 1 : if (!net->nexthop.devhash)
2295 : return -ENOMEM;
2296 1 : BLOCKING_INIT_NOTIFIER_HEAD(&net->nexthop.notifier_chain);
2297 :
2298 1 : return 0;
2299 : }
2300 :
2301 : static struct pernet_operations nexthop_net_ops = {
2302 : .init = nexthop_net_init,
2303 : .exit = nexthop_net_exit,
2304 : };
2305 :
2306 1 : static int __init nexthop_init(void)
2307 : {
2308 1 : register_pernet_subsys(&nexthop_net_ops);
2309 :
2310 1 : register_netdevice_notifier(&nh_netdev_notifier);
2311 :
2312 1 : rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
2313 1 : rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0);
2314 1 : rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop,
2315 : rtm_dump_nexthop, 0);
2316 :
2317 1 : rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
2318 1 : rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
2319 :
2320 1 : rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
2321 1 : rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
2322 :
2323 1 : return 0;
2324 : }
2325 : subsys_initcall(nexthop_init);
|