Line data Source code
1 : /* SPDX-License-Identifier: GPL-2.0 */
2 : #ifndef __NET_IP_TUNNELS_H
3 : #define __NET_IP_TUNNELS_H 1
4 :
5 : #include <linux/if_tunnel.h>
6 : #include <linux/netdevice.h>
7 : #include <linux/skbuff.h>
8 : #include <linux/socket.h>
9 : #include <linux/types.h>
10 : #include <linux/u64_stats_sync.h>
11 : #include <linux/bitops.h>
12 :
13 : #include <net/dsfield.h>
14 : #include <net/gro_cells.h>
15 : #include <net/inet_ecn.h>
16 : #include <net/netns/generic.h>
17 : #include <net/rtnetlink.h>
18 : #include <net/lwtunnel.h>
19 : #include <net/dst_cache.h>
20 :
21 : #if IS_ENABLED(CONFIG_IPV6)
22 : #include <net/ipv6.h>
23 : #include <net/ip6_fib.h>
24 : #include <net/ip6_route.h>
25 : #endif
26 :
27 : /* Keep error state on tunnel for 30 sec */
28 : #define IPTUNNEL_ERR_TIMEO (30*HZ)
29 :
30 : /* Used to memset ip_tunnel padding. */
31 : #define IP_TUNNEL_KEY_SIZE offsetofend(struct ip_tunnel_key, tp_dst)
32 :
33 : /* Used to memset ipv4 address padding. */
34 : #define IP_TUNNEL_KEY_IPV4_PAD offsetofend(struct ip_tunnel_key, u.ipv4.dst)
35 : #define IP_TUNNEL_KEY_IPV4_PAD_LEN \
36 : (sizeof_field(struct ip_tunnel_key, u) - \
37 : sizeof_field(struct ip_tunnel_key, u.ipv4))
38 :
39 : struct ip_tunnel_key {
40 : __be64 tun_id;
41 : union {
42 : struct {
43 : __be32 src;
44 : __be32 dst;
45 : } ipv4;
46 : struct {
47 : struct in6_addr src;
48 : struct in6_addr dst;
49 : } ipv6;
50 : } u;
51 : __be16 tun_flags;
52 : u8 tos; /* TOS for IPv4, TC for IPv6 */
53 : u8 ttl; /* TTL for IPv4, HL for IPv6 */
54 : __be32 label; /* Flow Label for IPv6 */
55 : __be16 tp_src;
56 : __be16 tp_dst;
57 : };
58 :
59 : /* Flags for ip_tunnel_info mode. */
60 : #define IP_TUNNEL_INFO_TX 0x01 /* represents tx tunnel parameters */
61 : #define IP_TUNNEL_INFO_IPV6 0x02 /* key contains IPv6 addresses */
62 : #define IP_TUNNEL_INFO_BRIDGE 0x04 /* represents a bridged tunnel id */
63 :
64 : /* Maximum tunnel options length. */
65 : #define IP_TUNNEL_OPTS_MAX \
66 : GENMASK((sizeof_field(struct ip_tunnel_info, \
67 : options_len) * BITS_PER_BYTE) - 1, 0)
68 :
69 : struct ip_tunnel_info {
70 : struct ip_tunnel_key key;
71 : #ifdef CONFIG_DST_CACHE
72 : struct dst_cache dst_cache;
73 : #endif
74 : u8 options_len;
75 : u8 mode;
76 : };
77 :
78 : /* 6rd prefix/relay information */
79 : #ifdef CONFIG_IPV6_SIT_6RD
80 : struct ip_tunnel_6rd_parm {
81 : struct in6_addr prefix;
82 : __be32 relay_prefix;
83 : u16 prefixlen;
84 : u16 relay_prefixlen;
85 : };
86 : #endif
87 :
88 : struct ip_tunnel_encap {
89 : u16 type;
90 : u16 flags;
91 : __be16 sport;
92 : __be16 dport;
93 : };
94 :
95 : struct ip_tunnel_prl_entry {
96 : struct ip_tunnel_prl_entry __rcu *next;
97 : __be32 addr;
98 : u16 flags;
99 : struct rcu_head rcu_head;
100 : };
101 :
102 : struct metadata_dst;
103 :
104 : struct ip_tunnel {
105 : struct ip_tunnel __rcu *next;
106 : struct hlist_node hash_node;
107 : struct net_device *dev;
108 : struct net *net; /* netns for packet i/o */
109 :
110 : unsigned long err_time; /* Time when the last ICMP error
111 : * arrived */
112 : int err_count; /* Number of arrived ICMP errors */
113 :
114 : /* These four fields used only by GRE */
115 : u32 i_seqno; /* The last seen seqno */
116 : u32 o_seqno; /* The last output seqno */
117 : int tun_hlen; /* Precalculated header length */
118 :
119 : /* These four fields used only by ERSPAN */
120 : u32 index; /* ERSPAN type II index */
121 : u8 erspan_ver; /* ERSPAN version */
122 : u8 dir; /* ERSPAN direction */
123 : u16 hwid; /* ERSPAN hardware ID */
124 :
125 : struct dst_cache dst_cache;
126 :
127 : struct ip_tunnel_parm parms;
128 :
129 : int mlink;
130 : int encap_hlen; /* Encap header length (FOU,GUE) */
131 : int hlen; /* tun_hlen + encap_hlen */
132 : struct ip_tunnel_encap encap;
133 :
134 : /* for SIT */
135 : #ifdef CONFIG_IPV6_SIT_6RD
136 : struct ip_tunnel_6rd_parm ip6rd;
137 : #endif
138 : struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */
139 : unsigned int prl_count; /* # of entries in PRL */
140 : unsigned int ip_tnl_net_id;
141 : struct gro_cells gro_cells;
142 : __u32 fwmark;
143 : bool collect_md;
144 : bool ignore_df;
145 : };
146 :
147 : struct tnl_ptk_info {
148 : __be16 flags;
149 : __be16 proto;
150 : __be32 key;
151 : __be32 seq;
152 : int hdr_len;
153 : };
154 :
155 : #define PACKET_RCVD 0
156 : #define PACKET_REJECT 1
157 : #define PACKET_NEXT 2
158 :
159 : #define IP_TNL_HASH_BITS 7
160 : #define IP_TNL_HASH_SIZE (1 << IP_TNL_HASH_BITS)
161 :
162 : struct ip_tunnel_net {
163 : struct net_device *fb_tunnel_dev;
164 : struct rtnl_link_ops *rtnl_link_ops;
165 : struct hlist_head tunnels[IP_TNL_HASH_SIZE];
166 : struct ip_tunnel __rcu *collect_md_tun;
167 : int type;
168 : };
169 :
170 : static inline void ip_tunnel_key_init(struct ip_tunnel_key *key,
171 : __be32 saddr, __be32 daddr,
172 : u8 tos, u8 ttl, __be32 label,
173 : __be16 tp_src, __be16 tp_dst,
174 : __be64 tun_id, __be16 tun_flags)
175 : {
176 : key->tun_id = tun_id;
177 : key->u.ipv4.src = saddr;
178 : key->u.ipv4.dst = daddr;
179 : memset((unsigned char *)key + IP_TUNNEL_KEY_IPV4_PAD,
180 : 0, IP_TUNNEL_KEY_IPV4_PAD_LEN);
181 : key->tos = tos;
182 : key->ttl = ttl;
183 : key->label = label;
184 : key->tun_flags = tun_flags;
185 :
186 : /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
187 : * the upper tunnel are used.
188 : * E.g: GRE over IPSEC, the tp_src and tp_port are zero.
189 : */
190 : key->tp_src = tp_src;
191 : key->tp_dst = tp_dst;
192 :
193 : /* Clear struct padding. */
194 : if (sizeof(*key) != IP_TUNNEL_KEY_SIZE)
195 : memset((unsigned char *)key + IP_TUNNEL_KEY_SIZE,
196 : 0, sizeof(*key) - IP_TUNNEL_KEY_SIZE);
197 : }
198 :
199 : static inline bool
200 : ip_tunnel_dst_cache_usable(const struct sk_buff *skb,
201 : const struct ip_tunnel_info *info)
202 : {
203 : if (skb->mark)
204 : return false;
205 : if (!info)
206 : return true;
207 : if (info->key.tun_flags & TUNNEL_NOCACHE)
208 : return false;
209 :
210 : return true;
211 : }
212 :
213 0 : static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info
214 : *tun_info)
215 : {
216 0 : return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET;
217 : }
218 :
219 : static inline __be64 key32_to_tunnel_id(__be32 key)
220 : {
221 : #ifdef __BIG_ENDIAN
222 : return (__force __be64)key;
223 : #else
224 : return (__force __be64)((__force u64)key << 32);
225 : #endif
226 : }
227 :
228 : /* Returns the least-significant 32 bits of a __be64. */
229 0 : static inline __be32 tunnel_id_to_key32(__be64 tun_id)
230 : {
231 : #ifdef __BIG_ENDIAN
232 : return (__force __be32)tun_id;
233 : #else
234 0 : return (__force __be32)((__force u64)tun_id >> 32);
235 : #endif
236 : }
237 :
238 : #ifdef CONFIG_INET
239 :
240 : static inline void ip_tunnel_init_flow(struct flowi4 *fl4,
241 : int proto,
242 : __be32 daddr, __be32 saddr,
243 : __be32 key, __u8 tos, int oif,
244 : __u32 mark, __u32 tun_inner_hash)
245 : {
246 : memset(fl4, 0, sizeof(*fl4));
247 : fl4->flowi4_oif = oif;
248 : fl4->daddr = daddr;
249 : fl4->saddr = saddr;
250 : fl4->flowi4_tos = tos;
251 : fl4->flowi4_proto = proto;
252 : fl4->fl4_gre_key = key;
253 : fl4->flowi4_mark = mark;
254 : fl4->flowi4_multipath_hash = tun_inner_hash;
255 : }
256 :
257 : int ip_tunnel_init(struct net_device *dev);
258 : void ip_tunnel_uninit(struct net_device *dev);
259 : void ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
260 : struct net *ip_tunnel_get_link_net(const struct net_device *dev);
261 : int ip_tunnel_get_iflink(const struct net_device *dev);
262 : int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
263 : struct rtnl_link_ops *ops, char *devname);
264 :
265 : void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id,
266 : struct rtnl_link_ops *ops);
267 :
268 : void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
269 : const struct iphdr *tnl_params, const u8 protocol);
270 : void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
271 : const u8 proto, int tunnel_hlen);
272 : int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
273 : int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
274 : int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
275 : int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
276 :
277 : struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
278 : int link, __be16 flags,
279 : __be32 remote, __be32 local,
280 : __be32 key);
281 :
282 : int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
283 : const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
284 : bool log_ecn_error);
285 : int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
286 : struct ip_tunnel_parm *p, __u32 fwmark);
287 : int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
288 : struct ip_tunnel_parm *p, __u32 fwmark);
289 : void ip_tunnel_setup(struct net_device *dev, unsigned int net_id);
290 :
291 : extern const struct header_ops ip_tunnel_header_ops;
292 : __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb);
293 :
294 : struct ip_tunnel_encap_ops {
295 : size_t (*encap_hlen)(struct ip_tunnel_encap *e);
296 : int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
297 : u8 *protocol, struct flowi4 *fl4);
298 : int (*err_handler)(struct sk_buff *skb, u32 info);
299 : };
300 :
301 : #define MAX_IPTUN_ENCAP_OPS 8
302 :
303 : extern const struct ip_tunnel_encap_ops __rcu *
304 : iptun_encaps[MAX_IPTUN_ENCAP_OPS];
305 :
306 : int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *op,
307 : unsigned int num);
308 : int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op,
309 : unsigned int num);
310 :
311 : int ip_tunnel_encap_setup(struct ip_tunnel *t,
312 : struct ip_tunnel_encap *ipencap);
313 :
314 : static inline bool pskb_inet_may_pull(struct sk_buff *skb)
315 : {
316 : int nhlen;
317 :
318 : switch (skb->protocol) {
319 : #if IS_ENABLED(CONFIG_IPV6)
320 : case htons(ETH_P_IPV6):
321 : nhlen = sizeof(struct ipv6hdr);
322 : break;
323 : #endif
324 : case htons(ETH_P_IP):
325 : nhlen = sizeof(struct iphdr);
326 : break;
327 : default:
328 : nhlen = 0;
329 : }
330 :
331 : return pskb_network_may_pull(skb, nhlen);
332 : }
333 :
334 : static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
335 : {
336 : const struct ip_tunnel_encap_ops *ops;
337 : int hlen = -EINVAL;
338 :
339 : if (e->type == TUNNEL_ENCAP_NONE)
340 : return 0;
341 :
342 : if (e->type >= MAX_IPTUN_ENCAP_OPS)
343 : return -EINVAL;
344 :
345 : rcu_read_lock();
346 : ops = rcu_dereference(iptun_encaps[e->type]);
347 : if (likely(ops && ops->encap_hlen))
348 : hlen = ops->encap_hlen(e);
349 : rcu_read_unlock();
350 :
351 : return hlen;
352 : }
353 :
354 : static inline int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
355 : u8 *protocol, struct flowi4 *fl4)
356 : {
357 : const struct ip_tunnel_encap_ops *ops;
358 : int ret = -EINVAL;
359 :
360 : if (t->encap.type == TUNNEL_ENCAP_NONE)
361 : return 0;
362 :
363 : if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
364 : return -EINVAL;
365 :
366 : rcu_read_lock();
367 : ops = rcu_dereference(iptun_encaps[t->encap.type]);
368 : if (likely(ops && ops->build_header))
369 : ret = ops->build_header(skb, &t->encap, protocol, fl4);
370 : rcu_read_unlock();
371 :
372 : return ret;
373 : }
374 :
375 : /* Extract dsfield from inner protocol */
376 : static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph,
377 : const struct sk_buff *skb)
378 : {
379 : if (skb->protocol == htons(ETH_P_IP))
380 : return iph->tos;
381 : else if (skb->protocol == htons(ETH_P_IPV6))
382 : return ipv6_get_dsfield((const struct ipv6hdr *)iph);
383 : else
384 : return 0;
385 : }
386 :
387 : static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph,
388 : const struct sk_buff *skb)
389 : {
390 : if (skb->protocol == htons(ETH_P_IP))
391 : return iph->ttl;
392 : else if (skb->protocol == htons(ETH_P_IPV6))
393 : return ((const struct ipv6hdr *)iph)->hop_limit;
394 : else
395 : return 0;
396 : }
397 :
398 : /* Propogate ECN bits out */
399 : static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
400 : const struct sk_buff *skb)
401 : {
402 : u8 inner = ip_tunnel_get_dsfield(iph, skb);
403 :
404 : return INET_ECN_encapsulate(tos, inner);
405 : }
406 :
407 : int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
408 : __be16 inner_proto, bool raw_proto, bool xnet);
409 :
410 : static inline int iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
411 : __be16 inner_proto, bool xnet)
412 : {
413 : return __iptunnel_pull_header(skb, hdr_len, inner_proto, false, xnet);
414 : }
415 :
416 : void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
417 : __be32 src, __be32 dst, u8 proto,
418 : u8 tos, u8 ttl, __be16 df, bool xnet);
419 : struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
420 : gfp_t flags);
421 : int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst,
422 : int headroom, bool reply);
423 :
424 : int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask);
425 :
426 0 : static inline int iptunnel_pull_offloads(struct sk_buff *skb)
427 : {
428 0 : if (skb_is_gso(skb)) {
429 0 : int err;
430 :
431 0 : err = skb_unclone(skb, GFP_ATOMIC);
432 0 : if (unlikely(err))
433 : return err;
434 0 : skb_shinfo(skb)->gso_type &= ~(NETIF_F_GSO_ENCAP_ALL >>
435 : NETIF_F_GSO_SHIFT);
436 : }
437 :
438 0 : skb->encapsulation = 0;
439 0 : return 0;
440 : }
441 :
442 0 : static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
443 : {
444 0 : if (pkt_len > 0) {
445 0 : struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats);
446 :
447 0 : u64_stats_update_begin(&tstats->syncp);
448 0 : tstats->tx_bytes += pkt_len;
449 0 : tstats->tx_packets++;
450 0 : u64_stats_update_end(&tstats->syncp);
451 0 : put_cpu_ptr(tstats);
452 : } else {
453 0 : struct net_device_stats *err_stats = &dev->stats;
454 :
455 0 : if (pkt_len < 0) {
456 0 : err_stats->tx_errors++;
457 0 : err_stats->tx_aborted_errors++;
458 : } else {
459 0 : err_stats->tx_dropped++;
460 : }
461 : }
462 0 : }
463 :
464 0 : static inline void *ip_tunnel_info_opts(struct ip_tunnel_info *info)
465 : {
466 0 : return info + 1;
467 : }
468 :
469 0 : static inline void ip_tunnel_info_opts_get(void *to,
470 : const struct ip_tunnel_info *info)
471 : {
472 0 : memcpy(to, info + 1, info->options_len);
473 0 : }
474 :
475 0 : static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
476 : const void *from, int len,
477 : __be16 flags)
478 : {
479 0 : info->options_len = len;
480 0 : if (len > 0) {
481 0 : memcpy(ip_tunnel_info_opts(info), from, len);
482 0 : info->key.tun_flags |= flags;
483 : }
484 0 : }
485 :
486 0 : static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
487 : {
488 0 : return (struct ip_tunnel_info *)lwtstate->data;
489 : }
490 :
491 : DECLARE_STATIC_KEY_FALSE(ip_tunnel_metadata_cnt);
492 :
493 : /* Returns > 0 if metadata should be collected */
494 : static inline int ip_tunnel_collect_metadata(void)
495 : {
496 : return static_branch_unlikely(&ip_tunnel_metadata_cnt);
497 : }
498 :
499 : void __init ip_tunnel_core_init(void);
500 :
501 : void ip_tunnel_need_metadata(void);
502 : void ip_tunnel_unneed_metadata(void);
503 :
504 : #else /* CONFIG_INET */
505 :
506 : static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstate)
507 : {
508 : return NULL;
509 : }
510 :
511 : static inline void ip_tunnel_need_metadata(void)
512 : {
513 : }
514 :
515 : static inline void ip_tunnel_unneed_metadata(void)
516 : {
517 : }
518 :
519 : static inline void ip_tunnel_info_opts_get(void *to,
520 : const struct ip_tunnel_info *info)
521 : {
522 : }
523 :
524 : static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
525 : const void *from, int len,
526 : __be16 flags)
527 : {
528 : info->options_len = 0;
529 : }
530 :
531 : #endif /* CONFIG_INET */
532 :
533 : #endif /* __NET_IP_TUNNELS_H */
|