Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : * net/core/dst.c Protocol independent destination cache.
4 : *
5 : * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 : *
7 : */
8 :
9 : #include <linux/bitops.h>
10 : #include <linux/errno.h>
11 : #include <linux/init.h>
12 : #include <linux/kernel.h>
13 : #include <linux/workqueue.h>
14 : #include <linux/mm.h>
15 : #include <linux/module.h>
16 : #include <linux/slab.h>
17 : #include <linux/netdevice.h>
18 : #include <linux/skbuff.h>
19 : #include <linux/string.h>
20 : #include <linux/types.h>
21 : #include <net/net_namespace.h>
22 : #include <linux/sched.h>
23 : #include <linux/prefetch.h>
24 : #include <net/lwtunnel.h>
25 : #include <net/xfrm.h>
26 :
27 : #include <net/dst.h>
28 : #include <net/dst_metadata.h>
29 :
30 0 : int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
31 : {
32 0 : kfree_skb(skb);
33 0 : return 0;
34 : }
35 : EXPORT_SYMBOL(dst_discard_out);
36 :
37 : const struct dst_metrics dst_default_metrics = {
38 : /* This initializer is needed to force linker to place this variable
39 : * into const section. Otherwise it might end into bss section.
40 : * We really want to avoid false sharing on this variable, and catch
41 : * any writes on it.
42 : */
43 : .refcnt = REFCOUNT_INIT(1),
44 : };
45 : EXPORT_SYMBOL(dst_default_metrics);
46 :
47 16 : void dst_init(struct dst_entry *dst, struct dst_ops *ops,
48 : struct net_device *dev, int initial_ref, int initial_obsolete,
49 : unsigned short flags)
50 : {
51 16 : dst->dev = dev;
52 16 : if (dev)
53 16 : dev_hold(dev);
54 16 : dst->ops = ops;
55 16 : dst_init_metrics(dst, dst_default_metrics.metrics, true);
56 16 : dst->expires = 0UL;
57 : #ifdef CONFIG_XFRM
58 : dst->xfrm = NULL;
59 : #endif
60 16 : dst->input = dst_discard;
61 16 : dst->output = dst_discard_out;
62 16 : dst->error = 0;
63 16 : dst->obsolete = initial_obsolete;
64 16 : dst->header_len = 0;
65 16 : dst->trailer_len = 0;
66 : #ifdef CONFIG_IP_ROUTE_CLASSID
67 : dst->tclassid = 0;
68 : #endif
69 16 : dst->lwtstate = NULL;
70 16 : atomic_set(&dst->__refcnt, initial_ref);
71 16 : dst->__use = 0;
72 16 : dst->lastuse = jiffies;
73 16 : dst->flags = flags;
74 16 : if (!(flags & DST_NOCOUNT))
75 16 : dst_entries_add(ops, 1);
76 16 : }
77 : EXPORT_SYMBOL(dst_init);
78 :
79 16 : void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
80 : int initial_ref, int initial_obsolete, unsigned short flags)
81 : {
82 16 : struct dst_entry *dst;
83 :
84 16 : if (ops->gc &&
85 0 : !(flags & DST_NOCOUNT) &&
86 0 : dst_entries_get_fast(ops) > ops->gc_thresh) {
87 0 : if (ops->gc(ops)) {
88 0 : pr_notice_ratelimited("Route cache is full: consider increasing sysctl net.ipv6.route.max_size.\n");
89 0 : return NULL;
90 : }
91 : }
92 :
93 16 : dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
94 16 : if (!dst)
95 : return NULL;
96 :
97 16 : dst_init(dst, ops, dev, initial_ref, initial_obsolete, flags);
98 :
99 16 : return dst;
100 : }
101 : EXPORT_SYMBOL(dst_alloc);
102 :
103 8 : struct dst_entry *dst_destroy(struct dst_entry * dst)
104 : {
105 8 : struct dst_entry *child = NULL;
106 :
107 8 : smp_rmb();
108 :
109 : #ifdef CONFIG_XFRM
110 : if (dst->xfrm) {
111 : struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
112 :
113 : child = xdst->child;
114 : }
115 : #endif
116 8 : if (!(dst->flags & DST_NOCOUNT))
117 8 : dst_entries_add(dst->ops, -1);
118 :
119 8 : if (dst->ops->destroy)
120 8 : dst->ops->destroy(dst);
121 8 : if (dst->dev)
122 8 : dev_put(dst->dev);
123 :
124 8 : lwtstate_put(dst->lwtstate);
125 :
126 8 : if (dst->flags & DST_METADATA)
127 8 : metadata_dst_free((struct metadata_dst *)dst);
128 : else
129 8 : kmem_cache_free(dst->ops->kmem_cachep, dst);
130 :
131 8 : dst = child;
132 8 : if (dst)
133 : dst_release_immediate(dst);
134 8 : return NULL;
135 : }
136 : EXPORT_SYMBOL(dst_destroy);
137 :
138 8 : static void dst_destroy_rcu(struct rcu_head *head)
139 : {
140 8 : struct dst_entry *dst = container_of(head, struct dst_entry, rcu_head);
141 :
142 8 : dst = dst_destroy(dst);
143 8 : }
144 :
145 : /* Operations to mark dst as DEAD and clean up the net device referenced
146 : * by dst:
147 : * 1. put the dst under blackhole interface and discard all tx/rx packets
148 : * on this route.
149 : * 2. release the net_device
150 : * This function should be called when removing routes from the fib tree
151 : * in preparation for a NETDEV_DOWN/NETDEV_UNREGISTER event and also to
152 : * make the next dst_ops->check() fail.
153 : */
154 0 : void dst_dev_put(struct dst_entry *dst)
155 : {
156 0 : struct net_device *dev = dst->dev;
157 :
158 0 : dst->obsolete = DST_OBSOLETE_DEAD;
159 0 : if (dst->ops->ifdown)
160 0 : dst->ops->ifdown(dst, dev, true);
161 0 : dst->input = dst_discard;
162 0 : dst->output = dst_discard_out;
163 0 : dst->dev = blackhole_netdev;
164 0 : dev_hold(dst->dev);
165 0 : dev_put(dev);
166 0 : }
167 : EXPORT_SYMBOL(dst_dev_put);
168 :
169 216 : void dst_release(struct dst_entry *dst)
170 : {
171 216 : if (dst) {
172 57 : int newrefcnt;
173 :
174 57 : newrefcnt = atomic_dec_return(&dst->__refcnt);
175 57 : if (WARN_ONCE(newrefcnt < 0, "dst_release underflow"))
176 0 : net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
177 : __func__, dst, newrefcnt);
178 57 : if (!newrefcnt)
179 8 : call_rcu(&dst->rcu_head, dst_destroy_rcu);
180 : }
181 216 : }
182 : EXPORT_SYMBOL(dst_release);
183 :
184 0 : void dst_release_immediate(struct dst_entry *dst)
185 : {
186 0 : if (dst) {
187 0 : int newrefcnt;
188 :
189 0 : newrefcnt = atomic_dec_return(&dst->__refcnt);
190 0 : if (WARN_ONCE(newrefcnt < 0, "dst_release_immediate underflow"))
191 0 : net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
192 : __func__, dst, newrefcnt);
193 0 : if (!newrefcnt)
194 0 : dst_destroy(dst);
195 : }
196 0 : }
197 : EXPORT_SYMBOL(dst_release_immediate);
198 :
199 0 : u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
200 : {
201 0 : struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC);
202 :
203 0 : if (p) {
204 0 : struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old);
205 0 : unsigned long prev, new;
206 :
207 0 : refcount_set(&p->refcnt, 1);
208 0 : memcpy(p->metrics, old_p->metrics, sizeof(p->metrics));
209 :
210 0 : new = (unsigned long) p;
211 0 : prev = cmpxchg(&dst->_metrics, old, new);
212 :
213 0 : if (prev != old) {
214 0 : kfree(p);
215 0 : p = (struct dst_metrics *)__DST_METRICS_PTR(prev);
216 0 : if (prev & DST_METRICS_READ_ONLY)
217 0 : p = NULL;
218 0 : } else if (prev & DST_METRICS_REFCOUNTED) {
219 0 : if (refcount_dec_and_test(&old_p->refcnt))
220 0 : kfree(old_p);
221 : }
222 : }
223 0 : BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0);
224 0 : return (u32 *)p;
225 : }
226 : EXPORT_SYMBOL(dst_cow_metrics_generic);
227 :
228 : /* Caller asserts that dst_metrics_read_only(dst) is false. */
229 0 : void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
230 : {
231 0 : unsigned long prev, new;
232 :
233 0 : new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY;
234 0 : prev = cmpxchg(&dst->_metrics, old, new);
235 0 : if (prev == old)
236 0 : kfree(__DST_METRICS_PTR(old));
237 0 : }
238 : EXPORT_SYMBOL(__dst_destroy_metrics_generic);
239 :
240 : static struct dst_ops md_dst_ops = {
241 : .family = AF_UNSPEC,
242 : };
243 :
244 0 : static int dst_md_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
245 : {
246 0 : WARN_ONCE(1, "Attempting to call output on metadata dst\n");
247 0 : kfree_skb(skb);
248 0 : return 0;
249 : }
250 :
251 0 : static int dst_md_discard(struct sk_buff *skb)
252 : {
253 0 : WARN_ONCE(1, "Attempting to call input on metadata dst\n");
254 0 : kfree_skb(skb);
255 0 : return 0;
256 : }
257 :
258 0 : static void __metadata_dst_init(struct metadata_dst *md_dst,
259 : enum metadata_type type, u8 optslen)
260 :
261 : {
262 0 : struct dst_entry *dst;
263 :
264 0 : dst = &md_dst->dst;
265 0 : dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE,
266 : DST_METADATA | DST_NOCOUNT);
267 :
268 0 : dst->input = dst_md_discard;
269 0 : dst->output = dst_md_discard_out;
270 :
271 0 : memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
272 0 : md_dst->type = type;
273 0 : }
274 :
275 0 : struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type,
276 : gfp_t flags)
277 : {
278 0 : struct metadata_dst *md_dst;
279 :
280 0 : md_dst = kmalloc(sizeof(*md_dst) + optslen, flags);
281 0 : if (!md_dst)
282 : return NULL;
283 :
284 0 : __metadata_dst_init(md_dst, type, optslen);
285 :
286 0 : return md_dst;
287 : }
288 : EXPORT_SYMBOL_GPL(metadata_dst_alloc);
289 :
290 0 : void metadata_dst_free(struct metadata_dst *md_dst)
291 : {
292 : #ifdef CONFIG_DST_CACHE
293 : if (md_dst->type == METADATA_IP_TUNNEL)
294 : dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
295 : #endif
296 0 : kfree(md_dst);
297 0 : }
298 : EXPORT_SYMBOL_GPL(metadata_dst_free);
299 :
300 : struct metadata_dst __percpu *
301 0 : metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags)
302 : {
303 0 : int cpu;
304 0 : struct metadata_dst __percpu *md_dst;
305 :
306 0 : md_dst = __alloc_percpu_gfp(sizeof(struct metadata_dst) + optslen,
307 : __alignof__(struct metadata_dst), flags);
308 0 : if (!md_dst)
309 : return NULL;
310 :
311 0 : for_each_possible_cpu(cpu)
312 0 : __metadata_dst_init(per_cpu_ptr(md_dst, cpu), type, optslen);
313 :
314 : return md_dst;
315 : }
316 : EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
317 :
318 0 : void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst)
319 : {
320 : #ifdef CONFIG_DST_CACHE
321 : int cpu;
322 :
323 : for_each_possible_cpu(cpu) {
324 : struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu);
325 :
326 : if (one_md_dst->type == METADATA_IP_TUNNEL)
327 : dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache);
328 : }
329 : #endif
330 0 : free_percpu(md_dst);
331 0 : }
332 : EXPORT_SYMBOL_GPL(metadata_dst_free_percpu);
|