Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-only
2 : /*
3 : *
4 : * Copyright (C) 2011 Novell Inc.
5 : */
6 :
7 : #include <uapi/linux/magic.h>
8 : #include <linux/fs.h>
9 : #include <linux/namei.h>
10 : #include <linux/xattr.h>
11 : #include <linux/mount.h>
12 : #include <linux/parser.h>
13 : #include <linux/module.h>
14 : #include <linux/statfs.h>
15 : #include <linux/seq_file.h>
16 : #include <linux/posix_acl_xattr.h>
17 : #include <linux/exportfs.h>
18 : #include "overlayfs.h"
19 :
20 : MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
21 : MODULE_DESCRIPTION("Overlay filesystem");
22 : MODULE_LICENSE("GPL");
23 :
24 :
25 : struct ovl_dir_cache;
26 :
27 : #define OVL_MAX_STACK 500
28 :
29 : static bool ovl_redirect_dir_def = IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_DIR);
30 : module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
31 : MODULE_PARM_DESC(redirect_dir,
32 : "Default to on or off for the redirect_dir feature");
33 :
34 : static bool ovl_redirect_always_follow =
35 : IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
36 : module_param_named(redirect_always_follow, ovl_redirect_always_follow,
37 : bool, 0644);
38 : MODULE_PARM_DESC(redirect_always_follow,
39 : "Follow redirects even if redirect_dir feature is turned off");
40 :
41 : static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
42 : module_param_named(index, ovl_index_def, bool, 0644);
43 : MODULE_PARM_DESC(index,
44 : "Default to on or off for the inodes index feature");
45 :
46 : static bool ovl_nfs_export_def = IS_ENABLED(CONFIG_OVERLAY_FS_NFS_EXPORT);
47 : module_param_named(nfs_export, ovl_nfs_export_def, bool, 0644);
48 : MODULE_PARM_DESC(nfs_export,
49 : "Default to on or off for the NFS export feature");
50 :
51 : static bool ovl_xino_auto_def = IS_ENABLED(CONFIG_OVERLAY_FS_XINO_AUTO);
52 : module_param_named(xino_auto, ovl_xino_auto_def, bool, 0644);
53 : MODULE_PARM_DESC(xino_auto,
54 : "Auto enable xino feature");
55 :
56 24 : static void ovl_entry_stack_free(struct ovl_entry *oe)
57 : {
58 24 : unsigned int i;
59 :
60 36 : for (i = 0; i < oe->numlower; i++)
61 12 : dput(oe->lowerstack[i].dentry);
62 24 : }
63 :
64 : static bool ovl_metacopy_def = IS_ENABLED(CONFIG_OVERLAY_FS_METACOPY);
65 : module_param_named(metacopy, ovl_metacopy_def, bool, 0644);
66 : MODULE_PARM_DESC(metacopy,
67 : "Default to on or off for the metadata only copy up feature");
68 :
69 24 : static void ovl_dentry_release(struct dentry *dentry)
70 : {
71 24 : struct ovl_entry *oe = dentry->d_fsdata;
72 :
73 24 : if (oe) {
74 24 : ovl_entry_stack_free(oe);
75 24 : kfree_rcu(oe, rcu);
76 : }
77 24 : }
78 :
79 39 : static struct dentry *ovl_d_real(struct dentry *dentry,
80 : const struct inode *inode)
81 : {
82 39 : struct dentry *real = NULL, *lower;
83 :
84 : /* It's an overlay file */
85 39 : if (inode && d_inode(dentry) == inode)
86 : return dentry;
87 :
88 0 : if (!d_is_reg(dentry)) {
89 0 : if (!inode || inode == d_inode(dentry))
90 : return dentry;
91 0 : goto bug;
92 : }
93 :
94 0 : real = ovl_dentry_upper(dentry);
95 0 : if (real && (inode == d_inode(real)))
96 : return real;
97 :
98 0 : if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
99 : return real;
100 :
101 0 : lower = ovl_dentry_lowerdata(dentry);
102 0 : if (!lower)
103 0 : goto bug;
104 0 : real = lower;
105 :
106 : /* Handle recursion */
107 0 : real = d_real(real, inode);
108 :
109 0 : if (!inode || inode == d_inode(real))
110 : return real;
111 0 : bug:
112 0 : WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n",
113 : __func__, dentry, inode ? inode->i_sb->s_id : "NULL",
114 : inode ? inode->i_ino : 0, real,
115 : real && d_inode(real) ? d_inode(real)->i_ino : 0);
116 0 : return dentry;
117 : }
118 :
119 0 : static int ovl_revalidate_real(struct dentry *d, unsigned int flags, bool weak)
120 : {
121 0 : int ret = 1;
122 :
123 0 : if (weak) {
124 0 : if (d->d_flags & DCACHE_OP_WEAK_REVALIDATE)
125 0 : ret = d->d_op->d_weak_revalidate(d, flags);
126 0 : } else if (d->d_flags & DCACHE_OP_REVALIDATE) {
127 0 : ret = d->d_op->d_revalidate(d, flags);
128 0 : if (!ret) {
129 0 : if (!(flags & LOOKUP_RCU))
130 0 : d_invalidate(d);
131 : ret = -ESTALE;
132 : }
133 : }
134 0 : return ret;
135 : }
136 :
137 0 : static int ovl_dentry_revalidate_common(struct dentry *dentry,
138 : unsigned int flags, bool weak)
139 : {
140 0 : struct ovl_entry *oe = dentry->d_fsdata;
141 0 : struct dentry *upper;
142 0 : unsigned int i;
143 0 : int ret = 1;
144 :
145 0 : upper = ovl_dentry_upper(dentry);
146 0 : if (upper)
147 0 : ret = ovl_revalidate_real(upper, flags, weak);
148 :
149 0 : for (i = 0; ret > 0 && i < oe->numlower; i++) {
150 0 : ret = ovl_revalidate_real(oe->lowerstack[i].dentry, flags,
151 : weak);
152 : }
153 0 : return ret;
154 : }
155 :
156 0 : static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
157 : {
158 0 : return ovl_dentry_revalidate_common(dentry, flags, false);
159 : }
160 :
161 0 : static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
162 : {
163 0 : return ovl_dentry_revalidate_common(dentry, flags, true);
164 : }
165 :
166 : static const struct dentry_operations ovl_dentry_operations = {
167 : .d_release = ovl_dentry_release,
168 : .d_real = ovl_d_real,
169 : .d_revalidate = ovl_dentry_revalidate,
170 : .d_weak_revalidate = ovl_dentry_weak_revalidate,
171 : };
172 :
173 : static struct kmem_cache *ovl_inode_cachep;
174 :
175 32 : static struct inode *ovl_alloc_inode(struct super_block *sb)
176 : {
177 32 : struct ovl_inode *oi = kmem_cache_alloc(ovl_inode_cachep, GFP_KERNEL);
178 :
179 32 : if (!oi)
180 : return NULL;
181 :
182 32 : oi->cache = NULL;
183 32 : oi->redirect = NULL;
184 32 : oi->version = 0;
185 32 : oi->flags = 0;
186 32 : oi->__upperdentry = NULL;
187 32 : oi->lower = NULL;
188 32 : oi->lowerdata = NULL;
189 32 : mutex_init(&oi->lock);
190 :
191 32 : return &oi->vfs_inode;
192 : }
193 :
194 32 : static void ovl_free_inode(struct inode *inode)
195 : {
196 32 : struct ovl_inode *oi = OVL_I(inode);
197 :
198 32 : kfree(oi->redirect);
199 32 : mutex_destroy(&oi->lock);
200 32 : kmem_cache_free(ovl_inode_cachep, oi);
201 32 : }
202 :
203 32 : static void ovl_destroy_inode(struct inode *inode)
204 : {
205 32 : struct ovl_inode *oi = OVL_I(inode);
206 :
207 32 : dput(oi->__upperdentry);
208 32 : iput(oi->lower);
209 32 : if (S_ISDIR(inode->i_mode))
210 16 : ovl_dir_cache_free(inode);
211 : else
212 16 : iput(oi->lowerdata);
213 32 : }
214 :
215 2 : static void ovl_free_fs(struct ovl_fs *ofs)
216 : {
217 2 : struct vfsmount **mounts;
218 2 : unsigned i;
219 :
220 2 : iput(ofs->workbasedir_trap);
221 2 : iput(ofs->indexdir_trap);
222 2 : iput(ofs->workdir_trap);
223 2 : dput(ofs->whiteout);
224 2 : dput(ofs->indexdir);
225 2 : dput(ofs->workdir);
226 2 : if (ofs->workdir_locked)
227 2 : ovl_inuse_unlock(ofs->workbasedir);
228 2 : dput(ofs->workbasedir);
229 2 : if (ofs->upperdir_locked)
230 2 : ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);
231 :
232 : /* Hack! Reuse ofs->layers as a vfsmount array before freeing it */
233 2 : mounts = (struct vfsmount **) ofs->layers;
234 6 : for (i = 0; i < ofs->numlayer; i++) {
235 4 : iput(ofs->layers[i].trap);
236 4 : mounts[i] = ofs->layers[i].mnt;
237 : }
238 2 : kern_unmount_array(mounts, ofs->numlayer);
239 2 : kfree(ofs->layers);
240 8 : for (i = 0; i < ofs->numfs; i++)
241 4 : free_anon_bdev(ofs->fs[i].pseudo_dev);
242 2 : kfree(ofs->fs);
243 :
244 2 : kfree(ofs->config.lowerdir);
245 2 : kfree(ofs->config.upperdir);
246 2 : kfree(ofs->config.workdir);
247 2 : kfree(ofs->config.redirect_mode);
248 2 : if (ofs->creator_cred)
249 2 : put_cred(ofs->creator_cred);
250 2 : kfree(ofs);
251 2 : }
252 :
253 2 : static void ovl_put_super(struct super_block *sb)
254 : {
255 2 : struct ovl_fs *ofs = sb->s_fs_info;
256 :
257 2 : ovl_free_fs(ofs);
258 2 : }
259 :
260 : /* Sync real dirty inodes in upper filesystem (if it exists) */
261 4 : static int ovl_sync_fs(struct super_block *sb, int wait)
262 : {
263 4 : struct ovl_fs *ofs = sb->s_fs_info;
264 4 : struct super_block *upper_sb;
265 4 : int ret;
266 :
267 4 : ret = ovl_sync_status(ofs);
268 : /*
269 : * We have to always set the err, because the return value isn't
270 : * checked in syncfs, and instead indirectly return an error via
271 : * the sb's writeback errseq, which VFS inspects after this call.
272 : */
273 4 : if (ret < 0) {
274 0 : errseq_set(&sb->s_wb_err, -EIO);
275 0 : return -EIO;
276 : }
277 :
278 4 : if (!ret)
279 : return ret;
280 :
281 : /*
282 : * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC).
283 : * All the super blocks will be iterated, including upper_sb.
284 : *
285 : * If this is a syncfs(2) call, then we do need to call
286 : * sync_filesystem() on upper_sb, but enough if we do it when being
287 : * called with wait == 1.
288 : */
289 4 : if (!wait)
290 : return 0;
291 :
292 2 : upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
293 :
294 2 : down_read(&upper_sb->s_umount);
295 2 : ret = sync_filesystem(upper_sb);
296 2 : up_read(&upper_sb->s_umount);
297 :
298 2 : return ret;
299 : }
300 :
301 : /**
302 : * ovl_statfs
303 : * @sb: The overlayfs super block
304 : * @buf: The struct kstatfs to fill in with stats
305 : *
306 : * Get the filesystem statistics. As writes always target the upper layer
307 : * filesystem pass the statfs to the upper filesystem (if it exists)
308 : */
309 0 : static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf)
310 : {
311 0 : struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
312 0 : struct dentry *root_dentry = dentry->d_sb->s_root;
313 0 : struct path path;
314 0 : int err;
315 :
316 0 : ovl_path_real(root_dentry, &path);
317 :
318 0 : err = vfs_statfs(&path, buf);
319 0 : if (!err) {
320 0 : buf->f_namelen = ofs->namelen;
321 0 : buf->f_type = OVERLAYFS_SUPER_MAGIC;
322 : }
323 :
324 0 : return err;
325 : }
326 :
327 : /* Will this overlay be forced to mount/remount ro? */
328 2 : static bool ovl_force_readonly(struct ovl_fs *ofs)
329 : {
330 2 : return (!ovl_upper_mnt(ofs) || !ofs->workdir);
331 : }
332 :
333 2 : static const char *ovl_redirect_mode_def(void)
334 : {
335 2 : return ovl_redirect_dir_def ? "on" : "off";
336 : }
337 :
338 : static const char * const ovl_xino_str[] = {
339 : "off",
340 : "auto",
341 : "on",
342 : };
343 :
344 2 : static inline int ovl_xino_def(void)
345 : {
346 2 : return ovl_xino_auto_def ? OVL_XINO_AUTO : OVL_XINO_OFF;
347 : }
348 :
349 : /**
350 : * ovl_show_options
351 : *
352 : * Prints the mount options for a given superblock.
353 : * Returns zero; does not fail.
354 : */
355 0 : static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
356 : {
357 0 : struct super_block *sb = dentry->d_sb;
358 0 : struct ovl_fs *ofs = sb->s_fs_info;
359 :
360 0 : seq_show_option(m, "lowerdir", ofs->config.lowerdir);
361 0 : if (ofs->config.upperdir) {
362 0 : seq_show_option(m, "upperdir", ofs->config.upperdir);
363 0 : seq_show_option(m, "workdir", ofs->config.workdir);
364 : }
365 0 : if (ofs->config.default_permissions)
366 0 : seq_puts(m, ",default_permissions");
367 0 : if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
368 0 : seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
369 0 : if (ofs->config.index != ovl_index_def)
370 0 : seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
371 0 : if (!ofs->config.uuid)
372 0 : seq_puts(m, ",uuid=off");
373 0 : if (ofs->config.nfs_export != ovl_nfs_export_def)
374 0 : seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
375 : "on" : "off");
376 0 : if (ofs->config.xino != ovl_xino_def() && !ovl_same_fs(sb))
377 0 : seq_printf(m, ",xino=%s", ovl_xino_str[ofs->config.xino]);
378 0 : if (ofs->config.metacopy != ovl_metacopy_def)
379 0 : seq_printf(m, ",metacopy=%s",
380 : ofs->config.metacopy ? "on" : "off");
381 0 : if (ofs->config.ovl_volatile)
382 0 : seq_puts(m, ",volatile");
383 0 : return 0;
384 : }
385 :
386 0 : static int ovl_remount(struct super_block *sb, int *flags, char *data)
387 : {
388 0 : struct ovl_fs *ofs = sb->s_fs_info;
389 0 : struct super_block *upper_sb;
390 0 : int ret = 0;
391 :
392 0 : if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
393 : return -EROFS;
394 :
395 0 : if (*flags & SB_RDONLY && !sb_rdonly(sb)) {
396 0 : upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
397 0 : if (ovl_should_sync(ofs)) {
398 0 : down_read(&upper_sb->s_umount);
399 0 : ret = sync_filesystem(upper_sb);
400 0 : up_read(&upper_sb->s_umount);
401 : }
402 : }
403 :
404 : return ret;
405 : }
406 :
407 : static const struct super_operations ovl_super_operations = {
408 : .alloc_inode = ovl_alloc_inode,
409 : .free_inode = ovl_free_inode,
410 : .destroy_inode = ovl_destroy_inode,
411 : .drop_inode = generic_delete_inode,
412 : .put_super = ovl_put_super,
413 : .sync_fs = ovl_sync_fs,
414 : .statfs = ovl_statfs,
415 : .show_options = ovl_show_options,
416 : .remount_fs = ovl_remount,
417 : };
418 :
419 : enum {
420 : OPT_LOWERDIR,
421 : OPT_UPPERDIR,
422 : OPT_WORKDIR,
423 : OPT_DEFAULT_PERMISSIONS,
424 : OPT_REDIRECT_DIR,
425 : OPT_INDEX_ON,
426 : OPT_INDEX_OFF,
427 : OPT_UUID_ON,
428 : OPT_UUID_OFF,
429 : OPT_NFS_EXPORT_ON,
430 : OPT_USERXATTR,
431 : OPT_NFS_EXPORT_OFF,
432 : OPT_XINO_ON,
433 : OPT_XINO_OFF,
434 : OPT_XINO_AUTO,
435 : OPT_METACOPY_ON,
436 : OPT_METACOPY_OFF,
437 : OPT_VOLATILE,
438 : OPT_ERR,
439 : };
440 :
441 : static const match_table_t ovl_tokens = {
442 : {OPT_LOWERDIR, "lowerdir=%s"},
443 : {OPT_UPPERDIR, "upperdir=%s"},
444 : {OPT_WORKDIR, "workdir=%s"},
445 : {OPT_DEFAULT_PERMISSIONS, "default_permissions"},
446 : {OPT_REDIRECT_DIR, "redirect_dir=%s"},
447 : {OPT_INDEX_ON, "index=on"},
448 : {OPT_INDEX_OFF, "index=off"},
449 : {OPT_USERXATTR, "userxattr"},
450 : {OPT_UUID_ON, "uuid=on"},
451 : {OPT_UUID_OFF, "uuid=off"},
452 : {OPT_NFS_EXPORT_ON, "nfs_export=on"},
453 : {OPT_NFS_EXPORT_OFF, "nfs_export=off"},
454 : {OPT_XINO_ON, "xino=on"},
455 : {OPT_XINO_OFF, "xino=off"},
456 : {OPT_XINO_AUTO, "xino=auto"},
457 : {OPT_METACOPY_ON, "metacopy=on"},
458 : {OPT_METACOPY_OFF, "metacopy=off"},
459 : {OPT_VOLATILE, "volatile"},
460 : {OPT_ERR, NULL}
461 : };
462 :
463 8 : static char *ovl_next_opt(char **s)
464 : {
465 8 : char *sbegin = *s;
466 8 : char *p;
467 :
468 8 : if (sbegin == NULL)
469 : return NULL;
470 :
471 142 : for (p = sbegin; *p; p++) {
472 140 : if (*p == '\\') {
473 0 : p++;
474 0 : if (!*p)
475 : break;
476 140 : } else if (*p == ',') {
477 4 : *p = '\0';
478 4 : *s = p + 1;
479 4 : return sbegin;
480 : }
481 : }
482 2 : *s = NULL;
483 2 : return sbegin;
484 : }
485 :
486 2 : static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
487 : {
488 2 : if (strcmp(mode, "on") == 0) {
489 0 : config->redirect_dir = true;
490 : /*
491 : * Does not make sense to have redirect creation without
492 : * redirect following.
493 : */
494 0 : config->redirect_follow = true;
495 2 : } else if (strcmp(mode, "follow") == 0) {
496 0 : config->redirect_follow = true;
497 2 : } else if (strcmp(mode, "off") == 0) {
498 2 : if (ovl_redirect_always_follow)
499 0 : config->redirect_follow = true;
500 0 : } else if (strcmp(mode, "nofollow") != 0) {
501 0 : pr_err("bad mount option \"redirect_dir=%s\"\n",
502 : mode);
503 0 : return -EINVAL;
504 : }
505 :
506 : return 0;
507 : }
508 :
509 2 : static int ovl_parse_opt(char *opt, struct ovl_config *config)
510 : {
511 2 : char *p;
512 2 : int err;
513 2 : bool metacopy_opt = false, redirect_opt = false;
514 2 : bool nfs_export_opt = false, index_opt = false;
515 :
516 4 : config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
517 2 : if (!config->redirect_mode)
518 : return -ENOMEM;
519 :
520 8 : while ((p = ovl_next_opt(&opt)) != NULL) {
521 6 : int token;
522 6 : substring_t args[MAX_OPT_ARGS];
523 :
524 6 : if (!*p)
525 0 : continue;
526 :
527 6 : token = match_token(p, ovl_tokens, args);
528 6 : switch (token) {
529 2 : case OPT_UPPERDIR:
530 2 : kfree(config->upperdir);
531 2 : config->upperdir = match_strdup(&args[0]);
532 2 : if (!config->upperdir)
533 0 : return -ENOMEM;
534 : break;
535 :
536 2 : case OPT_LOWERDIR:
537 2 : kfree(config->lowerdir);
538 2 : config->lowerdir = match_strdup(&args[0]);
539 2 : if (!config->lowerdir)
540 : return -ENOMEM;
541 : break;
542 :
543 2 : case OPT_WORKDIR:
544 2 : kfree(config->workdir);
545 2 : config->workdir = match_strdup(&args[0]);
546 2 : if (!config->workdir)
547 : return -ENOMEM;
548 : break;
549 :
550 0 : case OPT_DEFAULT_PERMISSIONS:
551 0 : config->default_permissions = true;
552 0 : break;
553 :
554 0 : case OPT_REDIRECT_DIR:
555 0 : kfree(config->redirect_mode);
556 0 : config->redirect_mode = match_strdup(&args[0]);
557 0 : if (!config->redirect_mode)
558 : return -ENOMEM;
559 : redirect_opt = true;
560 : break;
561 :
562 0 : case OPT_INDEX_ON:
563 0 : config->index = true;
564 0 : index_opt = true;
565 0 : break;
566 :
567 0 : case OPT_INDEX_OFF:
568 0 : config->index = false;
569 0 : index_opt = true;
570 0 : break;
571 :
572 0 : case OPT_UUID_ON:
573 0 : config->uuid = true;
574 0 : break;
575 :
576 0 : case OPT_UUID_OFF:
577 0 : config->uuid = false;
578 0 : break;
579 :
580 0 : case OPT_NFS_EXPORT_ON:
581 0 : config->nfs_export = true;
582 0 : nfs_export_opt = true;
583 0 : break;
584 :
585 0 : case OPT_NFS_EXPORT_OFF:
586 0 : config->nfs_export = false;
587 0 : nfs_export_opt = true;
588 0 : break;
589 :
590 0 : case OPT_XINO_ON:
591 0 : config->xino = OVL_XINO_ON;
592 0 : break;
593 :
594 0 : case OPT_XINO_OFF:
595 0 : config->xino = OVL_XINO_OFF;
596 0 : break;
597 :
598 0 : case OPT_XINO_AUTO:
599 0 : config->xino = OVL_XINO_AUTO;
600 0 : break;
601 :
602 0 : case OPT_METACOPY_ON:
603 0 : config->metacopy = true;
604 0 : metacopy_opt = true;
605 0 : break;
606 :
607 0 : case OPT_METACOPY_OFF:
608 0 : config->metacopy = false;
609 0 : metacopy_opt = true;
610 0 : break;
611 :
612 0 : case OPT_VOLATILE:
613 0 : config->ovl_volatile = true;
614 0 : break;
615 :
616 0 : case OPT_USERXATTR:
617 0 : config->userxattr = true;
618 0 : break;
619 :
620 0 : default:
621 0 : pr_err("unrecognized mount option \"%s\" or missing value\n",
622 : p);
623 0 : return -EINVAL;
624 : }
625 : }
626 :
627 : /* Workdir/index are useless in non-upper mount */
628 2 : if (!config->upperdir) {
629 0 : if (config->workdir) {
630 0 : pr_info("option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
631 : config->workdir);
632 0 : kfree(config->workdir);
633 0 : config->workdir = NULL;
634 : }
635 0 : if (config->index && index_opt) {
636 0 : pr_info("option \"index=on\" is useless in a non-upper mount, ignore\n");
637 0 : index_opt = false;
638 : }
639 0 : config->index = false;
640 : }
641 :
642 2 : if (!config->upperdir && config->ovl_volatile) {
643 0 : pr_info("option \"volatile\" is meaningless in a non-upper mount, ignoring it.\n");
644 0 : config->ovl_volatile = false;
645 : }
646 :
647 2 : err = ovl_parse_redirect_mode(config, config->redirect_mode);
648 2 : if (err)
649 : return err;
650 :
651 : /*
652 : * This is to make the logic below simpler. It doesn't make any other
653 : * difference, since config->redirect_dir is only used for upper.
654 : */
655 2 : if (!config->upperdir && config->redirect_follow)
656 0 : config->redirect_dir = true;
657 :
658 : /* Resolve metacopy -> redirect_dir dependency */
659 2 : if (config->metacopy && !config->redirect_dir) {
660 0 : if (metacopy_opt && redirect_opt) {
661 0 : pr_err("conflicting options: metacopy=on,redirect_dir=%s\n",
662 : config->redirect_mode);
663 0 : return -EINVAL;
664 : }
665 0 : if (redirect_opt) {
666 : /*
667 : * There was an explicit redirect_dir=... that resulted
668 : * in this conflict.
669 : */
670 0 : pr_info("disabling metacopy due to redirect_dir=%s\n",
671 : config->redirect_mode);
672 0 : config->metacopy = false;
673 : } else {
674 : /* Automatically enable redirect otherwise. */
675 0 : config->redirect_follow = config->redirect_dir = true;
676 : }
677 : }
678 :
679 : /* Resolve nfs_export -> index dependency */
680 2 : if (config->nfs_export && !config->index) {
681 0 : if (!config->upperdir && config->redirect_follow) {
682 0 : pr_info("NFS export requires \"redirect_dir=nofollow\" on non-upper mount, falling back to nfs_export=off.\n");
683 0 : config->nfs_export = false;
684 0 : } else if (nfs_export_opt && index_opt) {
685 0 : pr_err("conflicting options: nfs_export=on,index=off\n");
686 0 : return -EINVAL;
687 0 : } else if (index_opt) {
688 : /*
689 : * There was an explicit index=off that resulted
690 : * in this conflict.
691 : */
692 0 : pr_info("disabling nfs_export due to index=off\n");
693 0 : config->nfs_export = false;
694 : } else {
695 : /* Automatically enable index otherwise. */
696 0 : config->index = true;
697 : }
698 : }
699 :
700 : /* Resolve nfs_export -> !metacopy dependency */
701 2 : if (config->nfs_export && config->metacopy) {
702 0 : if (nfs_export_opt && metacopy_opt) {
703 0 : pr_err("conflicting options: nfs_export=on,metacopy=on\n");
704 0 : return -EINVAL;
705 : }
706 0 : if (metacopy_opt) {
707 : /*
708 : * There was an explicit metacopy=on that resulted
709 : * in this conflict.
710 : */
711 0 : pr_info("disabling nfs_export due to metacopy=on\n");
712 0 : config->nfs_export = false;
713 : } else {
714 : /*
715 : * There was an explicit nfs_export=on that resulted
716 : * in this conflict.
717 : */
718 0 : pr_info("disabling metacopy due to nfs_export=on\n");
719 0 : config->metacopy = false;
720 : }
721 : }
722 :
723 :
724 : /* Resolve userxattr -> !redirect && !metacopy dependency */
725 2 : if (config->userxattr) {
726 0 : if (config->redirect_follow && redirect_opt) {
727 0 : pr_err("conflicting options: userxattr,redirect_dir=%s\n",
728 : config->redirect_mode);
729 0 : return -EINVAL;
730 : }
731 0 : if (config->metacopy && metacopy_opt) {
732 0 : pr_err("conflicting options: userxattr,metacopy=on\n");
733 0 : return -EINVAL;
734 : }
735 : /*
736 : * Silently disable default setting of redirect and metacopy.
737 : * This shall be the default in the future as well: these
738 : * options must be explicitly enabled if used together with
739 : * userxattr.
740 : */
741 0 : config->redirect_dir = config->redirect_follow = false;
742 0 : config->metacopy = false;
743 : }
744 :
745 : return 0;
746 : }
747 :
748 : #define OVL_WORKDIR_NAME "work"
749 : #define OVL_INDEXDIR_NAME "index"
750 :
751 2 : static struct dentry *ovl_workdir_create(struct ovl_fs *ofs,
752 : const char *name, bool persist)
753 : {
754 2 : struct inode *dir = ofs->workbasedir->d_inode;
755 2 : struct vfsmount *mnt = ovl_upper_mnt(ofs);
756 2 : struct dentry *work;
757 2 : int err;
758 2 : bool retried = false;
759 :
760 2 : inode_lock_nested(dir, I_MUTEX_PARENT);
761 2 : retry:
762 2 : work = lookup_one_len(name, ofs->workbasedir, strlen(name));
763 :
764 2 : if (!IS_ERR(work)) {
765 2 : struct iattr attr = {
766 : .ia_valid = ATTR_MODE,
767 : .ia_mode = S_IFDIR | 0,
768 : };
769 :
770 2 : if (work->d_inode) {
771 0 : err = -EEXIST;
772 0 : if (retried)
773 0 : goto out_dput;
774 :
775 0 : if (persist)
776 0 : goto out_unlock;
777 :
778 0 : retried = true;
779 0 : err = ovl_workdir_cleanup(dir, mnt, work, 0);
780 0 : dput(work);
781 0 : if (err == -EINVAL) {
782 0 : work = ERR_PTR(err);
783 0 : goto out_unlock;
784 : }
785 0 : goto retry;
786 : }
787 :
788 2 : work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode));
789 2 : err = PTR_ERR(work);
790 2 : if (IS_ERR(work))
791 0 : goto out_err;
792 :
793 : /*
794 : * Try to remove POSIX ACL xattrs from workdir. We are good if:
795 : *
796 : * a) success (there was a POSIX ACL xattr and was removed)
797 : * b) -ENODATA (there was no POSIX ACL xattr)
798 : * c) -EOPNOTSUPP (POSIX ACL xattrs are not supported)
799 : *
800 : * There are various other error values that could effectively
801 : * mean that the xattr doesn't exist (e.g. -ERANGE is returned
802 : * if the xattr name is too long), but the set of filesystems
803 : * allowed as upper are limited to "normal" ones, where checking
804 : * for the above two errors is sufficient.
805 : */
806 2 : err = vfs_removexattr(&init_user_ns, work,
807 : XATTR_NAME_POSIX_ACL_DEFAULT);
808 2 : if (err && err != -ENODATA && err != -EOPNOTSUPP)
809 0 : goto out_dput;
810 :
811 2 : err = vfs_removexattr(&init_user_ns, work,
812 : XATTR_NAME_POSIX_ACL_ACCESS);
813 2 : if (err && err != -ENODATA && err != -EOPNOTSUPP)
814 0 : goto out_dput;
815 :
816 : /* Clear any inherited mode bits */
817 2 : inode_lock(work->d_inode);
818 2 : err = notify_change(&init_user_ns, work, &attr, NULL);
819 2 : inode_unlock(work->d_inode);
820 2 : if (err)
821 0 : goto out_dput;
822 : } else {
823 0 : err = PTR_ERR(work);
824 0 : goto out_err;
825 : }
826 2 : out_unlock:
827 2 : inode_unlock(dir);
828 2 : return work;
829 :
830 0 : out_dput:
831 0 : dput(work);
832 0 : out_err:
833 0 : pr_warn("failed to create directory %s/%s (errno: %i); mounting read-only\n",
834 : ofs->config.workdir, name, -err);
835 0 : work = NULL;
836 0 : goto out_unlock;
837 : }
838 :
839 4 : static void ovl_unescape(char *s)
840 : {
841 4 : char *d = s;
842 :
843 56 : for (;; s++, d++) {
844 60 : if (*s == '\\')
845 0 : s++;
846 60 : *d = *s;
847 60 : if (!*s)
848 : break;
849 : }
850 4 : }
851 :
852 6 : static int ovl_mount_dir_noesc(const char *name, struct path *path)
853 : {
854 6 : int err = -EINVAL;
855 :
856 6 : if (!*name) {
857 0 : pr_err("empty lowerdir\n");
858 0 : goto out;
859 : }
860 6 : err = kern_path(name, LOOKUP_FOLLOW, path);
861 6 : if (err) {
862 0 : pr_err("failed to resolve '%s': %i\n", name, err);
863 0 : goto out;
864 : }
865 6 : err = -EINVAL;
866 6 : if (ovl_dentry_weird(path->dentry)) {
867 0 : pr_err("filesystem on '%s' not supported\n", name);
868 0 : goto out_put;
869 : }
870 6 : if (mnt_user_ns(path->mnt) != &init_user_ns) {
871 0 : pr_err("idmapped layers are currently not supported\n");
872 0 : goto out_put;
873 : }
874 6 : if (!d_is_dir(path->dentry)) {
875 0 : pr_err("'%s' not a directory\n", name);
876 0 : goto out_put;
877 : }
878 : return 0;
879 :
880 0 : out_put:
881 0 : path_put_init(path);
882 : out:
883 : return err;
884 : }
885 :
886 4 : static int ovl_mount_dir(const char *name, struct path *path)
887 : {
888 4 : int err = -ENOMEM;
889 4 : char *tmp = kstrdup(name, GFP_KERNEL);
890 :
891 4 : if (tmp) {
892 4 : ovl_unescape(tmp);
893 4 : err = ovl_mount_dir_noesc(tmp, path);
894 :
895 4 : if (!err && path->dentry->d_flags & DCACHE_OP_REAL) {
896 0 : pr_err("filesystem on '%s' not supported as upperdir\n",
897 : tmp);
898 0 : path_put_init(path);
899 0 : err = -EINVAL;
900 : }
901 4 : kfree(tmp);
902 : }
903 4 : return err;
904 : }
905 :
906 4 : static int ovl_check_namelen(struct path *path, struct ovl_fs *ofs,
907 : const char *name)
908 : {
909 4 : struct kstatfs statfs;
910 4 : int err = vfs_statfs(path, &statfs);
911 :
912 4 : if (err)
913 0 : pr_err("statfs failed on '%s'\n", name);
914 : else
915 4 : ofs->namelen = max(ofs->namelen, statfs.f_namelen);
916 :
917 4 : return err;
918 : }
919 :
920 2 : static int ovl_lower_dir(const char *name, struct path *path,
921 : struct ovl_fs *ofs, int *stack_depth)
922 : {
923 2 : int fh_type;
924 2 : int err;
925 :
926 2 : err = ovl_mount_dir_noesc(name, path);
927 2 : if (err)
928 : return err;
929 :
930 2 : err = ovl_check_namelen(path, ofs, name);
931 2 : if (err)
932 : return err;
933 :
934 2 : *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth);
935 :
936 : /*
937 : * The inodes index feature and NFS export need to encode and decode
938 : * file handles, so they require that all layers support them.
939 : */
940 2 : fh_type = ovl_can_decode_fh(path->dentry->d_sb);
941 2 : if ((ofs->config.nfs_export ||
942 2 : (ofs->config.index && ofs->config.upperdir)) && !fh_type) {
943 0 : ofs->config.index = false;
944 0 : ofs->config.nfs_export = false;
945 0 : pr_warn("fs on '%s' does not support file handles, falling back to index=off,nfs_export=off.\n",
946 : name);
947 : }
948 :
949 : /* Check if lower fs has 32bit inode numbers */
950 2 : if (fh_type != FILEID_INO32_GEN)
951 2 : ofs->xino_mode = -1;
952 :
953 : return 0;
954 : }
955 :
956 : /* Workdir should not be subdir of upperdir and vice versa */
957 2 : static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir)
958 : {
959 2 : bool ok = false;
960 :
961 2 : if (workdir != upperdir) {
962 2 : ok = (lock_rename(workdir, upperdir) == NULL);
963 2 : unlock_rename(workdir, upperdir);
964 : }
965 2 : return ok;
966 : }
967 :
968 2 : static unsigned int ovl_split_lowerdirs(char *str)
969 : {
970 2 : unsigned int ctr = 1;
971 2 : char *s, *d;
972 :
973 30 : for (s = d = str;; s++, d++) {
974 30 : if (*s == '\\') {
975 0 : s++;
976 30 : } else if (*s == ':') {
977 0 : *d = '\0';
978 0 : ctr++;
979 0 : continue;
980 : }
981 30 : *d = *s;
982 30 : if (!*s)
983 : break;
984 : }
985 2 : return ctr;
986 : }
987 :
988 : static int __maybe_unused
989 : ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
990 : struct dentry *dentry, struct inode *inode,
991 : const char *name, void *buffer, size_t size)
992 : {
993 : return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
994 : }
995 :
996 : static int __maybe_unused
997 : ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
998 : struct user_namespace *mnt_userns,
999 : struct dentry *dentry, struct inode *inode,
1000 : const char *name, const void *value,
1001 : size_t size, int flags)
1002 : {
1003 : struct dentry *workdir = ovl_workdir(dentry);
1004 : struct inode *realinode = ovl_inode_real(inode);
1005 : struct posix_acl *acl = NULL;
1006 : int err;
1007 :
1008 : /* Check that everything is OK before copy-up */
1009 : if (value) {
1010 : acl = posix_acl_from_xattr(&init_user_ns, value, size);
1011 : if (IS_ERR(acl))
1012 : return PTR_ERR(acl);
1013 : }
1014 : err = -EOPNOTSUPP;
1015 : if (!IS_POSIXACL(d_inode(workdir)))
1016 : goto out_acl_release;
1017 : if (!realinode->i_op->set_acl)
1018 : goto out_acl_release;
1019 : if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) {
1020 : err = acl ? -EACCES : 0;
1021 : goto out_acl_release;
1022 : }
1023 : err = -EPERM;
1024 : if (!inode_owner_or_capable(&init_user_ns, inode))
1025 : goto out_acl_release;
1026 :
1027 : posix_acl_release(acl);
1028 :
1029 : /*
1030 : * Check if sgid bit needs to be cleared (actual setacl operation will
1031 : * be done with mounter's capabilities and so that won't do it for us).
1032 : */
1033 : if (unlikely(inode->i_mode & S_ISGID) &&
1034 : handler->flags == ACL_TYPE_ACCESS &&
1035 : !in_group_p(inode->i_gid) &&
1036 : !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) {
1037 : struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
1038 :
1039 : err = ovl_setattr(&init_user_ns, dentry, &iattr);
1040 : if (err)
1041 : return err;
1042 : }
1043 :
1044 : err = ovl_xattr_set(dentry, inode, handler->name, value, size, flags);
1045 : if (!err)
1046 : ovl_copyattr(ovl_inode_real(inode), inode);
1047 :
1048 : return err;
1049 :
1050 : out_acl_release:
1051 : posix_acl_release(acl);
1052 : return err;
1053 : }
1054 :
1055 0 : static int ovl_own_xattr_get(const struct xattr_handler *handler,
1056 : struct dentry *dentry, struct inode *inode,
1057 : const char *name, void *buffer, size_t size)
1058 : {
1059 0 : return -EOPNOTSUPP;
1060 : }
1061 :
1062 0 : static int ovl_own_xattr_set(const struct xattr_handler *handler,
1063 : struct user_namespace *mnt_userns,
1064 : struct dentry *dentry, struct inode *inode,
1065 : const char *name, const void *value,
1066 : size_t size, int flags)
1067 : {
1068 0 : return -EOPNOTSUPP;
1069 : }
1070 :
1071 0 : static int ovl_other_xattr_get(const struct xattr_handler *handler,
1072 : struct dentry *dentry, struct inode *inode,
1073 : const char *name, void *buffer, size_t size)
1074 : {
1075 0 : return ovl_xattr_get(dentry, inode, name, buffer, size);
1076 : }
1077 :
1078 0 : static int ovl_other_xattr_set(const struct xattr_handler *handler,
1079 : struct user_namespace *mnt_userns,
1080 : struct dentry *dentry, struct inode *inode,
1081 : const char *name, const void *value,
1082 : size_t size, int flags)
1083 : {
1084 0 : return ovl_xattr_set(dentry, inode, name, value, size, flags);
1085 : }
1086 :
1087 : static const struct xattr_handler __maybe_unused
1088 : ovl_posix_acl_access_xattr_handler = {
1089 : .name = XATTR_NAME_POSIX_ACL_ACCESS,
1090 : .flags = ACL_TYPE_ACCESS,
1091 : .get = ovl_posix_acl_xattr_get,
1092 : .set = ovl_posix_acl_xattr_set,
1093 : };
1094 :
1095 : static const struct xattr_handler __maybe_unused
1096 : ovl_posix_acl_default_xattr_handler = {
1097 : .name = XATTR_NAME_POSIX_ACL_DEFAULT,
1098 : .flags = ACL_TYPE_DEFAULT,
1099 : .get = ovl_posix_acl_xattr_get,
1100 : .set = ovl_posix_acl_xattr_set,
1101 : };
1102 :
1103 : static const struct xattr_handler ovl_own_trusted_xattr_handler = {
1104 : .prefix = OVL_XATTR_TRUSTED_PREFIX,
1105 : .get = ovl_own_xattr_get,
1106 : .set = ovl_own_xattr_set,
1107 : };
1108 :
1109 : static const struct xattr_handler ovl_own_user_xattr_handler = {
1110 : .prefix = OVL_XATTR_USER_PREFIX,
1111 : .get = ovl_own_xattr_get,
1112 : .set = ovl_own_xattr_set,
1113 : };
1114 :
1115 : static const struct xattr_handler ovl_other_xattr_handler = {
1116 : .prefix = "", /* catch all */
1117 : .get = ovl_other_xattr_get,
1118 : .set = ovl_other_xattr_set,
1119 : };
1120 :
1121 : static const struct xattr_handler *ovl_trusted_xattr_handlers[] = {
1122 : #ifdef CONFIG_FS_POSIX_ACL
1123 : &ovl_posix_acl_access_xattr_handler,
1124 : &ovl_posix_acl_default_xattr_handler,
1125 : #endif
1126 : &ovl_own_trusted_xattr_handler,
1127 : &ovl_other_xattr_handler,
1128 : NULL
1129 : };
1130 :
1131 : static const struct xattr_handler *ovl_user_xattr_handlers[] = {
1132 : #ifdef CONFIG_FS_POSIX_ACL
1133 : &ovl_posix_acl_access_xattr_handler,
1134 : &ovl_posix_acl_default_xattr_handler,
1135 : #endif
1136 : &ovl_own_user_xattr_handler,
1137 : &ovl_other_xattr_handler,
1138 : NULL
1139 : };
1140 :
1141 8 : static int ovl_setup_trap(struct super_block *sb, struct dentry *dir,
1142 : struct inode **ptrap, const char *name)
1143 : {
1144 8 : struct inode *trap;
1145 8 : int err;
1146 :
1147 8 : trap = ovl_get_trap_inode(sb, dir);
1148 8 : err = PTR_ERR_OR_ZERO(trap);
1149 0 : if (err) {
1150 0 : if (err == -ELOOP)
1151 0 : pr_err("conflicting %s path\n", name);
1152 0 : return err;
1153 : }
1154 :
1155 8 : *ptrap = trap;
1156 8 : return 0;
1157 : }
1158 :
1159 : /*
1160 : * Determine how we treat concurrent use of upperdir/workdir based on the
1161 : * index feature. This is papering over mount leaks of container runtimes,
1162 : * for example, an old overlay mount is leaked and now its upperdir is
1163 : * attempted to be used as a lower layer in a new overlay mount.
1164 : */
1165 0 : static int ovl_report_in_use(struct ovl_fs *ofs, const char *name)
1166 : {
1167 0 : if (ofs->config.index) {
1168 0 : pr_err("%s is in-use as upperdir/workdir of another mount, mount with '-o index=off' to override exclusive upperdir protection.\n",
1169 : name);
1170 0 : return -EBUSY;
1171 : } else {
1172 0 : pr_warn("%s is in-use as upperdir/workdir of another mount, accessing files from both mounts will result in undefined behavior.\n",
1173 : name);
1174 0 : return 0;
1175 : }
1176 : }
1177 :
1178 2 : static int ovl_get_upper(struct super_block *sb, struct ovl_fs *ofs,
1179 : struct ovl_layer *upper_layer, struct path *upperpath)
1180 : {
1181 2 : struct vfsmount *upper_mnt;
1182 2 : int err;
1183 :
1184 2 : err = ovl_mount_dir(ofs->config.upperdir, upperpath);
1185 2 : if (err)
1186 0 : goto out;
1187 :
1188 : /* Upper fs should not be r/o */
1189 2 : if (sb_rdonly(upperpath->mnt->mnt_sb)) {
1190 0 : pr_err("upper fs is r/o, try multi-lower layers mount\n");
1191 0 : err = -EINVAL;
1192 0 : goto out;
1193 : }
1194 :
1195 2 : err = ovl_check_namelen(upperpath, ofs, ofs->config.upperdir);
1196 2 : if (err)
1197 0 : goto out;
1198 :
1199 2 : err = ovl_setup_trap(sb, upperpath->dentry, &upper_layer->trap,
1200 : "upperdir");
1201 2 : if (err)
1202 0 : goto out;
1203 :
1204 2 : upper_mnt = clone_private_mount(upperpath);
1205 2 : err = PTR_ERR(upper_mnt);
1206 2 : if (IS_ERR(upper_mnt)) {
1207 0 : pr_err("failed to clone upperpath\n");
1208 0 : goto out;
1209 : }
1210 :
1211 : /* Don't inherit atime flags */
1212 2 : upper_mnt->mnt_flags &= ~(MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME);
1213 2 : upper_layer->mnt = upper_mnt;
1214 2 : upper_layer->idx = 0;
1215 2 : upper_layer->fsid = 0;
1216 :
1217 : /*
1218 : * Inherit SB_NOSEC flag from upperdir.
1219 : *
1220 : * This optimization changes behavior when a security related attribute
1221 : * (suid/sgid/security.*) is changed on an underlying layer. This is
1222 : * okay because we don't yet have guarantees in that case, but it will
1223 : * need careful treatment once we want to honour changes to underlying
1224 : * filesystems.
1225 : */
1226 2 : if (upper_mnt->mnt_sb->s_flags & SB_NOSEC)
1227 2 : sb->s_flags |= SB_NOSEC;
1228 :
1229 2 : if (ovl_inuse_trylock(ovl_upper_mnt(ofs)->mnt_root)) {
1230 2 : ofs->upperdir_locked = true;
1231 : } else {
1232 0 : err = ovl_report_in_use(ofs, "upperdir");
1233 0 : if (err)
1234 0 : goto out;
1235 : }
1236 :
1237 : err = 0;
1238 2 : out:
1239 2 : return err;
1240 : }
1241 :
1242 : /*
1243 : * Returns 1 if RENAME_WHITEOUT is supported, 0 if not supported and
1244 : * negative values if error is encountered.
1245 : */
1246 2 : static int ovl_check_rename_whiteout(struct dentry *workdir)
1247 : {
1248 2 : struct inode *dir = d_inode(workdir);
1249 2 : struct dentry *temp;
1250 2 : struct dentry *dest;
1251 2 : struct dentry *whiteout;
1252 2 : struct name_snapshot name;
1253 2 : int err;
1254 :
1255 2 : inode_lock_nested(dir, I_MUTEX_PARENT);
1256 :
1257 2 : temp = ovl_create_temp(workdir, OVL_CATTR(S_IFREG | 0));
1258 2 : err = PTR_ERR(temp);
1259 2 : if (IS_ERR(temp))
1260 0 : goto out_unlock;
1261 :
1262 2 : dest = ovl_lookup_temp(workdir);
1263 2 : err = PTR_ERR(dest);
1264 2 : if (IS_ERR(dest)) {
1265 0 : dput(temp);
1266 0 : goto out_unlock;
1267 : }
1268 :
1269 : /* Name is inline and stable - using snapshot as a copy helper */
1270 2 : take_dentry_name_snapshot(&name, temp);
1271 2 : err = ovl_do_rename(dir, temp, dir, dest, RENAME_WHITEOUT);
1272 2 : if (err) {
1273 0 : if (err == -EINVAL)
1274 0 : err = 0;
1275 0 : goto cleanup_temp;
1276 : }
1277 :
1278 2 : whiteout = lookup_one_len(name.name.name, workdir, name.name.len);
1279 2 : err = PTR_ERR(whiteout);
1280 2 : if (IS_ERR(whiteout))
1281 0 : goto cleanup_temp;
1282 :
1283 2 : err = ovl_is_whiteout(whiteout);
1284 :
1285 : /* Best effort cleanup of whiteout and temp file */
1286 2 : if (err)
1287 2 : ovl_cleanup(dir, whiteout);
1288 2 : dput(whiteout);
1289 :
1290 2 : cleanup_temp:
1291 2 : ovl_cleanup(dir, temp);
1292 2 : release_dentry_name_snapshot(&name);
1293 2 : dput(temp);
1294 2 : dput(dest);
1295 :
1296 2 : out_unlock:
1297 2 : inode_unlock(dir);
1298 :
1299 2 : return err;
1300 : }
1301 :
1302 0 : static struct dentry *ovl_lookup_or_create(struct dentry *parent,
1303 : const char *name, umode_t mode)
1304 : {
1305 0 : size_t len = strlen(name);
1306 0 : struct dentry *child;
1307 :
1308 0 : inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
1309 0 : child = lookup_one_len(name, parent, len);
1310 0 : if (!IS_ERR(child) && !child->d_inode)
1311 0 : child = ovl_create_real(parent->d_inode, child,
1312 0 : OVL_CATTR(mode));
1313 0 : inode_unlock(parent->d_inode);
1314 0 : dput(parent);
1315 :
1316 0 : return child;
1317 : }
1318 :
1319 : /*
1320 : * Creates $workdir/work/incompat/volatile/dirty file if it is not already
1321 : * present.
1322 : */
1323 0 : static int ovl_create_volatile_dirty(struct ovl_fs *ofs)
1324 : {
1325 0 : unsigned int ctr;
1326 0 : struct dentry *d = dget(ofs->workbasedir);
1327 : static const char *const volatile_path[] = {
1328 : OVL_WORKDIR_NAME, "incompat", "volatile", "dirty"
1329 : };
1330 : const char *const *name = volatile_path;
1331 :
1332 0 : for (ctr = ARRAY_SIZE(volatile_path); ctr; ctr--, name++) {
1333 0 : d = ovl_lookup_or_create(d, *name, ctr > 1 ? S_IFDIR : S_IFREG);
1334 0 : if (IS_ERR(d))
1335 0 : return PTR_ERR(d);
1336 : }
1337 0 : dput(d);
1338 0 : return 0;
1339 : }
1340 :
1341 2 : static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
1342 : struct path *workpath)
1343 : {
1344 2 : struct vfsmount *mnt = ovl_upper_mnt(ofs);
1345 2 : struct dentry *temp, *workdir;
1346 2 : bool rename_whiteout;
1347 2 : bool d_type;
1348 2 : int fh_type;
1349 2 : int err;
1350 :
1351 2 : err = mnt_want_write(mnt);
1352 2 : if (err)
1353 : return err;
1354 :
1355 2 : workdir = ovl_workdir_create(ofs, OVL_WORKDIR_NAME, false);
1356 2 : err = PTR_ERR(workdir);
1357 4 : if (IS_ERR_OR_NULL(workdir))
1358 0 : goto out;
1359 :
1360 2 : ofs->workdir = workdir;
1361 :
1362 2 : err = ovl_setup_trap(sb, ofs->workdir, &ofs->workdir_trap, "workdir");
1363 2 : if (err)
1364 0 : goto out;
1365 :
1366 : /*
1367 : * Upper should support d_type, else whiteouts are visible. Given
1368 : * workdir and upper are on same fs, we can do iterate_dir() on
1369 : * workdir. This check requires successful creation of workdir in
1370 : * previous step.
1371 : */
1372 2 : err = ovl_check_d_type_supported(workpath);
1373 2 : if (err < 0)
1374 0 : goto out;
1375 :
1376 2 : d_type = err;
1377 2 : if (!d_type)
1378 0 : pr_warn("upper fs needs to support d_type.\n");
1379 :
1380 : /* Check if upper/work fs supports O_TMPFILE */
1381 2 : temp = ovl_do_tmpfile(ofs->workdir, S_IFREG | 0);
1382 2 : ofs->tmpfile = !IS_ERR(temp);
1383 2 : if (ofs->tmpfile)
1384 2 : dput(temp);
1385 : else
1386 0 : pr_warn("upper fs does not support tmpfile.\n");
1387 :
1388 :
1389 : /* Check if upper/work fs supports RENAME_WHITEOUT */
1390 2 : err = ovl_check_rename_whiteout(ofs->workdir);
1391 2 : if (err < 0)
1392 0 : goto out;
1393 :
1394 2 : rename_whiteout = err;
1395 2 : if (!rename_whiteout)
1396 0 : pr_warn("upper fs does not support RENAME_WHITEOUT.\n");
1397 :
1398 : /*
1399 : * Check if upper/work fs supports (trusted|user).overlay.* xattr
1400 : */
1401 2 : err = ovl_do_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1);
1402 2 : if (err) {
1403 0 : ofs->noxattr = true;
1404 0 : ofs->config.index = false;
1405 0 : ofs->config.metacopy = false;
1406 0 : pr_warn("upper fs does not support xattr, falling back to index=off and metacopy=off.\n");
1407 0 : err = 0;
1408 : } else {
1409 2 : ovl_do_removexattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE);
1410 : }
1411 :
1412 : /*
1413 : * We allowed sub-optimal upper fs configuration and don't want to break
1414 : * users over kernel upgrade, but we never allowed remote upper fs, so
1415 : * we can enforce strict requirements for remote upper fs.
1416 : */
1417 2 : if (ovl_dentry_remote(ofs->workdir) &&
1418 0 : (!d_type || !rename_whiteout || ofs->noxattr)) {
1419 0 : pr_err("upper fs missing required features.\n");
1420 0 : err = -EINVAL;
1421 0 : goto out;
1422 : }
1423 :
1424 : /*
1425 : * For volatile mount, create a incompat/volatile/dirty file to keep
1426 : * track of it.
1427 : */
1428 2 : if (ofs->config.ovl_volatile) {
1429 0 : err = ovl_create_volatile_dirty(ofs);
1430 0 : if (err < 0) {
1431 0 : pr_err("Failed to create volatile/dirty file.\n");
1432 0 : goto out;
1433 : }
1434 : }
1435 :
1436 : /* Check if upper/work fs supports file handles */
1437 2 : fh_type = ovl_can_decode_fh(ofs->workdir->d_sb);
1438 2 : if (ofs->config.index && !fh_type) {
1439 0 : ofs->config.index = false;
1440 0 : pr_warn("upper fs does not support file handles, falling back to index=off.\n");
1441 : }
1442 :
1443 : /* Check if upper fs has 32bit inode numbers */
1444 2 : if (fh_type != FILEID_INO32_GEN)
1445 2 : ofs->xino_mode = -1;
1446 :
1447 : /* NFS export of r/w mount depends on index */
1448 2 : if (ofs->config.nfs_export && !ofs->config.index) {
1449 0 : pr_warn("NFS export requires \"index=on\", falling back to nfs_export=off.\n");
1450 0 : ofs->config.nfs_export = false;
1451 : }
1452 2 : out:
1453 2 : mnt_drop_write(mnt);
1454 2 : return err;
1455 : }
1456 :
1457 2 : static int ovl_get_workdir(struct super_block *sb, struct ovl_fs *ofs,
1458 : struct path *upperpath)
1459 : {
1460 2 : int err;
1461 2 : struct path workpath = { };
1462 :
1463 2 : err = ovl_mount_dir(ofs->config.workdir, &workpath);
1464 2 : if (err)
1465 0 : goto out;
1466 :
1467 2 : err = -EINVAL;
1468 2 : if (upperpath->mnt != workpath.mnt) {
1469 0 : pr_err("workdir and upperdir must reside under the same mount\n");
1470 0 : goto out;
1471 : }
1472 2 : if (!ovl_workdir_ok(workpath.dentry, upperpath->dentry)) {
1473 0 : pr_err("workdir and upperdir must be separate subtrees\n");
1474 0 : goto out;
1475 : }
1476 :
1477 2 : ofs->workbasedir = dget(workpath.dentry);
1478 :
1479 2 : if (ovl_inuse_trylock(ofs->workbasedir)) {
1480 2 : ofs->workdir_locked = true;
1481 : } else {
1482 0 : err = ovl_report_in_use(ofs, "workdir");
1483 0 : if (err)
1484 0 : goto out;
1485 : }
1486 :
1487 2 : err = ovl_setup_trap(sb, ofs->workbasedir, &ofs->workbasedir_trap,
1488 : "workdir");
1489 2 : if (err)
1490 0 : goto out;
1491 :
1492 2 : err = ovl_make_workdir(sb, ofs, &workpath);
1493 :
1494 2 : out:
1495 2 : path_put(&workpath);
1496 :
1497 2 : return err;
1498 : }
1499 :
1500 0 : static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
1501 : struct ovl_entry *oe, struct path *upperpath)
1502 : {
1503 0 : struct vfsmount *mnt = ovl_upper_mnt(ofs);
1504 0 : struct dentry *indexdir;
1505 0 : int err;
1506 :
1507 0 : err = mnt_want_write(mnt);
1508 0 : if (err)
1509 : return err;
1510 :
1511 : /* Verify lower root is upper root origin */
1512 0 : err = ovl_verify_origin(ofs, upperpath->dentry,
1513 : oe->lowerstack[0].dentry, true);
1514 0 : if (err) {
1515 0 : pr_err("failed to verify upper root origin\n");
1516 0 : goto out;
1517 : }
1518 :
1519 : /* index dir will act also as workdir */
1520 0 : iput(ofs->workdir_trap);
1521 0 : ofs->workdir_trap = NULL;
1522 0 : dput(ofs->workdir);
1523 0 : ofs->workdir = NULL;
1524 0 : indexdir = ovl_workdir_create(ofs, OVL_INDEXDIR_NAME, true);
1525 0 : if (IS_ERR(indexdir)) {
1526 0 : err = PTR_ERR(indexdir);
1527 0 : } else if (indexdir) {
1528 0 : ofs->indexdir = indexdir;
1529 0 : ofs->workdir = dget(indexdir);
1530 :
1531 0 : err = ovl_setup_trap(sb, ofs->indexdir, &ofs->indexdir_trap,
1532 : "indexdir");
1533 0 : if (err)
1534 0 : goto out;
1535 :
1536 : /*
1537 : * Verify upper root is exclusively associated with index dir.
1538 : * Older kernels stored upper fh in ".overlay.origin"
1539 : * xattr. If that xattr exists, verify that it is a match to
1540 : * upper dir file handle. In any case, verify or set xattr
1541 : * ".overlay.upper" to indicate that index may have
1542 : * directory entries.
1543 : */
1544 0 : if (ovl_check_origin_xattr(ofs, ofs->indexdir)) {
1545 0 : err = ovl_verify_set_fh(ofs, ofs->indexdir,
1546 : OVL_XATTR_ORIGIN,
1547 : upperpath->dentry, true, false);
1548 0 : if (err)
1549 0 : pr_err("failed to verify index dir 'origin' xattr\n");
1550 : }
1551 0 : err = ovl_verify_upper(ofs, ofs->indexdir, upperpath->dentry,
1552 : true);
1553 0 : if (err)
1554 0 : pr_err("failed to verify index dir 'upper' xattr\n");
1555 :
1556 : /* Cleanup bad/stale/orphan index entries */
1557 0 : if (!err)
1558 0 : err = ovl_indexdir_cleanup(ofs);
1559 : }
1560 0 : if (err || !ofs->indexdir)
1561 0 : pr_warn("try deleting index dir or mounting with '-o index=off' to disable inodes index.\n");
1562 :
1563 0 : out:
1564 0 : mnt_drop_write(mnt);
1565 0 : return err;
1566 : }
1567 :
1568 2 : static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
1569 : {
1570 2 : unsigned int i;
1571 :
1572 2 : if (!ofs->config.nfs_export && !ovl_upper_mnt(ofs))
1573 : return true;
1574 :
1575 : /*
1576 : * We allow using single lower with null uuid for index and nfs_export
1577 : * for example to support those features with single lower squashfs.
1578 : * To avoid regressions in setups of overlay with re-formatted lower
1579 : * squashfs, do not allow decoding origin with lower null uuid unless
1580 : * user opted-in to one of the new features that require following the
1581 : * lower inode of non-dir upper.
1582 : */
1583 2 : if (!ofs->config.index && !ofs->config.metacopy && !ofs->config.xino &&
1584 2 : uuid_is_null(uuid))
1585 : return false;
1586 :
1587 4 : for (i = 0; i < ofs->numfs; i++) {
1588 : /*
1589 : * We use uuid to associate an overlay lower file handle with a
1590 : * lower layer, so we can accept lower fs with null uuid as long
1591 : * as all lower layers with null uuid are on the same fs.
1592 : * if we detect multiple lower fs with the same uuid, we
1593 : * disable lower file handle decoding on all of them.
1594 : */
1595 2 : if (ofs->fs[i].is_lower &&
1596 0 : uuid_equal(&ofs->fs[i].sb->s_uuid, uuid)) {
1597 0 : ofs->fs[i].bad_uuid = true;
1598 0 : return false;
1599 : }
1600 : }
1601 : return true;
1602 : }
1603 :
1604 : /* Get a unique fsid for the layer */
1605 2 : static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
1606 : {
1607 2 : struct super_block *sb = path->mnt->mnt_sb;
1608 2 : unsigned int i;
1609 2 : dev_t dev;
1610 2 : int err;
1611 2 : bool bad_uuid = false;
1612 :
1613 4 : for (i = 0; i < ofs->numfs; i++) {
1614 2 : if (ofs->fs[i].sb == sb)
1615 0 : return i;
1616 : }
1617 :
1618 2 : if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
1619 0 : bad_uuid = true;
1620 0 : if (ofs->config.index || ofs->config.nfs_export) {
1621 0 : ofs->config.index = false;
1622 0 : ofs->config.nfs_export = false;
1623 0 : pr_warn("%s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
1624 : uuid_is_null(&sb->s_uuid) ? "null" :
1625 : "conflicting",
1626 : path->dentry);
1627 : }
1628 : }
1629 :
1630 2 : err = get_anon_bdev(&dev);
1631 2 : if (err) {
1632 0 : pr_err("failed to get anonymous bdev for lowerpath\n");
1633 0 : return err;
1634 : }
1635 :
1636 2 : ofs->fs[ofs->numfs].sb = sb;
1637 2 : ofs->fs[ofs->numfs].pseudo_dev = dev;
1638 2 : ofs->fs[ofs->numfs].bad_uuid = bad_uuid;
1639 :
1640 2 : return ofs->numfs++;
1641 : }
1642 :
1643 2 : static int ovl_get_layers(struct super_block *sb, struct ovl_fs *ofs,
1644 : struct path *stack, unsigned int numlower,
1645 : struct ovl_layer *layers)
1646 : {
1647 2 : int err;
1648 2 : unsigned int i;
1649 :
1650 2 : err = -ENOMEM;
1651 2 : ofs->fs = kcalloc(numlower + 1, sizeof(struct ovl_sb), GFP_KERNEL);
1652 2 : if (ofs->fs == NULL)
1653 0 : goto out;
1654 :
1655 : /* idx/fsid 0 are reserved for upper fs even with lower only overlay */
1656 2 : ofs->numfs++;
1657 :
1658 : /*
1659 : * All lower layers that share the same fs as upper layer, use the same
1660 : * pseudo_dev as upper layer. Allocate fs[0].pseudo_dev even for lower
1661 : * only overlay to simplify ovl_fs_free().
1662 : * is_lower will be set if upper fs is shared with a lower layer.
1663 : */
1664 2 : err = get_anon_bdev(&ofs->fs[0].pseudo_dev);
1665 2 : if (err) {
1666 0 : pr_err("failed to get anonymous bdev for upper fs\n");
1667 0 : goto out;
1668 : }
1669 :
1670 2 : if (ovl_upper_mnt(ofs)) {
1671 2 : ofs->fs[0].sb = ovl_upper_mnt(ofs)->mnt_sb;
1672 2 : ofs->fs[0].is_lower = false;
1673 : }
1674 :
1675 4 : for (i = 0; i < numlower; i++) {
1676 2 : struct vfsmount *mnt;
1677 2 : struct inode *trap;
1678 2 : int fsid;
1679 :
1680 2 : err = fsid = ovl_get_fsid(ofs, &stack[i]);
1681 2 : if (err < 0)
1682 0 : goto out;
1683 :
1684 : /*
1685 : * Check if lower root conflicts with this overlay layers before
1686 : * checking if it is in-use as upperdir/workdir of "another"
1687 : * mount, because we do not bother to check in ovl_is_inuse() if
1688 : * the upperdir/workdir is in fact in-use by our
1689 : * upperdir/workdir.
1690 : */
1691 2 : err = ovl_setup_trap(sb, stack[i].dentry, &trap, "lowerdir");
1692 2 : if (err)
1693 0 : goto out;
1694 :
1695 2 : if (ovl_is_inuse(stack[i].dentry)) {
1696 0 : err = ovl_report_in_use(ofs, "lowerdir");
1697 0 : if (err) {
1698 0 : iput(trap);
1699 0 : goto out;
1700 : }
1701 : }
1702 :
1703 2 : mnt = clone_private_mount(&stack[i]);
1704 2 : err = PTR_ERR(mnt);
1705 2 : if (IS_ERR(mnt)) {
1706 0 : pr_err("failed to clone lowerpath\n");
1707 0 : iput(trap);
1708 0 : goto out;
1709 : }
1710 :
1711 : /*
1712 : * Make lower layers R/O. That way fchmod/fchown on lower file
1713 : * will fail instead of modifying lower fs.
1714 : */
1715 2 : mnt->mnt_flags |= MNT_READONLY | MNT_NOATIME;
1716 :
1717 2 : layers[ofs->numlayer].trap = trap;
1718 2 : layers[ofs->numlayer].mnt = mnt;
1719 2 : layers[ofs->numlayer].idx = ofs->numlayer;
1720 2 : layers[ofs->numlayer].fsid = fsid;
1721 2 : layers[ofs->numlayer].fs = &ofs->fs[fsid];
1722 2 : ofs->numlayer++;
1723 2 : ofs->fs[fsid].is_lower = true;
1724 : }
1725 :
1726 : /*
1727 : * When all layers on same fs, overlay can use real inode numbers.
1728 : * With mount option "xino=<on|auto>", mounter declares that there are
1729 : * enough free high bits in underlying fs to hold the unique fsid.
1730 : * If overlayfs does encounter underlying inodes using the high xino
1731 : * bits reserved for fsid, it emits a warning and uses the original
1732 : * inode number or a non persistent inode number allocated from a
1733 : * dedicated range.
1734 : */
1735 2 : if (ofs->numfs - !ovl_upper_mnt(ofs) == 1) {
1736 0 : if (ofs->config.xino == OVL_XINO_ON)
1737 0 : pr_info("\"xino=on\" is useless with all layers on same fs, ignore.\n");
1738 0 : ofs->xino_mode = 0;
1739 2 : } else if (ofs->config.xino == OVL_XINO_OFF) {
1740 2 : ofs->xino_mode = -1;
1741 0 : } else if (ofs->xino_mode < 0) {
1742 : /*
1743 : * This is a roundup of number of bits needed for encoding
1744 : * fsid, where fsid 0 is reserved for upper fs (even with
1745 : * lower only overlay) +1 extra bit is reserved for the non
1746 : * persistent inode number range that is used for resolving
1747 : * xino lower bits overflow.
1748 : */
1749 0 : BUILD_BUG_ON(ilog2(OVL_MAX_STACK) > 30);
1750 0 : ofs->xino_mode = ilog2(ofs->numfs - 1) + 2;
1751 : }
1752 :
1753 2 : if (ofs->xino_mode > 0) {
1754 0 : pr_info("\"xino\" feature enabled using %d upper inode bits.\n",
1755 : ofs->xino_mode);
1756 : }
1757 :
1758 : err = 0;
1759 2 : out:
1760 2 : return err;
1761 : }
1762 :
1763 2 : static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb,
1764 : const char *lower, unsigned int numlower,
1765 : struct ovl_fs *ofs, struct ovl_layer *layers)
1766 : {
1767 2 : int err;
1768 2 : struct path *stack = NULL;
1769 2 : unsigned int i;
1770 2 : struct ovl_entry *oe;
1771 :
1772 2 : if (!ofs->config.upperdir && numlower == 1) {
1773 0 : pr_err("at least 2 lowerdir are needed while upperdir nonexistent\n");
1774 0 : return ERR_PTR(-EINVAL);
1775 : }
1776 :
1777 2 : stack = kcalloc(numlower, sizeof(struct path), GFP_KERNEL);
1778 2 : if (!stack)
1779 2 : return ERR_PTR(-ENOMEM);
1780 :
1781 4 : err = -EINVAL;
1782 4 : for (i = 0; i < numlower; i++) {
1783 2 : err = ovl_lower_dir(lower, &stack[i], ofs, &sb->s_stack_depth);
1784 2 : if (err)
1785 0 : goto out_err;
1786 :
1787 2 : lower = strchr(lower, '\0') + 1;
1788 : }
1789 :
1790 2 : err = -EINVAL;
1791 2 : sb->s_stack_depth++;
1792 2 : if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
1793 0 : pr_err("maximum fs stacking depth exceeded\n");
1794 0 : goto out_err;
1795 : }
1796 :
1797 2 : err = ovl_get_layers(sb, ofs, stack, numlower, layers);
1798 2 : if (err)
1799 0 : goto out_err;
1800 :
1801 2 : err = -ENOMEM;
1802 2 : oe = ovl_alloc_entry(numlower);
1803 2 : if (!oe)
1804 0 : goto out_err;
1805 :
1806 4 : for (i = 0; i < numlower; i++) {
1807 2 : oe->lowerstack[i].dentry = dget(stack[i].dentry);
1808 2 : oe->lowerstack[i].layer = &ofs->layers[i+1];
1809 : }
1810 :
1811 2 : out:
1812 4 : for (i = 0; i < numlower; i++)
1813 2 : path_put(&stack[i]);
1814 2 : kfree(stack);
1815 :
1816 2 : return oe;
1817 :
1818 0 : out_err:
1819 0 : oe = ERR_PTR(err);
1820 0 : goto out;
1821 : }
1822 :
1823 : /*
1824 : * Check if this layer root is a descendant of:
1825 : * - another layer of this overlayfs instance
1826 : * - upper/work dir of any overlayfs instance
1827 : */
1828 6 : static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs,
1829 : struct dentry *dentry, const char *name)
1830 : {
1831 6 : struct dentry *next = dentry, *parent;
1832 6 : int err = 0;
1833 :
1834 6 : if (!dentry)
1835 : return 0;
1836 :
1837 6 : parent = dget_parent(next);
1838 :
1839 : /* Walk back ancestors to root (inclusive) looking for traps */
1840 12 : while (!err && parent != next) {
1841 6 : if (ovl_lookup_trap_inode(sb, parent)) {
1842 0 : err = -ELOOP;
1843 0 : pr_err("overlapping %s path\n", name);
1844 6 : } else if (ovl_is_inuse(parent)) {
1845 0 : err = ovl_report_in_use(ofs, name);
1846 : }
1847 6 : next = parent;
1848 6 : parent = dget_parent(next);
1849 6 : dput(next);
1850 : }
1851 :
1852 6 : dput(parent);
1853 :
1854 6 : return err;
1855 : }
1856 :
1857 : /*
1858 : * Check if any of the layers or work dirs overlap.
1859 : */
1860 2 : static int ovl_check_overlapping_layers(struct super_block *sb,
1861 : struct ovl_fs *ofs)
1862 : {
1863 2 : int i, err;
1864 :
1865 2 : if (ovl_upper_mnt(ofs)) {
1866 2 : err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root,
1867 : "upperdir");
1868 2 : if (err)
1869 : return err;
1870 :
1871 : /*
1872 : * Checking workbasedir avoids hitting ovl_is_inuse(parent) of
1873 : * this instance and covers overlapping work and index dirs,
1874 : * unless work or index dir have been moved since created inside
1875 : * workbasedir. In that case, we already have their traps in
1876 : * inode cache and we will catch that case on lookup.
1877 : */
1878 2 : err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir");
1879 2 : if (err)
1880 : return err;
1881 : }
1882 :
1883 4 : for (i = 1; i < ofs->numlayer; i++) {
1884 4 : err = ovl_check_layer(sb, ofs,
1885 2 : ofs->layers[i].mnt->mnt_root,
1886 : "lowerdir");
1887 2 : if (err)
1888 0 : return err;
1889 : }
1890 :
1891 : return 0;
1892 : }
1893 :
1894 2 : static struct dentry *ovl_get_root(struct super_block *sb,
1895 : struct dentry *upperdentry,
1896 : struct ovl_entry *oe)
1897 : {
1898 2 : struct dentry *root;
1899 2 : struct ovl_path *lowerpath = &oe->lowerstack[0];
1900 2 : unsigned long ino = d_inode(lowerpath->dentry)->i_ino;
1901 2 : int fsid = lowerpath->layer->fsid;
1902 2 : struct ovl_inode_params oip = {
1903 : .upperdentry = upperdentry,
1904 : .lowerpath = lowerpath,
1905 : };
1906 :
1907 2 : root = d_make_root(ovl_new_inode(sb, S_IFDIR, 0));
1908 2 : if (!root)
1909 : return NULL;
1910 :
1911 2 : root->d_fsdata = oe;
1912 :
1913 2 : if (upperdentry) {
1914 : /* Root inode uses upper st_ino/i_ino */
1915 2 : ino = d_inode(upperdentry)->i_ino;
1916 2 : fsid = 0;
1917 2 : ovl_dentry_set_upper_alias(root);
1918 2 : if (ovl_is_impuredir(sb, upperdentry))
1919 0 : ovl_set_flag(OVL_IMPURE, d_inode(root));
1920 : }
1921 :
1922 : /* Root is always merge -> can have whiteouts */
1923 2 : ovl_set_flag(OVL_WHITEOUTS, d_inode(root));
1924 2 : ovl_dentry_set_flag(OVL_E_CONNECTED, root);
1925 2 : ovl_set_upperdata(d_inode(root));
1926 2 : ovl_inode_init(d_inode(root), &oip, ino, fsid);
1927 2 : ovl_dentry_update_reval(root, upperdentry, DCACHE_OP_WEAK_REVALIDATE);
1928 :
1929 2 : return root;
1930 : }
1931 :
1932 2 : static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1933 : {
1934 2 : struct path upperpath = { };
1935 2 : struct dentry *root_dentry;
1936 2 : struct ovl_entry *oe;
1937 2 : struct ovl_fs *ofs;
1938 2 : struct ovl_layer *layers;
1939 2 : struct cred *cred;
1940 2 : char *splitlower = NULL;
1941 2 : unsigned int numlower;
1942 2 : int err;
1943 :
1944 2 : err = -EIO;
1945 2 : if (WARN_ON(sb->s_user_ns != current_user_ns()))
1946 0 : goto out;
1947 :
1948 2 : sb->s_d_op = &ovl_dentry_operations;
1949 :
1950 2 : err = -ENOMEM;
1951 2 : ofs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL);
1952 2 : if (!ofs)
1953 0 : goto out;
1954 :
1955 2 : ofs->creator_cred = cred = prepare_creds();
1956 2 : if (!cred)
1957 0 : goto out_err;
1958 :
1959 : /* Is there a reason anyone would want not to share whiteouts? */
1960 2 : ofs->share_whiteout = true;
1961 :
1962 2 : ofs->config.index = ovl_index_def;
1963 2 : ofs->config.uuid = true;
1964 2 : ofs->config.nfs_export = ovl_nfs_export_def;
1965 2 : ofs->config.xino = ovl_xino_def();
1966 2 : ofs->config.metacopy = ovl_metacopy_def;
1967 2 : err = ovl_parse_opt((char *) data, &ofs->config);
1968 2 : if (err)
1969 0 : goto out_err;
1970 :
1971 2 : err = -EINVAL;
1972 2 : if (!ofs->config.lowerdir) {
1973 0 : if (!silent)
1974 0 : pr_err("missing 'lowerdir'\n");
1975 0 : goto out_err;
1976 : }
1977 :
1978 2 : err = -ENOMEM;
1979 2 : splitlower = kstrdup(ofs->config.lowerdir, GFP_KERNEL);
1980 2 : if (!splitlower)
1981 0 : goto out_err;
1982 :
1983 2 : numlower = ovl_split_lowerdirs(splitlower);
1984 2 : if (numlower > OVL_MAX_STACK) {
1985 0 : pr_err("too many lower directories, limit is %d\n",
1986 : OVL_MAX_STACK);
1987 0 : goto out_err;
1988 : }
1989 :
1990 2 : layers = kcalloc(numlower + 1, sizeof(struct ovl_layer), GFP_KERNEL);
1991 2 : if (!layers)
1992 0 : goto out_err;
1993 :
1994 2 : ofs->layers = layers;
1995 : /* Layer 0 is reserved for upper even if there's no upper */
1996 2 : ofs->numlayer = 1;
1997 :
1998 2 : sb->s_stack_depth = 0;
1999 2 : sb->s_maxbytes = MAX_LFS_FILESIZE;
2000 2 : atomic_long_set(&ofs->last_ino, 1);
2001 : /* Assume underlaying fs uses 32bit inodes unless proven otherwise */
2002 2 : if (ofs->config.xino != OVL_XINO_OFF) {
2003 0 : ofs->xino_mode = BITS_PER_LONG - 32;
2004 0 : if (!ofs->xino_mode) {
2005 : pr_warn("xino not supported on 32bit kernel, falling back to xino=off.\n");
2006 : ofs->config.xino = OVL_XINO_OFF;
2007 : }
2008 : }
2009 :
2010 : /* alloc/destroy_inode needed for setting up traps in inode cache */
2011 2 : sb->s_op = &ovl_super_operations;
2012 :
2013 2 : if (ofs->config.upperdir) {
2014 2 : struct super_block *upper_sb;
2015 :
2016 2 : if (!ofs->config.workdir) {
2017 0 : pr_err("missing 'workdir'\n");
2018 0 : goto out_err;
2019 : }
2020 :
2021 2 : err = ovl_get_upper(sb, ofs, &layers[0], &upperpath);
2022 2 : if (err)
2023 0 : goto out_err;
2024 :
2025 2 : upper_sb = ovl_upper_mnt(ofs)->mnt_sb;
2026 2 : if (!ovl_should_sync(ofs)) {
2027 0 : ofs->errseq = errseq_sample(&upper_sb->s_wb_err);
2028 0 : if (errseq_check(&upper_sb->s_wb_err, ofs->errseq)) {
2029 0 : err = -EIO;
2030 0 : pr_err("Cannot mount volatile when upperdir has an unseen error. Sync upperdir fs to clear state.\n");
2031 0 : goto out_err;
2032 : }
2033 : }
2034 :
2035 2 : err = ovl_get_workdir(sb, ofs, &upperpath);
2036 2 : if (err)
2037 0 : goto out_err;
2038 :
2039 2 : if (!ofs->workdir)
2040 0 : sb->s_flags |= SB_RDONLY;
2041 :
2042 2 : sb->s_stack_depth = upper_sb->s_stack_depth;
2043 2 : sb->s_time_gran = upper_sb->s_time_gran;
2044 : }
2045 2 : oe = ovl_get_lowerstack(sb, splitlower, numlower, ofs, layers);
2046 2 : err = PTR_ERR(oe);
2047 2 : if (IS_ERR(oe))
2048 0 : goto out_err;
2049 :
2050 : /* If the upper fs is nonexistent, we mark overlayfs r/o too */
2051 2 : if (!ovl_upper_mnt(ofs))
2052 0 : sb->s_flags |= SB_RDONLY;
2053 :
2054 2 : if (!ofs->config.uuid && ofs->numfs > 1) {
2055 0 : pr_warn("The uuid=off requires a single fs for lower and upper, falling back to uuid=on.\n");
2056 0 : ofs->config.uuid = true;
2057 : }
2058 :
2059 4 : if (!ovl_force_readonly(ofs) && ofs->config.index) {
2060 0 : err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
2061 0 : if (err)
2062 0 : goto out_free_oe;
2063 :
2064 : /* Force r/o mount with no index dir */
2065 0 : if (!ofs->indexdir)
2066 0 : sb->s_flags |= SB_RDONLY;
2067 : }
2068 :
2069 2 : err = ovl_check_overlapping_layers(sb, ofs);
2070 2 : if (err)
2071 0 : goto out_free_oe;
2072 :
2073 : /* Show index=off in /proc/mounts for forced r/o mount */
2074 2 : if (!ofs->indexdir) {
2075 2 : ofs->config.index = false;
2076 2 : if (ovl_upper_mnt(ofs) && ofs->config.nfs_export) {
2077 0 : pr_warn("NFS export requires an index dir, falling back to nfs_export=off.\n");
2078 0 : ofs->config.nfs_export = false;
2079 : }
2080 : }
2081 :
2082 2 : if (ofs->config.metacopy && ofs->config.nfs_export) {
2083 0 : pr_warn("NFS export is not supported with metadata only copy up, falling back to nfs_export=off.\n");
2084 0 : ofs->config.nfs_export = false;
2085 : }
2086 :
2087 2 : if (ofs->config.nfs_export)
2088 0 : sb->s_export_op = &ovl_export_operations;
2089 :
2090 : /* Never override disk quota limits or use reserved space */
2091 2 : cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
2092 :
2093 2 : sb->s_magic = OVERLAYFS_SUPER_MAGIC;
2094 2 : sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers :
2095 : ovl_trusted_xattr_handlers;
2096 2 : sb->s_fs_info = ofs;
2097 2 : sb->s_flags |= SB_POSIXACL;
2098 2 : sb->s_iflags |= SB_I_SKIP_SYNC;
2099 :
2100 2 : err = -ENOMEM;
2101 2 : root_dentry = ovl_get_root(sb, upperpath.dentry, oe);
2102 2 : if (!root_dentry)
2103 0 : goto out_free_oe;
2104 :
2105 2 : mntput(upperpath.mnt);
2106 2 : kfree(splitlower);
2107 :
2108 2 : sb->s_root = root_dentry;
2109 :
2110 2 : return 0;
2111 :
2112 0 : out_free_oe:
2113 0 : ovl_entry_stack_free(oe);
2114 0 : kfree(oe);
2115 0 : out_err:
2116 0 : kfree(splitlower);
2117 0 : path_put(&upperpath);
2118 0 : ovl_free_fs(ofs);
2119 : out:
2120 : return err;
2121 : }
2122 :
2123 2 : static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
2124 : const char *dev_name, void *raw_data)
2125 : {
2126 2 : return mount_nodev(fs_type, flags, raw_data, ovl_fill_super);
2127 : }
2128 :
2129 : static struct file_system_type ovl_fs_type = {
2130 : .owner = THIS_MODULE,
2131 : .name = "overlay",
2132 : .fs_flags = FS_USERNS_MOUNT,
2133 : .mount = ovl_mount,
2134 : .kill_sb = kill_anon_super,
2135 : };
2136 : MODULE_ALIAS_FS("overlay");
2137 :
2138 75 : static void ovl_inode_init_once(void *foo)
2139 : {
2140 75 : struct ovl_inode *oi = foo;
2141 :
2142 75 : inode_init_once(&oi->vfs_inode);
2143 75 : }
2144 :
2145 1 : static int __init ovl_init(void)
2146 : {
2147 1 : int err;
2148 :
2149 1 : ovl_inode_cachep = kmem_cache_create("ovl_inode",
2150 : sizeof(struct ovl_inode), 0,
2151 : (SLAB_RECLAIM_ACCOUNT|
2152 : SLAB_MEM_SPREAD|SLAB_ACCOUNT),
2153 : ovl_inode_init_once);
2154 1 : if (ovl_inode_cachep == NULL)
2155 : return -ENOMEM;
2156 :
2157 1 : err = ovl_aio_request_cache_init();
2158 1 : if (!err) {
2159 1 : err = register_filesystem(&ovl_fs_type);
2160 1 : if (!err)
2161 : return 0;
2162 :
2163 0 : ovl_aio_request_cache_destroy();
2164 : }
2165 0 : kmem_cache_destroy(ovl_inode_cachep);
2166 :
2167 0 : return err;
2168 : }
2169 :
2170 0 : static void __exit ovl_exit(void)
2171 : {
2172 0 : unregister_filesystem(&ovl_fs_type);
2173 :
2174 : /*
2175 : * Make sure all delayed rcu free inodes are flushed before we
2176 : * destroy cache.
2177 : */
2178 0 : rcu_barrier();
2179 0 : kmem_cache_destroy(ovl_inode_cachep);
2180 0 : ovl_aio_request_cache_destroy();
2181 0 : }
2182 :
2183 : module_init(ovl_init);
2184 : module_exit(ovl_exit);
|