Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * mm/fadvise.c
4 : *
5 : * Copyright (C) 2002, Linus Torvalds
6 : *
7 : * 11Jan2003 Andrew Morton
8 : * Initial version.
9 : */
10 :
11 : #include <linux/kernel.h>
12 : #include <linux/file.h>
13 : #include <linux/fs.h>
14 : #include <linux/mm.h>
15 : #include <linux/pagemap.h>
16 : #include <linux/backing-dev.h>
17 : #include <linux/pagevec.h>
18 : #include <linux/fadvise.h>
19 : #include <linux/writeback.h>
20 : #include <linux/syscalls.h>
21 : #include <linux/swap.h>
22 :
23 : #include <asm/unistd.h>
24 :
25 : #include "internal.h"
26 :
27 : /*
28 : * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
29 : * deactivate the pages and clear PG_Referenced.
30 : */
31 :
32 236 : int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
33 : {
34 236 : struct inode *inode;
35 236 : struct address_space *mapping;
36 236 : struct backing_dev_info *bdi;
37 236 : loff_t endbyte; /* inclusive */
38 236 : pgoff_t start_index;
39 236 : pgoff_t end_index;
40 236 : unsigned long nrpages;
41 :
42 236 : inode = file_inode(file);
43 236 : if (S_ISFIFO(inode->i_mode))
44 : return -ESPIPE;
45 :
46 191 : mapping = file->f_mapping;
47 191 : if (!mapping || len < 0)
48 : return -EINVAL;
49 :
50 191 : bdi = inode_to_bdi(mapping->host);
51 :
52 191 : if (IS_DAX(inode) || (bdi == &noop_backing_dev_info)) {
53 172 : switch (advice) {
54 : case POSIX_FADV_NORMAL:
55 : case POSIX_FADV_RANDOM:
56 : case POSIX_FADV_SEQUENTIAL:
57 : case POSIX_FADV_WILLNEED:
58 : case POSIX_FADV_NOREUSE:
59 : case POSIX_FADV_DONTNEED:
60 : /* no bad return value, but ignore advice */
61 172 : break;
62 : default:
63 : return -EINVAL;
64 : }
65 172 : return 0;
66 : }
67 :
68 : /*
69 : * Careful about overflows. Len == 0 means "as much as possible". Use
70 : * unsigned math because signed overflows are undefined and UBSan
71 : * complains.
72 : */
73 19 : endbyte = (u64)offset + (u64)len;
74 19 : if (!len || endbyte < len)
75 : endbyte = -1;
76 : else
77 0 : endbyte--; /* inclusive */
78 :
79 19 : switch (advice) {
80 0 : case POSIX_FADV_NORMAL:
81 0 : file->f_ra.ra_pages = bdi->ra_pages;
82 0 : spin_lock(&file->f_lock);
83 0 : file->f_mode &= ~FMODE_RANDOM;
84 0 : spin_unlock(&file->f_lock);
85 : break;
86 16 : case POSIX_FADV_RANDOM:
87 16 : spin_lock(&file->f_lock);
88 16 : file->f_mode |= FMODE_RANDOM;
89 16 : spin_unlock(&file->f_lock);
90 : break;
91 3 : case POSIX_FADV_SEQUENTIAL:
92 3 : file->f_ra.ra_pages = bdi->ra_pages * 2;
93 3 : spin_lock(&file->f_lock);
94 3 : file->f_mode &= ~FMODE_RANDOM;
95 3 : spin_unlock(&file->f_lock);
96 : break;
97 0 : case POSIX_FADV_WILLNEED:
98 : /* First and last PARTIAL page! */
99 0 : start_index = offset >> PAGE_SHIFT;
100 0 : end_index = endbyte >> PAGE_SHIFT;
101 :
102 : /* Careful about overflow on the "+1" */
103 0 : nrpages = end_index - start_index + 1;
104 0 : if (!nrpages)
105 0 : nrpages = ~0UL;
106 :
107 0 : force_page_cache_readahead(mapping, file, start_index, nrpages);
108 : break;
109 : case POSIX_FADV_NOREUSE:
110 : break;
111 0 : case POSIX_FADV_DONTNEED:
112 0 : if (!inode_write_congested(mapping->host))
113 0 : __filemap_fdatawrite_range(mapping, offset, endbyte,
114 : WB_SYNC_NONE);
115 :
116 : /*
117 : * First and last FULL page! Partial pages are deliberately
118 : * preserved on the expectation that it is better to preserve
119 : * needed memory than to discard unneeded memory.
120 : */
121 0 : start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
122 0 : end_index = (endbyte >> PAGE_SHIFT);
123 : /*
124 : * The page at end_index will be inclusively discarded according
125 : * by invalidate_mapping_pages(), so subtracting 1 from
126 : * end_index means we will skip the last page. But if endbyte
127 : * is page aligned or is at the end of file, we should not skip
128 : * that page - discarding the last page is safe enough.
129 : */
130 0 : if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK &&
131 0 : endbyte != inode->i_size - 1) {
132 : /* First page is tricky as 0 - 1 = -1, but pgoff_t
133 : * is unsigned, so the end_index >= start_index
134 : * check below would be true and we'll discard the whole
135 : * file cache which is not what was asked.
136 : */
137 0 : if (end_index == 0)
138 : break;
139 :
140 0 : end_index--;
141 : }
142 :
143 0 : if (end_index >= start_index) {
144 0 : unsigned long nr_pagevec = 0;
145 :
146 : /*
147 : * It's common to FADV_DONTNEED right after
148 : * the read or write that instantiates the
149 : * pages, in which case there will be some
150 : * sitting on the local LRU cache. Try to
151 : * avoid the expensive remote drain and the
152 : * second cache tree walk below by flushing
153 : * them out right away.
154 : */
155 0 : lru_add_drain();
156 :
157 0 : invalidate_mapping_pagevec(mapping,
158 : start_index, end_index,
159 : &nr_pagevec);
160 :
161 : /*
162 : * If fewer pages were invalidated than expected then
163 : * it is possible that some of the pages were on
164 : * a per-cpu pagevec for a remote CPU. Drain all
165 : * pagevecs and try again.
166 : */
167 0 : if (nr_pagevec) {
168 0 : lru_add_drain_all();
169 0 : invalidate_mapping_pages(mapping, start_index,
170 : end_index);
171 : }
172 : }
173 : break;
174 : default:
175 : return -EINVAL;
176 : }
177 0 : return 0;
178 : }
179 : EXPORT_SYMBOL(generic_fadvise);
180 :
181 236 : int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
182 : {
183 236 : if (file->f_op->fadvise)
184 0 : return file->f_op->fadvise(file, offset, len, advice);
185 :
186 236 : return generic_fadvise(file, offset, len, advice);
187 : }
188 : EXPORT_SYMBOL(vfs_fadvise);
189 :
190 : #ifdef CONFIG_ADVISE_SYSCALLS
191 :
192 236 : int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
193 : {
194 236 : struct fd f = fdget(fd);
195 236 : int ret;
196 :
197 236 : if (!f.file)
198 : return -EBADF;
199 :
200 236 : ret = vfs_fadvise(f.file, offset, len, advice);
201 :
202 236 : fdput(f);
203 236 : return ret;
204 : }
205 :
206 0 : SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
207 : {
208 0 : return ksys_fadvise64_64(fd, offset, len, advice);
209 : }
210 :
211 : #ifdef __ARCH_WANT_SYS_FADVISE64
212 :
213 472 : SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
214 : {
215 236 : return ksys_fadvise64_64(fd, offset, len, advice);
216 : }
217 :
218 : #endif
219 : #endif
|