Line data Source code
1 : /* SPDX-License-Identifier: GPL-2.0 */
2 : #ifndef _LINUX_PAGEMAP_H
3 : #define _LINUX_PAGEMAP_H
4 :
5 : /*
6 : * Copyright 1995 Linus Torvalds
7 : */
8 : #include <linux/mm.h>
9 : #include <linux/fs.h>
10 : #include <linux/list.h>
11 : #include <linux/highmem.h>
12 : #include <linux/compiler.h>
13 : #include <linux/uaccess.h>
14 : #include <linux/gfp.h>
15 : #include <linux/bitops.h>
16 : #include <linux/hardirq.h> /* for in_interrupt() */
17 : #include <linux/hugetlb_inline.h>
18 :
19 : struct pagevec;
20 :
21 : /*
22 : * Bits in mapping->flags.
23 : */
24 : enum mapping_flags {
25 : AS_EIO = 0, /* IO error on async write */
26 : AS_ENOSPC = 1, /* ENOSPC on async write */
27 : AS_MM_ALL_LOCKS = 2, /* under mm_take_all_locks() */
28 : AS_UNEVICTABLE = 3, /* e.g., ramdisk, SHM_LOCK */
29 : AS_EXITING = 4, /* final truncate in progress */
30 : /* writeback related tags are not used */
31 : AS_NO_WRITEBACK_TAGS = 5,
32 : AS_THP_SUPPORT = 6, /* THPs supported */
33 : };
34 :
35 : /**
36 : * mapping_set_error - record a writeback error in the address_space
37 : * @mapping: the mapping in which an error should be set
38 : * @error: the error to set in the mapping
39 : *
40 : * When writeback fails in some way, we must record that error so that
41 : * userspace can be informed when fsync and the like are called. We endeavor
42 : * to report errors on any file that was open at the time of the error. Some
43 : * internal callers also need to know when writeback errors have occurred.
44 : *
45 : * When a writeback error occurs, most filesystems will want to call
46 : * mapping_set_error to record the error in the mapping so that it can be
47 : * reported when the application calls fsync(2).
48 : */
49 874 : static inline void mapping_set_error(struct address_space *mapping, int error)
50 : {
51 874 : if (likely(!error))
52 : return;
53 :
54 : /* Record in wb_err for checkers using errseq_t based tracking */
55 0 : __filemap_set_wb_err(mapping, error);
56 :
57 : /* Record it in superblock */
58 0 : if (mapping->host)
59 0 : errseq_set(&mapping->host->i_sb->s_wb_err, error);
60 :
61 : /* Record it in flags for now, for legacy callers */
62 0 : if (error == -ENOSPC)
63 0 : set_bit(AS_ENOSPC, &mapping->flags);
64 : else
65 0 : set_bit(AS_EIO, &mapping->flags);
66 : }
67 :
68 6 : static inline void mapping_set_unevictable(struct address_space *mapping)
69 : {
70 6 : set_bit(AS_UNEVICTABLE, &mapping->flags);
71 0 : }
72 :
73 0 : static inline void mapping_clear_unevictable(struct address_space *mapping)
74 : {
75 0 : clear_bit(AS_UNEVICTABLE, &mapping->flags);
76 0 : }
77 :
78 96343 : static inline bool mapping_unevictable(struct address_space *mapping)
79 : {
80 96343 : return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags);
81 : }
82 :
83 3947 : static inline void mapping_set_exiting(struct address_space *mapping)
84 : {
85 3947 : set_bit(AS_EXITING, &mapping->flags);
86 : }
87 :
88 0 : static inline int mapping_exiting(struct address_space *mapping)
89 : {
90 0 : return test_bit(AS_EXITING, &mapping->flags);
91 : }
92 :
93 : static inline void mapping_set_no_writeback_tags(struct address_space *mapping)
94 : {
95 : set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
96 : }
97 :
98 2568 : static inline int mapping_use_writeback_tags(struct address_space *mapping)
99 : {
100 2568 : return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
101 : }
102 :
103 108241 : static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
104 : {
105 108241 : return mapping->gfp_mask;
106 : }
107 :
108 : /* Restricts the given gfp_mask to what the mapping allows. */
109 4131 : static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
110 : gfp_t gfp_mask)
111 : {
112 4131 : return mapping_gfp_mask(mapping) & gfp_mask;
113 : }
114 :
115 : /*
116 : * This is non-atomic. Only to be used before the mapping is activated.
117 : * Probably needs a barrier...
118 : */
119 19473 : static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
120 : {
121 18645 : m->gfp_mask = mask;
122 : }
123 :
124 : static inline bool mapping_thp_support(struct address_space *mapping)
125 : {
126 : return test_bit(AS_THP_SUPPORT, &mapping->flags);
127 : }
128 :
129 : static inline int filemap_nr_thps(struct address_space *mapping)
130 : {
131 : #ifdef CONFIG_READ_ONLY_THP_FOR_FS
132 : return atomic_read(&mapping->nr_thps);
133 : #else
134 : return 0;
135 : #endif
136 : }
137 :
138 0 : static inline void filemap_nr_thps_inc(struct address_space *mapping)
139 : {
140 : #ifdef CONFIG_READ_ONLY_THP_FOR_FS
141 : if (!mapping_thp_support(mapping))
142 : atomic_inc(&mapping->nr_thps);
143 : #else
144 0 : WARN_ON_ONCE(1);
145 : #endif
146 0 : }
147 :
148 0 : static inline void filemap_nr_thps_dec(struct address_space *mapping)
149 : {
150 : #ifdef CONFIG_READ_ONLY_THP_FOR_FS
151 : if (!mapping_thp_support(mapping))
152 : atomic_dec(&mapping->nr_thps);
153 : #else
154 0 : WARN_ON_ONCE(1);
155 : #endif
156 0 : }
157 :
158 : void release_pages(struct page **pages, int nr);
159 :
160 : /*
161 : * speculatively take a reference to a page.
162 : * If the page is free (_refcount == 0), then _refcount is untouched, and 0
163 : * is returned. Otherwise, _refcount is incremented by 1 and 1 is returned.
164 : *
165 : * This function must be called inside the same rcu_read_lock() section as has
166 : * been used to lookup the page in the pagecache radix-tree (or page table):
167 : * this allows allocators to use a synchronize_rcu() to stabilize _refcount.
168 : *
169 : * Unless an RCU grace period has passed, the count of all pages coming out
170 : * of the allocator must be considered unstable. page_count may return higher
171 : * than expected, and put_page must be able to do the right thing when the
172 : * page has been finished with, no matter what it is subsequently allocated
173 : * for (because put_page is what is used here to drop an invalid speculative
174 : * reference).
175 : *
176 : * This is the interesting part of the lockless pagecache (and lockless
177 : * get_user_pages) locking protocol, where the lookup-side (eg. find_get_page)
178 : * has the following pattern:
179 : * 1. find page in radix tree
180 : * 2. conditionally increment refcount
181 : * 3. check the page is still in pagecache (if no, goto 1)
182 : *
183 : * Remove-side that cares about stability of _refcount (eg. reclaim) has the
184 : * following (with the i_pages lock held):
185 : * A. atomically check refcount is correct and set it to 0 (atomic_cmpxchg)
186 : * B. remove page from pagecache
187 : * C. free the page
188 : *
189 : * There are 2 critical interleavings that matter:
190 : * - 2 runs before A: in this case, A sees elevated refcount and bails out
191 : * - A runs before 2: in this case, 2 sees zero refcount and retries;
192 : * subsequently, B will complete and 1 will find no page, causing the
193 : * lookup to return NULL.
194 : *
195 : * It is possible that between 1 and 2, the page is removed then the exact same
196 : * page is inserted into the same position in pagecache. That's OK: the
197 : * old find_get_page using a lock could equally have run before or after
198 : * such a re-insertion, depending on order that locks are granted.
199 : *
200 : * Lookups racing against pagecache insertion isn't a big problem: either 1
201 : * will find the page or it will not. Likewise, the old find_get_page could run
202 : * either before the insertion or afterwards, depending on timing.
203 : */
204 2 : static inline int __page_cache_add_speculative(struct page *page, int count)
205 : {
206 : #ifdef CONFIG_TINY_RCU
207 : # ifdef CONFIG_PREEMPT_COUNT
208 : VM_BUG_ON(!in_atomic() && !irqs_disabled());
209 : # endif
210 : /*
211 : * Preempt must be disabled here - we rely on rcu_read_lock doing
212 : * this for us.
213 : *
214 : * Pagecache won't be truncated from interrupt context, so if we have
215 : * found a page in the radix tree here, we have pinned its refcount by
216 : * disabling preempt, and hence no need for the "speculative get" that
217 : * SMP requires.
218 : */
219 : VM_BUG_ON_PAGE(page_count(page) == 0, page);
220 : page_ref_add(page, count);
221 :
222 : #else
223 2 : if (unlikely(!page_ref_add_unless(page, count, 0))) {
224 : /*
225 : * Either the page has been freed, or will be freed.
226 : * In either case, retry here and the caller should
227 : * do the right thing (see comments above).
228 : */
229 : return 0;
230 : }
231 : #endif
232 2 : VM_BUG_ON_PAGE(PageTail(page), page);
233 :
234 : return 1;
235 : }
236 :
237 0 : static inline int page_cache_get_speculative(struct page *page)
238 : {
239 0 : return __page_cache_add_speculative(page, 1);
240 : }
241 :
242 2 : static inline int page_cache_add_speculative(struct page *page, int count)
243 : {
244 2 : return __page_cache_add_speculative(page, count);
245 : }
246 :
247 : /**
248 : * attach_page_private - Attach private data to a page.
249 : * @page: Page to attach data to.
250 : * @data: Data to attach to page.
251 : *
252 : * Attaching private data to a page increments the page's reference count.
253 : * The data must be detached before the page will be freed.
254 : */
255 5842 : static inline void attach_page_private(struct page *page, void *data)
256 : {
257 5842 : get_page(page);
258 5842 : set_page_private(page, (unsigned long)data);
259 5842 : SetPagePrivate(page);
260 5842 : }
261 :
262 : /**
263 : * detach_page_private - Detach private data from a page.
264 : * @page: Page to detach data from.
265 : *
266 : * Removes the data that was previously attached to the page and decrements
267 : * the refcount on the page.
268 : *
269 : * Return: Data that was attached to the page.
270 : */
271 498 : static inline void *detach_page_private(struct page *page)
272 : {
273 498 : void *data = (void *)page_private(page);
274 :
275 498 : if (!PagePrivate(page))
276 : return NULL;
277 498 : ClearPagePrivate(page);
278 498 : set_page_private(page, 0);
279 498 : put_page(page);
280 :
281 498 : return data;
282 : }
283 :
284 : #ifdef CONFIG_NUMA
285 : extern struct page *__page_cache_alloc(gfp_t gfp);
286 : #else
287 : static inline struct page *__page_cache_alloc(gfp_t gfp)
288 : {
289 : return alloc_pages(gfp, 0);
290 : }
291 : #endif
292 :
293 0 : static inline struct page *page_cache_alloc(struct address_space *x)
294 : {
295 0 : return __page_cache_alloc(mapping_gfp_mask(x));
296 : }
297 :
298 2173 : static inline gfp_t readahead_gfp_mask(struct address_space *x)
299 : {
300 2173 : return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN;
301 : }
302 :
303 : typedef int filler_t(void *, struct page *);
304 :
305 : pgoff_t page_cache_next_miss(struct address_space *mapping,
306 : pgoff_t index, unsigned long max_scan);
307 : pgoff_t page_cache_prev_miss(struct address_space *mapping,
308 : pgoff_t index, unsigned long max_scan);
309 :
310 : #define FGP_ACCESSED 0x00000001
311 : #define FGP_LOCK 0x00000002
312 : #define FGP_CREAT 0x00000004
313 : #define FGP_WRITE 0x00000008
314 : #define FGP_NOFS 0x00000010
315 : #define FGP_NOWAIT 0x00000020
316 : #define FGP_FOR_MMAP 0x00000040
317 : #define FGP_HEAD 0x00000080
318 : #define FGP_ENTRY 0x00000100
319 :
320 : struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
321 : int fgp_flags, gfp_t cache_gfp_mask);
322 :
323 : /**
324 : * find_get_page - find and get a page reference
325 : * @mapping: the address_space to search
326 : * @offset: the page index
327 : *
328 : * Looks up the page cache slot at @mapping & @offset. If there is a
329 : * page cache page, it is returned with an increased refcount.
330 : *
331 : * Otherwise, %NULL is returned.
332 : */
333 0 : static inline struct page *find_get_page(struct address_space *mapping,
334 : pgoff_t offset)
335 : {
336 0 : return pagecache_get_page(mapping, offset, 0, 0);
337 : }
338 :
339 21209 : static inline struct page *find_get_page_flags(struct address_space *mapping,
340 : pgoff_t offset, int fgp_flags)
341 : {
342 21209 : return pagecache_get_page(mapping, offset, fgp_flags, 0);
343 : }
344 :
345 : /**
346 : * find_lock_page - locate, pin and lock a pagecache page
347 : * @mapping: the address_space to search
348 : * @index: the page index
349 : *
350 : * Looks up the page cache entry at @mapping & @index. If there is a
351 : * page cache page, it is returned locked and with an increased
352 : * refcount.
353 : *
354 : * Context: May sleep.
355 : * Return: A struct page or %NULL if there is no page in the cache for this
356 : * index.
357 : */
358 2 : static inline struct page *find_lock_page(struct address_space *mapping,
359 : pgoff_t index)
360 : {
361 2 : return pagecache_get_page(mapping, index, FGP_LOCK, 0);
362 : }
363 :
364 : /**
365 : * find_lock_head - Locate, pin and lock a pagecache page.
366 : * @mapping: The address_space to search.
367 : * @index: The page index.
368 : *
369 : * Looks up the page cache entry at @mapping & @index. If there is a
370 : * page cache page, its head page is returned locked and with an increased
371 : * refcount.
372 : *
373 : * Context: May sleep.
374 : * Return: A struct page which is !PageTail, or %NULL if there is no page
375 : * in the cache for this index.
376 : */
377 : static inline struct page *find_lock_head(struct address_space *mapping,
378 : pgoff_t index)
379 : {
380 : return pagecache_get_page(mapping, index, FGP_LOCK | FGP_HEAD, 0);
381 : }
382 :
383 : /**
384 : * find_or_create_page - locate or add a pagecache page
385 : * @mapping: the page's address_space
386 : * @index: the page's index into the mapping
387 : * @gfp_mask: page allocation mode
388 : *
389 : * Looks up the page cache slot at @mapping & @offset. If there is a
390 : * page cache page, it is returned locked and with an increased
391 : * refcount.
392 : *
393 : * If the page is not present, a new page is allocated using @gfp_mask
394 : * and added to the page cache and the VM's LRU list. The page is
395 : * returned locked and with an increased refcount.
396 : *
397 : * On memory exhaustion, %NULL is returned.
398 : *
399 : * find_or_create_page() may sleep, even if @gfp_flags specifies an
400 : * atomic allocation!
401 : */
402 4163 : static inline struct page *find_or_create_page(struct address_space *mapping,
403 : pgoff_t index, gfp_t gfp_mask)
404 : {
405 4163 : return pagecache_get_page(mapping, index,
406 : FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
407 : gfp_mask);
408 : }
409 :
410 : /**
411 : * grab_cache_page_nowait - returns locked page at given index in given cache
412 : * @mapping: target address_space
413 : * @index: the page index
414 : *
415 : * Same as grab_cache_page(), but do not wait if the page is unavailable.
416 : * This is intended for speculative data generators, where the data can
417 : * be regenerated if the page couldn't be grabbed. This routine should
418 : * be safe to call while holding the lock for another page.
419 : *
420 : * Clear __GFP_FS when allocating the page to avoid recursion into the fs
421 : * and deadlock against the caller's locked page.
422 : */
423 : static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
424 : pgoff_t index)
425 : {
426 : return pagecache_get_page(mapping, index,
427 : FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
428 : mapping_gfp_mask(mapping));
429 : }
430 :
431 : /* Does this page contain this index? */
432 0 : static inline bool thp_contains(struct page *head, pgoff_t index)
433 : {
434 : /* HugeTLBfs indexes the page cache in units of hpage_size */
435 0 : if (PageHuge(head))
436 : return head->index == index;
437 0 : return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL));
438 : }
439 :
440 : /*
441 : * Given the page we found in the page cache, return the page corresponding
442 : * to this index in the file
443 : */
444 0 : static inline struct page *find_subpage(struct page *head, pgoff_t index)
445 : {
446 : /* HugeTLBfs wants the head page regardless */
447 0 : if (PageHuge(head))
448 : return head;
449 :
450 0 : return head + (index & (thp_nr_pages(head) - 1));
451 : }
452 :
453 : unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
454 : pgoff_t end, struct pagevec *pvec, pgoff_t *indices);
455 : unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
456 : pgoff_t end, unsigned int nr_pages,
457 : struct page **pages);
458 : static inline unsigned find_get_pages(struct address_space *mapping,
459 : pgoff_t *start, unsigned int nr_pages,
460 : struct page **pages)
461 : {
462 : return find_get_pages_range(mapping, start, (pgoff_t)-1, nr_pages,
463 : pages);
464 : }
465 : unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
466 : unsigned int nr_pages, struct page **pages);
467 : unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
468 : pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
469 : struct page **pages);
470 : static inline unsigned find_get_pages_tag(struct address_space *mapping,
471 : pgoff_t *index, xa_mark_t tag, unsigned int nr_pages,
472 : struct page **pages)
473 : {
474 : return find_get_pages_range_tag(mapping, index, (pgoff_t)-1, tag,
475 : nr_pages, pages);
476 : }
477 :
478 : struct page *grab_cache_page_write_begin(struct address_space *mapping,
479 : pgoff_t index, unsigned flags);
480 :
481 : /*
482 : * Returns locked page at given index in given cache, creating it if needed.
483 : */
484 0 : static inline struct page *grab_cache_page(struct address_space *mapping,
485 : pgoff_t index)
486 : {
487 0 : return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
488 : }
489 :
490 : extern struct page * read_cache_page(struct address_space *mapping,
491 : pgoff_t index, filler_t *filler, void *data);
492 : extern struct page * read_cache_page_gfp(struct address_space *mapping,
493 : pgoff_t index, gfp_t gfp_mask);
494 : extern int read_cache_pages(struct address_space *mapping,
495 : struct list_head *pages, filler_t *filler, void *data);
496 :
497 1 : static inline struct page *read_mapping_page(struct address_space *mapping,
498 : pgoff_t index, void *data)
499 : {
500 1 : return read_cache_page(mapping, index, NULL, data);
501 : }
502 :
503 : /*
504 : * Get index of the page with in radix-tree
505 : * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE)
506 : */
507 136 : static inline pgoff_t page_to_index(struct page *page)
508 : {
509 136 : pgoff_t pgoff;
510 :
511 136 : if (likely(!PageTransTail(page)))
512 136 : return page->index;
513 :
514 : /*
515 : * We don't initialize ->index for tail pages: calculate based on
516 : * head page
517 : */
518 0 : pgoff = compound_head(page)->index;
519 0 : pgoff += page - compound_head(page);
520 0 : return pgoff;
521 : }
522 :
523 : /*
524 : * Get the offset in PAGE_SIZE.
525 : * (TODO: hugepage should have ->index in PAGE_SIZE)
526 : */
527 136 : static inline pgoff_t page_to_pgoff(struct page *page)
528 : {
529 136 : if (unlikely(PageHeadHuge(page)))
530 : return page->index << compound_order(page);
531 :
532 136 : return page_to_index(page);
533 : }
534 :
535 : /*
536 : * Return byte-offset into filesystem object for page.
537 : */
538 304 : static inline loff_t page_offset(struct page *page)
539 : {
540 304 : return ((loff_t)page->index) << PAGE_SHIFT;
541 : }
542 :
543 : static inline loff_t page_file_offset(struct page *page)
544 : {
545 : return ((loff_t)page_index(page)) << PAGE_SHIFT;
546 : }
547 :
548 : extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,
549 : unsigned long address);
550 :
551 241070 : static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
552 : unsigned long address)
553 : {
554 241070 : pgoff_t pgoff;
555 241070 : if (unlikely(is_vm_hugetlb_page(vma)))
556 : return linear_hugepage_index(vma, address);
557 241070 : pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
558 241070 : pgoff += vma->vm_pgoff;
559 241070 : return pgoff;
560 : }
561 :
562 : /* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
563 : struct wait_page_key {
564 : struct page *page;
565 : int bit_nr;
566 : int page_match;
567 : };
568 :
569 : struct wait_page_queue {
570 : struct page *page;
571 : int bit_nr;
572 : wait_queue_entry_t wait;
573 : };
574 :
575 0 : static inline bool wake_page_match(struct wait_page_queue *wait_page,
576 : struct wait_page_key *key)
577 : {
578 0 : if (wait_page->page != key->page)
579 : return false;
580 0 : key->page_match = 1;
581 :
582 0 : if (wait_page->bit_nr != key->bit_nr)
583 : return false;
584 :
585 : return true;
586 : }
587 :
588 : extern void __lock_page(struct page *page);
589 : extern int __lock_page_killable(struct page *page);
590 : extern int __lock_page_async(struct page *page, struct wait_page_queue *wait);
591 : extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
592 : unsigned int flags);
593 : extern void unlock_page(struct page *page);
594 :
595 : /*
596 : * Return true if the page was successfully locked
597 : */
598 22858 : static inline int trylock_page(struct page *page)
599 : {
600 22858 : page = compound_head(page);
601 22858 : return (likely(!test_and_set_bit_lock(PG_locked, &page->flags)));
602 : }
603 :
604 : /*
605 : * lock_page may only be called if we have the page's inode pinned.
606 : */
607 5504 : static inline void lock_page(struct page *page)
608 : {
609 5504 : might_sleep();
610 5504 : if (!trylock_page(page))
611 1 : __lock_page(page);
612 5504 : }
613 :
614 : /*
615 : * lock_page_killable is like lock_page but can be interrupted by fatal
616 : * signals. It returns 0 if it locked the page and -EINTR if it was
617 : * killed while waiting.
618 : */
619 : static inline int lock_page_killable(struct page *page)
620 : {
621 : might_sleep();
622 : if (!trylock_page(page))
623 : return __lock_page_killable(page);
624 : return 0;
625 : }
626 :
627 : /*
628 : * lock_page_async - Lock the page, unless this would block. If the page
629 : * is already locked, then queue a callback when the page becomes unlocked.
630 : * This callback can then retry the operation.
631 : *
632 : * Returns 0 if the page is locked successfully, or -EIOCBQUEUED if the page
633 : * was already locked and the callback defined in 'wait' was queued.
634 : */
635 : static inline int lock_page_async(struct page *page,
636 : struct wait_page_queue *wait)
637 : {
638 : if (!trylock_page(page))
639 : return __lock_page_async(page, wait);
640 : return 0;
641 : }
642 :
643 : /*
644 : * lock_page_or_retry - Lock the page, unless this would block and the
645 : * caller indicated that it can handle a retry.
646 : *
647 : * Return value and mmap_lock implications depend on flags; see
648 : * __lock_page_or_retry().
649 : */
650 : static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
651 : unsigned int flags)
652 : {
653 : might_sleep();
654 : return trylock_page(page) || __lock_page_or_retry(page, mm, flags);
655 : }
656 :
657 : /*
658 : * This is exported only for wait_on_page_locked/wait_on_page_writeback, etc.,
659 : * and should not be used directly.
660 : */
661 : extern void wait_on_page_bit(struct page *page, int bit_nr);
662 : extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
663 :
664 : /*
665 : * Wait for a page to be unlocked.
666 : *
667 : * This must be called with the caller "holding" the page,
668 : * ie with increased "page->count" so that the page won't
669 : * go away during the wait..
670 : */
671 0 : static inline void wait_on_page_locked(struct page *page)
672 : {
673 0 : if (PageLocked(page))
674 0 : wait_on_page_bit(compound_head(page), PG_locked);
675 0 : }
676 :
677 0 : static inline int wait_on_page_locked_killable(struct page *page)
678 : {
679 0 : if (!PageLocked(page))
680 : return 0;
681 0 : return wait_on_page_bit_killable(compound_head(page), PG_locked);
682 : }
683 :
684 : int put_and_wait_on_page_locked(struct page *page, int state);
685 : void wait_on_page_writeback(struct page *page);
686 : extern void end_page_writeback(struct page *page);
687 : void wait_for_stable_page(struct page *page);
688 :
689 : void page_endio(struct page *page, bool is_write, int err);
690 :
691 : /*
692 : * Add an arbitrary waiter to a page's wait queue
693 : */
694 : extern void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter);
695 :
696 : /*
697 : * Fault everything in given userspace address range in.
698 : */
699 338 : static inline int fault_in_pages_writeable(char __user *uaddr, int size)
700 : {
701 338 : char __user *end = uaddr + size - 1;
702 :
703 338 : if (unlikely(size == 0))
704 : return 0;
705 :
706 338 : if (unlikely(uaddr > end))
707 : return -EFAULT;
708 : /*
709 : * Writing zeroes into userspace here is OK, because we know that if
710 : * the zero gets there, we'll be overwriting it.
711 : */
712 338 : do {
713 338 : if (unlikely(__put_user(0, uaddr) != 0))
714 : return -EFAULT;
715 338 : uaddr += PAGE_SIZE;
716 338 : } while (uaddr <= end);
717 :
718 : /* Check whether the range spilled into the next page. */
719 338 : if (((unsigned long)uaddr & PAGE_MASK) ==
720 338 : ((unsigned long)end & PAGE_MASK))
721 280 : return __put_user(0, end);
722 :
723 : return 0;
724 : }
725 :
726 2936 : static inline int fault_in_pages_readable(const char __user *uaddr, int size)
727 : {
728 2936 : volatile char c;
729 2936 : const char __user *end = uaddr + size - 1;
730 :
731 2936 : if (unlikely(size == 0))
732 : return 0;
733 :
734 2936 : if (unlikely(uaddr > end))
735 : return -EFAULT;
736 :
737 2936 : do {
738 2936 : if (unlikely(__get_user(c, uaddr) != 0))
739 : return -EFAULT;
740 2936 : uaddr += PAGE_SIZE;
741 2936 : } while (uaddr <= end);
742 :
743 : /* Check whether the range spilled into the next page. */
744 2936 : if (((unsigned long)uaddr & PAGE_MASK) ==
745 2936 : ((unsigned long)end & PAGE_MASK)) {
746 1027 : return __get_user(c, end);
747 : }
748 :
749 1909 : (void)c;
750 1909 : return 0;
751 : }
752 :
753 : int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
754 : pgoff_t index, gfp_t gfp_mask);
755 : int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
756 : pgoff_t index, gfp_t gfp_mask);
757 : extern void delete_from_page_cache(struct page *page);
758 : extern void __delete_from_page_cache(struct page *page, void *shadow);
759 : void replace_page_cache_page(struct page *old, struct page *new);
760 : void delete_from_page_cache_batch(struct address_space *mapping,
761 : struct pagevec *pvec);
762 : loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end,
763 : int whence);
764 :
765 : /*
766 : * Like add_to_page_cache_locked, but used to add newly allocated pages:
767 : * the page is new, so we can just run __SetPageLocked() against it.
768 : */
769 : static inline int add_to_page_cache(struct page *page,
770 : struct address_space *mapping, pgoff_t offset, gfp_t gfp_mask)
771 : {
772 : int error;
773 :
774 : __SetPageLocked(page);
775 : error = add_to_page_cache_locked(page, mapping, offset, gfp_mask);
776 : if (unlikely(error))
777 : __ClearPageLocked(page);
778 : return error;
779 : }
780 :
781 : /**
782 : * struct readahead_control - Describes a readahead request.
783 : *
784 : * A readahead request is for consecutive pages. Filesystems which
785 : * implement the ->readahead method should call readahead_page() or
786 : * readahead_page_batch() in a loop and attempt to start I/O against
787 : * each page in the request.
788 : *
789 : * Most of the fields in this struct are private and should be accessed
790 : * by the functions below.
791 : *
792 : * @file: The file, used primarily by network filesystems for authentication.
793 : * May be NULL if invoked internally by the filesystem.
794 : * @mapping: Readahead this filesystem object.
795 : */
796 : struct readahead_control {
797 : struct file *file;
798 : struct address_space *mapping;
799 : /* private: use the readahead_* accessors instead */
800 : pgoff_t _index;
801 : unsigned int _nr_pages;
802 : unsigned int _batch_count;
803 : };
804 :
805 : #define DEFINE_READAHEAD(rac, f, m, i) \
806 : struct readahead_control rac = { \
807 : .file = f, \
808 : .mapping = m, \
809 : ._index = i, \
810 : }
811 :
812 : #define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
813 :
814 : void page_cache_ra_unbounded(struct readahead_control *,
815 : unsigned long nr_to_read, unsigned long lookahead_count);
816 : void page_cache_sync_ra(struct readahead_control *, struct file_ra_state *,
817 : unsigned long req_count);
818 : void page_cache_async_ra(struct readahead_control *, struct file_ra_state *,
819 : struct page *, unsigned long req_count);
820 :
821 : /**
822 : * page_cache_sync_readahead - generic file readahead
823 : * @mapping: address_space which holds the pagecache and I/O vectors
824 : * @ra: file_ra_state which holds the readahead state
825 : * @file: Used by the filesystem for authentication.
826 : * @index: Index of first page to be read.
827 : * @req_count: Total number of pages being read by the caller.
828 : *
829 : * page_cache_sync_readahead() should be called when a cache miss happened:
830 : * it will submit the read. The readahead logic may decide to piggyback more
831 : * pages onto the read request if access patterns suggest it will improve
832 : * performance.
833 : */
834 : static inline
835 0 : void page_cache_sync_readahead(struct address_space *mapping,
836 : struct file_ra_state *ra, struct file *file, pgoff_t index,
837 : unsigned long req_count)
838 : {
839 0 : DEFINE_READAHEAD(ractl, file, mapping, index);
840 0 : page_cache_sync_ra(&ractl, ra, req_count);
841 0 : }
842 :
843 : /**
844 : * page_cache_async_readahead - file readahead for marked pages
845 : * @mapping: address_space which holds the pagecache and I/O vectors
846 : * @ra: file_ra_state which holds the readahead state
847 : * @file: Used by the filesystem for authentication.
848 : * @page: The page at @index which triggered the readahead call.
849 : * @index: Index of first page to be read.
850 : * @req_count: Total number of pages being read by the caller.
851 : *
852 : * page_cache_async_readahead() should be called when a page is used which
853 : * is marked as PageReadahead; this is a marker to suggest that the application
854 : * has used up enough of the readahead window that we should start pulling in
855 : * more pages.
856 : */
857 : static inline
858 0 : void page_cache_async_readahead(struct address_space *mapping,
859 : struct file_ra_state *ra, struct file *file,
860 : struct page *page, pgoff_t index, unsigned long req_count)
861 : {
862 0 : DEFINE_READAHEAD(ractl, file, mapping, index);
863 0 : page_cache_async_ra(&ractl, ra, page, req_count);
864 0 : }
865 :
866 : /**
867 : * readahead_page - Get the next page to read.
868 : * @rac: The current readahead request.
869 : *
870 : * Context: The page is locked and has an elevated refcount. The caller
871 : * should decreases the refcount once the page has been submitted for I/O
872 : * and unlock the page once all I/O to that page has completed.
873 : * Return: A pointer to the next page, or %NULL if we are done.
874 : */
875 23227 : static inline struct page *readahead_page(struct readahead_control *rac)
876 : {
877 23227 : struct page *page;
878 :
879 23227 : BUG_ON(rac->_batch_count > rac->_nr_pages);
880 23227 : rac->_nr_pages -= rac->_batch_count;
881 23227 : rac->_index += rac->_batch_count;
882 :
883 23227 : if (!rac->_nr_pages) {
884 1672 : rac->_batch_count = 0;
885 1672 : return NULL;
886 : }
887 :
888 21555 : page = xa_load(&rac->mapping->i_pages, rac->_index);
889 43110 : VM_BUG_ON_PAGE(!PageLocked(page), page);
890 21555 : rac->_batch_count = thp_nr_pages(page);
891 :
892 21555 : return page;
893 : }
894 :
895 : static inline unsigned int __readahead_batch(struct readahead_control *rac,
896 : struct page **array, unsigned int array_sz)
897 : {
898 : unsigned int i = 0;
899 : XA_STATE(xas, &rac->mapping->i_pages, 0);
900 : struct page *page;
901 :
902 : BUG_ON(rac->_batch_count > rac->_nr_pages);
903 : rac->_nr_pages -= rac->_batch_count;
904 : rac->_index += rac->_batch_count;
905 : rac->_batch_count = 0;
906 :
907 : xas_set(&xas, rac->_index);
908 : rcu_read_lock();
909 : xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) {
910 : if (xas_retry(&xas, page))
911 : continue;
912 : VM_BUG_ON_PAGE(!PageLocked(page), page);
913 : VM_BUG_ON_PAGE(PageTail(page), page);
914 : array[i++] = page;
915 : rac->_batch_count += thp_nr_pages(page);
916 :
917 : /*
918 : * The page cache isn't using multi-index entries yet,
919 : * so the xas cursor needs to be manually moved to the
920 : * next index. This can be removed once the page cache
921 : * is converted.
922 : */
923 : if (PageHead(page))
924 : xas_set(&xas, rac->_index + rac->_batch_count);
925 :
926 : if (i == array_sz)
927 : break;
928 : }
929 : rcu_read_unlock();
930 :
931 : return i;
932 : }
933 :
934 : /**
935 : * readahead_page_batch - Get a batch of pages to read.
936 : * @rac: The current readahead request.
937 : * @array: An array of pointers to struct page.
938 : *
939 : * Context: The pages are locked and have an elevated refcount. The caller
940 : * should decreases the refcount once the page has been submitted for I/O
941 : * and unlock the page once all I/O to that page has completed.
942 : * Return: The number of pages placed in the array. 0 indicates the request
943 : * is complete.
944 : */
945 : #define readahead_page_batch(rac, array) \
946 : __readahead_batch(rac, array, ARRAY_SIZE(array))
947 :
948 : /**
949 : * readahead_pos - The byte offset into the file of this readahead request.
950 : * @rac: The readahead request.
951 : */
952 0 : static inline loff_t readahead_pos(struct readahead_control *rac)
953 : {
954 0 : return (loff_t)rac->_index * PAGE_SIZE;
955 : }
956 :
957 : /**
958 : * readahead_length - The number of bytes in this readahead request.
959 : * @rac: The readahead request.
960 : */
961 0 : static inline loff_t readahead_length(struct readahead_control *rac)
962 : {
963 0 : return (loff_t)rac->_nr_pages * PAGE_SIZE;
964 : }
965 :
966 : /**
967 : * readahead_index - The index of the first page in this readahead request.
968 : * @rac: The readahead request.
969 : */
970 4940 : static inline pgoff_t readahead_index(struct readahead_control *rac)
971 : {
972 4940 : return rac->_index;
973 : }
974 :
975 : /**
976 : * readahead_count - The number of pages in this readahead request.
977 : * @rac: The readahead request.
978 : */
979 10587 : static inline unsigned int readahead_count(struct readahead_control *rac)
980 : {
981 10587 : return rac->_nr_pages;
982 : }
983 :
984 : static inline unsigned long dir_pages(struct inode *inode)
985 : {
986 : return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
987 : PAGE_SHIFT;
988 : }
989 :
990 : /**
991 : * page_mkwrite_check_truncate - check if page was truncated
992 : * @page: the page to check
993 : * @inode: the inode to check the page against
994 : *
995 : * Returns the number of bytes in the page up to EOF,
996 : * or -EFAULT if the page was truncated.
997 : */
998 0 : static inline int page_mkwrite_check_truncate(struct page *page,
999 : struct inode *inode)
1000 : {
1001 0 : loff_t size = i_size_read(inode);
1002 0 : pgoff_t index = size >> PAGE_SHIFT;
1003 0 : int offset = offset_in_page(size);
1004 :
1005 0 : if (page->mapping != inode->i_mapping)
1006 : return -EFAULT;
1007 :
1008 : /* page is wholly inside EOF */
1009 0 : if (page->index < index)
1010 : return PAGE_SIZE;
1011 : /* page is wholly past EOF */
1012 0 : if (page->index > index || !offset)
1013 0 : return -EFAULT;
1014 : /* page is partially inside EOF */
1015 : return offset;
1016 : }
1017 :
1018 : /**
1019 : * i_blocks_per_page - How many blocks fit in this page.
1020 : * @inode: The inode which contains the blocks.
1021 : * @page: The page (head page if the page is a THP).
1022 : *
1023 : * If the block size is larger than the size of this page, return zero.
1024 : *
1025 : * Context: The caller should hold a refcount on the page to prevent it
1026 : * from being split.
1027 : * Return: The number of filesystem blocks covered by this page.
1028 : */
1029 : static inline
1030 0 : unsigned int i_blocks_per_page(struct inode *inode, struct page *page)
1031 : {
1032 0 : return thp_size(page) >> inode->i_blkbits;
1033 : }
1034 : #endif /* _LINUX_PAGEMAP_H */
|