LCOV - code coverage report
Current view: top level - mm - sparse.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 151 186 81.2 %
Date: 2021-04-22 12:43:58 Functions: 18 20 90.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * sparse memory mappings.
       4             :  */
       5             : #include <linux/mm.h>
       6             : #include <linux/slab.h>
       7             : #include <linux/mmzone.h>
       8             : #include <linux/memblock.h>
       9             : #include <linux/compiler.h>
      10             : #include <linux/highmem.h>
      11             : #include <linux/export.h>
      12             : #include <linux/spinlock.h>
      13             : #include <linux/vmalloc.h>
      14             : #include <linux/swap.h>
      15             : #include <linux/swapops.h>
      16             : 
      17             : #include "internal.h"
      18             : #include <asm/dma.h>
      19             : 
      20             : /*
      21             :  * Permanent SPARSEMEM data:
      22             :  *
      23             :  * 1) mem_section       - memory sections, mem_map's for valid memory
      24             :  */
      25             : #ifdef CONFIG_SPARSEMEM_EXTREME
      26             : struct mem_section **mem_section;
      27             : #else
      28             : struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
      29             :         ____cacheline_internodealigned_in_smp;
      30             : #endif
      31             : EXPORT_SYMBOL(mem_section);
      32             : 
      33             : #ifdef NODE_NOT_IN_PAGE_FLAGS
      34             : /*
      35             :  * If we did not store the node number in the page then we have to
      36             :  * do a lookup in the section_to_node_table in order to find which
      37             :  * node the page belongs to.
      38             :  */
      39             : #if MAX_NUMNODES <= 256
      40             : static u8 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
      41             : #else
      42             : static u16 section_to_node_table[NR_MEM_SECTIONS] __cacheline_aligned;
      43             : #endif
      44             : 
      45             : int page_to_nid(const struct page *page)
      46             : {
      47             :         return section_to_node_table[page_to_section(page)];
      48             : }
      49             : EXPORT_SYMBOL(page_to_nid);
      50             : 
      51             : static void set_section_nid(unsigned long section_nr, int nid)
      52             : {
      53             :         section_to_node_table[section_nr] = nid;
      54             : }
      55             : #else /* !NODE_NOT_IN_PAGE_FLAGS */
      56           9 : static inline void set_section_nid(unsigned long section_nr, int nid)
      57             : {
      58           9 : }
      59             : #endif
      60             : 
      61             : #ifdef CONFIG_SPARSEMEM_EXTREME
      62           1 : static noinline struct mem_section __ref *sparse_index_alloc(int nid)
      63             : {
      64           1 :         struct mem_section *section = NULL;
      65           1 :         unsigned long array_size = SECTIONS_PER_ROOT *
      66             :                                    sizeof(struct mem_section);
      67             : 
      68           1 :         if (slab_is_available()) {
      69           0 :                 section = kzalloc_node(array_size, GFP_KERNEL, nid);
      70             :         } else {
      71           1 :                 section = memblock_alloc_node(array_size, SMP_CACHE_BYTES,
      72             :                                               nid);
      73           1 :                 if (!section)
      74           0 :                         panic("%s: Failed to allocate %lu bytes nid=%d\n",
      75             :                               __func__, array_size, nid);
      76             :         }
      77             : 
      78           1 :         return section;
      79             : }
      80             : 
      81           9 : static int __meminit sparse_index_init(unsigned long section_nr, int nid)
      82             : {
      83           9 :         unsigned long root = SECTION_NR_TO_ROOT(section_nr);
      84           9 :         struct mem_section *section;
      85             : 
      86             :         /*
      87             :          * An existing section is possible in the sub-section hotplug
      88             :          * case. First hot-add instantiates, follow-on hot-add reuses
      89             :          * the existing section.
      90             :          *
      91             :          * The mem_hotplug_lock resolves the apparent race below.
      92             :          */
      93           9 :         if (mem_section[root])
      94             :                 return 0;
      95             : 
      96           1 :         section = sparse_index_alloc(nid);
      97           1 :         if (!section)
      98             :                 return -ENOMEM;
      99             : 
     100           1 :         mem_section[root] = section;
     101             : 
     102           1 :         return 0;
     103             : }
     104             : #else /* !SPARSEMEM_EXTREME */
     105             : static inline int sparse_index_init(unsigned long section_nr, int nid)
     106             : {
     107             :         return 0;
     108             : }
     109             : #endif
     110             : 
     111             : #ifdef CONFIG_SPARSEMEM_EXTREME
     112           8 : unsigned long __section_nr(struct mem_section *ms)
     113             : {
     114           8 :         unsigned long root_nr;
     115           8 :         struct mem_section *root = NULL;
     116             : 
     117           8 :         for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
     118           8 :                 root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
     119           8 :                 if (!root)
     120           0 :                         continue;
     121             : 
     122           8 :                 if ((ms >= root) && (ms < (root + SECTIONS_PER_ROOT)))
     123             :                      break;
     124             :         }
     125             : 
     126           8 :         VM_BUG_ON(!root);
     127             : 
     128           8 :         return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
     129             : }
     130             : #else
     131             : unsigned long __section_nr(struct mem_section *ms)
     132             : {
     133             :         return (unsigned long)(ms - mem_section[0]);
     134             : }
     135             : #endif
     136             : 
     137             : /*
     138             :  * During early boot, before section_mem_map is used for an actual
     139             :  * mem_map, we use section_mem_map to store the section's NUMA
     140             :  * node.  This keeps us from having to use another data structure.  The
     141             :  * node information is cleared just before we store the real mem_map.
     142             :  */
     143           8 : static inline unsigned long sparse_encode_early_nid(int nid)
     144             : {
     145           8 :         return (nid << SECTION_NID_SHIFT);
     146             : }
     147             : 
     148           8 : static inline int sparse_early_nid(struct mem_section *section)
     149             : {
     150           8 :         return (section->section_mem_map >> SECTION_NID_SHIFT);
     151             : }
     152             : 
     153             : /* Validate the physical addressing limitations of the model */
     154           2 : void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
     155             :                                                 unsigned long *end_pfn)
     156             : {
     157           2 :         unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
     158             : 
     159             :         /*
     160             :          * Sanity checks - do not allow an architecture to pass
     161             :          * in larger pfns than the maximum scope of sparsemem:
     162             :          */
     163           2 :         if (*start_pfn > max_sparsemem_pfn) {
     164           0 :                 mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
     165             :                         "Start of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
     166             :                         *start_pfn, *end_pfn, max_sparsemem_pfn);
     167           0 :                 WARN_ON_ONCE(1);
     168           0 :                 *start_pfn = max_sparsemem_pfn;
     169           0 :                 *end_pfn = max_sparsemem_pfn;
     170           2 :         } else if (*end_pfn > max_sparsemem_pfn) {
     171           0 :                 mminit_dprintk(MMINIT_WARNING, "pfnvalidation",
     172             :                         "End of range %lu -> %lu exceeds SPARSEMEM max %lu\n",
     173             :                         *start_pfn, *end_pfn, max_sparsemem_pfn);
     174           0 :                 WARN_ON_ONCE(1);
     175           0 :                 *end_pfn = max_sparsemem_pfn;
     176             :         }
     177           2 : }
     178             : 
     179             : /*
     180             :  * There are a number of times that we loop over NR_MEM_SECTIONS,
     181             :  * looking for section_present() on each.  But, when we have very
     182             :  * large physical address spaces, NR_MEM_SECTIONS can also be
     183             :  * very large which makes the loops quite long.
     184             :  *
     185             :  * Keeping track of this gives us an easy way to break out of
     186             :  * those loops early.
     187             :  */
     188             : unsigned long __highest_present_section_nr;
     189           8 : static void section_mark_present(struct mem_section *ms)
     190             : {
     191           8 :         unsigned long section_nr = __section_nr(ms);
     192             : 
     193           8 :         if (section_nr > __highest_present_section_nr)
     194           7 :                 __highest_present_section_nr = section_nr;
     195             : 
     196           8 :         ms->section_mem_map |= SECTION_MARKED_PRESENT;
     197           8 : }
     198             : 
     199             : #define for_each_present_section_nr(start, section_nr)          \
     200             :         for (section_nr = next_present_section_nr(start-1);     \
     201             :              ((section_nr != -1) &&                             \
     202             :               (section_nr <= __highest_present_section_nr)); \
     203             :              section_nr = next_present_section_nr(section_nr))
     204             : 
     205           1 : static inline unsigned long first_present_section_nr(void)
     206             : {
     207           1 :         return next_present_section_nr(-1);
     208             : }
     209             : 
     210             : #ifdef CONFIG_SPARSEMEM_VMEMMAP
     211             : static void subsection_mask_set(unsigned long *map, unsigned long pfn,
     212             :                 unsigned long nr_pages)
     213             : {
     214             :         int idx = subsection_map_index(pfn);
     215             :         int end = subsection_map_index(pfn + nr_pages - 1);
     216             : 
     217             :         bitmap_set(map, idx, end - idx + 1);
     218             : }
     219             : 
     220             : void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
     221             : {
     222             :         int end_sec = pfn_to_section_nr(pfn + nr_pages - 1);
     223             :         unsigned long nr, start_sec = pfn_to_section_nr(pfn);
     224             : 
     225             :         if (!nr_pages)
     226             :                 return;
     227             : 
     228             :         for (nr = start_sec; nr <= end_sec; nr++) {
     229             :                 struct mem_section *ms;
     230             :                 unsigned long pfns;
     231             : 
     232             :                 pfns = min(nr_pages, PAGES_PER_SECTION
     233             :                                 - (pfn & ~PAGE_SECTION_MASK));
     234             :                 ms = __nr_to_section(nr);
     235             :                 subsection_mask_set(ms->usage->subsection_map, pfn, pfns);
     236             : 
     237             :                 pr_debug("%s: sec: %lu pfns: %lu set(%d, %d)\n", __func__, nr,
     238             :                                 pfns, subsection_map_index(pfn),
     239             :                                 subsection_map_index(pfn + pfns - 1));
     240             : 
     241             :                 pfn += pfns;
     242             :                 nr_pages -= pfns;
     243             :         }
     244             : }
     245             : #else
     246           2 : void __init subsection_map_init(unsigned long pfn, unsigned long nr_pages)
     247             : {
     248           2 : }
     249             : #endif
     250             : 
     251             : /* Record a memory area against a node. */
     252           2 : static void __init memory_present(int nid, unsigned long start, unsigned long end)
     253             : {
     254           2 :         unsigned long pfn;
     255             : 
     256             : #ifdef CONFIG_SPARSEMEM_EXTREME
     257           2 :         if (unlikely(!mem_section)) {
     258           1 :                 unsigned long size, align;
     259             : 
     260           1 :                 size = sizeof(struct mem_section*) * NR_SECTION_ROOTS;
     261           1 :                 align = 1 << (INTERNODE_CACHE_SHIFT);
     262           1 :                 mem_section = memblock_alloc(size, align);
     263           1 :                 if (!mem_section)
     264           0 :                         panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
     265             :                               __func__, size, align);
     266             :         }
     267             : #endif
     268             : 
     269           2 :         start &= PAGE_SECTION_MASK;
     270           2 :         mminit_validate_memmodel_limits(&start, &end);
     271          11 :         for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
     272           9 :                 unsigned long section = pfn_to_section_nr(pfn);
     273           9 :                 struct mem_section *ms;
     274             : 
     275           9 :                 sparse_index_init(section, nid);
     276           9 :                 set_section_nid(section, nid);
     277             : 
     278           9 :                 ms = __nr_to_section(section);
     279           9 :                 if (!ms->section_mem_map) {
     280           8 :                         ms->section_mem_map = sparse_encode_early_nid(nid) |
     281             :                                                         SECTION_IS_ONLINE;
     282           8 :                         section_mark_present(ms);
     283             :                 }
     284             :         }
     285           2 : }
     286             : 
     287             : /*
     288             :  * Mark all memblocks as present using memory_present().
     289             :  * This is a convenience function that is useful to mark all of the systems
     290             :  * memory as present during initialization.
     291             :  */
     292           1 : static void __init memblocks_present(void)
     293             : {
     294           1 :         unsigned long start, end;
     295           1 :         int i, nid;
     296             : 
     297           3 :         for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid)
     298           2 :                 memory_present(nid, start, end);
     299           1 : }
     300             : 
     301             : /*
     302             :  * Subtle, we encode the real pfn into the mem_map such that
     303             :  * the identity pfn - section_mem_map will return the actual
     304             :  * physical page frame number.
     305             :  */
     306           8 : static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
     307             : {
     308           8 :         unsigned long coded_mem_map =
     309           8 :                 (unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
     310           8 :         BUILD_BUG_ON(SECTION_MAP_LAST_BIT > (1UL<<PFN_SECTION_SHIFT));
     311           8 :         BUG_ON(coded_mem_map & ~SECTION_MAP_MASK);
     312           8 :         return coded_mem_map;
     313             : }
     314             : 
     315             : #ifdef CONFIG_MEMORY_HOTPLUG
     316             : /*
     317             :  * Decode mem_map from the coded memmap
     318             :  */
     319             : struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
     320             : {
     321             :         /* mask off the extra low bits of information */
     322             :         coded_mem_map &= SECTION_MAP_MASK;
     323             :         return ((struct page *)coded_mem_map) + section_nr_to_pfn(pnum);
     324             : }
     325             : #endif /* CONFIG_MEMORY_HOTPLUG */
     326             : 
     327           8 : static void __meminit sparse_init_one_section(struct mem_section *ms,
     328             :                 unsigned long pnum, struct page *mem_map,
     329             :                 struct mem_section_usage *usage, unsigned long flags)
     330             : {
     331           8 :         ms->section_mem_map &= ~SECTION_MAP_MASK;
     332           8 :         ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum)
     333           8 :                 | SECTION_HAS_MEM_MAP | flags;
     334           8 :         ms->usage = usage;
     335           8 : }
     336             : 
     337           9 : static unsigned long usemap_size(void)
     338             : {
     339           9 :         return BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS) * sizeof(unsigned long);
     340             : }
     341             : 
     342           9 : size_t mem_section_usage_size(void)
     343             : {
     344           9 :         return sizeof(struct mem_section_usage) + usemap_size();
     345             : }
     346             : 
     347             : #ifdef CONFIG_MEMORY_HOTREMOVE
     348             : static struct mem_section_usage * __init
     349             : sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
     350             :                                          unsigned long size)
     351             : {
     352             :         struct mem_section_usage *usage;
     353             :         unsigned long goal, limit;
     354             :         int nid;
     355             :         /*
     356             :          * A page may contain usemaps for other sections preventing the
     357             :          * page being freed and making a section unremovable while
     358             :          * other sections referencing the usemap remain active. Similarly,
     359             :          * a pgdat can prevent a section being removed. If section A
     360             :          * contains a pgdat and section B contains the usemap, both
     361             :          * sections become inter-dependent. This allocates usemaps
     362             :          * from the same section as the pgdat where possible to avoid
     363             :          * this problem.
     364             :          */
     365             :         goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT);
     366             :         limit = goal + (1UL << PA_SECTION_SHIFT);
     367             :         nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
     368             : again:
     369             :         usage = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, goal, limit, nid);
     370             :         if (!usage && limit) {
     371             :                 limit = 0;
     372             :                 goto again;
     373             :         }
     374             :         return usage;
     375             : }
     376             : 
     377             : static void __init check_usemap_section_nr(int nid,
     378             :                 struct mem_section_usage *usage)
     379             : {
     380             :         unsigned long usemap_snr, pgdat_snr;
     381             :         static unsigned long old_usemap_snr;
     382             :         static unsigned long old_pgdat_snr;
     383             :         struct pglist_data *pgdat = NODE_DATA(nid);
     384             :         int usemap_nid;
     385             : 
     386             :         /* First call */
     387             :         if (!old_usemap_snr) {
     388             :                 old_usemap_snr = NR_MEM_SECTIONS;
     389             :                 old_pgdat_snr = NR_MEM_SECTIONS;
     390             :         }
     391             : 
     392             :         usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT);
     393             :         pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
     394             :         if (usemap_snr == pgdat_snr)
     395             :                 return;
     396             : 
     397             :         if (old_usemap_snr == usemap_snr && old_pgdat_snr == pgdat_snr)
     398             :                 /* skip redundant message */
     399             :                 return;
     400             : 
     401             :         old_usemap_snr = usemap_snr;
     402             :         old_pgdat_snr = pgdat_snr;
     403             : 
     404             :         usemap_nid = sparse_early_nid(__nr_to_section(usemap_snr));
     405             :         if (usemap_nid != nid) {
     406             :                 pr_info("node %d must be removed before remove section %ld\n",
     407             :                         nid, usemap_snr);
     408             :                 return;
     409             :         }
     410             :         /*
     411             :          * There is a circular dependency.
     412             :          * Some platforms allow un-removable section because they will just
     413             :          * gather other removable sections for dynamic partitioning.
     414             :          * Just notify un-removable section's number here.
     415             :          */
     416             :         pr_info("Section %ld and %ld (node %d) have a circular dependency on usemap and pgdat allocations\n",
     417             :                 usemap_snr, pgdat_snr, nid);
     418             : }
     419             : #else
     420             : static struct mem_section_usage * __init
     421           1 : sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
     422             :                                          unsigned long size)
     423             : {
     424           1 :         return memblock_alloc_node(size, SMP_CACHE_BYTES, pgdat->node_id);
     425             : }
     426             : 
     427           8 : static void __init check_usemap_section_nr(int nid,
     428             :                 struct mem_section_usage *usage)
     429             : {
     430           8 : }
     431             : #endif /* CONFIG_MEMORY_HOTREMOVE */
     432             : 
     433             : #ifdef CONFIG_SPARSEMEM_VMEMMAP
     434             : static unsigned long __init section_map_size(void)
     435             : {
     436             :         return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
     437             : }
     438             : 
     439             : #else
     440          10 : static unsigned long __init section_map_size(void)
     441             : {
     442          10 :         return PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
     443             : }
     444             : 
     445           8 : struct page __init *__populate_section_memmap(unsigned long pfn,
     446             :                 unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
     447             : {
     448           8 :         unsigned long size = section_map_size();
     449           8 :         struct page *map = sparse_buffer_alloc(size);
     450           8 :         phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
     451             : 
     452           8 :         if (map)
     453             :                 return map;
     454             : 
     455           0 :         map = memblock_alloc_try_nid_raw(size, size, addr,
     456             :                                           MEMBLOCK_ALLOC_ACCESSIBLE, nid);
     457           0 :         if (!map)
     458           0 :                 panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa\n",
     459             :                       __func__, size, PAGE_SIZE, nid, &addr);
     460             : 
     461             :         return map;
     462             : }
     463             : #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
     464             : 
     465             : static void *sparsemap_buf __meminitdata;
     466             : static void *sparsemap_buf_end __meminitdata;
     467             : 
     468           0 : static inline void __meminit sparse_buffer_free(unsigned long size)
     469             : {
     470           0 :         WARN_ON(!sparsemap_buf || size == 0);
     471           0 :         memblock_free_early(__pa(sparsemap_buf), size);
     472           0 : }
     473             : 
     474           1 : static void __init sparse_buffer_init(unsigned long size, int nid)
     475             : {
     476           1 :         phys_addr_t addr = __pa(MAX_DMA_ADDRESS);
     477           1 :         WARN_ON(sparsemap_buf); /* forgot to call sparse_buffer_fini()? */
     478             :         /*
     479             :          * Pre-allocated buffer is mainly used by __populate_section_memmap
     480             :          * and we want it to be properly aligned to the section size - this is
     481             :          * especially the case for VMEMMAP which maps memmap to PMDs
     482             :          */
     483           1 :         sparsemap_buf = memblock_alloc_exact_nid_raw(size, section_map_size(),
     484             :                                         addr, MEMBLOCK_ALLOC_ACCESSIBLE, nid);
     485           1 :         sparsemap_buf_end = sparsemap_buf + size;
     486           1 : }
     487             : 
     488           1 : static void __init sparse_buffer_fini(void)
     489             : {
     490           1 :         unsigned long size = sparsemap_buf_end - sparsemap_buf;
     491             : 
     492           1 :         if (sparsemap_buf && size > 0)
     493           0 :                 sparse_buffer_free(size);
     494           1 :         sparsemap_buf = NULL;
     495           1 : }
     496             : 
     497           8 : void * __meminit sparse_buffer_alloc(unsigned long size)
     498             : {
     499           8 :         void *ptr = NULL;
     500             : 
     501           8 :         if (sparsemap_buf) {
     502           8 :                 ptr = (void *) roundup((unsigned long)sparsemap_buf, size);
     503           8 :                 if (ptr + size > sparsemap_buf_end)
     504             :                         ptr = NULL;
     505             :                 else {
     506             :                         /* Free redundant aligned space */
     507           8 :                         if ((unsigned long)(ptr - sparsemap_buf) > 0)
     508           0 :                                 sparse_buffer_free((unsigned long)(ptr - sparsemap_buf));
     509           8 :                         sparsemap_buf = ptr + size;
     510             :                 }
     511             :         }
     512           8 :         return ptr;
     513             : }
     514             : 
     515           1 : void __weak __meminit vmemmap_populate_print_last(void)
     516             : {
     517           1 : }
     518             : 
     519             : /*
     520             :  * Initialize sparse on a specific node. The node spans [pnum_begin, pnum_end)
     521             :  * And number of present sections in this node is map_count.
     522             :  */
     523           1 : static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
     524             :                                    unsigned long pnum_end,
     525             :                                    unsigned long map_count)
     526             : {
     527           1 :         struct mem_section_usage *usage;
     528           1 :         unsigned long pnum;
     529           1 :         struct page *map;
     530             : 
     531           1 :         usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid),
     532             :                         mem_section_usage_size() * map_count);
     533           1 :         if (!usage) {
     534           0 :                 pr_err("%s: node[%d] usemap allocation failed", __func__, nid);
     535           0 :                 goto failed;
     536             :         }
     537           1 :         sparse_buffer_init(map_count * section_map_size(), nid);
     538           9 :         for_each_present_section_nr(pnum_begin, pnum) {
     539           8 :                 unsigned long pfn = section_nr_to_pfn(pnum);
     540             : 
     541           8 :                 if (pnum >= pnum_end)
     542             :                         break;
     543             : 
     544           8 :                 map = __populate_section_memmap(pfn, PAGES_PER_SECTION,
     545             :                                 nid, NULL);
     546           8 :                 if (!map) {
     547           0 :                         pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.",
     548             :                                __func__, nid);
     549           0 :                         pnum_begin = pnum;
     550           0 :                         goto failed;
     551             :                 }
     552           8 :                 check_usemap_section_nr(nid, usage);
     553           8 :                 sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage,
     554             :                                 SECTION_IS_EARLY);
     555           8 :                 usage = (void *) usage + mem_section_usage_size();
     556             :         }
     557           1 :         sparse_buffer_fini();
     558           1 :         return;
     559           0 : failed:
     560             :         /* We failed to allocate, mark all the following pnums as not present */
     561           0 :         for_each_present_section_nr(pnum_begin, pnum) {
     562           0 :                 struct mem_section *ms;
     563             : 
     564           0 :                 if (pnum >= pnum_end)
     565             :                         break;
     566           0 :                 ms = __nr_to_section(pnum);
     567           0 :                 ms->section_mem_map = 0;
     568             :         }
     569             : }
     570             : 
     571             : /*
     572             :  * Allocate the accumulated non-linear sections, allocate a mem_map
     573             :  * for each and record the physical to section mapping.
     574             :  */
     575           1 : void __init sparse_init(void)
     576             : {
     577           1 :         unsigned long pnum_end, pnum_begin, map_count = 1;
     578           1 :         int nid_begin;
     579             : 
     580           1 :         memblocks_present();
     581             : 
     582           1 :         pnum_begin = first_present_section_nr();
     583           1 :         nid_begin = sparse_early_nid(__nr_to_section(pnum_begin));
     584             : 
     585             :         /* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
     586           1 :         set_pageblock_order();
     587             : 
     588           8 :         for_each_present_section_nr(pnum_begin + 1, pnum_end) {
     589           7 :                 int nid = sparse_early_nid(__nr_to_section(pnum_end));
     590             : 
     591           7 :                 if (nid == nid_begin) {
     592           7 :                         map_count++;
     593           7 :                         continue;
     594             :                 }
     595             :                 /* Init node with sections in range [pnum_begin, pnum_end) */
     596           0 :                 sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
     597           0 :                 nid_begin = nid;
     598           0 :                 pnum_begin = pnum_end;
     599           0 :                 map_count = 1;
     600             :         }
     601             :         /* cover the last node */
     602           1 :         sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
     603           1 :         vmemmap_populate_print_last();
     604           1 : }
     605             : 
     606             : #ifdef CONFIG_MEMORY_HOTPLUG
     607             : 
     608             : /* Mark all memory sections within the pfn range as online */
     609             : void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
     610             : {
     611             :         unsigned long pfn;
     612             : 
     613             :         for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
     614             :                 unsigned long section_nr = pfn_to_section_nr(pfn);
     615             :                 struct mem_section *ms;
     616             : 
     617             :                 /* onlining code should never touch invalid ranges */
     618             :                 if (WARN_ON(!valid_section_nr(section_nr)))
     619             :                         continue;
     620             : 
     621             :                 ms = __nr_to_section(section_nr);
     622             :                 ms->section_mem_map |= SECTION_IS_ONLINE;
     623             :         }
     624             : }
     625             : 
     626             : #ifdef CONFIG_MEMORY_HOTREMOVE
     627             : /* Mark all memory sections within the pfn range as offline */
     628             : void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
     629             : {
     630             :         unsigned long pfn;
     631             : 
     632             :         for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
     633             :                 unsigned long section_nr = pfn_to_section_nr(pfn);
     634             :                 struct mem_section *ms;
     635             : 
     636             :                 /*
     637             :                  * TODO this needs some double checking. Offlining code makes
     638             :                  * sure to check pfn_valid but those checks might be just bogus
     639             :                  */
     640             :                 if (WARN_ON(!valid_section_nr(section_nr)))
     641             :                         continue;
     642             : 
     643             :                 ms = __nr_to_section(section_nr);
     644             :                 ms->section_mem_map &= ~SECTION_IS_ONLINE;
     645             :         }
     646             : }
     647             : #endif
     648             : 
     649             : #ifdef CONFIG_SPARSEMEM_VMEMMAP
     650             : static struct page * __meminit populate_section_memmap(unsigned long pfn,
     651             :                 unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
     652             : {
     653             :         return __populate_section_memmap(pfn, nr_pages, nid, altmap);
     654             : }
     655             : 
     656             : static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
     657             :                 struct vmem_altmap *altmap)
     658             : {
     659             :         unsigned long start = (unsigned long) pfn_to_page(pfn);
     660             :         unsigned long end = start + nr_pages * sizeof(struct page);
     661             : 
     662             :         vmemmap_free(start, end, altmap);
     663             : }
     664             : static void free_map_bootmem(struct page *memmap)
     665             : {
     666             :         unsigned long start = (unsigned long)memmap;
     667             :         unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
     668             : 
     669             :         vmemmap_free(start, end, NULL);
     670             : }
     671             : 
     672             : static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
     673             : {
     674             :         DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
     675             :         DECLARE_BITMAP(tmp, SUBSECTIONS_PER_SECTION) = { 0 };
     676             :         struct mem_section *ms = __pfn_to_section(pfn);
     677             :         unsigned long *subsection_map = ms->usage
     678             :                 ? &ms->usage->subsection_map[0] : NULL;
     679             : 
     680             :         subsection_mask_set(map, pfn, nr_pages);
     681             :         if (subsection_map)
     682             :                 bitmap_and(tmp, map, subsection_map, SUBSECTIONS_PER_SECTION);
     683             : 
     684             :         if (WARN(!subsection_map || !bitmap_equal(tmp, map, SUBSECTIONS_PER_SECTION),
     685             :                                 "section already deactivated (%#lx + %ld)\n",
     686             :                                 pfn, nr_pages))
     687             :                 return -EINVAL;
     688             : 
     689             :         bitmap_xor(subsection_map, map, subsection_map, SUBSECTIONS_PER_SECTION);
     690             :         return 0;
     691             : }
     692             : 
     693             : static bool is_subsection_map_empty(struct mem_section *ms)
     694             : {
     695             :         return bitmap_empty(&ms->usage->subsection_map[0],
     696             :                             SUBSECTIONS_PER_SECTION);
     697             : }
     698             : 
     699             : static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
     700             : {
     701             :         struct mem_section *ms = __pfn_to_section(pfn);
     702             :         DECLARE_BITMAP(map, SUBSECTIONS_PER_SECTION) = { 0 };
     703             :         unsigned long *subsection_map;
     704             :         int rc = 0;
     705             : 
     706             :         subsection_mask_set(map, pfn, nr_pages);
     707             : 
     708             :         subsection_map = &ms->usage->subsection_map[0];
     709             : 
     710             :         if (bitmap_empty(map, SUBSECTIONS_PER_SECTION))
     711             :                 rc = -EINVAL;
     712             :         else if (bitmap_intersects(map, subsection_map, SUBSECTIONS_PER_SECTION))
     713             :                 rc = -EEXIST;
     714             :         else
     715             :                 bitmap_or(subsection_map, map, subsection_map,
     716             :                                 SUBSECTIONS_PER_SECTION);
     717             : 
     718             :         return rc;
     719             : }
     720             : #else
     721             : struct page * __meminit populate_section_memmap(unsigned long pfn,
     722             :                 unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
     723             : {
     724             :         return kvmalloc_node(array_size(sizeof(struct page),
     725             :                                         PAGES_PER_SECTION), GFP_KERNEL, nid);
     726             : }
     727             : 
     728             : static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages,
     729             :                 struct vmem_altmap *altmap)
     730             : {
     731             :         kvfree(pfn_to_page(pfn));
     732             : }
     733             : 
     734             : static void free_map_bootmem(struct page *memmap)
     735             : {
     736             :         unsigned long maps_section_nr, removing_section_nr, i;
     737             :         unsigned long magic, nr_pages;
     738             :         struct page *page = virt_to_page(memmap);
     739             : 
     740             :         nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
     741             :                 >> PAGE_SHIFT;
     742             : 
     743             :         for (i = 0; i < nr_pages; i++, page++) {
     744             :                 magic = (unsigned long) page->freelist;
     745             : 
     746             :                 BUG_ON(magic == NODE_INFO);
     747             : 
     748             :                 maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
     749             :                 removing_section_nr = page_private(page);
     750             : 
     751             :                 /*
     752             :                  * When this function is called, the removing section is
     753             :                  * logical offlined state. This means all pages are isolated
     754             :                  * from page allocator. If removing section's memmap is placed
     755             :                  * on the same section, it must not be freed.
     756             :                  * If it is freed, page allocator may allocate it which will
     757             :                  * be removed physically soon.
     758             :                  */
     759             :                 if (maps_section_nr != removing_section_nr)
     760             :                         put_page_bootmem(page);
     761             :         }
     762             : }
     763             : 
     764             : static int clear_subsection_map(unsigned long pfn, unsigned long nr_pages)
     765             : {
     766             :         return 0;
     767             : }
     768             : 
     769             : static bool is_subsection_map_empty(struct mem_section *ms)
     770             : {
     771             :         return true;
     772             : }
     773             : 
     774             : static int fill_subsection_map(unsigned long pfn, unsigned long nr_pages)
     775             : {
     776             :         return 0;
     777             : }
     778             : #endif /* CONFIG_SPARSEMEM_VMEMMAP */
     779             : 
     780             : /*
     781             :  * To deactivate a memory region, there are 3 cases to handle across
     782             :  * two configurations (SPARSEMEM_VMEMMAP={y,n}):
     783             :  *
     784             :  * 1. deactivation of a partial hot-added section (only possible in
     785             :  *    the SPARSEMEM_VMEMMAP=y case).
     786             :  *      a) section was present at memory init.
     787             :  *      b) section was hot-added post memory init.
     788             :  * 2. deactivation of a complete hot-added section.
     789             :  * 3. deactivation of a complete section from memory init.
     790             :  *
     791             :  * For 1, when subsection_map does not empty we will not be freeing the
     792             :  * usage map, but still need to free the vmemmap range.
     793             :  *
     794             :  * For 2 and 3, the SPARSEMEM_VMEMMAP={y,n} cases are unified
     795             :  */
     796             : static void section_deactivate(unsigned long pfn, unsigned long nr_pages,
     797             :                 struct vmem_altmap *altmap)
     798             : {
     799             :         struct mem_section *ms = __pfn_to_section(pfn);
     800             :         bool section_is_early = early_section(ms);
     801             :         struct page *memmap = NULL;
     802             :         bool empty;
     803             : 
     804             :         if (clear_subsection_map(pfn, nr_pages))
     805             :                 return;
     806             : 
     807             :         empty = is_subsection_map_empty(ms);
     808             :         if (empty) {
     809             :                 unsigned long section_nr = pfn_to_section_nr(pfn);
     810             : 
     811             :                 /*
     812             :                  * When removing an early section, the usage map is kept (as the
     813             :                  * usage maps of other sections fall into the same page). It
     814             :                  * will be re-used when re-adding the section - which is then no
     815             :                  * longer an early section. If the usage map is PageReserved, it
     816             :                  * was allocated during boot.
     817             :                  */
     818             :                 if (!PageReserved(virt_to_page(ms->usage))) {
     819             :                         kfree(ms->usage);
     820             :                         ms->usage = NULL;
     821             :                 }
     822             :                 memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
     823             :                 /*
     824             :                  * Mark the section invalid so that valid_section()
     825             :                  * return false. This prevents code from dereferencing
     826             :                  * ms->usage array.
     827             :                  */
     828             :                 ms->section_mem_map &= ~SECTION_HAS_MEM_MAP;
     829             :         }
     830             : 
     831             :         /*
     832             :          * The memmap of early sections is always fully populated. See
     833             :          * section_activate() and pfn_valid() .
     834             :          */
     835             :         if (!section_is_early)
     836             :                 depopulate_section_memmap(pfn, nr_pages, altmap);
     837             :         else if (memmap)
     838             :                 free_map_bootmem(memmap);
     839             : 
     840             :         if (empty)
     841             :                 ms->section_mem_map = (unsigned long)NULL;
     842             : }
     843             : 
     844             : static struct page * __meminit section_activate(int nid, unsigned long pfn,
     845             :                 unsigned long nr_pages, struct vmem_altmap *altmap)
     846             : {
     847             :         struct mem_section *ms = __pfn_to_section(pfn);
     848             :         struct mem_section_usage *usage = NULL;
     849             :         struct page *memmap;
     850             :         int rc = 0;
     851             : 
     852             :         if (!ms->usage) {
     853             :                 usage = kzalloc(mem_section_usage_size(), GFP_KERNEL);
     854             :                 if (!usage)
     855             :                         return ERR_PTR(-ENOMEM);
     856             :                 ms->usage = usage;
     857             :         }
     858             : 
     859             :         rc = fill_subsection_map(pfn, nr_pages);
     860             :         if (rc) {
     861             :                 if (usage)
     862             :                         ms->usage = NULL;
     863             :                 kfree(usage);
     864             :                 return ERR_PTR(rc);
     865             :         }
     866             : 
     867             :         /*
     868             :          * The early init code does not consider partially populated
     869             :          * initial sections, it simply assumes that memory will never be
     870             :          * referenced.  If we hot-add memory into such a section then we
     871             :          * do not need to populate the memmap and can simply reuse what
     872             :          * is already there.
     873             :          */
     874             :         if (nr_pages < PAGES_PER_SECTION && early_section(ms))
     875             :                 return pfn_to_page(pfn);
     876             : 
     877             :         memmap = populate_section_memmap(pfn, nr_pages, nid, altmap);
     878             :         if (!memmap) {
     879             :                 section_deactivate(pfn, nr_pages, altmap);
     880             :                 return ERR_PTR(-ENOMEM);
     881             :         }
     882             : 
     883             :         return memmap;
     884             : }
     885             : 
     886             : /**
     887             :  * sparse_add_section - add a memory section, or populate an existing one
     888             :  * @nid: The node to add section on
     889             :  * @start_pfn: start pfn of the memory range
     890             :  * @nr_pages: number of pfns to add in the section
     891             :  * @altmap: device page map
     892             :  *
     893             :  * This is only intended for hotplug.
     894             :  *
     895             :  * Note that only VMEMMAP supports sub-section aligned hotplug,
     896             :  * the proper alignment and size are gated by check_pfn_span().
     897             :  *
     898             :  *
     899             :  * Return:
     900             :  * * 0          - On success.
     901             :  * * -EEXIST    - Section has been present.
     902             :  * * -ENOMEM    - Out of memory.
     903             :  */
     904             : int __meminit sparse_add_section(int nid, unsigned long start_pfn,
     905             :                 unsigned long nr_pages, struct vmem_altmap *altmap)
     906             : {
     907             :         unsigned long section_nr = pfn_to_section_nr(start_pfn);
     908             :         struct mem_section *ms;
     909             :         struct page *memmap;
     910             :         int ret;
     911             : 
     912             :         ret = sparse_index_init(section_nr, nid);
     913             :         if (ret < 0)
     914             :                 return ret;
     915             : 
     916             :         memmap = section_activate(nid, start_pfn, nr_pages, altmap);
     917             :         if (IS_ERR(memmap))
     918             :                 return PTR_ERR(memmap);
     919             : 
     920             :         /*
     921             :          * Poison uninitialized struct pages in order to catch invalid flags
     922             :          * combinations.
     923             :          */
     924             :         page_init_poison(memmap, sizeof(struct page) * nr_pages);
     925             : 
     926             :         ms = __nr_to_section(section_nr);
     927             :         set_section_nid(section_nr, nid);
     928             :         section_mark_present(ms);
     929             : 
     930             :         /* Align memmap to section boundary in the subsection case */
     931             :         if (section_nr_to_pfn(section_nr) != start_pfn)
     932             :                 memmap = pfn_to_page(section_nr_to_pfn(section_nr));
     933             :         sparse_init_one_section(ms, section_nr, memmap, ms->usage, 0);
     934             : 
     935             :         return 0;
     936             : }
     937             : 
     938             : #ifdef CONFIG_MEMORY_FAILURE
     939             : static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
     940             : {
     941             :         int i;
     942             : 
     943             :         /*
     944             :          * A further optimization is to have per section refcounted
     945             :          * num_poisoned_pages.  But that would need more space per memmap, so
     946             :          * for now just do a quick global check to speed up this routine in the
     947             :          * absence of bad pages.
     948             :          */
     949             :         if (atomic_long_read(&num_poisoned_pages) == 0)
     950             :                 return;
     951             : 
     952             :         for (i = 0; i < nr_pages; i++) {
     953             :                 if (PageHWPoison(&memmap[i])) {
     954             :                         num_poisoned_pages_dec();
     955             :                         ClearPageHWPoison(&memmap[i]);
     956             :                 }
     957             :         }
     958             : }
     959             : #else
     960             : static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
     961             : {
     962             : }
     963             : #endif
     964             : 
     965             : void sparse_remove_section(struct mem_section *ms, unsigned long pfn,
     966             :                 unsigned long nr_pages, unsigned long map_offset,
     967             :                 struct vmem_altmap *altmap)
     968             : {
     969             :         clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset,
     970             :                         nr_pages - map_offset);
     971             :         section_deactivate(pfn, nr_pages, altmap);
     972             : }
     973             : #endif /* CONFIG_MEMORY_HOTPLUG */

Generated by: LCOV version 1.14