LCOV - code coverage report
Current view: top level - drivers/base - node.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 45 217 20.7 %
Date: 2021-04-22 12:43:58 Functions: 4 20 20.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Basic Node interface support
       4             :  */
       5             : 
       6             : #include <linux/module.h>
       7             : #include <linux/init.h>
       8             : #include <linux/mm.h>
       9             : #include <linux/memory.h>
      10             : #include <linux/vmstat.h>
      11             : #include <linux/notifier.h>
      12             : #include <linux/node.h>
      13             : #include <linux/hugetlb.h>
      14             : #include <linux/compaction.h>
      15             : #include <linux/cpumask.h>
      16             : #include <linux/topology.h>
      17             : #include <linux/nodemask.h>
      18             : #include <linux/cpu.h>
      19             : #include <linux/device.h>
      20             : #include <linux/pm_runtime.h>
      21             : #include <linux/swap.h>
      22             : #include <linux/slab.h>
      23             : 
      24             : static struct bus_type node_subsys = {
      25             :         .name = "node",
      26             :         .dev_name = "node",
      27             : };
      28             : 
      29             : 
      30           0 : static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf)
      31             : {
      32           0 :         ssize_t n;
      33           0 :         cpumask_var_t mask;
      34           0 :         struct node *node_dev = to_node(dev);
      35             : 
      36             :         /* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */
      37           0 :         BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));
      38             : 
      39           0 :         if (!alloc_cpumask_var(&mask, GFP_KERNEL))
      40             :                 return 0;
      41             : 
      42           0 :         cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask);
      43           0 :         n = cpumap_print_to_pagebuf(list, buf, mask);
      44           0 :         free_cpumask_var(mask);
      45             : 
      46           0 :         return n;
      47             : }
      48             : 
      49           0 : static inline ssize_t cpumap_show(struct device *dev,
      50             :                                   struct device_attribute *attr,
      51             :                                   char *buf)
      52             : {
      53           0 :         return node_read_cpumap(dev, false, buf);
      54             : }
      55             : 
      56             : static DEVICE_ATTR_RO(cpumap);
      57             : 
      58           0 : static inline ssize_t cpulist_show(struct device *dev,
      59             :                                    struct device_attribute *attr,
      60             :                                    char *buf)
      61             : {
      62           0 :         return node_read_cpumap(dev, true, buf);
      63             : }
      64             : 
      65             : static DEVICE_ATTR_RO(cpulist);
      66             : 
      67             : /**
      68             :  * struct node_access_nodes - Access class device to hold user visible
      69             :  *                            relationships to other nodes.
      70             :  * @dev:        Device for this memory access class
      71             :  * @list_node:  List element in the node's access list
      72             :  * @access:     The access class rank
      73             :  * @hmem_attrs: Heterogeneous memory performance attributes
      74             :  */
      75             : struct node_access_nodes {
      76             :         struct device           dev;
      77             :         struct list_head        list_node;
      78             :         unsigned                access;
      79             : #ifdef CONFIG_HMEM_REPORTING
      80             :         struct node_hmem_attrs  hmem_attrs;
      81             : #endif
      82             : };
      83             : #define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev)
      84             : 
      85             : static struct attribute *node_init_access_node_attrs[] = {
      86             :         NULL,
      87             : };
      88             : 
      89             : static struct attribute *node_targ_access_node_attrs[] = {
      90             :         NULL,
      91             : };
      92             : 
      93             : static const struct attribute_group initiators = {
      94             :         .name   = "initiators",
      95             :         .attrs  = node_init_access_node_attrs,
      96             : };
      97             : 
      98             : static const struct attribute_group targets = {
      99             :         .name   = "targets",
     100             :         .attrs  = node_targ_access_node_attrs,
     101             : };
     102             : 
     103             : static const struct attribute_group *node_access_node_groups[] = {
     104             :         &initiators,
     105             :         &targets,
     106             :         NULL,
     107             : };
     108             : 
     109           0 : static void node_remove_accesses(struct node *node)
     110             : {
     111           0 :         struct node_access_nodes *c, *cnext;
     112             : 
     113           0 :         list_for_each_entry_safe(c, cnext, &node->access_list, list_node) {
     114           0 :                 list_del(&c->list_node);
     115           0 :                 device_unregister(&c->dev);
     116             :         }
     117           0 : }
     118             : 
     119           0 : static void node_access_release(struct device *dev)
     120             : {
     121           0 :         kfree(to_access_nodes(dev));
     122           0 : }
     123             : 
     124           0 : static struct node_access_nodes *node_init_node_access(struct node *node,
     125             :                                                        unsigned access)
     126             : {
     127           0 :         struct node_access_nodes *access_node;
     128           0 :         struct device *dev;
     129             : 
     130           0 :         list_for_each_entry(access_node, &node->access_list, list_node)
     131           0 :                 if (access_node->access == access)
     132           0 :                         return access_node;
     133             : 
     134           0 :         access_node = kzalloc(sizeof(*access_node), GFP_KERNEL);
     135           0 :         if (!access_node)
     136             :                 return NULL;
     137             : 
     138           0 :         access_node->access = access;
     139           0 :         dev = &access_node->dev;
     140           0 :         dev->parent = &node->dev;
     141           0 :         dev->release = node_access_release;
     142           0 :         dev->groups = node_access_node_groups;
     143           0 :         if (dev_set_name(dev, "access%u", access))
     144           0 :                 goto free;
     145             : 
     146           0 :         if (device_register(dev))
     147           0 :                 goto free_name;
     148             : 
     149           0 :         pm_runtime_no_callbacks(dev);
     150           0 :         list_add_tail(&access_node->list_node, &node->access_list);
     151           0 :         return access_node;
     152           0 : free_name:
     153           0 :         kfree_const(dev->kobj.name);
     154           0 : free:
     155           0 :         kfree(access_node);
     156           0 :         return NULL;
     157             : }
     158             : 
     159             : #ifdef CONFIG_HMEM_REPORTING
     160             : #define ACCESS_ATTR(name)                                               \
     161             : static ssize_t name##_show(struct device *dev,                          \
     162             :                            struct device_attribute *attr,               \
     163             :                            char *buf)                                   \
     164             : {                                                                       \
     165             :         return sysfs_emit(buf, "%u\n",                                        \
     166             :                           to_access_nodes(dev)->hmem_attrs.name);    \
     167             : }                                                                       \
     168             : static DEVICE_ATTR_RO(name)
     169             : 
     170             : ACCESS_ATTR(read_bandwidth);
     171             : ACCESS_ATTR(read_latency);
     172             : ACCESS_ATTR(write_bandwidth);
     173             : ACCESS_ATTR(write_latency);
     174             : 
     175             : static struct attribute *access_attrs[] = {
     176             :         &dev_attr_read_bandwidth.attr,
     177             :         &dev_attr_read_latency.attr,
     178             :         &dev_attr_write_bandwidth.attr,
     179             :         &dev_attr_write_latency.attr,
     180             :         NULL,
     181             : };
     182             : 
     183             : /**
     184             :  * node_set_perf_attrs - Set the performance values for given access class
     185             :  * @nid: Node identifier to be set
     186             :  * @hmem_attrs: Heterogeneous memory performance attributes
     187             :  * @access: The access class the for the given attributes
     188             :  */
     189             : void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs,
     190             :                          unsigned access)
     191             : {
     192             :         struct node_access_nodes *c;
     193             :         struct node *node;
     194             :         int i;
     195             : 
     196             :         if (WARN_ON_ONCE(!node_online(nid)))
     197             :                 return;
     198             : 
     199             :         node = node_devices[nid];
     200             :         c = node_init_node_access(node, access);
     201             :         if (!c)
     202             :                 return;
     203             : 
     204             :         c->hmem_attrs = *hmem_attrs;
     205             :         for (i = 0; access_attrs[i] != NULL; i++) {
     206             :                 if (sysfs_add_file_to_group(&c->dev.kobj, access_attrs[i],
     207             :                                             "initiators")) {
     208             :                         pr_info("failed to add performance attribute to node %d\n",
     209             :                                 nid);
     210             :                         break;
     211             :                 }
     212             :         }
     213             : }
     214             : 
     215             : /**
     216             :  * struct node_cache_info - Internal tracking for memory node caches
     217             :  * @dev:        Device represeting the cache level
     218             :  * @node:       List element for tracking in the node
     219             :  * @cache_attrs:Attributes for this cache level
     220             :  */
     221             : struct node_cache_info {
     222             :         struct device dev;
     223             :         struct list_head node;
     224             :         struct node_cache_attrs cache_attrs;
     225             : };
     226             : #define to_cache_info(device) container_of(device, struct node_cache_info, dev)
     227             : 
     228             : #define CACHE_ATTR(name, fmt)                                           \
     229             : static ssize_t name##_show(struct device *dev,                          \
     230             :                            struct device_attribute *attr,               \
     231             :                            char *buf)                                   \
     232             : {                                                                       \
     233             :         return sysfs_emit(buf, fmt "\n",                              \
     234             :                           to_cache_info(dev)->cache_attrs.name);     \
     235             : }                                                                       \
     236             : DEVICE_ATTR_RO(name);
     237             : 
     238             : CACHE_ATTR(size, "%llu")
     239             : CACHE_ATTR(line_size, "%u")
     240             : CACHE_ATTR(indexing, "%u")
     241             : CACHE_ATTR(write_policy, "%u")
     242             : 
     243             : static struct attribute *cache_attrs[] = {
     244             :         &dev_attr_indexing.attr,
     245             :         &dev_attr_size.attr,
     246             :         &dev_attr_line_size.attr,
     247             :         &dev_attr_write_policy.attr,
     248             :         NULL,
     249             : };
     250             : ATTRIBUTE_GROUPS(cache);
     251             : 
     252             : static void node_cache_release(struct device *dev)
     253             : {
     254             :         kfree(dev);
     255             : }
     256             : 
     257             : static void node_cacheinfo_release(struct device *dev)
     258             : {
     259             :         struct node_cache_info *info = to_cache_info(dev);
     260             :         kfree(info);
     261             : }
     262             : 
     263             : static void node_init_cache_dev(struct node *node)
     264             : {
     265             :         struct device *dev;
     266             : 
     267             :         dev = kzalloc(sizeof(*dev), GFP_KERNEL);
     268             :         if (!dev)
     269             :                 return;
     270             : 
     271             :         dev->parent = &node->dev;
     272             :         dev->release = node_cache_release;
     273             :         if (dev_set_name(dev, "memory_side_cache"))
     274             :                 goto free_dev;
     275             : 
     276             :         if (device_register(dev))
     277             :                 goto free_name;
     278             : 
     279             :         pm_runtime_no_callbacks(dev);
     280             :         node->cache_dev = dev;
     281             :         return;
     282             : free_name:
     283             :         kfree_const(dev->kobj.name);
     284             : free_dev:
     285             :         kfree(dev);
     286             : }
     287             : 
     288             : /**
     289             :  * node_add_cache() - add cache attribute to a memory node
     290             :  * @nid: Node identifier that has new cache attributes
     291             :  * @cache_attrs: Attributes for the cache being added
     292             :  */
     293             : void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs)
     294             : {
     295             :         struct node_cache_info *info;
     296             :         struct device *dev;
     297             :         struct node *node;
     298             : 
     299             :         if (!node_online(nid) || !node_devices[nid])
     300             :                 return;
     301             : 
     302             :         node = node_devices[nid];
     303             :         list_for_each_entry(info, &node->cache_attrs, node) {
     304             :                 if (info->cache_attrs.level == cache_attrs->level) {
     305             :                         dev_warn(&node->dev,
     306             :                                 "attempt to add duplicate cache level:%d\n",
     307             :                                 cache_attrs->level);
     308             :                         return;
     309             :                 }
     310             :         }
     311             : 
     312             :         if (!node->cache_dev)
     313             :                 node_init_cache_dev(node);
     314             :         if (!node->cache_dev)
     315             :                 return;
     316             : 
     317             :         info = kzalloc(sizeof(*info), GFP_KERNEL);
     318             :         if (!info)
     319             :                 return;
     320             : 
     321             :         dev = &info->dev;
     322             :         dev->parent = node->cache_dev;
     323             :         dev->release = node_cacheinfo_release;
     324             :         dev->groups = cache_groups;
     325             :         if (dev_set_name(dev, "index%d", cache_attrs->level))
     326             :                 goto free_cache;
     327             : 
     328             :         info->cache_attrs = *cache_attrs;
     329             :         if (device_register(dev)) {
     330             :                 dev_warn(&node->dev, "failed to add cache level:%d\n",
     331             :                          cache_attrs->level);
     332             :                 goto free_name;
     333             :         }
     334             :         pm_runtime_no_callbacks(dev);
     335             :         list_add_tail(&info->node, &node->cache_attrs);
     336             :         return;
     337             : free_name:
     338             :         kfree_const(dev->kobj.name);
     339             : free_cache:
     340             :         kfree(info);
     341             : }
     342             : 
     343             : static void node_remove_caches(struct node *node)
     344             : {
     345             :         struct node_cache_info *info, *next;
     346             : 
     347             :         if (!node->cache_dev)
     348             :                 return;
     349             : 
     350             :         list_for_each_entry_safe(info, next, &node->cache_attrs, node) {
     351             :                 list_del(&info->node);
     352             :                 device_unregister(&info->dev);
     353             :         }
     354             :         device_unregister(node->cache_dev);
     355             : }
     356             : 
     357             : static void node_init_caches(unsigned int nid)
     358             : {
     359             :         INIT_LIST_HEAD(&node_devices[nid]->cache_attrs);
     360             : }
     361             : #else
     362           1 : static void node_init_caches(unsigned int nid) { }
     363           0 : static void node_remove_caches(struct node *node) { }
     364             : #endif
     365             : 
     366             : #define K(x) ((x) << (PAGE_SHIFT - 10))
     367           0 : static ssize_t node_read_meminfo(struct device *dev,
     368             :                         struct device_attribute *attr, char *buf)
     369             : {
     370           0 :         int len = 0;
     371           0 :         int nid = dev->id;
     372           0 :         struct pglist_data *pgdat = NODE_DATA(nid);
     373           0 :         struct sysinfo i;
     374           0 :         unsigned long sreclaimable, sunreclaimable;
     375           0 :         unsigned long swapcached = 0;
     376             : 
     377           0 :         si_meminfo_node(&i, nid);
     378           0 :         sreclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B);
     379           0 :         sunreclaimable = node_page_state_pages(pgdat, NR_SLAB_UNRECLAIMABLE_B);
     380             : #ifdef CONFIG_SWAP
     381             :         swapcached = node_page_state_pages(pgdat, NR_SWAPCACHE);
     382             : #endif
     383           0 :         len = sysfs_emit_at(buf, len,
     384             :                             "Node %d MemTotal:       %8lu kB\n"
     385             :                             "Node %d MemFree:        %8lu kB\n"
     386             :                             "Node %d MemUsed:        %8lu kB\n"
     387             :                             "Node %d SwapCached:     %8lu kB\n"
     388             :                             "Node %d Active:         %8lu kB\n"
     389             :                             "Node %d Inactive:       %8lu kB\n"
     390             :                             "Node %d Active(anon):   %8lu kB\n"
     391             :                             "Node %d Inactive(anon): %8lu kB\n"
     392             :                             "Node %d Active(file):   %8lu kB\n"
     393             :                             "Node %d Inactive(file): %8lu kB\n"
     394             :                             "Node %d Unevictable:    %8lu kB\n"
     395             :                             "Node %d Mlocked:        %8lu kB\n",
     396             :                             nid, K(i.totalram),
     397             :                             nid, K(i.freeram),
     398           0 :                             nid, K(i.totalram - i.freeram),
     399             :                             nid, K(swapcached),
     400           0 :                             nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) +
     401             :                                    node_page_state(pgdat, NR_ACTIVE_FILE)),
     402           0 :                             nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) +
     403             :                                    node_page_state(pgdat, NR_INACTIVE_FILE)),
     404           0 :                             nid, K(node_page_state(pgdat, NR_ACTIVE_ANON)),
     405           0 :                             nid, K(node_page_state(pgdat, NR_INACTIVE_ANON)),
     406           0 :                             nid, K(node_page_state(pgdat, NR_ACTIVE_FILE)),
     407           0 :                             nid, K(node_page_state(pgdat, NR_INACTIVE_FILE)),
     408           0 :                             nid, K(node_page_state(pgdat, NR_UNEVICTABLE)),
     409           0 :                             nid, K(sum_zone_node_page_state(nid, NR_MLOCK)));
     410             : 
     411             : #ifdef CONFIG_HIGHMEM
     412             :         len += sysfs_emit_at(buf, len,
     413             :                              "Node %d HighTotal:      %8lu kB\n"
     414             :                              "Node %d HighFree:       %8lu kB\n"
     415             :                              "Node %d LowTotal:       %8lu kB\n"
     416             :                              "Node %d LowFree:        %8lu kB\n",
     417             :                              nid, K(i.totalhigh),
     418             :                              nid, K(i.freehigh),
     419             :                              nid, K(i.totalram - i.totalhigh),
     420             :                              nid, K(i.freeram - i.freehigh));
     421             : #endif
     422           0 :         len += sysfs_emit_at(buf, len,
     423             :                              "Node %d Dirty:          %8lu kB\n"
     424             :                              "Node %d Writeback:      %8lu kB\n"
     425             :                              "Node %d FilePages:      %8lu kB\n"
     426             :                              "Node %d Mapped:         %8lu kB\n"
     427             :                              "Node %d AnonPages:      %8lu kB\n"
     428             :                              "Node %d Shmem:          %8lu kB\n"
     429             :                              "Node %d KernelStack:    %8lu kB\n"
     430             : #ifdef CONFIG_SHADOW_CALL_STACK
     431             :                              "Node %d ShadowCallStack:%8lu kB\n"
     432             : #endif
     433             :                              "Node %d PageTables:     %8lu kB\n"
     434             :                              "Node %d NFS_Unstable:   %8lu kB\n"
     435             :                              "Node %d Bounce:         %8lu kB\n"
     436             :                              "Node %d WritebackTmp:   %8lu kB\n"
     437             :                              "Node %d KReclaimable:   %8lu kB\n"
     438             :                              "Node %d Slab:           %8lu kB\n"
     439             :                              "Node %d SReclaimable:   %8lu kB\n"
     440             :                              "Node %d SUnreclaim:     %8lu kB\n"
     441             : #ifdef CONFIG_TRANSPARENT_HUGEPAGE
     442             :                              "Node %d AnonHugePages:  %8lu kB\n"
     443             :                              "Node %d ShmemHugePages: %8lu kB\n"
     444             :                              "Node %d ShmemPmdMapped: %8lu kB\n"
     445             :                              "Node %d FileHugePages: %8lu kB\n"
     446             :                              "Node %d FilePmdMapped: %8lu kB\n"
     447             : #endif
     448             :                              ,
     449           0 :                              nid, K(node_page_state(pgdat, NR_FILE_DIRTY)),
     450           0 :                              nid, K(node_page_state(pgdat, NR_WRITEBACK)),
     451           0 :                              nid, K(node_page_state(pgdat, NR_FILE_PAGES)),
     452           0 :                              nid, K(node_page_state(pgdat, NR_FILE_MAPPED)),
     453           0 :                              nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
     454           0 :                              nid, K(i.sharedram),
     455             :                              nid, node_page_state(pgdat, NR_KERNEL_STACK_KB),
     456             : #ifdef CONFIG_SHADOW_CALL_STACK
     457             :                              nid, node_page_state(pgdat, NR_KERNEL_SCS_KB),
     458             : #endif
     459           0 :                              nid, K(node_page_state(pgdat, NR_PAGETABLE)),
     460             :                              nid, 0UL,
     461           0 :                              nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
     462           0 :                              nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
     463           0 :                              nid, K(sreclaimable +
     464             :                                     node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)),
     465           0 :                              nid, K(sreclaimable + sunreclaimable),
     466             :                              nid, K(sreclaimable),
     467             :                              nid, K(sunreclaimable)
     468             : #ifdef CONFIG_TRANSPARENT_HUGEPAGE
     469             :                              ,
     470           0 :                              nid, K(node_page_state(pgdat, NR_ANON_THPS)),
     471           0 :                              nid, K(node_page_state(pgdat, NR_SHMEM_THPS)),
     472           0 :                              nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)),
     473           0 :                              nid, K(node_page_state(pgdat, NR_FILE_THPS)),
     474           0 :                              nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED))
     475             : #endif
     476             :                             );
     477           0 :         len += hugetlb_report_node_meminfo(buf, len, nid);
     478           0 :         return len;
     479             : }
     480             : 
     481             : #undef K
     482             : static DEVICE_ATTR(meminfo, 0444, node_read_meminfo, NULL);
     483             : 
     484           0 : static ssize_t node_read_numastat(struct device *dev,
     485             :                                   struct device_attribute *attr, char *buf)
     486             : {
     487           0 :         return sysfs_emit(buf,
     488             :                           "numa_hit %lu\n"
     489             :                           "numa_miss %lu\n"
     490             :                           "numa_foreign %lu\n"
     491             :                           "interleave_hit %lu\n"
     492             :                           "local_node %lu\n"
     493             :                           "other_node %lu\n",
     494           0 :                           sum_zone_numa_state(dev->id, NUMA_HIT),
     495           0 :                           sum_zone_numa_state(dev->id, NUMA_MISS),
     496           0 :                           sum_zone_numa_state(dev->id, NUMA_FOREIGN),
     497           0 :                           sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT),
     498           0 :                           sum_zone_numa_state(dev->id, NUMA_LOCAL),
     499           0 :                           sum_zone_numa_state(dev->id, NUMA_OTHER));
     500             : }
     501             : static DEVICE_ATTR(numastat, 0444, node_read_numastat, NULL);
     502             : 
     503           0 : static ssize_t node_read_vmstat(struct device *dev,
     504             :                                 struct device_attribute *attr, char *buf)
     505             : {
     506           0 :         int nid = dev->id;
     507           0 :         struct pglist_data *pgdat = NODE_DATA(nid);
     508           0 :         int i;
     509           0 :         int len = 0;
     510             : 
     511           0 :         for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
     512           0 :                 len += sysfs_emit_at(buf, len, "%s %lu\n",
     513             :                                      zone_stat_name(i),
     514             :                                      sum_zone_node_page_state(nid, i));
     515             : 
     516             : #ifdef CONFIG_NUMA
     517           0 :         for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++)
     518           0 :                 len += sysfs_emit_at(buf, len, "%s %lu\n",
     519             :                                      numa_stat_name(i),
     520             :                                      sum_zone_numa_state(nid, i));
     521             : 
     522             : #endif
     523           0 :         for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
     524           0 :                 unsigned long pages = node_page_state_pages(pgdat, i);
     525             : 
     526           0 :                 if (vmstat_item_print_in_thp(i))
     527           0 :                         pages /= HPAGE_PMD_NR;
     528           0 :                 len += sysfs_emit_at(buf, len, "%s %lu\n", node_stat_name(i),
     529             :                                      pages);
     530             :         }
     531             : 
     532           0 :         return len;
     533             : }
     534             : static DEVICE_ATTR(vmstat, 0444, node_read_vmstat, NULL);
     535             : 
     536           0 : static ssize_t node_read_distance(struct device *dev,
     537             :                                   struct device_attribute *attr, char *buf)
     538             : {
     539           0 :         int nid = dev->id;
     540           0 :         int len = 0;
     541           0 :         int i;
     542             : 
     543             :         /*
     544             :          * buf is currently PAGE_SIZE in length and each node needs 4 chars
     545             :          * at the most (distance + space or newline).
     546             :          */
     547           0 :         BUILD_BUG_ON(MAX_NUMNODES * 4 > PAGE_SIZE);
     548             : 
     549           0 :         for_each_online_node(i) {
     550           0 :                 len += sysfs_emit_at(buf, len, "%s%d",
     551             :                                      i ? " " : "", node_distance(nid, i));
     552             :         }
     553             : 
     554           0 :         len += sysfs_emit_at(buf, len, "\n");
     555           0 :         return len;
     556             : }
     557             : static DEVICE_ATTR(distance, 0444, node_read_distance, NULL);
     558             : 
     559             : static struct attribute *node_dev_attrs[] = {
     560             :         &dev_attr_cpumap.attr,
     561             :         &dev_attr_cpulist.attr,
     562             :         &dev_attr_meminfo.attr,
     563             :         &dev_attr_numastat.attr,
     564             :         &dev_attr_distance.attr,
     565             :         &dev_attr_vmstat.attr,
     566             :         NULL
     567             : };
     568             : ATTRIBUTE_GROUPS(node_dev);
     569             : 
     570             : #ifdef CONFIG_HUGETLBFS
     571             : /*
     572             :  * hugetlbfs per node attributes registration interface:
     573             :  * When/if hugetlb[fs] subsystem initializes [sometime after this module],
     574             :  * it will register its per node attributes for all online nodes with
     575             :  * memory.  It will also call register_hugetlbfs_with_node(), below, to
     576             :  * register its attribute registration functions with this node driver.
     577             :  * Once these hooks have been initialized, the node driver will call into
     578             :  * the hugetlb module to [un]register attributes for hot-plugged nodes.
     579             :  */
     580             : static node_registration_func_t __hugetlb_register_node;
     581             : static node_registration_func_t __hugetlb_unregister_node;
     582             : 
     583             : static inline bool hugetlb_register_node(struct node *node)
     584             : {
     585             :         if (__hugetlb_register_node &&
     586             :                         node_state(node->dev.id, N_MEMORY)) {
     587             :                 __hugetlb_register_node(node);
     588             :                 return true;
     589             :         }
     590             :         return false;
     591             : }
     592             : 
     593             : static inline void hugetlb_unregister_node(struct node *node)
     594             : {
     595             :         if (__hugetlb_unregister_node)
     596             :                 __hugetlb_unregister_node(node);
     597             : }
     598             : 
     599             : void register_hugetlbfs_with_node(node_registration_func_t doregister,
     600             :                                   node_registration_func_t unregister)
     601             : {
     602             :         __hugetlb_register_node   = doregister;
     603             :         __hugetlb_unregister_node = unregister;
     604             : }
     605             : #else
     606           1 : static inline void hugetlb_register_node(struct node *node) {}
     607             : 
     608           0 : static inline void hugetlb_unregister_node(struct node *node) {}
     609             : #endif
     610             : 
     611           0 : static void node_device_release(struct device *dev)
     612             : {
     613           0 :         struct node *node = to_node(dev);
     614             : 
     615             : #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS)
     616             :         /*
     617             :          * We schedule the work only when a memory section is
     618             :          * onlined/offlined on this node. When we come here,
     619             :          * all the memory on this node has been offlined,
     620             :          * so we won't enqueue new work to this work.
     621             :          *
     622             :          * The work is using node->node_work, so we should
     623             :          * flush work before freeing the memory.
     624             :          */
     625             :         flush_work(&node->node_work);
     626             : #endif
     627           0 :         kfree(node);
     628           0 : }
     629             : 
     630             : /*
     631             :  * register_node - Setup a sysfs device for a node.
     632             :  * @num - Node number to use when creating the device.
     633             :  *
     634             :  * Initialize and register the node device.
     635             :  */
     636           1 : static int register_node(struct node *node, int num)
     637             : {
     638           1 :         int error;
     639             : 
     640           1 :         node->dev.id = num;
     641           1 :         node->dev.bus = &node_subsys;
     642           1 :         node->dev.release = node_device_release;
     643           1 :         node->dev.groups = node_dev_groups;
     644           1 :         error = device_register(&node->dev);
     645             : 
     646           1 :         if (error)
     647           0 :                 put_device(&node->dev);
     648             :         else {
     649           1 :                 hugetlb_register_node(node);
     650             : 
     651           1 :                 compaction_register_node(node);
     652             :         }
     653           1 :         return error;
     654             : }
     655             : 
     656             : /**
     657             :  * unregister_node - unregister a node device
     658             :  * @node: node going away
     659             :  *
     660             :  * Unregisters a node device @node.  All the devices on the node must be
     661             :  * unregistered before calling this function.
     662             :  */
     663           0 : void unregister_node(struct node *node)
     664             : {
     665           0 :         hugetlb_unregister_node(node);          /* no-op, if memoryless node */
     666           0 :         node_remove_accesses(node);
     667           0 :         node_remove_caches(node);
     668           0 :         device_unregister(&node->dev);
     669           0 : }
     670             : 
     671             : struct node *node_devices[MAX_NUMNODES];
     672             : 
     673             : /*
     674             :  * register cpu under node
     675             :  */
     676           8 : int register_cpu_under_node(unsigned int cpu, unsigned int nid)
     677             : {
     678           8 :         int ret;
     679           8 :         struct device *obj;
     680             : 
     681           8 :         if (!node_online(nid))
     682             :                 return 0;
     683             : 
     684           8 :         obj = get_cpu_device(cpu);
     685           8 :         if (!obj)
     686             :                 return 0;
     687             : 
     688           8 :         ret = sysfs_create_link(&node_devices[nid]->dev.kobj,
     689             :                                 &obj->kobj,
     690           4 :                                 kobject_name(&obj->kobj));
     691           4 :         if (ret)
     692             :                 return ret;
     693             : 
     694           4 :         return sysfs_create_link(&obj->kobj,
     695             :                                  &node_devices[nid]->dev.kobj,
     696           4 :                                  kobject_name(&node_devices[nid]->dev.kobj));
     697             : }
     698             : 
     699             : /**
     700             :  * register_memory_node_under_compute_node - link memory node to its compute
     701             :  *                                           node for a given access class.
     702             :  * @mem_nid:    Memory node number
     703             :  * @cpu_nid:    Cpu  node number
     704             :  * @access:     Access class to register
     705             :  *
     706             :  * Description:
     707             :  *      For use with platforms that may have separate memory and compute nodes.
     708             :  *      This function will export node relationships linking which memory
     709             :  *      initiator nodes can access memory targets at a given ranked access
     710             :  *      class.
     711             :  */
     712           0 : int register_memory_node_under_compute_node(unsigned int mem_nid,
     713             :                                             unsigned int cpu_nid,
     714             :                                             unsigned access)
     715             : {
     716           0 :         struct node *init_node, *targ_node;
     717           0 :         struct node_access_nodes *initiator, *target;
     718           0 :         int ret;
     719             : 
     720           0 :         if (!node_online(cpu_nid) || !node_online(mem_nid))
     721           0 :                 return -ENODEV;
     722             : 
     723           0 :         init_node = node_devices[cpu_nid];
     724           0 :         targ_node = node_devices[mem_nid];
     725           0 :         initiator = node_init_node_access(init_node, access);
     726           0 :         target = node_init_node_access(targ_node, access);
     727           0 :         if (!initiator || !target)
     728             :                 return -ENOMEM;
     729             : 
     730           0 :         ret = sysfs_add_link_to_group(&initiator->dev.kobj, "targets",
     731             :                                       &targ_node->dev.kobj,
     732           0 :                                       dev_name(&targ_node->dev));
     733           0 :         if (ret)
     734             :                 return ret;
     735             : 
     736           0 :         ret = sysfs_add_link_to_group(&target->dev.kobj, "initiators",
     737             :                                       &init_node->dev.kobj,
     738           0 :                                       dev_name(&init_node->dev));
     739           0 :         if (ret)
     740           0 :                 goto err;
     741             : 
     742             :         return 0;
     743           0 :  err:
     744           0 :         sysfs_remove_link_from_group(&initiator->dev.kobj, "targets",
     745           0 :                                      dev_name(&targ_node->dev));
     746           0 :         return ret;
     747             : }
     748             : 
     749           0 : int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
     750             : {
     751           0 :         struct device *obj;
     752             : 
     753           0 :         if (!node_online(nid))
     754             :                 return 0;
     755             : 
     756           0 :         obj = get_cpu_device(cpu);
     757           0 :         if (!obj)
     758             :                 return 0;
     759             : 
     760           0 :         sysfs_remove_link(&node_devices[nid]->dev.kobj,
     761           0 :                           kobject_name(&obj->kobj));
     762           0 :         sysfs_remove_link(&obj->kobj,
     763           0 :                           kobject_name(&node_devices[nid]->dev.kobj));
     764             : 
     765           0 :         return 0;
     766             : }
     767             : 
     768             : #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
     769             : static int __ref get_nid_for_pfn(unsigned long pfn)
     770             : {
     771             :         if (!pfn_valid_within(pfn))
     772             :                 return -1;
     773             : #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
     774             :         if (system_state < SYSTEM_RUNNING)
     775             :                 return early_pfn_to_nid(pfn);
     776             : #endif
     777             :         return pfn_to_nid(pfn);
     778             : }
     779             : 
     780             : static void do_register_memory_block_under_node(int nid,
     781             :                                                 struct memory_block *mem_blk)
     782             : {
     783             :         int ret;
     784             : 
     785             :         /*
     786             :          * If this memory block spans multiple nodes, we only indicate
     787             :          * the last processed node.
     788             :          */
     789             :         mem_blk->nid = nid;
     790             : 
     791             :         ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
     792             :                                        &mem_blk->dev.kobj,
     793             :                                        kobject_name(&mem_blk->dev.kobj));
     794             :         if (ret && ret != -EEXIST)
     795             :                 dev_err_ratelimited(&node_devices[nid]->dev,
     796             :                                     "can't create link to %s in sysfs (%d)\n",
     797             :                                     kobject_name(&mem_blk->dev.kobj), ret);
     798             : 
     799             :         ret = sysfs_create_link_nowarn(&mem_blk->dev.kobj,
     800             :                                 &node_devices[nid]->dev.kobj,
     801             :                                 kobject_name(&node_devices[nid]->dev.kobj));
     802             :         if (ret && ret != -EEXIST)
     803             :                 dev_err_ratelimited(&mem_blk->dev,
     804             :                                     "can't create link to %s in sysfs (%d)\n",
     805             :                                     kobject_name(&node_devices[nid]->dev.kobj),
     806             :                                     ret);
     807             : }
     808             : 
     809             : /* register memory section under specified node if it spans that node */
     810             : static int register_mem_block_under_node_early(struct memory_block *mem_blk,
     811             :                                                void *arg)
     812             : {
     813             :         unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE;
     814             :         unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr);
     815             :         unsigned long end_pfn = start_pfn + memory_block_pfns - 1;
     816             :         int nid = *(int *)arg;
     817             :         unsigned long pfn;
     818             : 
     819             :         for (pfn = start_pfn; pfn <= end_pfn; pfn++) {
     820             :                 int page_nid;
     821             : 
     822             :                 /*
     823             :                  * memory block could have several absent sections from start.
     824             :                  * skip pfn range from absent section
     825             :                  */
     826             :                 if (!pfn_in_present_section(pfn)) {
     827             :                         pfn = round_down(pfn + PAGES_PER_SECTION,
     828             :                                          PAGES_PER_SECTION) - 1;
     829             :                         continue;
     830             :                 }
     831             : 
     832             :                 /*
     833             :                  * We need to check if page belongs to nid only at the boot
     834             :                  * case because node's ranges can be interleaved.
     835             :                  */
     836             :                 page_nid = get_nid_for_pfn(pfn);
     837             :                 if (page_nid < 0)
     838             :                         continue;
     839             :                 if (page_nid != nid)
     840             :                         continue;
     841             : 
     842             :                 do_register_memory_block_under_node(nid, mem_blk);
     843             :                 return 0;
     844             :         }
     845             :         /* mem section does not span the specified node */
     846             :         return 0;
     847             : }
     848             : 
     849             : /*
     850             :  * During hotplug we know that all pages in the memory block belong to the same
     851             :  * node.
     852             :  */
     853             : static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk,
     854             :                                                  void *arg)
     855             : {
     856             :         int nid = *(int *)arg;
     857             : 
     858             :         do_register_memory_block_under_node(nid, mem_blk);
     859             :         return 0;
     860             : }
     861             : 
     862             : /*
     863             :  * Unregister a memory block device under the node it spans. Memory blocks
     864             :  * with multiple nodes cannot be offlined and therefore also never be removed.
     865             :  */
     866             : void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
     867             : {
     868             :         if (mem_blk->nid == NUMA_NO_NODE)
     869             :                 return;
     870             : 
     871             :         sysfs_remove_link(&node_devices[mem_blk->nid]->dev.kobj,
     872             :                           kobject_name(&mem_blk->dev.kobj));
     873             :         sysfs_remove_link(&mem_blk->dev.kobj,
     874             :                           kobject_name(&node_devices[mem_blk->nid]->dev.kobj));
     875             : }
     876             : 
     877             : void link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
     878             :                        enum meminit_context context)
     879             : {
     880             :         walk_memory_blocks_func_t func;
     881             : 
     882             :         if (context == MEMINIT_HOTPLUG)
     883             :                 func = register_mem_block_under_node_hotplug;
     884             :         else
     885             :                 func = register_mem_block_under_node_early;
     886             : 
     887             :         walk_memory_blocks(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn),
     888             :                            (void *)&nid, func);
     889             :         return;
     890             : }
     891             : 
     892             : #ifdef CONFIG_HUGETLBFS
     893             : /*
     894             :  * Handle per node hstate attribute [un]registration on transistions
     895             :  * to/from memoryless state.
     896             :  */
     897             : static void node_hugetlb_work(struct work_struct *work)
     898             : {
     899             :         struct node *node = container_of(work, struct node, node_work);
     900             : 
     901             :         /*
     902             :          * We only get here when a node transitions to/from memoryless state.
     903             :          * We can detect which transition occurred by examining whether the
     904             :          * node has memory now.  hugetlb_register_node() already check this
     905             :          * so we try to register the attributes.  If that fails, then the
     906             :          * node has transitioned to memoryless, try to unregister the
     907             :          * attributes.
     908             :          */
     909             :         if (!hugetlb_register_node(node))
     910             :                 hugetlb_unregister_node(node);
     911             : }
     912             : 
     913             : static void init_node_hugetlb_work(int nid)
     914             : {
     915             :         INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work);
     916             : }
     917             : 
     918             : static int node_memory_callback(struct notifier_block *self,
     919             :                                 unsigned long action, void *arg)
     920             : {
     921             :         struct memory_notify *mnb = arg;
     922             :         int nid = mnb->status_change_nid;
     923             : 
     924             :         switch (action) {
     925             :         case MEM_ONLINE:
     926             :         case MEM_OFFLINE:
     927             :                 /*
     928             :                  * offload per node hstate [un]registration to a work thread
     929             :                  * when transitioning to/from memoryless state.
     930             :                  */
     931             :                 if (nid != NUMA_NO_NODE)
     932             :                         schedule_work(&node_devices[nid]->node_work);
     933             :                 break;
     934             : 
     935             :         case MEM_GOING_ONLINE:
     936             :         case MEM_GOING_OFFLINE:
     937             :         case MEM_CANCEL_ONLINE:
     938             :         case MEM_CANCEL_OFFLINE:
     939             :         default:
     940             :                 break;
     941             :         }
     942             : 
     943             :         return NOTIFY_OK;
     944             : }
     945             : #endif  /* CONFIG_HUGETLBFS */
     946             : #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
     947             : 
     948             : #if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \
     949             :     !defined(CONFIG_HUGETLBFS)
     950             : static inline int node_memory_callback(struct notifier_block *self,
     951             :                                 unsigned long action, void *arg)
     952             : {
     953             :         return NOTIFY_OK;
     954             : }
     955             : 
     956           1 : static void init_node_hugetlb_work(int nid) { }
     957             : 
     958             : #endif
     959             : 
     960           1 : int __register_one_node(int nid)
     961             : {
     962           1 :         int error;
     963           1 :         int cpu;
     964             : 
     965           1 :         node_devices[nid] = kzalloc(sizeof(struct node), GFP_KERNEL);
     966           1 :         if (!node_devices[nid])
     967             :                 return -ENOMEM;
     968             : 
     969           1 :         error = register_node(node_devices[nid], nid);
     970             : 
     971             :         /* link cpu under this node */
     972           6 :         for_each_present_cpu(cpu) {
     973           4 :                 if (cpu_to_node(cpu) == nid)
     974           4 :                         register_cpu_under_node(cpu, nid);
     975             :         }
     976             : 
     977           1 :         INIT_LIST_HEAD(&node_devices[nid]->access_list);
     978             :         /* initialize work queue for memory hot plug */
     979           1 :         init_node_hugetlb_work(nid);
     980           1 :         node_init_caches(nid);
     981             : 
     982           1 :         return error;
     983             : }
     984             : 
     985           0 : void unregister_one_node(int nid)
     986             : {
     987           0 :         if (!node_devices[nid])
     988             :                 return;
     989             : 
     990           0 :         unregister_node(node_devices[nid]);
     991           0 :         node_devices[nid] = NULL;
     992             : }
     993             : 
     994             : /*
     995             :  * node states attributes
     996             :  */
     997             : 
     998             : struct node_attr {
     999             :         struct device_attribute attr;
    1000             :         enum node_states state;
    1001             : };
    1002             : 
    1003           0 : static ssize_t show_node_state(struct device *dev,
    1004             :                                struct device_attribute *attr, char *buf)
    1005             : {
    1006           0 :         struct node_attr *na = container_of(attr, struct node_attr, attr);
    1007             : 
    1008           0 :         return sysfs_emit(buf, "%*pbl\n",
    1009           0 :                           nodemask_pr_args(&node_states[na->state]));
    1010             : }
    1011             : 
    1012             : #define _NODE_ATTR(name, state) \
    1013             :         { __ATTR(name, 0444, show_node_state, NULL), state }
    1014             : 
    1015             : static struct node_attr node_state_attr[] = {
    1016             :         [N_POSSIBLE] = _NODE_ATTR(possible, N_POSSIBLE),
    1017             :         [N_ONLINE] = _NODE_ATTR(online, N_ONLINE),
    1018             :         [N_NORMAL_MEMORY] = _NODE_ATTR(has_normal_memory, N_NORMAL_MEMORY),
    1019             : #ifdef CONFIG_HIGHMEM
    1020             :         [N_HIGH_MEMORY] = _NODE_ATTR(has_high_memory, N_HIGH_MEMORY),
    1021             : #endif
    1022             :         [N_MEMORY] = _NODE_ATTR(has_memory, N_MEMORY),
    1023             :         [N_CPU] = _NODE_ATTR(has_cpu, N_CPU),
    1024             :         [N_GENERIC_INITIATOR] = _NODE_ATTR(has_generic_initiator,
    1025             :                                            N_GENERIC_INITIATOR),
    1026             : };
    1027             : 
    1028             : static struct attribute *node_state_attrs[] = {
    1029             :         &node_state_attr[N_POSSIBLE].attr.attr,
    1030             :         &node_state_attr[N_ONLINE].attr.attr,
    1031             :         &node_state_attr[N_NORMAL_MEMORY].attr.attr,
    1032             : #ifdef CONFIG_HIGHMEM
    1033             :         &node_state_attr[N_HIGH_MEMORY].attr.attr,
    1034             : #endif
    1035             :         &node_state_attr[N_MEMORY].attr.attr,
    1036             :         &node_state_attr[N_CPU].attr.attr,
    1037             :         &node_state_attr[N_GENERIC_INITIATOR].attr.attr,
    1038             :         NULL
    1039             : };
    1040             : 
    1041             : static struct attribute_group memory_root_attr_group = {
    1042             :         .attrs = node_state_attrs,
    1043             : };
    1044             : 
    1045             : static const struct attribute_group *cpu_root_attr_groups[] = {
    1046             :         &memory_root_attr_group,
    1047             :         NULL,
    1048             : };
    1049             : 
    1050             : #define NODE_CALLBACK_PRI       2       /* lower than SLAB */
    1051           1 : static int __init register_node_type(void)
    1052             : {
    1053           1 :         int ret;
    1054             : 
    1055           1 :         BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES);
    1056           1 :         BUILD_BUG_ON(ARRAY_SIZE(node_state_attrs)-1 != NR_NODE_STATES);
    1057             : 
    1058           1 :         ret = subsys_system_register(&node_subsys, cpu_root_attr_groups);
    1059           1 :         if (!ret) {
    1060             :                 static struct notifier_block node_memory_callback_nb = {
    1061             :                         .notifier_call = node_memory_callback,
    1062             :                         .priority = NODE_CALLBACK_PRI,
    1063             :                 };
    1064             :                 register_hotmemory_notifier(&node_memory_callback_nb);
    1065             :         }
    1066             : 
    1067             :         /*
    1068             :          * Note:  we're not going to unregister the node class if we fail
    1069             :          * to register the node state class attribute files.
    1070             :          */
    1071           1 :         return ret;
    1072             : }
    1073             : postcore_initcall(register_node_type);

Generated by: LCOV version 1.14