LCOV - code coverage report
Current view: top level - kernel/irq - affinity.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 0 189 0.0 %
Date: 2021-04-22 12:43:58 Functions: 0 12 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (C) 2016 Thomas Gleixner.
       4             :  * Copyright (C) 2016-2017 Christoph Hellwig.
       5             :  */
       6             : #include <linux/interrupt.h>
       7             : #include <linux/kernel.h>
       8             : #include <linux/slab.h>
       9             : #include <linux/cpu.h>
      10             : #include <linux/sort.h>
      11             : 
      12           0 : static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk,
      13             :                                 unsigned int cpus_per_vec)
      14             : {
      15           0 :         const struct cpumask *siblmsk;
      16           0 :         int cpu, sibl;
      17             : 
      18           0 :         for ( ; cpus_per_vec > 0; ) {
      19           0 :                 cpu = cpumask_first(nmsk);
      20             : 
      21             :                 /* Should not happen, but I'm too lazy to think about it */
      22           0 :                 if (cpu >= nr_cpu_ids)
      23             :                         return;
      24             : 
      25           0 :                 cpumask_clear_cpu(cpu, nmsk);
      26           0 :                 cpumask_set_cpu(cpu, irqmsk);
      27           0 :                 cpus_per_vec--;
      28             : 
      29             :                 /* If the cpu has siblings, use them first */
      30           0 :                 siblmsk = topology_sibling_cpumask(cpu);
      31           0 :                 for (sibl = -1; cpus_per_vec > 0; ) {
      32           0 :                         sibl = cpumask_next(sibl, siblmsk);
      33           0 :                         if (sibl >= nr_cpu_ids)
      34             :                                 break;
      35           0 :                         if (!cpumask_test_and_clear_cpu(sibl, nmsk))
      36           0 :                                 continue;
      37           0 :                         cpumask_set_cpu(sibl, irqmsk);
      38           0 :                         cpus_per_vec--;
      39             :                 }
      40             :         }
      41             : }
      42             : 
      43           0 : static cpumask_var_t *alloc_node_to_cpumask(void)
      44             : {
      45           0 :         cpumask_var_t *masks;
      46           0 :         int node;
      47             : 
      48           0 :         masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL);
      49           0 :         if (!masks)
      50             :                 return NULL;
      51             : 
      52           0 :         for (node = 0; node < nr_node_ids; node++) {
      53           0 :                 if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL))
      54             :                         goto out_unwind;
      55             :         }
      56             : 
      57             :         return masks;
      58             : 
      59             : out_unwind:
      60             :         while (--node >= 0)
      61             :                 free_cpumask_var(masks[node]);
      62             :         kfree(masks);
      63             :         return NULL;
      64             : }
      65             : 
      66           0 : static void free_node_to_cpumask(cpumask_var_t *masks)
      67             : {
      68           0 :         int node;
      69             : 
      70           0 :         for (node = 0; node < nr_node_ids; node++)
      71           0 :                 free_cpumask_var(masks[node]);
      72           0 :         kfree(masks);
      73           0 : }
      74             : 
      75           0 : static void build_node_to_cpumask(cpumask_var_t *masks)
      76             : {
      77           0 :         int cpu;
      78             : 
      79           0 :         for_each_possible_cpu(cpu)
      80           0 :                 cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]);
      81           0 : }
      82             : 
      83           0 : static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
      84             :                                 const struct cpumask *mask, nodemask_t *nodemsk)
      85             : {
      86           0 :         int n, nodes = 0;
      87             : 
      88             :         /* Calculate the number of nodes in the supplied affinity mask */
      89           0 :         for_each_node(n) {
      90           0 :                 if (cpumask_intersects(mask, node_to_cpumask[n])) {
      91           0 :                         node_set(n, *nodemsk);
      92           0 :                         nodes++;
      93             :                 }
      94             :         }
      95           0 :         return nodes;
      96             : }
      97             : 
      98             : struct node_vectors {
      99             :         unsigned id;
     100             : 
     101             :         union {
     102             :                 unsigned nvectors;
     103             :                 unsigned ncpus;
     104             :         };
     105             : };
     106             : 
     107           0 : static int ncpus_cmp_func(const void *l, const void *r)
     108             : {
     109           0 :         const struct node_vectors *ln = l;
     110           0 :         const struct node_vectors *rn = r;
     111             : 
     112           0 :         return ln->ncpus - rn->ncpus;
     113             : }
     114             : 
     115             : /*
     116             :  * Allocate vector number for each node, so that for each node:
     117             :  *
     118             :  * 1) the allocated number is >= 1
     119             :  *
     120             :  * 2) the allocated numbver is <= active CPU number of this node
     121             :  *
     122             :  * The actual allocated total vectors may be less than @numvecs when
     123             :  * active total CPU number is less than @numvecs.
     124             :  *
     125             :  * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]'
     126             :  * for each node.
     127             :  */
     128           0 : static void alloc_nodes_vectors(unsigned int numvecs,
     129             :                                 cpumask_var_t *node_to_cpumask,
     130             :                                 const struct cpumask *cpu_mask,
     131             :                                 const nodemask_t nodemsk,
     132             :                                 struct cpumask *nmsk,
     133             :                                 struct node_vectors *node_vectors)
     134             : {
     135           0 :         unsigned n, remaining_ncpus = 0;
     136             : 
     137           0 :         for (n = 0; n < nr_node_ids; n++) {
     138           0 :                 node_vectors[n].id = n;
     139           0 :                 node_vectors[n].ncpus = UINT_MAX;
     140             :         }
     141             : 
     142           0 :         for_each_node_mask(n, nodemsk) {
     143           0 :                 unsigned ncpus;
     144             : 
     145           0 :                 cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]);
     146           0 :                 ncpus = cpumask_weight(nmsk);
     147             : 
     148           0 :                 if (!ncpus)
     149           0 :                         continue;
     150           0 :                 remaining_ncpus += ncpus;
     151           0 :                 node_vectors[n].ncpus = ncpus;
     152             :         }
     153             : 
     154           0 :         numvecs = min_t(unsigned, remaining_ncpus, numvecs);
     155             : 
     156           0 :         sort(node_vectors, nr_node_ids, sizeof(node_vectors[0]),
     157             :              ncpus_cmp_func, NULL);
     158             : 
     159             :         /*
     160             :          * Allocate vectors for each node according to the ratio of this
     161             :          * node's nr_cpus to remaining un-assigned ncpus. 'numvecs' is
     162             :          * bigger than number of active numa nodes. Always start the
     163             :          * allocation from the node with minimized nr_cpus.
     164             :          *
     165             :          * This way guarantees that each active node gets allocated at
     166             :          * least one vector, and the theory is simple: over-allocation
     167             :          * is only done when this node is assigned by one vector, so
     168             :          * other nodes will be allocated >= 1 vector, since 'numvecs' is
     169             :          * bigger than number of numa nodes.
     170             :          *
     171             :          * One perfect invariant is that number of allocated vectors for
     172             :          * each node is <= CPU count of this node:
     173             :          *
     174             :          * 1) suppose there are two nodes: A and B
     175             :          *      ncpu(X) is CPU count of node X
     176             :          *      vecs(X) is the vector count allocated to node X via this
     177             :          *      algorithm
     178             :          *
     179             :          *      ncpu(A) <= ncpu(B)
     180             :          *      ncpu(A) + ncpu(B) = N
     181             :          *      vecs(A) + vecs(B) = V
     182             :          *
     183             :          *      vecs(A) = max(1, round_down(V * ncpu(A) / N))
     184             :          *      vecs(B) = V - vecs(A)
     185             :          *
     186             :          *      both N and V are integer, and 2 <= V <= N, suppose
     187             :          *      V = N - delta, and 0 <= delta <= N - 2
     188             :          *
     189             :          * 2) obviously vecs(A) <= ncpu(A) because:
     190             :          *
     191             :          *      if vecs(A) is 1, then vecs(A) <= ncpu(A) given
     192             :          *      ncpu(A) >= 1
     193             :          *
     194             :          *      otherwise,
     195             :          *              vecs(A) <= V * ncpu(A) / N <= ncpu(A), given V <= N
     196             :          *
     197             :          * 3) prove how vecs(B) <= ncpu(B):
     198             :          *
     199             :          *      if round_down(V * ncpu(A) / N) == 0, vecs(B) won't be
     200             :          *      over-allocated, so vecs(B) <= ncpu(B),
     201             :          *
     202             :          *      otherwise:
     203             :          *
     204             :          *      vecs(A) =
     205             :          *              round_down(V * ncpu(A) / N) =
     206             :          *              round_down((N - delta) * ncpu(A) / N) =
     207             :          *              round_down((N * ncpu(A) - delta * ncpu(A)) / N)  >=
     208             :          *              round_down((N * ncpu(A) - delta * N) / N)        =
     209             :          *              cpu(A) - delta
     210             :          *
     211             :          *      then:
     212             :          *
     213             :          *      vecs(A) - V >= ncpu(A) - delta - V
     214             :          *      =>
     215             :          *      V - vecs(A) <= V + delta - ncpu(A)
     216             :          *      =>
     217             :          *      vecs(B) <= N - ncpu(A)
     218             :          *      =>
     219             :          *      vecs(B) <= cpu(B)
     220             :          *
     221             :          * For nodes >= 3, it can be thought as one node and another big
     222             :          * node given that is exactly what this algorithm is implemented,
     223             :          * and we always re-calculate 'remaining_ncpus' & 'numvecs', and
     224             :          * finally for each node X: vecs(X) <= ncpu(X).
     225             :          *
     226             :          */
     227           0 :         for (n = 0; n < nr_node_ids; n++) {
     228           0 :                 unsigned nvectors, ncpus;
     229             : 
     230           0 :                 if (node_vectors[n].ncpus == UINT_MAX)
     231           0 :                         continue;
     232             : 
     233           0 :                 WARN_ON_ONCE(numvecs == 0);
     234             : 
     235           0 :                 ncpus = node_vectors[n].ncpus;
     236           0 :                 nvectors = max_t(unsigned, 1,
     237             :                                  numvecs * ncpus / remaining_ncpus);
     238           0 :                 WARN_ON_ONCE(nvectors > ncpus);
     239             : 
     240           0 :                 node_vectors[n].nvectors = nvectors;
     241             : 
     242           0 :                 remaining_ncpus -= ncpus;
     243           0 :                 numvecs -= nvectors;
     244             :         }
     245           0 : }
     246             : 
     247           0 : static int __irq_build_affinity_masks(unsigned int startvec,
     248             :                                       unsigned int numvecs,
     249             :                                       unsigned int firstvec,
     250             :                                       cpumask_var_t *node_to_cpumask,
     251             :                                       const struct cpumask *cpu_mask,
     252             :                                       struct cpumask *nmsk,
     253             :                                       struct irq_affinity_desc *masks)
     254             : {
     255           0 :         unsigned int i, n, nodes, cpus_per_vec, extra_vecs, done = 0;
     256           0 :         unsigned int last_affv = firstvec + numvecs;
     257           0 :         unsigned int curvec = startvec;
     258           0 :         nodemask_t nodemsk = NODE_MASK_NONE;
     259           0 :         struct node_vectors *node_vectors;
     260             : 
     261           0 :         if (!cpumask_weight(cpu_mask))
     262             :                 return 0;
     263             : 
     264           0 :         nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk);
     265             : 
     266             :         /*
     267             :          * If the number of nodes in the mask is greater than or equal the
     268             :          * number of vectors we just spread the vectors across the nodes.
     269             :          */
     270           0 :         if (numvecs <= nodes) {
     271           0 :                 for_each_node_mask(n, nodemsk) {
     272           0 :                         cpumask_or(&masks[curvec].mask, &masks[curvec].mask,
     273           0 :                                    node_to_cpumask[n]);
     274           0 :                         if (++curvec == last_affv)
     275           0 :                                 curvec = firstvec;
     276             :                 }
     277           0 :                 return numvecs;
     278             :         }
     279             : 
     280           0 :         node_vectors = kcalloc(nr_node_ids,
     281             :                                sizeof(struct node_vectors),
     282             :                                GFP_KERNEL);
     283           0 :         if (!node_vectors)
     284             :                 return -ENOMEM;
     285             : 
     286             :         /* allocate vector number for each node */
     287           0 :         alloc_nodes_vectors(numvecs, node_to_cpumask, cpu_mask,
     288             :                             nodemsk, nmsk, node_vectors);
     289             : 
     290           0 :         for (i = 0; i < nr_node_ids; i++) {
     291           0 :                 unsigned int ncpus, v;
     292           0 :                 struct node_vectors *nv = &node_vectors[i];
     293             : 
     294           0 :                 if (nv->nvectors == UINT_MAX)
     295           0 :                         continue;
     296             : 
     297             :                 /* Get the cpus on this node which are in the mask */
     298           0 :                 cpumask_and(nmsk, cpu_mask, node_to_cpumask[nv->id]);
     299           0 :                 ncpus = cpumask_weight(nmsk);
     300           0 :                 if (!ncpus)
     301           0 :                         continue;
     302             : 
     303           0 :                 WARN_ON_ONCE(nv->nvectors > ncpus);
     304             : 
     305             :                 /* Account for rounding errors */
     306           0 :                 extra_vecs = ncpus - nv->nvectors * (ncpus / nv->nvectors);
     307             : 
     308             :                 /* Spread allocated vectors on CPUs of the current node */
     309           0 :                 for (v = 0; v < nv->nvectors; v++, curvec++) {
     310           0 :                         cpus_per_vec = ncpus / nv->nvectors;
     311             : 
     312             :                         /* Account for extra vectors to compensate rounding errors */
     313           0 :                         if (extra_vecs) {
     314           0 :                                 cpus_per_vec++;
     315           0 :                                 --extra_vecs;
     316             :                         }
     317             : 
     318             :                         /*
     319             :                          * wrapping has to be considered given 'startvec'
     320             :                          * may start anywhere
     321             :                          */
     322           0 :                         if (curvec >= last_affv)
     323           0 :                                 curvec = firstvec;
     324           0 :                         irq_spread_init_one(&masks[curvec].mask, nmsk,
     325             :                                                 cpus_per_vec);
     326             :                 }
     327           0 :                 done += nv->nvectors;
     328             :         }
     329           0 :         kfree(node_vectors);
     330           0 :         return done;
     331             : }
     332             : 
     333             : /*
     334             :  * build affinity in two stages:
     335             :  *      1) spread present CPU on these vectors
     336             :  *      2) spread other possible CPUs on these vectors
     337             :  */
     338           0 : static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
     339             :                                     unsigned int firstvec,
     340             :                                     struct irq_affinity_desc *masks)
     341             : {
     342           0 :         unsigned int curvec = startvec, nr_present = 0, nr_others = 0;
     343           0 :         cpumask_var_t *node_to_cpumask;
     344           0 :         cpumask_var_t nmsk, npresmsk;
     345           0 :         int ret = -ENOMEM;
     346             : 
     347           0 :         if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL))
     348             :                 return ret;
     349             : 
     350           0 :         if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL))
     351             :                 goto fail_nmsk;
     352             : 
     353           0 :         node_to_cpumask = alloc_node_to_cpumask();
     354           0 :         if (!node_to_cpumask)
     355           0 :                 goto fail_npresmsk;
     356             : 
     357             :         /* Stabilize the cpumasks */
     358           0 :         get_online_cpus();
     359           0 :         build_node_to_cpumask(node_to_cpumask);
     360             : 
     361             :         /* Spread on present CPUs starting from affd->pre_vectors */
     362           0 :         ret = __irq_build_affinity_masks(curvec, numvecs, firstvec,
     363             :                                          node_to_cpumask, cpu_present_mask,
     364             :                                          nmsk, masks);
     365           0 :         if (ret < 0)
     366           0 :                 goto fail_build_affinity;
     367           0 :         nr_present = ret;
     368             : 
     369             :         /*
     370             :          * Spread on non present CPUs starting from the next vector to be
     371             :          * handled. If the spreading of present CPUs already exhausted the
     372             :          * vector space, assign the non present CPUs to the already spread
     373             :          * out vectors.
     374             :          */
     375           0 :         if (nr_present >= numvecs)
     376             :                 curvec = firstvec;
     377             :         else
     378           0 :                 curvec = firstvec + nr_present;
     379           0 :         cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
     380           0 :         ret = __irq_build_affinity_masks(curvec, numvecs, firstvec,
     381             :                                          node_to_cpumask, npresmsk, nmsk,
     382             :                                          masks);
     383           0 :         if (ret >= 0)
     384           0 :                 nr_others = ret;
     385             : 
     386           0 :  fail_build_affinity:
     387           0 :         put_online_cpus();
     388             : 
     389           0 :         if (ret >= 0)
     390           0 :                 WARN_ON(nr_present + nr_others < numvecs);
     391             : 
     392           0 :         free_node_to_cpumask(node_to_cpumask);
     393             : 
     394           0 :  fail_npresmsk:
     395           0 :         free_cpumask_var(npresmsk);
     396             : 
     397           0 :  fail_nmsk:
     398           0 :         free_cpumask_var(nmsk);
     399           0 :         return ret < 0 ? ret : 0;
     400             : }
     401             : 
     402           0 : static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs)
     403             : {
     404           0 :         affd->nr_sets = 1;
     405           0 :         affd->set_size[0] = affvecs;
     406           0 : }
     407             : 
     408             : /**
     409             :  * irq_create_affinity_masks - Create affinity masks for multiqueue spreading
     410             :  * @nvecs:      The total number of vectors
     411             :  * @affd:       Description of the affinity requirements
     412             :  *
     413             :  * Returns the irq_affinity_desc pointer or NULL if allocation failed.
     414             :  */
     415             : struct irq_affinity_desc *
     416           0 : irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
     417             : {
     418           0 :         unsigned int affvecs, curvec, usedvecs, i;
     419           0 :         struct irq_affinity_desc *masks = NULL;
     420             : 
     421             :         /*
     422             :          * Determine the number of vectors which need interrupt affinities
     423             :          * assigned. If the pre/post request exhausts the available vectors
     424             :          * then nothing to do here except for invoking the calc_sets()
     425             :          * callback so the device driver can adjust to the situation.
     426             :          */
     427           0 :         if (nvecs > affd->pre_vectors + affd->post_vectors)
     428           0 :                 affvecs = nvecs - affd->pre_vectors - affd->post_vectors;
     429             :         else
     430             :                 affvecs = 0;
     431             : 
     432             :         /*
     433             :          * Simple invocations do not provide a calc_sets() callback. Install
     434             :          * the generic one.
     435             :          */
     436           0 :         if (!affd->calc_sets)
     437           0 :                 affd->calc_sets = default_calc_sets;
     438             : 
     439             :         /* Recalculate the sets */
     440           0 :         affd->calc_sets(affd, affvecs);
     441             : 
     442           0 :         if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS))
     443             :                 return NULL;
     444             : 
     445             :         /* Nothing to assign? */
     446           0 :         if (!affvecs)
     447             :                 return NULL;
     448             : 
     449           0 :         masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL);
     450           0 :         if (!masks)
     451             :                 return NULL;
     452             : 
     453             :         /* Fill out vectors at the beginning that don't need affinity */
     454           0 :         for (curvec = 0; curvec < affd->pre_vectors; curvec++)
     455           0 :                 cpumask_copy(&masks[curvec].mask, irq_default_affinity);
     456             : 
     457             :         /*
     458             :          * Spread on present CPUs starting from affd->pre_vectors. If we
     459             :          * have multiple sets, build each sets affinity mask separately.
     460             :          */
     461           0 :         for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) {
     462           0 :                 unsigned int this_vecs = affd->set_size[i];
     463           0 :                 int ret;
     464             : 
     465           0 :                 ret = irq_build_affinity_masks(curvec, this_vecs,
     466             :                                                curvec, masks);
     467           0 :                 if (ret) {
     468           0 :                         kfree(masks);
     469           0 :                         return NULL;
     470             :                 }
     471           0 :                 curvec += this_vecs;
     472           0 :                 usedvecs += this_vecs;
     473             :         }
     474             : 
     475             :         /* Fill out vectors at the end that don't need affinity */
     476           0 :         if (usedvecs >= affvecs)
     477           0 :                 curvec = affd->pre_vectors + affvecs;
     478             :         else
     479           0 :                 curvec = affd->pre_vectors + usedvecs;
     480           0 :         for (; curvec < nvecs; curvec++)
     481           0 :                 cpumask_copy(&masks[curvec].mask, irq_default_affinity);
     482             : 
     483             :         /* Mark the managed interrupts */
     484           0 :         for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++)
     485           0 :                 masks[i].is_managed = 1;
     486             : 
     487             :         return masks;
     488             : }
     489             : 
     490             : /**
     491             :  * irq_calc_affinity_vectors - Calculate the optimal number of vectors
     492             :  * @minvec:     The minimum number of vectors available
     493             :  * @maxvec:     The maximum number of vectors available
     494             :  * @affd:       Description of the affinity requirements
     495             :  */
     496           0 : unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec,
     497             :                                        const struct irq_affinity *affd)
     498             : {
     499           0 :         unsigned int resv = affd->pre_vectors + affd->post_vectors;
     500           0 :         unsigned int set_vecs;
     501             : 
     502           0 :         if (resv > minvec)
     503             :                 return 0;
     504             : 
     505           0 :         if (affd->calc_sets) {
     506           0 :                 set_vecs = maxvec - resv;
     507             :         } else {
     508           0 :                 get_online_cpus();
     509           0 :                 set_vecs = cpumask_weight(cpu_possible_mask);
     510           0 :                 put_online_cpus();
     511             :         }
     512             : 
     513           0 :         return resv + min(set_vecs, maxvec - resv);
     514             : }

Generated by: LCOV version 1.14