LCOV - code coverage report
Current view: top level - kernel/events - ring_buffer.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 0 367 0.0 %
Date: 2021-04-22 12:43:58 Functions: 0 27 0.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Performance events ring-buffer code:
       4             :  *
       5             :  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
       6             :  *  Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar
       7             :  *  Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra
       8             :  *  Copyright  ©  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
       9             :  */
      10             : 
      11             : #include <linux/perf_event.h>
      12             : #include <linux/vmalloc.h>
      13             : #include <linux/slab.h>
      14             : #include <linux/circ_buf.h>
      15             : #include <linux/poll.h>
      16             : #include <linux/nospec.h>
      17             : 
      18             : #include "internal.h"
      19             : 
      20           0 : static void perf_output_wakeup(struct perf_output_handle *handle)
      21             : {
      22           0 :         atomic_set(&handle->rb->poll, EPOLLIN);
      23             : 
      24           0 :         handle->event->pending_wakeup = 1;
      25           0 :         irq_work_queue(&handle->event->pending);
      26           0 : }
      27             : 
      28             : /*
      29             :  * We need to ensure a later event_id doesn't publish a head when a former
      30             :  * event isn't done writing. However since we need to deal with NMIs we
      31             :  * cannot fully serialize things.
      32             :  *
      33             :  * We only publish the head (and generate a wakeup) when the outer-most
      34             :  * event completes.
      35             :  */
      36           0 : static void perf_output_get_handle(struct perf_output_handle *handle)
      37             : {
      38           0 :         struct perf_buffer *rb = handle->rb;
      39             : 
      40           0 :         preempt_disable();
      41             : 
      42             :         /*
      43             :          * Avoid an explicit LOAD/STORE such that architectures with memops
      44             :          * can use them.
      45             :          */
      46           0 :         (*(volatile unsigned int *)&rb->nest)++;
      47           0 :         handle->wakeup = local_read(&rb->wakeup);
      48           0 : }
      49             : 
      50           0 : static void perf_output_put_handle(struct perf_output_handle *handle)
      51             : {
      52           0 :         struct perf_buffer *rb = handle->rb;
      53           0 :         unsigned long head;
      54           0 :         unsigned int nest;
      55             : 
      56             :         /*
      57             :          * If this isn't the outermost nesting, we don't have to update
      58             :          * @rb->user_page->data_head.
      59             :          */
      60           0 :         nest = READ_ONCE(rb->nest);
      61           0 :         if (nest > 1) {
      62           0 :                 WRITE_ONCE(rb->nest, nest - 1);
      63           0 :                 goto out;
      64             :         }
      65             : 
      66           0 : again:
      67             :         /*
      68             :          * In order to avoid publishing a head value that goes backwards,
      69             :          * we must ensure the load of @rb->head happens after we've
      70             :          * incremented @rb->nest.
      71             :          *
      72             :          * Otherwise we can observe a @rb->head value before one published
      73             :          * by an IRQ/NMI happening between the load and the increment.
      74             :          */
      75           0 :         barrier();
      76           0 :         head = local_read(&rb->head);
      77             : 
      78             :         /*
      79             :          * IRQ/NMI can happen here and advance @rb->head, causing our
      80             :          * load above to be stale.
      81             :          */
      82             : 
      83             :         /*
      84             :          * Since the mmap() consumer (userspace) can run on a different CPU:
      85             :          *
      86             :          *   kernel                             user
      87             :          *
      88             :          *   if (LOAD ->data_tail) {         LOAD ->data_head
      89             :          *                      (A)             smp_rmb()       (C)
      90             :          *      STORE $data                     LOAD $data
      91             :          *      smp_wmb()       (B)             smp_mb()        (D)
      92             :          *      STORE ->data_head            STORE ->data_tail
      93             :          *   }
      94             :          *
      95             :          * Where A pairs with D, and B pairs with C.
      96             :          *
      97             :          * In our case (A) is a control dependency that separates the load of
      98             :          * the ->data_tail and the stores of $data. In case ->data_tail
      99             :          * indicates there is no room in the buffer to store $data we do not.
     100             :          *
     101             :          * D needs to be a full barrier since it separates the data READ
     102             :          * from the tail WRITE.
     103             :          *
     104             :          * For B a WMB is sufficient since it separates two WRITEs, and for C
     105             :          * an RMB is sufficient since it separates two READs.
     106             :          *
     107             :          * See perf_output_begin().
     108             :          */
     109           0 :         smp_wmb(); /* B, matches C */
     110           0 :         WRITE_ONCE(rb->user_page->data_head, head);
     111             : 
     112             :         /*
     113             :          * We must publish the head before decrementing the nest count,
     114             :          * otherwise an IRQ/NMI can publish a more recent head value and our
     115             :          * write will (temporarily) publish a stale value.
     116             :          */
     117           0 :         barrier();
     118           0 :         WRITE_ONCE(rb->nest, 0);
     119             : 
     120             :         /*
     121             :          * Ensure we decrement @rb->nest before we validate the @rb->head.
     122             :          * Otherwise we cannot be sure we caught the 'last' nested update.
     123             :          */
     124           0 :         barrier();
     125           0 :         if (unlikely(head != local_read(&rb->head))) {
     126           0 :                 WRITE_ONCE(rb->nest, 1);
     127           0 :                 goto again;
     128             :         }
     129             : 
     130           0 :         if (handle->wakeup != local_read(&rb->wakeup))
     131           0 :                 perf_output_wakeup(handle);
     132             : 
     133           0 : out:
     134           0 :         preempt_enable();
     135           0 : }
     136             : 
     137             : static __always_inline bool
     138           0 : ring_buffer_has_space(unsigned long head, unsigned long tail,
     139             :                       unsigned long data_size, unsigned int size,
     140             :                       bool backward)
     141             : {
     142           0 :         if (!backward)
     143           0 :                 return CIRC_SPACE(head, tail, data_size) >= size;
     144             :         else
     145           0 :                 return CIRC_SPACE(tail, head, data_size) >= size;
     146             : }
     147             : 
     148             : static __always_inline int
     149           0 : __perf_output_begin(struct perf_output_handle *handle,
     150             :                     struct perf_sample_data *data,
     151             :                     struct perf_event *event, unsigned int size,
     152             :                     bool backward)
     153             : {
     154           0 :         struct perf_buffer *rb;
     155           0 :         unsigned long tail, offset, head;
     156           0 :         int have_lost, page_shift;
     157           0 :         struct {
     158             :                 struct perf_event_header header;
     159             :                 u64                      id;
     160             :                 u64                      lost;
     161             :         } lost_event;
     162             : 
     163           0 :         rcu_read_lock();
     164             :         /*
     165             :          * For inherited events we send all the output towards the parent.
     166             :          */
     167           0 :         if (event->parent)
     168           0 :                 event = event->parent;
     169             : 
     170           0 :         rb = rcu_dereference(event->rb);
     171           0 :         if (unlikely(!rb))
     172           0 :                 goto out;
     173             : 
     174           0 :         if (unlikely(rb->paused)) {
     175           0 :                 if (rb->nr_pages)
     176           0 :                         local_inc(&rb->lost);
     177           0 :                 goto out;
     178             :         }
     179             : 
     180           0 :         handle->rb    = rb;
     181           0 :         handle->event = event;
     182             : 
     183           0 :         have_lost = local_read(&rb->lost);
     184           0 :         if (unlikely(have_lost)) {
     185           0 :                 size += sizeof(lost_event);
     186           0 :                 if (event->attr.sample_id_all)
     187           0 :                         size += event->id_header_size;
     188             :         }
     189             : 
     190           0 :         perf_output_get_handle(handle);
     191             : 
     192           0 :         do {
     193           0 :                 tail = READ_ONCE(rb->user_page->data_tail);
     194           0 :                 offset = head = local_read(&rb->head);
     195           0 :                 if (!rb->overwrite) {
     196           0 :                         if (unlikely(!ring_buffer_has_space(head, tail,
     197             :                                                             perf_data_size(rb),
     198             :                                                             size, backward)))
     199           0 :                                 goto fail;
     200             :                 }
     201             : 
     202             :                 /*
     203             :                  * The above forms a control dependency barrier separating the
     204             :                  * @tail load above from the data stores below. Since the @tail
     205             :                  * load is required to compute the branch to fail below.
     206             :                  *
     207             :                  * A, matches D; the full memory barrier userspace SHOULD issue
     208             :                  * after reading the data and before storing the new tail
     209             :                  * position.
     210             :                  *
     211             :                  * See perf_output_put_handle().
     212             :                  */
     213             : 
     214           0 :                 if (!backward)
     215           0 :                         head += size;
     216             :                 else
     217           0 :                         head -= size;
     218           0 :         } while (local_cmpxchg(&rb->head, offset, head) != offset);
     219             : 
     220           0 :         if (backward) {
     221           0 :                 offset = head;
     222           0 :                 head = (u64)(-head);
     223             :         }
     224             : 
     225             :         /*
     226             :          * We rely on the implied barrier() by local_cmpxchg() to ensure
     227             :          * none of the data stores below can be lifted up by the compiler.
     228             :          */
     229             : 
     230           0 :         if (unlikely(head - local_read(&rb->wakeup) > rb->watermark))
     231           0 :                 local_add(rb->watermark, &rb->wakeup);
     232             : 
     233           0 :         page_shift = PAGE_SHIFT + page_order(rb);
     234             : 
     235           0 :         handle->page = (offset >> page_shift) & (rb->nr_pages - 1);
     236           0 :         offset &= (1UL << page_shift) - 1;
     237           0 :         handle->addr = rb->data_pages[handle->page] + offset;
     238           0 :         handle->size = (1UL << page_shift) - offset;
     239             : 
     240           0 :         if (unlikely(have_lost)) {
     241           0 :                 lost_event.header.size = sizeof(lost_event);
     242           0 :                 lost_event.header.type = PERF_RECORD_LOST;
     243           0 :                 lost_event.header.misc = 0;
     244           0 :                 lost_event.id          = event->id;
     245           0 :                 lost_event.lost        = local_xchg(&rb->lost, 0);
     246             : 
     247             :                 /* XXX mostly redundant; @data is already fully initializes */
     248           0 :                 perf_event_header__init_id(&lost_event.header, data, event);
     249           0 :                 perf_output_put(handle, lost_event);
     250           0 :                 perf_event__output_id_sample(event, handle, data);
     251             :         }
     252             : 
     253             :         return 0;
     254             : 
     255           0 : fail:
     256           0 :         local_inc(&rb->lost);
     257           0 :         perf_output_put_handle(handle);
     258           0 : out:
     259           0 :         rcu_read_unlock();
     260             : 
     261           0 :         return -ENOSPC;
     262             : }
     263             : 
     264           0 : int perf_output_begin_forward(struct perf_output_handle *handle,
     265             :                               struct perf_sample_data *data,
     266             :                               struct perf_event *event, unsigned int size)
     267             : {
     268           0 :         return __perf_output_begin(handle, data, event, size, false);
     269             : }
     270             : 
     271           0 : int perf_output_begin_backward(struct perf_output_handle *handle,
     272             :                                struct perf_sample_data *data,
     273             :                                struct perf_event *event, unsigned int size)
     274             : {
     275           0 :         return __perf_output_begin(handle, data, event, size, true);
     276             : }
     277             : 
     278           0 : int perf_output_begin(struct perf_output_handle *handle,
     279             :                       struct perf_sample_data *data,
     280             :                       struct perf_event *event, unsigned int size)
     281             : {
     282             : 
     283           0 :         return __perf_output_begin(handle, data, event, size,
     284           0 :                                    unlikely(is_write_backward(event)));
     285             : }
     286             : 
     287           0 : unsigned int perf_output_copy(struct perf_output_handle *handle,
     288             :                       const void *buf, unsigned int len)
     289             : {
     290           0 :         return __output_copy(handle, buf, len);
     291             : }
     292             : 
     293           0 : unsigned int perf_output_skip(struct perf_output_handle *handle,
     294             :                               unsigned int len)
     295             : {
     296           0 :         return __output_skip(handle, NULL, len);
     297             : }
     298             : 
     299           0 : void perf_output_end(struct perf_output_handle *handle)
     300             : {
     301           0 :         perf_output_put_handle(handle);
     302           0 :         rcu_read_unlock();
     303           0 : }
     304             : 
     305             : static void
     306           0 : ring_buffer_init(struct perf_buffer *rb, long watermark, int flags)
     307             : {
     308           0 :         long max_size = perf_data_size(rb);
     309             : 
     310           0 :         if (watermark)
     311           0 :                 rb->watermark = min(max_size, watermark);
     312             : 
     313           0 :         if (!rb->watermark)
     314           0 :                 rb->watermark = max_size / 2;
     315             : 
     316           0 :         if (flags & RING_BUFFER_WRITABLE)
     317           0 :                 rb->overwrite = 0;
     318             :         else
     319           0 :                 rb->overwrite = 1;
     320             : 
     321           0 :         refcount_set(&rb->refcount, 1);
     322             : 
     323           0 :         INIT_LIST_HEAD(&rb->event_list);
     324           0 :         spin_lock_init(&rb->event_lock);
     325             : 
     326             :         /*
     327             :          * perf_output_begin() only checks rb->paused, therefore
     328             :          * rb->paused must be true if we have no pages for output.
     329             :          */
     330           0 :         if (!rb->nr_pages)
     331           0 :                 rb->paused = 1;
     332           0 : }
     333             : 
     334           0 : void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)
     335             : {
     336             :         /*
     337             :          * OVERWRITE is determined by perf_aux_output_end() and can't
     338             :          * be passed in directly.
     339             :          */
     340           0 :         if (WARN_ON_ONCE(flags & PERF_AUX_FLAG_OVERWRITE))
     341             :                 return;
     342             : 
     343           0 :         handle->aux_flags |= flags;
     344             : }
     345             : EXPORT_SYMBOL_GPL(perf_aux_output_flag);
     346             : 
     347             : /*
     348             :  * This is called before hardware starts writing to the AUX area to
     349             :  * obtain an output handle and make sure there's room in the buffer.
     350             :  * When the capture completes, call perf_aux_output_end() to commit
     351             :  * the recorded data to the buffer.
     352             :  *
     353             :  * The ordering is similar to that of perf_output_{begin,end}, with
     354             :  * the exception of (B), which should be taken care of by the pmu
     355             :  * driver, since ordering rules will differ depending on hardware.
     356             :  *
     357             :  * Call this from pmu::start(); see the comment in perf_aux_output_end()
     358             :  * about its use in pmu callbacks. Both can also be called from the PMI
     359             :  * handler if needed.
     360             :  */
     361           0 : void *perf_aux_output_begin(struct perf_output_handle *handle,
     362             :                             struct perf_event *event)
     363             : {
     364           0 :         struct perf_event *output_event = event;
     365           0 :         unsigned long aux_head, aux_tail;
     366           0 :         struct perf_buffer *rb;
     367           0 :         unsigned int nest;
     368             : 
     369           0 :         if (output_event->parent)
     370           0 :                 output_event = output_event->parent;
     371             : 
     372             :         /*
     373             :          * Since this will typically be open across pmu::add/pmu::del, we
     374             :          * grab ring_buffer's refcount instead of holding rcu read lock
     375             :          * to make sure it doesn't disappear under us.
     376             :          */
     377           0 :         rb = ring_buffer_get(output_event);
     378           0 :         if (!rb)
     379             :                 return NULL;
     380             : 
     381           0 :         if (!rb_has_aux(rb))
     382           0 :                 goto err;
     383             : 
     384             :         /*
     385             :          * If aux_mmap_count is zero, the aux buffer is in perf_mmap_close(),
     386             :          * about to get freed, so we leave immediately.
     387             :          *
     388             :          * Checking rb::aux_mmap_count and rb::refcount has to be done in
     389             :          * the same order, see perf_mmap_close. Otherwise we end up freeing
     390             :          * aux pages in this path, which is a bug, because in_atomic().
     391             :          */
     392           0 :         if (!atomic_read(&rb->aux_mmap_count))
     393           0 :                 goto err;
     394             : 
     395           0 :         if (!refcount_inc_not_zero(&rb->aux_refcount))
     396           0 :                 goto err;
     397             : 
     398           0 :         nest = READ_ONCE(rb->aux_nest);
     399             :         /*
     400             :          * Nesting is not supported for AUX area, make sure nested
     401             :          * writers are caught early
     402             :          */
     403           0 :         if (WARN_ON_ONCE(nest))
     404           0 :                 goto err_put;
     405             : 
     406           0 :         WRITE_ONCE(rb->aux_nest, nest + 1);
     407             : 
     408           0 :         aux_head = rb->aux_head;
     409             : 
     410           0 :         handle->rb = rb;
     411           0 :         handle->event = event;
     412           0 :         handle->head = aux_head;
     413           0 :         handle->size = 0;
     414           0 :         handle->aux_flags = 0;
     415             : 
     416             :         /*
     417             :          * In overwrite mode, AUX data stores do not depend on aux_tail,
     418             :          * therefore (A) control dependency barrier does not exist. The
     419             :          * (B) <-> (C) ordering is still observed by the pmu driver.
     420             :          */
     421           0 :         if (!rb->aux_overwrite) {
     422           0 :                 aux_tail = READ_ONCE(rb->user_page->aux_tail);
     423           0 :                 handle->wakeup = rb->aux_wakeup + rb->aux_watermark;
     424           0 :                 if (aux_head - aux_tail < perf_aux_size(rb))
     425           0 :                         handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb));
     426             : 
     427             :                 /*
     428             :                  * handle->size computation depends on aux_tail load; this forms a
     429             :                  * control dependency barrier separating aux_tail load from aux data
     430             :                  * store that will be enabled on successful return
     431             :                  */
     432           0 :                 if (!handle->size) { /* A, matches D */
     433           0 :                         event->pending_disable = smp_processor_id();
     434           0 :                         perf_output_wakeup(handle);
     435           0 :                         WRITE_ONCE(rb->aux_nest, 0);
     436           0 :                         goto err_put;
     437             :                 }
     438             :         }
     439             : 
     440           0 :         return handle->rb->aux_priv;
     441             : 
     442           0 : err_put:
     443             :         /* can't be last */
     444           0 :         rb_free_aux(rb);
     445             : 
     446           0 : err:
     447           0 :         ring_buffer_put(rb);
     448           0 :         handle->event = NULL;
     449             : 
     450           0 :         return NULL;
     451             : }
     452             : EXPORT_SYMBOL_GPL(perf_aux_output_begin);
     453             : 
     454           0 : static __always_inline bool rb_need_aux_wakeup(struct perf_buffer *rb)
     455             : {
     456           0 :         if (rb->aux_overwrite)
     457             :                 return false;
     458             : 
     459           0 :         if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) {
     460           0 :                 rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark);
     461           0 :                 return true;
     462             :         }
     463             : 
     464             :         return false;
     465             : }
     466             : 
     467             : /*
     468             :  * Commit the data written by hardware into the ring buffer by adjusting
     469             :  * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the
     470             :  * pmu driver's responsibility to observe ordering rules of the hardware,
     471             :  * so that all the data is externally visible before this is called.
     472             :  *
     473             :  * Note: this has to be called from pmu::stop() callback, as the assumption
     474             :  * of the AUX buffer management code is that after pmu::stop(), the AUX
     475             :  * transaction must be stopped and therefore drop the AUX reference count.
     476             :  */
     477           0 : void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
     478             : {
     479           0 :         bool wakeup = !!(handle->aux_flags & PERF_AUX_FLAG_TRUNCATED);
     480           0 :         struct perf_buffer *rb = handle->rb;
     481           0 :         unsigned long aux_head;
     482             : 
     483             :         /* in overwrite mode, driver provides aux_head via handle */
     484           0 :         if (rb->aux_overwrite) {
     485           0 :                 handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE;
     486             : 
     487           0 :                 aux_head = handle->head;
     488           0 :                 rb->aux_head = aux_head;
     489             :         } else {
     490           0 :                 handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE;
     491             : 
     492           0 :                 aux_head = rb->aux_head;
     493           0 :                 rb->aux_head += size;
     494             :         }
     495             : 
     496             :         /*
     497             :          * Only send RECORD_AUX if we have something useful to communicate
     498             :          *
     499             :          * Note: the OVERWRITE records by themselves are not considered
     500             :          * useful, as they don't communicate any *new* information,
     501             :          * aside from the short-lived offset, that becomes history at
     502             :          * the next event sched-in and therefore isn't useful.
     503             :          * The userspace that needs to copy out AUX data in overwrite
     504             :          * mode should know to use user_page::aux_head for the actual
     505             :          * offset. So, from now on we don't output AUX records that
     506             :          * have *only* OVERWRITE flag set.
     507             :          */
     508           0 :         if (size || (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE))
     509           0 :                 perf_event_aux_event(handle->event, aux_head, size,
     510             :                                      handle->aux_flags);
     511             : 
     512           0 :         WRITE_ONCE(rb->user_page->aux_head, rb->aux_head);
     513           0 :         if (rb_need_aux_wakeup(rb))
     514           0 :                 wakeup = true;
     515             : 
     516           0 :         if (wakeup) {
     517           0 :                 if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
     518           0 :                         handle->event->pending_disable = smp_processor_id();
     519           0 :                 perf_output_wakeup(handle);
     520             :         }
     521             : 
     522           0 :         handle->event = NULL;
     523             : 
     524           0 :         WRITE_ONCE(rb->aux_nest, 0);
     525             :         /* can't be last */
     526           0 :         rb_free_aux(rb);
     527           0 :         ring_buffer_put(rb);
     528           0 : }
     529             : EXPORT_SYMBOL_GPL(perf_aux_output_end);
     530             : 
     531             : /*
     532             :  * Skip over a given number of bytes in the AUX buffer, due to, for example,
     533             :  * hardware's alignment constraints.
     534             :  */
     535           0 : int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size)
     536             : {
     537           0 :         struct perf_buffer *rb = handle->rb;
     538             : 
     539           0 :         if (size > handle->size)
     540             :                 return -ENOSPC;
     541             : 
     542           0 :         rb->aux_head += size;
     543             : 
     544           0 :         WRITE_ONCE(rb->user_page->aux_head, rb->aux_head);
     545           0 :         if (rb_need_aux_wakeup(rb)) {
     546           0 :                 perf_output_wakeup(handle);
     547           0 :                 handle->wakeup = rb->aux_wakeup + rb->aux_watermark;
     548             :         }
     549             : 
     550           0 :         handle->head = rb->aux_head;
     551           0 :         handle->size -= size;
     552             : 
     553           0 :         return 0;
     554             : }
     555             : EXPORT_SYMBOL_GPL(perf_aux_output_skip);
     556             : 
     557           0 : void *perf_get_aux(struct perf_output_handle *handle)
     558             : {
     559             :         /* this is only valid between perf_aux_output_begin and *_end */
     560           0 :         if (!handle->event)
     561             :                 return NULL;
     562             : 
     563           0 :         return handle->rb->aux_priv;
     564             : }
     565             : EXPORT_SYMBOL_GPL(perf_get_aux);
     566             : 
     567             : /*
     568             :  * Copy out AUX data from an AUX handle.
     569             :  */
     570           0 : long perf_output_copy_aux(struct perf_output_handle *aux_handle,
     571             :                           struct perf_output_handle *handle,
     572             :                           unsigned long from, unsigned long to)
     573             : {
     574           0 :         struct perf_buffer *rb = aux_handle->rb;
     575           0 :         unsigned long tocopy, remainder, len = 0;
     576           0 :         void *addr;
     577             : 
     578           0 :         from &= (rb->aux_nr_pages << PAGE_SHIFT) - 1;
     579           0 :         to &= (rb->aux_nr_pages << PAGE_SHIFT) - 1;
     580             : 
     581           0 :         do {
     582           0 :                 tocopy = PAGE_SIZE - offset_in_page(from);
     583           0 :                 if (to > from)
     584           0 :                         tocopy = min(tocopy, to - from);
     585           0 :                 if (!tocopy)
     586             :                         break;
     587             : 
     588           0 :                 addr = rb->aux_pages[from >> PAGE_SHIFT];
     589           0 :                 addr += offset_in_page(from);
     590             : 
     591           0 :                 remainder = perf_output_copy(handle, addr, tocopy);
     592           0 :                 if (remainder)
     593             :                         return -EFAULT;
     594             : 
     595           0 :                 len += tocopy;
     596           0 :                 from += tocopy;
     597           0 :                 from &= (rb->aux_nr_pages << PAGE_SHIFT) - 1;
     598           0 :         } while (to != from);
     599             : 
     600           0 :         return len;
     601             : }
     602             : 
     603             : #define PERF_AUX_GFP    (GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY)
     604             : 
     605           0 : static struct page *rb_alloc_aux_page(int node, int order)
     606             : {
     607           0 :         struct page *page;
     608             : 
     609           0 :         if (order > MAX_ORDER)
     610             :                 order = MAX_ORDER;
     611             : 
     612           0 :         do {
     613           0 :                 page = alloc_pages_node(node, PERF_AUX_GFP, order);
     614           0 :         } while (!page && order--);
     615             : 
     616           0 :         if (page && order) {
     617             :                 /*
     618             :                  * Communicate the allocation size to the driver:
     619             :                  * if we managed to secure a high-order allocation,
     620             :                  * set its first page's private to this order;
     621             :                  * !PagePrivate(page) means it's just a normal page.
     622             :                  */
     623           0 :                 split_page(page, order);
     624           0 :                 SetPagePrivate(page);
     625           0 :                 set_page_private(page, order);
     626             :         }
     627             : 
     628           0 :         return page;
     629             : }
     630             : 
     631           0 : static void rb_free_aux_page(struct perf_buffer *rb, int idx)
     632             : {
     633           0 :         struct page *page = virt_to_page(rb->aux_pages[idx]);
     634             : 
     635           0 :         ClearPagePrivate(page);
     636           0 :         page->mapping = NULL;
     637           0 :         __free_page(page);
     638           0 : }
     639             : 
     640           0 : static void __rb_free_aux(struct perf_buffer *rb)
     641             : {
     642           0 :         int pg;
     643             : 
     644             :         /*
     645             :          * Should never happen, the last reference should be dropped from
     646             :          * perf_mmap_close() path, which first stops aux transactions (which
     647             :          * in turn are the atomic holders of aux_refcount) and then does the
     648             :          * last rb_free_aux().
     649             :          */
     650           0 :         WARN_ON_ONCE(in_atomic());
     651             : 
     652           0 :         if (rb->aux_priv) {
     653           0 :                 rb->free_aux(rb->aux_priv);
     654           0 :                 rb->free_aux = NULL;
     655           0 :                 rb->aux_priv = NULL;
     656             :         }
     657             : 
     658           0 :         if (rb->aux_nr_pages) {
     659           0 :                 for (pg = 0; pg < rb->aux_nr_pages; pg++)
     660           0 :                         rb_free_aux_page(rb, pg);
     661             : 
     662           0 :                 kfree(rb->aux_pages);
     663           0 :                 rb->aux_nr_pages = 0;
     664             :         }
     665           0 : }
     666             : 
     667           0 : int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
     668             :                  pgoff_t pgoff, int nr_pages, long watermark, int flags)
     669             : {
     670           0 :         bool overwrite = !(flags & RING_BUFFER_WRITABLE);
     671           0 :         int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu);
     672           0 :         int ret = -ENOMEM, max_order;
     673             : 
     674           0 :         if (!has_aux(event))
     675             :                 return -EOPNOTSUPP;
     676             : 
     677             :         /*
     678             :          * We need to start with the max_order that fits in nr_pages,
     679             :          * not the other way around, hence ilog2() and not get_order.
     680             :          */
     681           0 :         max_order = ilog2(nr_pages);
     682             : 
     683             :         /*
     684             :          * PMU requests more than one contiguous chunks of memory
     685             :          * for SW double buffering
     686             :          */
     687           0 :         if (!overwrite) {
     688           0 :                 if (!max_order)
     689             :                         return -EINVAL;
     690             : 
     691           0 :                 max_order--;
     692             :         }
     693             : 
     694           0 :         rb->aux_pages = kcalloc_node(nr_pages, sizeof(void *), GFP_KERNEL,
     695             :                                      node);
     696           0 :         if (!rb->aux_pages)
     697             :                 return -ENOMEM;
     698             : 
     699           0 :         rb->free_aux = event->pmu->free_aux;
     700           0 :         for (rb->aux_nr_pages = 0; rb->aux_nr_pages < nr_pages;) {
     701           0 :                 struct page *page;
     702           0 :                 int last, order;
     703             : 
     704           0 :                 order = min(max_order, ilog2(nr_pages - rb->aux_nr_pages));
     705           0 :                 page = rb_alloc_aux_page(node, order);
     706           0 :                 if (!page)
     707           0 :                         goto out;
     708             : 
     709           0 :                 for (last = rb->aux_nr_pages + (1 << page_private(page));
     710           0 :                      last > rb->aux_nr_pages; rb->aux_nr_pages++)
     711           0 :                         rb->aux_pages[rb->aux_nr_pages] = page_address(page++);
     712             :         }
     713             : 
     714             :         /*
     715             :          * In overwrite mode, PMUs that don't support SG may not handle more
     716             :          * than one contiguous allocation, since they rely on PMI to do double
     717             :          * buffering. In this case, the entire buffer has to be one contiguous
     718             :          * chunk.
     719             :          */
     720           0 :         if ((event->pmu->capabilities & PERF_PMU_CAP_AUX_NO_SG) &&
     721             :             overwrite) {
     722           0 :                 struct page *page = virt_to_page(rb->aux_pages[0]);
     723             : 
     724           0 :                 if (page_private(page) != max_order)
     725           0 :                         goto out;
     726             :         }
     727             : 
     728           0 :         rb->aux_priv = event->pmu->setup_aux(event, rb->aux_pages, nr_pages,
     729             :                                              overwrite);
     730           0 :         if (!rb->aux_priv)
     731           0 :                 goto out;
     732             : 
     733           0 :         ret = 0;
     734             : 
     735             :         /*
     736             :          * aux_pages (and pmu driver's private data, aux_priv) will be
     737             :          * referenced in both producer's and consumer's contexts, thus
     738             :          * we keep a refcount here to make sure either of the two can
     739             :          * reference them safely.
     740             :          */
     741           0 :         refcount_set(&rb->aux_refcount, 1);
     742             : 
     743           0 :         rb->aux_overwrite = overwrite;
     744           0 :         rb->aux_watermark = watermark;
     745             : 
     746           0 :         if (!rb->aux_watermark && !rb->aux_overwrite)
     747           0 :                 rb->aux_watermark = nr_pages << (PAGE_SHIFT - 1);
     748             : 
     749           0 : out:
     750           0 :         if (!ret)
     751           0 :                 rb->aux_pgoff = pgoff;
     752             :         else
     753           0 :                 __rb_free_aux(rb);
     754             : 
     755             :         return ret;
     756             : }
     757             : 
     758           0 : void rb_free_aux(struct perf_buffer *rb)
     759             : {
     760           0 :         if (refcount_dec_and_test(&rb->aux_refcount))
     761           0 :                 __rb_free_aux(rb);
     762           0 : }
     763             : 
     764             : #ifndef CONFIG_PERF_USE_VMALLOC
     765             : 
     766             : /*
     767             :  * Back perf_mmap() with regular GFP_KERNEL-0 pages.
     768             :  */
     769             : 
     770             : static struct page *
     771           0 : __perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff)
     772             : {
     773           0 :         if (pgoff > rb->nr_pages)
     774             :                 return NULL;
     775             : 
     776           0 :         if (pgoff == 0)
     777           0 :                 return virt_to_page(rb->user_page);
     778             : 
     779           0 :         return virt_to_page(rb->data_pages[pgoff - 1]);
     780             : }
     781             : 
     782           0 : static void *perf_mmap_alloc_page(int cpu)
     783             : {
     784           0 :         struct page *page;
     785           0 :         int node;
     786             : 
     787           0 :         node = (cpu == -1) ? cpu : cpu_to_node(cpu);
     788           0 :         page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
     789           0 :         if (!page)
     790             :                 return NULL;
     791             : 
     792           0 :         return page_address(page);
     793             : }
     794             : 
     795           0 : static void perf_mmap_free_page(void *addr)
     796             : {
     797           0 :         struct page *page = virt_to_page(addr);
     798             : 
     799           0 :         page->mapping = NULL;
     800           0 :         __free_page(page);
     801           0 : }
     802             : 
     803           0 : struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
     804             : {
     805           0 :         struct perf_buffer *rb;
     806           0 :         unsigned long size;
     807           0 :         int i;
     808             : 
     809           0 :         size = sizeof(struct perf_buffer);
     810           0 :         size += nr_pages * sizeof(void *);
     811             : 
     812           0 :         if (order_base_2(size) >= PAGE_SHIFT+MAX_ORDER)
     813           0 :                 goto fail;
     814             : 
     815           0 :         rb = kzalloc(size, GFP_KERNEL);
     816           0 :         if (!rb)
     817           0 :                 goto fail;
     818             : 
     819           0 :         rb->user_page = perf_mmap_alloc_page(cpu);
     820           0 :         if (!rb->user_page)
     821           0 :                 goto fail_user_page;
     822             : 
     823           0 :         for (i = 0; i < nr_pages; i++) {
     824           0 :                 rb->data_pages[i] = perf_mmap_alloc_page(cpu);
     825           0 :                 if (!rb->data_pages[i])
     826           0 :                         goto fail_data_pages;
     827             :         }
     828             : 
     829           0 :         rb->nr_pages = nr_pages;
     830             : 
     831           0 :         ring_buffer_init(rb, watermark, flags);
     832             : 
     833           0 :         return rb;
     834             : 
     835           0 : fail_data_pages:
     836           0 :         for (i--; i >= 0; i--)
     837           0 :                 perf_mmap_free_page(rb->data_pages[i]);
     838             : 
     839           0 :         perf_mmap_free_page(rb->user_page);
     840             : 
     841           0 : fail_user_page:
     842           0 :         kfree(rb);
     843             : 
     844             : fail:
     845             :         return NULL;
     846             : }
     847             : 
     848           0 : void rb_free(struct perf_buffer *rb)
     849             : {
     850           0 :         int i;
     851             : 
     852           0 :         perf_mmap_free_page(rb->user_page);
     853           0 :         for (i = 0; i < rb->nr_pages; i++)
     854           0 :                 perf_mmap_free_page(rb->data_pages[i]);
     855           0 :         kfree(rb);
     856           0 : }
     857             : 
     858             : #else
     859             : static int data_page_nr(struct perf_buffer *rb)
     860             : {
     861             :         return rb->nr_pages << page_order(rb);
     862             : }
     863             : 
     864             : static struct page *
     865             : __perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff)
     866             : {
     867             :         /* The '>' counts in the user page. */
     868             :         if (pgoff > data_page_nr(rb))
     869             :                 return NULL;
     870             : 
     871             :         return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE);
     872             : }
     873             : 
     874             : static void perf_mmap_unmark_page(void *addr)
     875             : {
     876             :         struct page *page = vmalloc_to_page(addr);
     877             : 
     878             :         page->mapping = NULL;
     879             : }
     880             : 
     881             : static void rb_free_work(struct work_struct *work)
     882             : {
     883             :         struct perf_buffer *rb;
     884             :         void *base;
     885             :         int i, nr;
     886             : 
     887             :         rb = container_of(work, struct perf_buffer, work);
     888             :         nr = data_page_nr(rb);
     889             : 
     890             :         base = rb->user_page;
     891             :         /* The '<=' counts in the user page. */
     892             :         for (i = 0; i <= nr; i++)
     893             :                 perf_mmap_unmark_page(base + (i * PAGE_SIZE));
     894             : 
     895             :         vfree(base);
     896             :         kfree(rb);
     897             : }
     898             : 
     899             : void rb_free(struct perf_buffer *rb)
     900             : {
     901             :         schedule_work(&rb->work);
     902             : }
     903             : 
     904             : struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
     905             : {
     906             :         struct perf_buffer *rb;
     907             :         unsigned long size;
     908             :         void *all_buf;
     909             : 
     910             :         size = sizeof(struct perf_buffer);
     911             :         size += sizeof(void *);
     912             : 
     913             :         rb = kzalloc(size, GFP_KERNEL);
     914             :         if (!rb)
     915             :                 goto fail;
     916             : 
     917             :         INIT_WORK(&rb->work, rb_free_work);
     918             : 
     919             :         all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
     920             :         if (!all_buf)
     921             :                 goto fail_all_buf;
     922             : 
     923             :         rb->user_page = all_buf;
     924             :         rb->data_pages[0] = all_buf + PAGE_SIZE;
     925             :         if (nr_pages) {
     926             :                 rb->nr_pages = 1;
     927             :                 rb->page_order = ilog2(nr_pages);
     928             :         }
     929             : 
     930             :         ring_buffer_init(rb, watermark, flags);
     931             : 
     932             :         return rb;
     933             : 
     934             : fail_all_buf:
     935             :         kfree(rb);
     936             : 
     937             : fail:
     938             :         return NULL;
     939             : }
     940             : 
     941             : #endif
     942             : 
     943             : struct page *
     944           0 : perf_mmap_to_page(struct perf_buffer *rb, unsigned long pgoff)
     945             : {
     946           0 :         if (rb->aux_nr_pages) {
     947             :                 /* above AUX space */
     948           0 :                 if (pgoff > rb->aux_pgoff + rb->aux_nr_pages)
     949             :                         return NULL;
     950             : 
     951             :                 /* AUX space */
     952           0 :                 if (pgoff >= rb->aux_pgoff) {
     953           0 :                         int aux_pgoff = array_index_nospec(pgoff - rb->aux_pgoff, rb->aux_nr_pages);
     954           0 :                         return virt_to_page(rb->aux_pages[aux_pgoff]);
     955             :                 }
     956             :         }
     957             : 
     958           0 :         return __perf_mmap_to_page(rb, pgoff);
     959             : }

Generated by: LCOV version 1.14