LCOV - code coverage report
Current view: top level - kernel/printk - printk_ringbuffer.c (source / functions) Hit Total Coverage
Test: landlock.info Lines: 291 450 64.7 %
Date: 2021-04-22 12:43:58 Functions: 22 29 75.9 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0
       2             : 
       3             : #include <linux/kernel.h>
       4             : #include <linux/irqflags.h>
       5             : #include <linux/string.h>
       6             : #include <linux/errno.h>
       7             : #include <linux/bug.h>
       8             : #include "printk_ringbuffer.h"
       9             : 
      10             : /**
      11             :  * DOC: printk_ringbuffer overview
      12             :  *
      13             :  * Data Structure
      14             :  * --------------
      15             :  * The printk_ringbuffer is made up of 3 internal ringbuffers:
      16             :  *
      17             :  *   desc_ring
      18             :  *     A ring of descriptors and their meta data (such as sequence number,
      19             :  *     timestamp, loglevel, etc.) as well as internal state information about
      20             :  *     the record and logical positions specifying where in the other
      21             :  *     ringbuffer the text strings are located.
      22             :  *
      23             :  *   text_data_ring
      24             :  *     A ring of data blocks. A data block consists of an unsigned long
      25             :  *     integer (ID) that maps to a desc_ring index followed by the text
      26             :  *     string of the record.
      27             :  *
      28             :  * The internal state information of a descriptor is the key element to allow
      29             :  * readers and writers to locklessly synchronize access to the data.
      30             :  *
      31             :  * Implementation
      32             :  * --------------
      33             :  *
      34             :  * Descriptor Ring
      35             :  * ~~~~~~~~~~~~~~~
      36             :  * The descriptor ring is an array of descriptors. A descriptor contains
      37             :  * essential meta data to track the data of a printk record using
      38             :  * blk_lpos structs pointing to associated text data blocks (see
      39             :  * "Data Rings" below). Each descriptor is assigned an ID that maps
      40             :  * directly to index values of the descriptor array and has a state. The ID
      41             :  * and the state are bitwise combined into a single descriptor field named
      42             :  * @state_var, allowing ID and state to be synchronously and atomically
      43             :  * updated.
      44             :  *
      45             :  * Descriptors have four states:
      46             :  *
      47             :  *   reserved
      48             :  *     A writer is modifying the record.
      49             :  *
      50             :  *   committed
      51             :  *     The record and all its data are written. A writer can reopen the
      52             :  *     descriptor (transitioning it back to reserved), but in the committed
      53             :  *     state the data is consistent.
      54             :  *
      55             :  *   finalized
      56             :  *     The record and all its data are complete and available for reading. A
      57             :  *     writer cannot reopen the descriptor.
      58             :  *
      59             :  *   reusable
      60             :  *     The record exists, but its text and/or meta data may no longer be
      61             :  *     available.
      62             :  *
      63             :  * Querying the @state_var of a record requires providing the ID of the
      64             :  * descriptor to query. This can yield a possible fifth (pseudo) state:
      65             :  *
      66             :  *   miss
      67             :  *     The descriptor being queried has an unexpected ID.
      68             :  *
      69             :  * The descriptor ring has a @tail_id that contains the ID of the oldest
      70             :  * descriptor and @head_id that contains the ID of the newest descriptor.
      71             :  *
      72             :  * When a new descriptor should be created (and the ring is full), the tail
      73             :  * descriptor is invalidated by first transitioning to the reusable state and
      74             :  * then invalidating all tail data blocks up to and including the data blocks
      75             :  * associated with the tail descriptor (for the text ring). Then
      76             :  * @tail_id is advanced, followed by advancing @head_id. And finally the
      77             :  * @state_var of the new descriptor is initialized to the new ID and reserved
      78             :  * state.
      79             :  *
      80             :  * The @tail_id can only be advanced if the new @tail_id would be in the
      81             :  * committed or reusable queried state. This makes it possible that a valid
      82             :  * sequence number of the tail is always available.
      83             :  *
      84             :  * Descriptor Finalization
      85             :  * ~~~~~~~~~~~~~~~~~~~~~~~
      86             :  * When a writer calls the commit function prb_commit(), record data is
      87             :  * fully stored and is consistent within the ringbuffer. However, a writer can
      88             :  * reopen that record, claiming exclusive access (as with prb_reserve()), and
      89             :  * modify that record. When finished, the writer must again commit the record.
      90             :  *
      91             :  * In order for a record to be made available to readers (and also become
      92             :  * recyclable for writers), it must be finalized. A finalized record cannot be
      93             :  * reopened and can never become "unfinalized". Record finalization can occur
      94             :  * in three different scenarios:
      95             :  *
      96             :  *   1) A writer can simultaneously commit and finalize its record by calling
      97             :  *      prb_final_commit() instead of prb_commit().
      98             :  *
      99             :  *   2) When a new record is reserved and the previous record has been
     100             :  *      committed via prb_commit(), that previous record is automatically
     101             :  *      finalized.
     102             :  *
     103             :  *   3) When a record is committed via prb_commit() and a newer record
     104             :  *      already exists, the record being committed is automatically finalized.
     105             :  *
     106             :  * Data Ring
     107             :  * ~~~~~~~~~
     108             :  * The text data ring is a byte array composed of data blocks. Data blocks are
     109             :  * referenced by blk_lpos structs that point to the logical position of the
     110             :  * beginning of a data block and the beginning of the next adjacent data
     111             :  * block. Logical positions are mapped directly to index values of the byte
     112             :  * array ringbuffer.
     113             :  *
     114             :  * Each data block consists of an ID followed by the writer data. The ID is
     115             :  * the identifier of a descriptor that is associated with the data block. A
     116             :  * given data block is considered valid if all of the following conditions
     117             :  * are met:
     118             :  *
     119             :  *   1) The descriptor associated with the data block is in the committed
     120             :  *      or finalized queried state.
     121             :  *
     122             :  *   2) The blk_lpos struct within the descriptor associated with the data
     123             :  *      block references back to the same data block.
     124             :  *
     125             :  *   3) The data block is within the head/tail logical position range.
     126             :  *
     127             :  * If the writer data of a data block would extend beyond the end of the
     128             :  * byte array, only the ID of the data block is stored at the logical
     129             :  * position and the full data block (ID and writer data) is stored at the
     130             :  * beginning of the byte array. The referencing blk_lpos will point to the
     131             :  * ID before the wrap and the next data block will be at the logical
     132             :  * position adjacent the full data block after the wrap.
     133             :  *
     134             :  * Data rings have a @tail_lpos that points to the beginning of the oldest
     135             :  * data block and a @head_lpos that points to the logical position of the
     136             :  * next (not yet existing) data block.
     137             :  *
     138             :  * When a new data block should be created (and the ring is full), tail data
     139             :  * blocks will first be invalidated by putting their associated descriptors
     140             :  * into the reusable state and then pushing the @tail_lpos forward beyond
     141             :  * them. Then the @head_lpos is pushed forward and is associated with a new
     142             :  * descriptor. If a data block is not valid, the @tail_lpos cannot be
     143             :  * advanced beyond it.
     144             :  *
     145             :  * Info Array
     146             :  * ~~~~~~~~~~
     147             :  * The general meta data of printk records are stored in printk_info structs,
     148             :  * stored in an array with the same number of elements as the descriptor ring.
     149             :  * Each info corresponds to the descriptor of the same index in the
     150             :  * descriptor ring. Info validity is confirmed by evaluating the corresponding
     151             :  * descriptor before and after loading the info.
     152             :  *
     153             :  * Usage
     154             :  * -----
     155             :  * Here are some simple examples demonstrating writers and readers. For the
     156             :  * examples a global ringbuffer (test_rb) is available (which is not the
     157             :  * actual ringbuffer used by printk)::
     158             :  *
     159             :  *      DEFINE_PRINTKRB(test_rb, 15, 5);
     160             :  *
     161             :  * This ringbuffer allows up to 32768 records (2 ^ 15) and has a size of
     162             :  * 1 MiB (2 ^ (15 + 5)) for text data.
     163             :  *
     164             :  * Sample writer code::
     165             :  *
     166             :  *      const char *textstr = "message text";
     167             :  *      struct prb_reserved_entry e;
     168             :  *      struct printk_record r;
     169             :  *
     170             :  *      // specify how much to allocate
     171             :  *      prb_rec_init_wr(&r, strlen(textstr) + 1);
     172             :  *
     173             :  *      if (prb_reserve(&e, &test_rb, &r)) {
     174             :  *              snprintf(r.text_buf, r.text_buf_size, "%s", textstr);
     175             :  *
     176             :  *              r.info->text_len = strlen(textstr);
     177             :  *              r.info->ts_nsec = local_clock();
     178             :  *              r.info->caller_id = printk_caller_id();
     179             :  *
     180             :  *              // commit and finalize the record
     181             :  *              prb_final_commit(&e);
     182             :  *      }
     183             :  *
     184             :  * Note that additional writer functions are available to extend a record
     185             :  * after it has been committed but not yet finalized. This can be done as
     186             :  * long as no new records have been reserved and the caller is the same.
     187             :  *
     188             :  * Sample writer code (record extending)::
     189             :  *
     190             :  *              // alternate rest of previous example
     191             :  *
     192             :  *              r.info->text_len = strlen(textstr);
     193             :  *              r.info->ts_nsec = local_clock();
     194             :  *              r.info->caller_id = printk_caller_id();
     195             :  *
     196             :  *              // commit the record (but do not finalize yet)
     197             :  *              prb_commit(&e);
     198             :  *      }
     199             :  *
     200             :  *      ...
     201             :  *
     202             :  *      // specify additional 5 bytes text space to extend
     203             :  *      prb_rec_init_wr(&r, 5);
     204             :  *
     205             :  *      // try to extend, but only if it does not exceed 32 bytes
     206             :  *      if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id()), 32) {
     207             :  *              snprintf(&r.text_buf[r.info->text_len],
     208             :  *                       r.text_buf_size - r.info->text_len, "hello");
     209             :  *
     210             :  *              r.info->text_len += 5;
     211             :  *
     212             :  *              // commit and finalize the record
     213             :  *              prb_final_commit(&e);
     214             :  *      }
     215             :  *
     216             :  * Sample reader code::
     217             :  *
     218             :  *      struct printk_info info;
     219             :  *      struct printk_record r;
     220             :  *      char text_buf[32];
     221             :  *      u64 seq;
     222             :  *
     223             :  *      prb_rec_init_rd(&r, &info, &text_buf[0], sizeof(text_buf));
     224             :  *
     225             :  *      prb_for_each_record(0, &test_rb, &seq, &r) {
     226             :  *              if (info.seq != seq)
     227             :  *                      pr_warn("lost %llu records\n", info.seq - seq);
     228             :  *
     229             :  *              if (info.text_len > r.text_buf_size) {
     230             :  *                      pr_warn("record %llu text truncated\n", info.seq);
     231             :  *                      text_buf[r.text_buf_size - 1] = 0;
     232             :  *              }
     233             :  *
     234             :  *              pr_info("%llu: %llu: %s\n", info.seq, info.ts_nsec,
     235             :  *                      &text_buf[0]);
     236             :  *      }
     237             :  *
     238             :  * Note that additional less convenient reader functions are available to
     239             :  * allow complex record access.
     240             :  *
     241             :  * ABA Issues
     242             :  * ~~~~~~~~~~
     243             :  * To help avoid ABA issues, descriptors are referenced by IDs (array index
     244             :  * values combined with tagged bits counting array wraps) and data blocks are
     245             :  * referenced by logical positions (array index values combined with tagged
     246             :  * bits counting array wraps). However, on 32-bit systems the number of
     247             :  * tagged bits is relatively small such that an ABA incident is (at least
     248             :  * theoretically) possible. For example, if 4 million maximally sized (1KiB)
     249             :  * printk messages were to occur in NMI context on a 32-bit system, the
     250             :  * interrupted context would not be able to recognize that the 32-bit integer
     251             :  * completely wrapped and thus represents a different data block than the one
     252             :  * the interrupted context expects.
     253             :  *
     254             :  * To help combat this possibility, additional state checking is performed
     255             :  * (such as using cmpxchg() even though set() would suffice). These extra
     256             :  * checks are commented as such and will hopefully catch any ABA issue that
     257             :  * a 32-bit system might experience.
     258             :  *
     259             :  * Memory Barriers
     260             :  * ~~~~~~~~~~~~~~~
     261             :  * Multiple memory barriers are used. To simplify proving correctness and
     262             :  * generating litmus tests, lines of code related to memory barriers
     263             :  * (loads, stores, and the associated memory barriers) are labeled::
     264             :  *
     265             :  *      LMM(function:letter)
     266             :  *
     267             :  * Comments reference the labels using only the "function:letter" part.
     268             :  *
     269             :  * The memory barrier pairs and their ordering are:
     270             :  *
     271             :  *   desc_reserve:D / desc_reserve:B
     272             :  *     push descriptor tail (id), then push descriptor head (id)
     273             :  *
     274             :  *   desc_reserve:D / data_push_tail:B
     275             :  *     push data tail (lpos), then set new descriptor reserved (state)
     276             :  *
     277             :  *   desc_reserve:D / desc_push_tail:C
     278             :  *     push descriptor tail (id), then set new descriptor reserved (state)
     279             :  *
     280             :  *   desc_reserve:D / prb_first_seq:C
     281             :  *     push descriptor tail (id), then set new descriptor reserved (state)
     282             :  *
     283             :  *   desc_reserve:F / desc_read:D
     284             :  *     set new descriptor id and reserved (state), then allow writer changes
     285             :  *
     286             :  *   data_alloc:A (or data_realloc:A) / desc_read:D
     287             :  *     set old descriptor reusable (state), then modify new data block area
     288             :  *
     289             :  *   data_alloc:A (or data_realloc:A) / data_push_tail:B
     290             :  *     push data tail (lpos), then modify new data block area
     291             :  *
     292             :  *   _prb_commit:B / desc_read:B
     293             :  *     store writer changes, then set new descriptor committed (state)
     294             :  *
     295             :  *   desc_reopen_last:A / _prb_commit:B
     296             :  *     set descriptor reserved (state), then read descriptor data
     297             :  *
     298             :  *   _prb_commit:B / desc_reserve:D
     299             :  *     set new descriptor committed (state), then check descriptor head (id)
     300             :  *
     301             :  *   data_push_tail:D / data_push_tail:A
     302             :  *     set descriptor reusable (state), then push data tail (lpos)
     303             :  *
     304             :  *   desc_push_tail:B / desc_reserve:D
     305             :  *     set descriptor reusable (state), then push descriptor tail (id)
     306             :  */
     307             : 
     308             : #define DATA_SIZE(data_ring)            _DATA_SIZE((data_ring)->size_bits)
     309             : #define DATA_SIZE_MASK(data_ring)       (DATA_SIZE(data_ring) - 1)
     310             : 
     311             : #define DESCS_COUNT(desc_ring)          _DESCS_COUNT((desc_ring)->count_bits)
     312             : #define DESCS_COUNT_MASK(desc_ring)     (DESCS_COUNT(desc_ring) - 1)
     313             : 
     314             : /* Determine the data array index from a logical position. */
     315             : #define DATA_INDEX(data_ring, lpos)     ((lpos) & DATA_SIZE_MASK(data_ring))
     316             : 
     317             : /* Determine the desc array index from an ID or sequence number. */
     318             : #define DESC_INDEX(desc_ring, n)        ((n) & DESCS_COUNT_MASK(desc_ring))
     319             : 
     320             : /* Determine how many times the data array has wrapped. */
     321             : #define DATA_WRAPS(data_ring, lpos)     ((lpos) >> (data_ring)->size_bits)
     322             : 
     323             : /* Determine if a logical position refers to a data-less block. */
     324             : #define LPOS_DATALESS(lpos)             ((lpos) & 1UL)
     325             : #define BLK_DATALESS(blk)               (LPOS_DATALESS((blk)->begin) && \
     326             :                                          LPOS_DATALESS((blk)->next))
     327             : 
     328             : /* Get the logical position at index 0 of the current wrap. */
     329             : #define DATA_THIS_WRAP_START_LPOS(data_ring, lpos) \
     330             : ((lpos) & ~DATA_SIZE_MASK(data_ring))
     331             : 
     332             : /* Get the ID for the same index of the previous wrap as the given ID. */
     333             : #define DESC_ID_PREV_WRAP(desc_ring, id) \
     334             : DESC_ID((id) - DESCS_COUNT(desc_ring))
     335             : 
     336             : /*
     337             :  * A data block: mapped directly to the beginning of the data block area
     338             :  * specified as a logical position within the data ring.
     339             :  *
     340             :  * @id:   the ID of the associated descriptor
     341             :  * @data: the writer data
     342             :  *
     343             :  * Note that the size of a data block is only known by its associated
     344             :  * descriptor.
     345             :  */
     346             : struct prb_data_block {
     347             :         unsigned long   id;
     348             :         char            data[];
     349             : };
     350             : 
     351             : /*
     352             :  * Return the descriptor associated with @n. @n can be either a
     353             :  * descriptor ID or a sequence number.
     354             :  */
     355        7308 : static struct prb_desc *to_desc(struct prb_desc_ring *desc_ring, u64 n)
     356             : {
     357        7308 :         return &desc_ring->descs[DESC_INDEX(desc_ring, n)];
     358             : }
     359             : 
     360             : /*
     361             :  * Return the printk_info associated with @n. @n can be either a
     362             :  * descriptor ID or a sequence number.
     363             :  */
     364        6464 : static struct printk_info *to_info(struct prb_desc_ring *desc_ring, u64 n)
     365             : {
     366        6464 :         return &desc_ring->infos[DESC_INDEX(desc_ring, n)];
     367             : }
     368             : 
     369        1241 : static struct prb_data_block *to_block(struct prb_data_ring *data_ring,
     370             :                                        unsigned long begin_lpos)
     371             : {
     372        1241 :         return (void *)&data_ring->data[DATA_INDEX(data_ring, begin_lpos)];
     373             : }
     374             : 
     375             : /*
     376             :  * Increase the data size to account for data block meta data plus any
     377             :  * padding so that the adjacent data block is aligned on the ID size.
     378             :  */
     379         614 : static unsigned int to_blk_size(unsigned int size)
     380             : {
     381         614 :         struct prb_data_block *db = NULL;
     382             : 
     383         614 :         size += sizeof(*db);
     384         614 :         size = ALIGN(size, sizeof(db->id));
     385         614 :         return size;
     386             : }
     387             : 
     388             : /*
     389             :  * Sanity checker for reserve size. The ringbuffer code assumes that a data
     390             :  * block does not exceed the maximum possible size that could fit within the
     391             :  * ringbuffer. This function provides that basic size check so that the
     392             :  * assumption is safe.
     393             :  */
     394         307 : static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size)
     395             : {
     396         307 :         struct prb_data_block *db = NULL;
     397             : 
     398         307 :         if (size == 0)
     399             :                 return true;
     400             : 
     401             :         /*
     402             :          * Ensure the alignment padded size could possibly fit in the data
     403             :          * array. The largest possible data block must still leave room for
     404             :          * at least the ID of the next block.
     405             :          */
     406         307 :         size = to_blk_size(size);
     407         307 :         if (size > DATA_SIZE(data_ring) - sizeof(db->id))
     408           0 :                 return false;
     409             : 
     410             :         return true;
     411             : }
     412             : 
     413             : /* Query the state of a descriptor. */
     414        7257 : static enum desc_state get_desc_state(unsigned long id,
     415             :                                       unsigned long state_val)
     416             : {
     417        7257 :         if (id != DESC_ID(state_val))
     418             :                 return desc_miss;
     419             : 
     420        6292 :         return DESC_STATE(state_val);
     421             : }
     422             : 
     423             : /*
     424             :  * Get a copy of a specified descriptor and return its queried state. If the
     425             :  * descriptor is in an inconsistent state (miss or reserved), the caller can
     426             :  * only expect the descriptor's @state_var field to be valid.
     427             :  *
     428             :  * The sequence number and caller_id can be optionally retrieved. Like all
     429             :  * non-state_var data, they are only valid if the descriptor is in a
     430             :  * consistent state.
     431             :  */
     432        4111 : static enum desc_state desc_read(struct prb_desc_ring *desc_ring,
     433             :                                  unsigned long id, struct prb_desc *desc_out,
     434             :                                  u64 *seq_out, u32 *caller_id_out)
     435             : {
     436        4111 :         struct printk_info *info = to_info(desc_ring, id);
     437        4111 :         struct prb_desc *desc = to_desc(desc_ring, id);
     438        4111 :         atomic_long_t *state_var = &desc->state_var;
     439        4111 :         enum desc_state d_state;
     440        4111 :         unsigned long state_val;
     441             : 
     442             :         /* Check the descriptor state. */
     443        4111 :         state_val = atomic_long_read(state_var); /* LMM(desc_read:A) */
     444        4111 :         d_state = get_desc_state(id, state_val);
     445        4111 :         if (d_state == desc_miss || d_state == desc_reserved) {
     446             :                 /*
     447             :                  * The descriptor is in an inconsistent state. Set at least
     448             :                  * @state_var so that the caller can see the details of
     449             :                  * the inconsistent state.
     450             :                  */
     451         965 :                 goto out;
     452             :         }
     453             : 
     454             :         /*
     455             :          * Guarantee the state is loaded before copying the descriptor
     456             :          * content. This avoids copying obsolete descriptor content that might
     457             :          * not apply to the descriptor state. This pairs with _prb_commit:B.
     458             :          *
     459             :          * Memory barrier involvement:
     460             :          *
     461             :          * If desc_read:A reads from _prb_commit:B, then desc_read:C reads
     462             :          * from _prb_commit:A.
     463             :          *
     464             :          * Relies on:
     465             :          *
     466             :          * WMB from _prb_commit:A to _prb_commit:B
     467             :          *    matching
     468             :          * RMB from desc_read:A to desc_read:C
     469             :          */
     470        3146 :         smp_rmb(); /* LMM(desc_read:B) */
     471             : 
     472             :         /*
     473             :          * Copy the descriptor data. The data is not valid until the
     474             :          * state has been re-checked. A memcpy() for all of @desc
     475             :          * cannot be used because of the atomic_t @state_var field.
     476             :          */
     477        3146 :         memcpy(&desc_out->text_blk_lpos, &desc->text_blk_lpos,
     478             :                sizeof(desc_out->text_blk_lpos)); /* LMM(desc_read:C) */
     479        3146 :         if (seq_out)
     480        3106 :                 *seq_out = info->seq; /* also part of desc_read:C */
     481        3146 :         if (caller_id_out)
     482          40 :                 *caller_id_out = info->caller_id; /* also part of desc_read:C */
     483             : 
     484             :         /*
     485             :          * 1. Guarantee the descriptor content is loaded before re-checking
     486             :          *    the state. This avoids reading an obsolete descriptor state
     487             :          *    that may not apply to the copied content. This pairs with
     488             :          *    desc_reserve:F.
     489             :          *
     490             :          *    Memory barrier involvement:
     491             :          *
     492             :          *    If desc_read:C reads from desc_reserve:G, then desc_read:E
     493             :          *    reads from desc_reserve:F.
     494             :          *
     495             :          *    Relies on:
     496             :          *
     497             :          *    WMB from desc_reserve:F to desc_reserve:G
     498             :          *       matching
     499             :          *    RMB from desc_read:C to desc_read:E
     500             :          *
     501             :          * 2. Guarantee the record data is loaded before re-checking the
     502             :          *    state. This avoids reading an obsolete descriptor state that may
     503             :          *    not apply to the copied data. This pairs with data_alloc:A and
     504             :          *    data_realloc:A.
     505             :          *
     506             :          *    Memory barrier involvement:
     507             :          *
     508             :          *    If copy_data:A reads from data_alloc:B, then desc_read:E
     509             :          *    reads from desc_make_reusable:A.
     510             :          *
     511             :          *    Relies on:
     512             :          *
     513             :          *    MB from desc_make_reusable:A to data_alloc:B
     514             :          *       matching
     515             :          *    RMB from desc_read:C to desc_read:E
     516             :          *
     517             :          *    Note: desc_make_reusable:A and data_alloc:B can be different
     518             :          *          CPUs. However, the data_alloc:B CPU (which performs the
     519             :          *          full memory barrier) must have previously seen
     520             :          *          desc_make_reusable:A.
     521             :          */
     522        3146 :         smp_rmb(); /* LMM(desc_read:D) */
     523             : 
     524             :         /*
     525             :          * The data has been copied. Return the current descriptor state,
     526             :          * which may have changed since the load above.
     527             :          */
     528        3146 :         state_val = atomic_long_read(state_var); /* LMM(desc_read:E) */
     529        3146 :         d_state = get_desc_state(id, state_val);
     530        4111 : out:
     531        4111 :         atomic_long_set(&desc_out->state_var, state_val);
     532        4111 :         return d_state;
     533             : }
     534             : 
     535             : /*
     536             :  * Take a specified descriptor out of the finalized state by attempting
     537             :  * the transition from finalized to reusable. Either this context or some
     538             :  * other context will have been successful.
     539             :  */
     540           0 : static void desc_make_reusable(struct prb_desc_ring *desc_ring,
     541             :                                unsigned long id)
     542             : {
     543           0 :         unsigned long val_finalized = DESC_SV(id, desc_finalized);
     544           0 :         unsigned long val_reusable = DESC_SV(id, desc_reusable);
     545           0 :         struct prb_desc *desc = to_desc(desc_ring, id);
     546           0 :         atomic_long_t *state_var = &desc->state_var;
     547             : 
     548           0 :         atomic_long_cmpxchg_relaxed(state_var, val_finalized,
     549             :                                     val_reusable); /* LMM(desc_make_reusable:A) */
     550           0 : }
     551             : 
     552             : /*
     553             :  * Given the text data ring, put the associated descriptor of each
     554             :  * data block from @lpos_begin until @lpos_end into the reusable state.
     555             :  *
     556             :  * If there is any problem making the associated descriptor reusable, either
     557             :  * the descriptor has not yet been finalized or another writer context has
     558             :  * already pushed the tail lpos past the problematic data block. Regardless,
     559             :  * on error the caller can re-load the tail lpos to determine the situation.
     560             :  */
     561           0 : static bool data_make_reusable(struct printk_ringbuffer *rb,
     562             :                                unsigned long lpos_begin,
     563             :                                unsigned long lpos_end,
     564             :                                unsigned long *lpos_out)
     565             : {
     566             : 
     567           0 :         struct prb_data_ring *data_ring = &rb->text_data_ring;
     568           0 :         struct prb_desc_ring *desc_ring = &rb->desc_ring;
     569           0 :         struct prb_data_block *blk;
     570           0 :         enum desc_state d_state;
     571           0 :         struct prb_desc desc;
     572           0 :         struct prb_data_blk_lpos *blk_lpos = &desc.text_blk_lpos;
     573           0 :         unsigned long id;
     574             : 
     575             :         /* Loop until @lpos_begin has advanced to or beyond @lpos_end. */
     576           0 :         while ((lpos_end - lpos_begin) - 1 < DATA_SIZE(data_ring)) {
     577           0 :                 blk = to_block(data_ring, lpos_begin);
     578             : 
     579             :                 /*
     580             :                  * Load the block ID from the data block. This is a data race
     581             :                  * against a writer that may have newly reserved this data
     582             :                  * area. If the loaded value matches a valid descriptor ID,
     583             :                  * the blk_lpos of that descriptor will be checked to make
     584             :                  * sure it points back to this data block. If the check fails,
     585             :                  * the data area has been recycled by another writer.
     586             :                  */
     587           0 :                 id = blk->id; /* LMM(data_make_reusable:A) */
     588             : 
     589           0 :                 d_state = desc_read(desc_ring, id, &desc,
     590             :                                     NULL, NULL); /* LMM(data_make_reusable:B) */
     591             : 
     592           0 :                 switch (d_state) {
     593             :                 case desc_miss:
     594             :                 case desc_reserved:
     595             :                 case desc_committed:
     596             :                         return false;
     597           0 :                 case desc_finalized:
     598             :                         /*
     599             :                          * This data block is invalid if the descriptor
     600             :                          * does not point back to it.
     601             :                          */
     602           0 :                         if (blk_lpos->begin != lpos_begin)
     603             :                                 return false;
     604           0 :                         desc_make_reusable(desc_ring, id);
     605           0 :                         break;
     606           0 :                 case desc_reusable:
     607             :                         /*
     608             :                          * This data block is invalid if the descriptor
     609             :                          * does not point back to it.
     610             :                          */
     611           0 :                         if (blk_lpos->begin != lpos_begin)
     612             :                                 return false;
     613             :                         break;
     614             :                 }
     615             : 
     616             :                 /* Advance @lpos_begin to the next data block. */
     617           0 :                 lpos_begin = blk_lpos->next;
     618             :         }
     619             : 
     620           0 :         *lpos_out = lpos_begin;
     621           0 :         return true;
     622             : }
     623             : 
     624             : /*
     625             :  * Advance the data ring tail to at least @lpos. This function puts
     626             :  * descriptors into the reusable state if the tail is pushed beyond
     627             :  * their associated data block.
     628             :  */
     629         291 : static bool data_push_tail(struct printk_ringbuffer *rb, unsigned long lpos)
     630             : {
     631         291 :         struct prb_data_ring *data_ring = &rb->text_data_ring;
     632         291 :         unsigned long tail_lpos_new;
     633         291 :         unsigned long tail_lpos;
     634         291 :         unsigned long next_lpos;
     635             : 
     636             :         /* If @lpos is from a data-less block, there is nothing to do. */
     637         291 :         if (LPOS_DATALESS(lpos))
     638             :                 return true;
     639             : 
     640             :         /*
     641             :          * Any descriptor states that have transitioned to reusable due to the
     642             :          * data tail being pushed to this loaded value will be visible to this
     643             :          * CPU. This pairs with data_push_tail:D.
     644             :          *
     645             :          * Memory barrier involvement:
     646             :          *
     647             :          * If data_push_tail:A reads from data_push_tail:D, then this CPU can
     648             :          * see desc_make_reusable:A.
     649             :          *
     650             :          * Relies on:
     651             :          *
     652             :          * MB from desc_make_reusable:A to data_push_tail:D
     653             :          *    matches
     654             :          * READFROM from data_push_tail:D to data_push_tail:A
     655             :          *    thus
     656             :          * READFROM from desc_make_reusable:A to this CPU
     657             :          */
     658         291 :         tail_lpos = atomic_long_read(&data_ring->tail_lpos); /* LMM(data_push_tail:A) */
     659             : 
     660             :         /*
     661             :          * Loop until the tail lpos is at or beyond @lpos. This condition
     662             :          * may already be satisfied, resulting in no full memory barrier
     663             :          * from data_push_tail:D being performed. However, since this CPU
     664             :          * sees the new tail lpos, any descriptor states that transitioned to
     665             :          * the reusable state must already be visible.
     666             :          */
     667         291 :         while ((lpos - tail_lpos) - 1 < DATA_SIZE(data_ring)) {
     668             :                 /*
     669             :                  * Make all descriptors reusable that are associated with
     670             :                  * data blocks before @lpos.
     671             :                  */
     672           0 :                 if (!data_make_reusable(rb, tail_lpos, lpos, &next_lpos)) {
     673             :                         /*
     674             :                          * 1. Guarantee the block ID loaded in
     675             :                          *    data_make_reusable() is performed before
     676             :                          *    reloading the tail lpos. The failed
     677             :                          *    data_make_reusable() may be due to a newly
     678             :                          *    recycled data area causing the tail lpos to
     679             :                          *    have been previously pushed. This pairs with
     680             :                          *    data_alloc:A and data_realloc:A.
     681             :                          *
     682             :                          *    Memory barrier involvement:
     683             :                          *
     684             :                          *    If data_make_reusable:A reads from data_alloc:B,
     685             :                          *    then data_push_tail:C reads from
     686             :                          *    data_push_tail:D.
     687             :                          *
     688             :                          *    Relies on:
     689             :                          *
     690             :                          *    MB from data_push_tail:D to data_alloc:B
     691             :                          *       matching
     692             :                          *    RMB from data_make_reusable:A to
     693             :                          *    data_push_tail:C
     694             :                          *
     695             :                          *    Note: data_push_tail:D and data_alloc:B can be
     696             :                          *          different CPUs. However, the data_alloc:B
     697             :                          *          CPU (which performs the full memory
     698             :                          *          barrier) must have previously seen
     699             :                          *          data_push_tail:D.
     700             :                          *
     701             :                          * 2. Guarantee the descriptor state loaded in
     702             :                          *    data_make_reusable() is performed before
     703             :                          *    reloading the tail lpos. The failed
     704             :                          *    data_make_reusable() may be due to a newly
     705             :                          *    recycled descriptor causing the tail lpos to
     706             :                          *    have been previously pushed. This pairs with
     707             :                          *    desc_reserve:D.
     708             :                          *
     709             :                          *    Memory barrier involvement:
     710             :                          *
     711             :                          *    If data_make_reusable:B reads from
     712             :                          *    desc_reserve:F, then data_push_tail:C reads
     713             :                          *    from data_push_tail:D.
     714             :                          *
     715             :                          *    Relies on:
     716             :                          *
     717             :                          *    MB from data_push_tail:D to desc_reserve:F
     718             :                          *       matching
     719             :                          *    RMB from data_make_reusable:B to
     720             :                          *    data_push_tail:C
     721             :                          *
     722             :                          *    Note: data_push_tail:D and desc_reserve:F can
     723             :                          *          be different CPUs. However, the
     724             :                          *          desc_reserve:F CPU (which performs the
     725             :                          *          full memory barrier) must have previously
     726             :                          *          seen data_push_tail:D.
     727             :                          */
     728           0 :                         smp_rmb(); /* LMM(data_push_tail:B) */
     729             : 
     730           0 :                         tail_lpos_new = atomic_long_read(&data_ring->tail_lpos
     731             :                                                         ); /* LMM(data_push_tail:C) */
     732           0 :                         if (tail_lpos_new == tail_lpos)
     733             :                                 return false;
     734             : 
     735             :                         /* Another CPU pushed the tail. Try again. */
     736           0 :                         tail_lpos = tail_lpos_new;
     737           0 :                         continue;
     738             :                 }
     739             : 
     740             :                 /*
     741             :                  * Guarantee any descriptor states that have transitioned to
     742             :                  * reusable are stored before pushing the tail lpos. A full
     743             :                  * memory barrier is needed since other CPUs may have made
     744             :                  * the descriptor states reusable. This pairs with
     745             :                  * data_push_tail:A.
     746             :                  */
     747           0 :                 if (atomic_long_try_cmpxchg(&data_ring->tail_lpos, &tail_lpos,
     748             :                                             next_lpos)) { /* LMM(data_push_tail:D) */
     749             :                         break;
     750             :                 }
     751             :         }
     752             : 
     753             :         return true;
     754             : }
     755             : 
     756             : /*
     757             :  * Advance the desc ring tail. This function advances the tail by one
     758             :  * descriptor, thus invalidating the oldest descriptor. Before advancing
     759             :  * the tail, the tail descriptor is made reusable and all data blocks up to
     760             :  * and including the descriptor's data block are invalidated (i.e. the data
     761             :  * ring tail is pushed past the data block of the descriptor being made
     762             :  * reusable).
     763             :  */
     764           0 : static bool desc_push_tail(struct printk_ringbuffer *rb,
     765             :                            unsigned long tail_id)
     766             : {
     767           0 :         struct prb_desc_ring *desc_ring = &rb->desc_ring;
     768           0 :         enum desc_state d_state;
     769           0 :         struct prb_desc desc;
     770             : 
     771           0 :         d_state = desc_read(desc_ring, tail_id, &desc, NULL, NULL);
     772             : 
     773           0 :         switch (d_state) {
     774             :         case desc_miss:
     775             :                 /*
     776             :                  * If the ID is exactly 1 wrap behind the expected, it is
     777             :                  * in the process of being reserved by another writer and
     778             :                  * must be considered reserved.
     779             :                  */
     780           0 :                 if (DESC_ID(atomic_long_read(&desc.state_var)) ==
     781           0 :                     DESC_ID_PREV_WRAP(desc_ring, tail_id)) {
     782           0 :                         return false;
     783             :                 }
     784             : 
     785             :                 /*
     786             :                  * The ID has changed. Another writer must have pushed the
     787             :                  * tail and recycled the descriptor already. Success is
     788             :                  * returned because the caller is only interested in the
     789             :                  * specified tail being pushed, which it was.
     790             :                  */
     791             :                 return true;
     792             :         case desc_reserved:
     793             :         case desc_committed:
     794             :                 return false;
     795           0 :         case desc_finalized:
     796           0 :                 desc_make_reusable(desc_ring, tail_id);
     797           0 :                 break;
     798             :         case desc_reusable:
     799             :                 break;
     800             :         }
     801             : 
     802             :         /*
     803             :          * Data blocks must be invalidated before their associated
     804             :          * descriptor can be made available for recycling. Invalidating
     805             :          * them later is not possible because there is no way to trust
     806             :          * data blocks once their associated descriptor is gone.
     807             :          */
     808             : 
     809           0 :         if (!data_push_tail(rb, desc.text_blk_lpos.next))
     810             :                 return false;
     811             : 
     812             :         /*
     813             :          * Check the next descriptor after @tail_id before pushing the tail
     814             :          * to it because the tail must always be in a finalized or reusable
     815             :          * state. The implementation of prb_first_seq() relies on this.
     816             :          *
     817             :          * A successful read implies that the next descriptor is less than or
     818             :          * equal to @head_id so there is no risk of pushing the tail past the
     819             :          * head.
     820             :          */
     821           0 :         d_state = desc_read(desc_ring, DESC_ID(tail_id + 1), &desc,
     822             :                             NULL, NULL); /* LMM(desc_push_tail:A) */
     823             : 
     824           0 :         if (d_state == desc_finalized || d_state == desc_reusable) {
     825             :                 /*
     826             :                  * Guarantee any descriptor states that have transitioned to
     827             :                  * reusable are stored before pushing the tail ID. This allows
     828             :                  * verifying the recycled descriptor state. A full memory
     829             :                  * barrier is needed since other CPUs may have made the
     830             :                  * descriptor states reusable. This pairs with desc_reserve:D.
     831             :                  */
     832           0 :                 atomic_long_cmpxchg(&desc_ring->tail_id, tail_id,
     833           0 :                                     DESC_ID(tail_id + 1)); /* LMM(desc_push_tail:B) */
     834             :         } else {
     835             :                 /*
     836             :                  * Guarantee the last state load from desc_read() is before
     837             :                  * reloading @tail_id in order to see a new tail ID in the
     838             :                  * case that the descriptor has been recycled. This pairs
     839             :                  * with desc_reserve:D.
     840             :                  *
     841             :                  * Memory barrier involvement:
     842             :                  *
     843             :                  * If desc_push_tail:A reads from desc_reserve:F, then
     844             :                  * desc_push_tail:D reads from desc_push_tail:B.
     845             :                  *
     846             :                  * Relies on:
     847             :                  *
     848             :                  * MB from desc_push_tail:B to desc_reserve:F
     849             :                  *    matching
     850             :                  * RMB from desc_push_tail:A to desc_push_tail:D
     851             :                  *
     852             :                  * Note: desc_push_tail:B and desc_reserve:F can be different
     853             :                  *       CPUs. However, the desc_reserve:F CPU (which performs
     854             :                  *       the full memory barrier) must have previously seen
     855             :                  *       desc_push_tail:B.
     856             :                  */
     857           0 :                 smp_rmb(); /* LMM(desc_push_tail:C) */
     858             : 
     859             :                 /*
     860             :                  * Re-check the tail ID. The descriptor following @tail_id is
     861             :                  * not in an allowed tail state. But if the tail has since
     862             :                  * been moved by another CPU, then it does not matter.
     863             :                  */
     864           0 :                 if (atomic_long_read(&desc_ring->tail_id) == tail_id) /* LMM(desc_push_tail:D) */
     865           0 :                         return false;
     866             :         }
     867             : 
     868             :         return true;
     869             : }
     870             : 
     871             : /* Reserve a new descriptor, invalidating the oldest if necessary. */
     872         269 : static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out)
     873             : {
     874         269 :         struct prb_desc_ring *desc_ring = &rb->desc_ring;
     875         269 :         unsigned long prev_state_val;
     876         269 :         unsigned long id_prev_wrap;
     877         269 :         struct prb_desc *desc;
     878         269 :         unsigned long head_id;
     879         269 :         unsigned long id;
     880             : 
     881         269 :         head_id = atomic_long_read(&desc_ring->head_id); /* LMM(desc_reserve:A) */
     882             : 
     883         269 :         do {
     884         269 :                 id = DESC_ID(head_id + 1);
     885         269 :                 id_prev_wrap = DESC_ID_PREV_WRAP(desc_ring, id);
     886             : 
     887             :                 /*
     888             :                  * Guarantee the head ID is read before reading the tail ID.
     889             :                  * Since the tail ID is updated before the head ID, this
     890             :                  * guarantees that @id_prev_wrap is never ahead of the tail
     891             :                  * ID. This pairs with desc_reserve:D.
     892             :                  *
     893             :                  * Memory barrier involvement:
     894             :                  *
     895             :                  * If desc_reserve:A reads from desc_reserve:D, then
     896             :                  * desc_reserve:C reads from desc_push_tail:B.
     897             :                  *
     898             :                  * Relies on:
     899             :                  *
     900             :                  * MB from desc_push_tail:B to desc_reserve:D
     901             :                  *    matching
     902             :                  * RMB from desc_reserve:A to desc_reserve:C
     903             :                  *
     904             :                  * Note: desc_push_tail:B and desc_reserve:D can be different
     905             :                  *       CPUs. However, the desc_reserve:D CPU (which performs
     906             :                  *       the full memory barrier) must have previously seen
     907             :                  *       desc_push_tail:B.
     908             :                  */
     909         269 :                 smp_rmb(); /* LMM(desc_reserve:B) */
     910             : 
     911         269 :                 if (id_prev_wrap == atomic_long_read(&desc_ring->tail_id
     912             :                                                     )) { /* LMM(desc_reserve:C) */
     913             :                         /*
     914             :                          * Make space for the new descriptor by
     915             :                          * advancing the tail.
     916             :                          */
     917           0 :                         if (!desc_push_tail(rb, id_prev_wrap))
     918             :                                 return false;
     919             :                 }
     920             : 
     921             :                 /*
     922             :                  * 1. Guarantee the tail ID is read before validating the
     923             :                  *    recycled descriptor state. A read memory barrier is
     924             :                  *    sufficient for this. This pairs with desc_push_tail:B.
     925             :                  *
     926             :                  *    Memory barrier involvement:
     927             :                  *
     928             :                  *    If desc_reserve:C reads from desc_push_tail:B, then
     929             :                  *    desc_reserve:E reads from desc_make_reusable:A.
     930             :                  *
     931             :                  *    Relies on:
     932             :                  *
     933             :                  *    MB from desc_make_reusable:A to desc_push_tail:B
     934             :                  *       matching
     935             :                  *    RMB from desc_reserve:C to desc_reserve:E
     936             :                  *
     937             :                  *    Note: desc_make_reusable:A and desc_push_tail:B can be
     938             :                  *          different CPUs. However, the desc_push_tail:B CPU
     939             :                  *          (which performs the full memory barrier) must have
     940             :                  *          previously seen desc_make_reusable:A.
     941             :                  *
     942             :                  * 2. Guarantee the tail ID is stored before storing the head
     943             :                  *    ID. This pairs with desc_reserve:B.
     944             :                  *
     945             :                  * 3. Guarantee any data ring tail changes are stored before
     946             :                  *    recycling the descriptor. Data ring tail changes can
     947             :                  *    happen via desc_push_tail()->data_push_tail(). A full
     948             :                  *    memory barrier is needed since another CPU may have
     949             :                  *    pushed the data ring tails. This pairs with
     950             :                  *    data_push_tail:B.
     951             :                  *
     952             :                  * 4. Guarantee a new tail ID is stored before recycling the
     953             :                  *    descriptor. A full memory barrier is needed since
     954             :                  *    another CPU may have pushed the tail ID. This pairs
     955             :                  *    with desc_push_tail:C and this also pairs with
     956             :                  *    prb_first_seq:C.
     957             :                  *
     958             :                  * 5. Guarantee the head ID is stored before trying to
     959             :                  *    finalize the previous descriptor. This pairs with
     960             :                  *    _prb_commit:B.
     961             :                  */
     962         269 :         } while (!atomic_long_try_cmpxchg(&desc_ring->head_id, &head_id,
     963         269 :                                           id)); /* LMM(desc_reserve:D) */
     964             : 
     965         269 :         desc = to_desc(desc_ring, id);
     966             : 
     967             :         /*
     968             :          * If the descriptor has been recycled, verify the old state val.
     969             :          * See "ABA Issues" about why this verification is performed.
     970             :          */
     971         269 :         prev_state_val = atomic_long_read(&desc->state_var); /* LMM(desc_reserve:E) */
     972         269 :         if (prev_state_val &&
     973           0 :             get_desc_state(id_prev_wrap, prev_state_val) != desc_reusable) {
     974           0 :                 WARN_ON_ONCE(1);
     975           0 :                 return false;
     976             :         }
     977             : 
     978             :         /*
     979             :          * Assign the descriptor a new ID and set its state to reserved.
     980             :          * See "ABA Issues" about why cmpxchg() instead of set() is used.
     981             :          *
     982             :          * Guarantee the new descriptor ID and state is stored before making
     983             :          * any other changes. A write memory barrier is sufficient for this.
     984             :          * This pairs with desc_read:D.
     985             :          */
     986         538 :         if (!atomic_long_try_cmpxchg(&desc->state_var, &prev_state_val,
     987             :                         DESC_SV(id, desc_reserved))) { /* LMM(desc_reserve:F) */
     988           0 :                 WARN_ON_ONCE(1);
     989           0 :                 return false;
     990             :         }
     991             : 
     992             :         /* Now data in @desc can be modified: LMM(desc_reserve:G) */
     993             : 
     994         269 :         *id_out = id;
     995         269 :         return true;
     996             : }
     997             : 
     998             : /* Determine the end of a data block. */
     999         307 : static unsigned long get_next_lpos(struct prb_data_ring *data_ring,
    1000             :                                    unsigned long lpos, unsigned int size)
    1001             : {
    1002         307 :         unsigned long begin_lpos;
    1003         307 :         unsigned long next_lpos;
    1004             : 
    1005         307 :         begin_lpos = lpos;
    1006         307 :         next_lpos = lpos + size;
    1007             : 
    1008             :         /* First check if the data block does not wrap. */
    1009         307 :         if (DATA_WRAPS(data_ring, begin_lpos) == DATA_WRAPS(data_ring, next_lpos))
    1010             :                 return next_lpos;
    1011             : 
    1012             :         /* Wrapping data blocks store their data at the beginning. */
    1013           0 :         return (DATA_THIS_WRAP_START_LPOS(data_ring, next_lpos) + size);
    1014             : }
    1015             : 
    1016             : /*
    1017             :  * Allocate a new data block, invalidating the oldest data block(s)
    1018             :  * if necessary. This function also associates the data block with
    1019             :  * a specified descriptor.
    1020             :  */
    1021         269 : static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size,
    1022             :                         struct prb_data_blk_lpos *blk_lpos, unsigned long id)
    1023             : {
    1024         269 :         struct prb_data_ring *data_ring = &rb->text_data_ring;
    1025         269 :         struct prb_data_block *blk;
    1026         269 :         unsigned long begin_lpos;
    1027         269 :         unsigned long next_lpos;
    1028             : 
    1029         269 :         if (size == 0) {
    1030             :                 /* Specify a data-less block. */
    1031           0 :                 blk_lpos->begin = NO_LPOS;
    1032           0 :                 blk_lpos->next = NO_LPOS;
    1033           0 :                 return NULL;
    1034             :         }
    1035             : 
    1036         269 :         size = to_blk_size(size);
    1037             : 
    1038         269 :         begin_lpos = atomic_long_read(&data_ring->head_lpos);
    1039             : 
    1040         269 :         do {
    1041         269 :                 next_lpos = get_next_lpos(data_ring, begin_lpos, size);
    1042             : 
    1043         269 :                 if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring))) {
    1044             :                         /* Failed to allocate, specify a data-less block. */
    1045           0 :                         blk_lpos->begin = FAILED_LPOS;
    1046           0 :                         blk_lpos->next = FAILED_LPOS;
    1047           0 :                         return NULL;
    1048             :                 }
    1049             : 
    1050             :                 /*
    1051             :                  * 1. Guarantee any descriptor states that have transitioned
    1052             :                  *    to reusable are stored before modifying the newly
    1053             :                  *    allocated data area. A full memory barrier is needed
    1054             :                  *    since other CPUs may have made the descriptor states
    1055             :                  *    reusable. See data_push_tail:A about why the reusable
    1056             :                  *    states are visible. This pairs with desc_read:D.
    1057             :                  *
    1058             :                  * 2. Guarantee any updated tail lpos is stored before
    1059             :                  *    modifying the newly allocated data area. Another CPU may
    1060             :                  *    be in data_make_reusable() and is reading a block ID
    1061             :                  *    from this area. data_make_reusable() can handle reading
    1062             :                  *    a garbage block ID value, but then it must be able to
    1063             :                  *    load a new tail lpos. A full memory barrier is needed
    1064             :                  *    since other CPUs may have updated the tail lpos. This
    1065             :                  *    pairs with data_push_tail:B.
    1066             :                  */
    1067         269 :         } while (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &begin_lpos,
    1068         269 :                                           next_lpos)); /* LMM(data_alloc:A) */
    1069             : 
    1070         269 :         blk = to_block(data_ring, begin_lpos);
    1071         269 :         blk->id = id; /* LMM(data_alloc:B) */
    1072             : 
    1073         269 :         if (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos)) {
    1074             :                 /* Wrapping data blocks store their data at the beginning. */
    1075           0 :                 blk = to_block(data_ring, 0);
    1076             : 
    1077             :                 /*
    1078             :                  * Store the ID on the wrapped block for consistency.
    1079             :                  * The printk_ringbuffer does not actually use it.
    1080             :                  */
    1081           0 :                 blk->id = id;
    1082             :         }
    1083             : 
    1084         269 :         blk_lpos->begin = begin_lpos;
    1085         269 :         blk_lpos->next = next_lpos;
    1086             : 
    1087         269 :         return &blk->data[0];
    1088             : }
    1089             : 
    1090             : /*
    1091             :  * Try to resize an existing data block associated with the descriptor
    1092             :  * specified by @id. If the resized data block should become wrapped, it
    1093             :  * copies the old data to the new data block. If @size yields a data block
    1094             :  * with the same or less size, the data block is left as is.
    1095             :  *
    1096             :  * Fail if this is not the last allocated data block or if there is not
    1097             :  * enough space or it is not possible make enough space.
    1098             :  *
    1099             :  * Return a pointer to the beginning of the entire data buffer or NULL on
    1100             :  * failure.
    1101             :  */
    1102          38 : static char *data_realloc(struct printk_ringbuffer *rb, unsigned int size,
    1103             :                           struct prb_data_blk_lpos *blk_lpos, unsigned long id)
    1104             : {
    1105          38 :         struct prb_data_ring *data_ring = &rb->text_data_ring;
    1106          38 :         struct prb_data_block *blk;
    1107          38 :         unsigned long head_lpos;
    1108          38 :         unsigned long next_lpos;
    1109          38 :         bool wrapped;
    1110             : 
    1111             :         /* Reallocation only works if @blk_lpos is the newest data block. */
    1112          38 :         head_lpos = atomic_long_read(&data_ring->head_lpos);
    1113          38 :         if (head_lpos != blk_lpos->next)
    1114             :                 return NULL;
    1115             : 
    1116             :         /* Keep track if @blk_lpos was a wrapping data block. */
    1117          38 :         wrapped = (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, blk_lpos->next));
    1118             : 
    1119          38 :         size = to_blk_size(size);
    1120             : 
    1121          38 :         next_lpos = get_next_lpos(data_ring, blk_lpos->begin, size);
    1122             : 
    1123             :         /* If the data block does not increase, there is nothing to do. */
    1124          38 :         if (head_lpos - next_lpos < DATA_SIZE(data_ring)) {
    1125          16 :                 if (wrapped)
    1126           0 :                         blk = to_block(data_ring, 0);
    1127             :                 else
    1128          16 :                         blk = to_block(data_ring, blk_lpos->begin);
    1129          16 :                 return &blk->data[0];
    1130             :         }
    1131             : 
    1132          22 :         if (!data_push_tail(rb, next_lpos - DATA_SIZE(data_ring)))
    1133             :                 return NULL;
    1134             : 
    1135             :         /* The memory barrier involvement is the same as data_alloc:A. */
    1136          44 :         if (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &head_lpos,
    1137             :                                      next_lpos)) { /* LMM(data_realloc:A) */
    1138             :                 return NULL;
    1139             :         }
    1140             : 
    1141          22 :         blk = to_block(data_ring, blk_lpos->begin);
    1142             : 
    1143          22 :         if (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, next_lpos)) {
    1144           0 :                 struct prb_data_block *old_blk = blk;
    1145             : 
    1146             :                 /* Wrapping data blocks store their data at the beginning. */
    1147           0 :                 blk = to_block(data_ring, 0);
    1148             : 
    1149             :                 /*
    1150             :                  * Store the ID on the wrapped block for consistency.
    1151             :                  * The printk_ringbuffer does not actually use it.
    1152             :                  */
    1153           0 :                 blk->id = id;
    1154             : 
    1155           0 :                 if (!wrapped) {
    1156             :                         /*
    1157             :                          * Since the allocated space is now in the newly
    1158             :                          * created wrapping data block, copy the content
    1159             :                          * from the old data block.
    1160             :                          */
    1161           0 :                         memcpy(&blk->data[0], &old_blk->data[0],
    1162           0 :                                (blk_lpos->next - blk_lpos->begin) - sizeof(blk->id));
    1163             :                 }
    1164             :         }
    1165             : 
    1166          22 :         blk_lpos->next = next_lpos;
    1167             : 
    1168          22 :         return &blk->data[0];
    1169             : }
    1170             : 
    1171             : /* Return the number of bytes used by a data block. */
    1172         307 : static unsigned int space_used(struct prb_data_ring *data_ring,
    1173             :                                struct prb_data_blk_lpos *blk_lpos)
    1174             : {
    1175             :         /* Data-less blocks take no space. */
    1176         307 :         if (BLK_DATALESS(blk_lpos))
    1177             :                 return 0;
    1178             : 
    1179         307 :         if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next)) {
    1180             :                 /* Data block does not wrap. */
    1181         307 :                 return (DATA_INDEX(data_ring, blk_lpos->next) -
    1182         307 :                         DATA_INDEX(data_ring, blk_lpos->begin));
    1183             :         }
    1184             : 
    1185             :         /*
    1186             :          * For wrapping data blocks, the trailing (wasted) space is
    1187             :          * also counted.
    1188             :          */
    1189           0 :         return (DATA_INDEX(data_ring, blk_lpos->next) +
    1190           0 :                 DATA_SIZE(data_ring) - DATA_INDEX(data_ring, blk_lpos->begin));
    1191             : }
    1192             : 
    1193             : /*
    1194             :  * Given @blk_lpos, return a pointer to the writer data from the data block
    1195             :  * and calculate the size of the data part. A NULL pointer is returned if
    1196             :  * @blk_lpos specifies values that could never be legal.
    1197             :  *
    1198             :  * This function (used by readers) performs strict validation on the lpos
    1199             :  * values to possibly detect bugs in the writer code. A WARN_ON_ONCE() is
    1200             :  * triggered if an internal error is detected.
    1201             :  */
    1202         934 : static const char *get_data(struct prb_data_ring *data_ring,
    1203             :                             struct prb_data_blk_lpos *blk_lpos,
    1204             :                             unsigned int *data_size)
    1205             : {
    1206         934 :         struct prb_data_block *db;
    1207             : 
    1208             :         /* Data-less data block description. */
    1209         934 :         if (BLK_DATALESS(blk_lpos)) {
    1210           0 :                 if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) {
    1211           0 :                         *data_size = 0;
    1212           0 :                         return "";
    1213             :                 }
    1214             :                 return NULL;
    1215             :         }
    1216             : 
    1217             :         /* Regular data block: @begin less than @next and in same wrap. */
    1218         934 :         if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next) &&
    1219             :             blk_lpos->begin < blk_lpos->next) {
    1220         934 :                 db = to_block(data_ring, blk_lpos->begin);
    1221         934 :                 *data_size = blk_lpos->next - blk_lpos->begin;
    1222             : 
    1223             :         /* Wrapping data block: @begin is one wrap behind @next. */
    1224           0 :         } else if (DATA_WRAPS(data_ring, blk_lpos->begin + DATA_SIZE(data_ring)) ==
    1225             :                    DATA_WRAPS(data_ring, blk_lpos->next)) {
    1226           0 :                 db = to_block(data_ring, 0);
    1227           0 :                 *data_size = DATA_INDEX(data_ring, blk_lpos->next);
    1228             : 
    1229             :         /* Illegal block description. */
    1230             :         } else {
    1231           0 :                 WARN_ON_ONCE(1);
    1232           0 :                 return NULL;
    1233             :         }
    1234             : 
    1235             :         /* A valid data block will always be aligned to the ID size. */
    1236         934 :         if (WARN_ON_ONCE(blk_lpos->begin != ALIGN(blk_lpos->begin, sizeof(db->id))) ||
    1237         934 :             WARN_ON_ONCE(blk_lpos->next != ALIGN(blk_lpos->next, sizeof(db->id)))) {
    1238             :                 return NULL;
    1239             :         }
    1240             : 
    1241             :         /* A valid data block will always have at least an ID. */
    1242         934 :         if (WARN_ON_ONCE(*data_size < sizeof(db->id)))
    1243             :                 return NULL;
    1244             : 
    1245             :         /* Subtract block ID space from size to reflect data size. */
    1246         934 :         *data_size -= sizeof(db->id);
    1247             : 
    1248         934 :         return &db->data[0];
    1249             : }
    1250             : 
    1251             : /*
    1252             :  * Attempt to transition the newest descriptor from committed back to reserved
    1253             :  * so that the record can be modified by a writer again. This is only possible
    1254             :  * if the descriptor is not yet finalized and the provided @caller_id matches.
    1255             :  */
    1256          40 : static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring,
    1257             :                                          u32 caller_id, unsigned long *id_out)
    1258             : {
    1259          40 :         unsigned long prev_state_val;
    1260          40 :         enum desc_state d_state;
    1261          40 :         struct prb_desc desc;
    1262          40 :         struct prb_desc *d;
    1263          40 :         unsigned long id;
    1264          40 :         u32 cid;
    1265             : 
    1266          40 :         id = atomic_long_read(&desc_ring->head_id);
    1267             : 
    1268             :         /*
    1269             :          * To reduce unnecessarily reopening, first check if the descriptor
    1270             :          * state and caller ID are correct.
    1271             :          */
    1272          40 :         d_state = desc_read(desc_ring, id, &desc, NULL, &cid);
    1273          40 :         if (d_state != desc_committed || cid != caller_id)
    1274             :                 return NULL;
    1275             : 
    1276          38 :         d = to_desc(desc_ring, id);
    1277             : 
    1278          38 :         prev_state_val = DESC_SV(id, desc_committed);
    1279             : 
    1280             :         /*
    1281             :          * Guarantee the reserved state is stored before reading any
    1282             :          * record data. A full memory barrier is needed because @state_var
    1283             :          * modification is followed by reading. This pairs with _prb_commit:B.
    1284             :          *
    1285             :          * Memory barrier involvement:
    1286             :          *
    1287             :          * If desc_reopen_last:A reads from _prb_commit:B, then
    1288             :          * prb_reserve_in_last:A reads from _prb_commit:A.
    1289             :          *
    1290             :          * Relies on:
    1291             :          *
    1292             :          * WMB from _prb_commit:A to _prb_commit:B
    1293             :          *    matching
    1294             :          * MB If desc_reopen_last:A to prb_reserve_in_last:A
    1295             :          */
    1296          76 :         if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val,
    1297             :                         DESC_SV(id, desc_reserved))) { /* LMM(desc_reopen_last:A) */
    1298             :                 return NULL;
    1299             :         }
    1300             : 
    1301          38 :         *id_out = id;
    1302          38 :         return d;
    1303             : }
    1304             : 
    1305             : /**
    1306             :  * prb_reserve_in_last() - Re-reserve and extend the space in the ringbuffer
    1307             :  *                         used by the newest record.
    1308             :  *
    1309             :  * @e:         The entry structure to setup.
    1310             :  * @rb:        The ringbuffer to re-reserve and extend data in.
    1311             :  * @r:         The record structure to allocate buffers for.
    1312             :  * @caller_id: The caller ID of the caller (reserving writer).
    1313             :  * @max_size:  Fail if the extended size would be greater than this.
    1314             :  *
    1315             :  * This is the public function available to writers to re-reserve and extend
    1316             :  * data.
    1317             :  *
    1318             :  * The writer specifies the text size to extend (not the new total size) by
    1319             :  * setting the @text_buf_size field of @r. To ensure proper initialization
    1320             :  * of @r, prb_rec_init_wr() should be used.
    1321             :  *
    1322             :  * This function will fail if @caller_id does not match the caller ID of the
    1323             :  * newest record. In that case the caller must reserve new data using
    1324             :  * prb_reserve().
    1325             :  *
    1326             :  * Context: Any context. Disables local interrupts on success.
    1327             :  * Return: true if text data could be extended, otherwise false.
    1328             :  *
    1329             :  * On success:
    1330             :  *
    1331             :  *   - @r->text_buf points to the beginning of the entire text buffer.
    1332             :  *
    1333             :  *   - @r->text_buf_size is set to the new total size of the buffer.
    1334             :  *
    1335             :  *   - @r->info is not touched so that @r->info->text_len could be used
    1336             :  *     to append the text.
    1337             :  *
    1338             :  *   - prb_record_text_space() can be used on @e to query the new
    1339             :  *     actually used space.
    1340             :  *
    1341             :  * Important: All @r->info fields will already be set with the current values
    1342             :  *            for the record. I.e. @r->info->text_len will be less than
    1343             :  *            @text_buf_size. Writers can use @r->info->text_len to know
    1344             :  *            where concatenation begins and writers should update
    1345             :  *            @r->info->text_len after concatenating.
    1346             :  */
    1347          40 : bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
    1348             :                          struct printk_record *r, u32 caller_id, unsigned int max_size)
    1349             : {
    1350          40 :         struct prb_desc_ring *desc_ring = &rb->desc_ring;
    1351          40 :         struct printk_info *info;
    1352          40 :         unsigned int data_size;
    1353          40 :         struct prb_desc *d;
    1354          40 :         unsigned long id;
    1355             : 
    1356          80 :         local_irq_save(e->irqflags);
    1357             : 
    1358             :         /* Transition the newest descriptor back to the reserved state. */
    1359          40 :         d = desc_reopen_last(desc_ring, caller_id, &id);
    1360          40 :         if (!d) {
    1361           2 :                 local_irq_restore(e->irqflags);
    1362           2 :                 goto fail_reopen;
    1363             :         }
    1364             : 
    1365             :         /* Now the writer has exclusive access: LMM(prb_reserve_in_last:A) */
    1366             : 
    1367          38 :         info = to_info(desc_ring, id);
    1368             : 
    1369             :         /*
    1370             :          * Set the @e fields here so that prb_commit() can be used if
    1371             :          * anything fails from now on.
    1372             :          */
    1373          38 :         e->rb = rb;
    1374          38 :         e->id = id;
    1375             : 
    1376             :         /*
    1377             :          * desc_reopen_last() checked the caller_id, but there was no
    1378             :          * exclusive access at that point. The descriptor may have
    1379             :          * changed since then.
    1380             :          */
    1381          38 :         if (caller_id != info->caller_id)
    1382           0 :                 goto fail;
    1383             : 
    1384          38 :         if (BLK_DATALESS(&d->text_blk_lpos)) {
    1385           0 :                 if (WARN_ON_ONCE(info->text_len != 0)) {
    1386           0 :                         pr_warn_once("wrong text_len value (%hu, expecting 0)\n",
    1387             :                                      info->text_len);
    1388           0 :                         info->text_len = 0;
    1389             :                 }
    1390             : 
    1391           0 :                 if (!data_check_size(&rb->text_data_ring, r->text_buf_size))
    1392           0 :                         goto fail;
    1393             : 
    1394           0 :                 if (r->text_buf_size > max_size)
    1395           0 :                         goto fail;
    1396             : 
    1397           0 :                 r->text_buf = data_alloc(rb, r->text_buf_size,
    1398             :                                          &d->text_blk_lpos, id);
    1399             :         } else {
    1400          38 :                 if (!get_data(&rb->text_data_ring, &d->text_blk_lpos, &data_size))
    1401           0 :                         goto fail;
    1402             : 
    1403             :                 /*
    1404             :                  * Increase the buffer size to include the original size. If
    1405             :                  * the meta data (@text_len) is not sane, use the full data
    1406             :                  * block size.
    1407             :                  */
    1408          38 :                 if (WARN_ON_ONCE(info->text_len > data_size)) {
    1409           0 :                         pr_warn_once("wrong text_len value (%hu, expecting <=%u)\n",
    1410             :                                      info->text_len, data_size);
    1411           0 :                         info->text_len = data_size;
    1412             :                 }
    1413          38 :                 r->text_buf_size += info->text_len;
    1414             : 
    1415          38 :                 if (!data_check_size(&rb->text_data_ring, r->text_buf_size))
    1416           0 :                         goto fail;
    1417             : 
    1418          38 :                 if (r->text_buf_size > max_size)
    1419           0 :                         goto fail;
    1420             : 
    1421          38 :                 r->text_buf = data_realloc(rb, r->text_buf_size,
    1422             :                                            &d->text_blk_lpos, id);
    1423             :         }
    1424          38 :         if (r->text_buf_size && !r->text_buf)
    1425           0 :                 goto fail;
    1426             : 
    1427          38 :         r->info = info;
    1428             : 
    1429          38 :         e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos);
    1430             : 
    1431          38 :         return true;
    1432           0 : fail:
    1433           0 :         prb_commit(e);
    1434             :         /* prb_commit() re-enabled interrupts. */
    1435           2 : fail_reopen:
    1436             :         /* Make it clear to the caller that the re-reserve failed. */
    1437           2 :         memset(r, 0, sizeof(*r));
    1438           2 :         return false;
    1439             : }
    1440             : 
    1441             : /*
    1442             :  * Attempt to finalize a specified descriptor. If this fails, the descriptor
    1443             :  * is either already final or it will finalize itself when the writer commits.
    1444             :  */
    1445         268 : static void desc_make_final(struct prb_desc_ring *desc_ring, unsigned long id)
    1446             : {
    1447         268 :         unsigned long prev_state_val = DESC_SV(id, desc_committed);
    1448         268 :         struct prb_desc *d = to_desc(desc_ring, id);
    1449             : 
    1450         536 :         atomic_long_cmpxchg_relaxed(&d->state_var, prev_state_val,
    1451         268 :                         DESC_SV(id, desc_finalized)); /* LMM(desc_make_final:A) */
    1452         268 : }
    1453             : 
    1454             : /**
    1455             :  * prb_reserve() - Reserve space in the ringbuffer.
    1456             :  *
    1457             :  * @e:  The entry structure to setup.
    1458             :  * @rb: The ringbuffer to reserve data in.
    1459             :  * @r:  The record structure to allocate buffers for.
    1460             :  *
    1461             :  * This is the public function available to writers to reserve data.
    1462             :  *
    1463             :  * The writer specifies the text size to reserve by setting the
    1464             :  * @text_buf_size field of @r. To ensure proper initialization of @r,
    1465             :  * prb_rec_init_wr() should be used.
    1466             :  *
    1467             :  * Context: Any context. Disables local interrupts on success.
    1468             :  * Return: true if at least text data could be allocated, otherwise false.
    1469             :  *
    1470             :  * On success, the fields @info and @text_buf of @r will be set by this
    1471             :  * function and should be filled in by the writer before committing. Also
    1472             :  * on success, prb_record_text_space() can be used on @e to query the actual
    1473             :  * space used for the text data block.
    1474             :  *
    1475             :  * Important: @info->text_len needs to be set correctly by the writer in
    1476             :  *            order for data to be readable and/or extended. Its value
    1477             :  *            is initialized to 0.
    1478             :  */
    1479         269 : bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
    1480             :                  struct printk_record *r)
    1481             : {
    1482         269 :         struct prb_desc_ring *desc_ring = &rb->desc_ring;
    1483         269 :         struct printk_info *info;
    1484         269 :         struct prb_desc *d;
    1485         269 :         unsigned long id;
    1486         269 :         u64 seq;
    1487             : 
    1488         269 :         if (!data_check_size(&rb->text_data_ring, r->text_buf_size))
    1489           0 :                 goto fail;
    1490             : 
    1491             :         /*
    1492             :          * Descriptors in the reserved state act as blockers to all further
    1493             :          * reservations once the desc_ring has fully wrapped. Disable
    1494             :          * interrupts during the reserve/commit window in order to minimize
    1495             :          * the likelihood of this happening.
    1496             :          */
    1497         538 :         local_irq_save(e->irqflags);
    1498             : 
    1499         269 :         if (!desc_reserve(rb, &id)) {
    1500             :                 /* Descriptor reservation failures are tracked. */
    1501           0 :                 atomic_long_inc(&rb->fail);
    1502           0 :                 local_irq_restore(e->irqflags);
    1503           0 :                 goto fail;
    1504             :         }
    1505             : 
    1506         269 :         d = to_desc(desc_ring, id);
    1507         269 :         info = to_info(desc_ring, id);
    1508             : 
    1509             :         /*
    1510             :          * All @info fields (except @seq) are cleared and must be filled in
    1511             :          * by the writer. Save @seq before clearing because it is used to
    1512             :          * determine the new sequence number.
    1513             :          */
    1514         269 :         seq = info->seq;
    1515         269 :         memset(info, 0, sizeof(*info));
    1516             : 
    1517             :         /*
    1518             :          * Set the @e fields here so that prb_commit() can be used if
    1519             :          * text data allocation fails.
    1520             :          */
    1521         269 :         e->rb = rb;
    1522         269 :         e->id = id;
    1523             : 
    1524             :         /*
    1525             :          * Initialize the sequence number if it has "never been set".
    1526             :          * Otherwise just increment it by a full wrap.
    1527             :          *
    1528             :          * @seq is considered "never been set" if it has a value of 0,
    1529             :          * _except_ for @infos[0], which was specially setup by the ringbuffer
    1530             :          * initializer and therefore is always considered as set.
    1531             :          *
    1532             :          * See the "Bootstrap" comment block in printk_ringbuffer.h for
    1533             :          * details about how the initializer bootstraps the descriptors.
    1534             :          */
    1535         269 :         if (seq == 0 && DESC_INDEX(desc_ring, id) != 0)
    1536         268 :                 info->seq = DESC_INDEX(desc_ring, id);
    1537             :         else
    1538           1 :                 info->seq = seq + DESCS_COUNT(desc_ring);
    1539             : 
    1540             :         /*
    1541             :          * New data is about to be reserved. Once that happens, previous
    1542             :          * descriptors are no longer able to be extended. Finalize the
    1543             :          * previous descriptor now so that it can be made available to
    1544             :          * readers. (For seq==0 there is no previous descriptor.)
    1545             :          */
    1546         269 :         if (info->seq > 0)
    1547         268 :                 desc_make_final(desc_ring, DESC_ID(id - 1));
    1548             : 
    1549         269 :         r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id);
    1550             :         /* If text data allocation fails, a data-less record is committed. */
    1551         269 :         if (r->text_buf_size && !r->text_buf) {
    1552           0 :                 prb_commit(e);
    1553             :                 /* prb_commit() re-enabled interrupts. */
    1554           0 :                 goto fail;
    1555             :         }
    1556             : 
    1557         269 :         r->info = info;
    1558             : 
    1559             :         /* Record full text space used by record. */
    1560         269 :         e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos);
    1561             : 
    1562         269 :         return true;
    1563           0 : fail:
    1564             :         /* Make it clear to the caller that the reserve failed. */
    1565           0 :         memset(r, 0, sizeof(*r));
    1566           0 :         return false;
    1567             : }
    1568             : 
    1569             : /* Commit the data (possibly finalizing it) and restore interrupts. */
    1570         307 : static void _prb_commit(struct prb_reserved_entry *e, unsigned long state_val)
    1571             : {
    1572         307 :         struct prb_desc_ring *desc_ring = &e->rb->desc_ring;
    1573         307 :         struct prb_desc *d = to_desc(desc_ring, e->id);
    1574         307 :         unsigned long prev_state_val = DESC_SV(e->id, desc_reserved);
    1575             : 
    1576             :         /* Now the writer has finished all writing: LMM(_prb_commit:A) */
    1577             : 
    1578             :         /*
    1579             :          * Set the descriptor as committed. See "ABA Issues" about why
    1580             :          * cmpxchg() instead of set() is used.
    1581             :          *
    1582             :          * 1  Guarantee all record data is stored before the descriptor state
    1583             :          *    is stored as committed. A write memory barrier is sufficient
    1584             :          *    for this. This pairs with desc_read:B and desc_reopen_last:A.
    1585             :          *
    1586             :          * 2. Guarantee the descriptor state is stored as committed before
    1587             :          *    re-checking the head ID in order to possibly finalize this
    1588             :          *    descriptor. This pairs with desc_reserve:D.
    1589             :          *
    1590             :          *    Memory barrier involvement:
    1591             :          *
    1592             :          *    If prb_commit:A reads from desc_reserve:D, then
    1593             :          *    desc_make_final:A reads from _prb_commit:B.
    1594             :          *
    1595             :          *    Relies on:
    1596             :          *
    1597             :          *    MB _prb_commit:B to prb_commit:A
    1598             :          *       matching
    1599             :          *    MB desc_reserve:D to desc_make_final:A
    1600             :          */
    1601         307 :         if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val,
    1602         307 :                         DESC_SV(e->id, state_val))) { /* LMM(_prb_commit:B) */
    1603           0 :                 WARN_ON_ONCE(1);
    1604             :         }
    1605             : 
    1606             :         /* Restore interrupts, the reserve/commit window is finished. */
    1607         307 :         local_irq_restore(e->irqflags);
    1608         307 : }
    1609             : 
    1610             : /**
    1611             :  * prb_commit() - Commit (previously reserved) data to the ringbuffer.
    1612             :  *
    1613             :  * @e: The entry containing the reserved data information.
    1614             :  *
    1615             :  * This is the public function available to writers to commit data.
    1616             :  *
    1617             :  * Note that the data is not yet available to readers until it is finalized.
    1618             :  * Finalizing happens automatically when space for the next record is
    1619             :  * reserved.
    1620             :  *
    1621             :  * See prb_final_commit() for a version of this function that finalizes
    1622             :  * immediately.
    1623             :  *
    1624             :  * Context: Any context. Enables local interrupts.
    1625             :  */
    1626          48 : void prb_commit(struct prb_reserved_entry *e)
    1627             : {
    1628          48 :         struct prb_desc_ring *desc_ring = &e->rb->desc_ring;
    1629          48 :         unsigned long head_id;
    1630             : 
    1631          48 :         _prb_commit(e, desc_committed);
    1632             : 
    1633             :         /*
    1634             :          * If this descriptor is no longer the head (i.e. a new record has
    1635             :          * been allocated), extending the data for this record is no longer
    1636             :          * allowed and therefore it must be finalized.
    1637             :          */
    1638          48 :         head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */
    1639          48 :         if (head_id != e->id)
    1640           0 :                 desc_make_final(desc_ring, e->id);
    1641          48 : }
    1642             : 
    1643             : /**
    1644             :  * prb_final_commit() - Commit and finalize (previously reserved) data to
    1645             :  *                      the ringbuffer.
    1646             :  *
    1647             :  * @e: The entry containing the reserved data information.
    1648             :  *
    1649             :  * This is the public function available to writers to commit+finalize data.
    1650             :  *
    1651             :  * By finalizing, the data is made immediately available to readers.
    1652             :  *
    1653             :  * This function should only be used if there are no intentions of extending
    1654             :  * this data using prb_reserve_in_last().
    1655             :  *
    1656             :  * Context: Any context. Enables local interrupts.
    1657             :  */
    1658         259 : void prb_final_commit(struct prb_reserved_entry *e)
    1659             : {
    1660         259 :         _prb_commit(e, desc_finalized);
    1661         259 : }
    1662             : 
    1663             : /*
    1664             :  * Count the number of lines in provided text. All text has at least 1 line
    1665             :  * (even if @text_size is 0). Each '\n' processed is counted as an additional
    1666             :  * line.
    1667             :  */
    1668           0 : static unsigned int count_lines(const char *text, unsigned int text_size)
    1669             : {
    1670           0 :         unsigned int next_size = text_size;
    1671           0 :         unsigned int line_count = 1;
    1672           0 :         const char *next = text;
    1673             : 
    1674           0 :         while (next_size) {
    1675           0 :                 next = memchr(next, '\n', next_size);
    1676           0 :                 if (!next)
    1677             :                         break;
    1678           0 :                 line_count++;
    1679           0 :                 next++;
    1680           0 :                 next_size = text_size - (next - text);
    1681             :         }
    1682             : 
    1683           0 :         return line_count;
    1684             : }
    1685             : 
    1686             : /*
    1687             :  * Given @blk_lpos, copy an expected @len of data into the provided buffer.
    1688             :  * If @line_count is provided, count the number of lines in the data.
    1689             :  *
    1690             :  * This function (used by readers) performs strict validation on the data
    1691             :  * size to possibly detect bugs in the writer code. A WARN_ON_ONCE() is
    1692             :  * triggered if an internal error is detected.
    1693             :  */
    1694         964 : static bool copy_data(struct prb_data_ring *data_ring,
    1695             :                       struct prb_data_blk_lpos *blk_lpos, u16 len, char *buf,
    1696             :                       unsigned int buf_size, unsigned int *line_count)
    1697             : {
    1698         964 :         unsigned int data_size;
    1699         964 :         const char *data;
    1700             : 
    1701             :         /* Caller might not want any data. */
    1702         964 :         if ((!buf || !buf_size) && !line_count)
    1703             :                 return true;
    1704             : 
    1705         896 :         data = get_data(data_ring, blk_lpos, &data_size);
    1706         896 :         if (!data)
    1707             :                 return false;
    1708             : 
    1709             :         /*
    1710             :          * Actual cannot be less than expected. It can be more than expected
    1711             :          * because of the trailing alignment padding.
    1712             :          *
    1713             :          * Note that invalid @len values can occur because the caller loads
    1714             :          * the value during an allowed data race.
    1715             :          */
    1716         896 :         if (data_size < (unsigned int)len)
    1717             :                 return false;
    1718             : 
    1719             :         /* Caller interested in the line count? */
    1720         896 :         if (line_count)
    1721           0 :                 *line_count = count_lines(data, len);
    1722             : 
    1723             :         /* Caller interested in the data content? */
    1724         896 :         if (!buf || !buf_size)
    1725             :                 return true;
    1726             : 
    1727         896 :         data_size = min_t(u16, buf_size, len);
    1728             : 
    1729         896 :         memcpy(&buf[0], data, data_size); /* LMM(copy_data:A) */
    1730         896 :         return true;
    1731             : }
    1732             : 
    1733             : /*
    1734             :  * This is an extended version of desc_read(). It gets a copy of a specified
    1735             :  * descriptor. However, it also verifies that the record is finalized and has
    1736             :  * the sequence number @seq. On success, 0 is returned.
    1737             :  *
    1738             :  * Error return values:
    1739             :  * -EINVAL: A finalized record with sequence number @seq does not exist.
    1740             :  * -ENOENT: A finalized record with sequence number @seq exists, but its data
    1741             :  *          is not available. This is a valid record, so readers should
    1742             :  *          continue with the next record.
    1743             :  */
    1744        3010 : static int desc_read_finalized_seq(struct prb_desc_ring *desc_ring,
    1745             :                                    unsigned long id, u64 seq,
    1746             :                                    struct prb_desc *desc_out)
    1747             : {
    1748        3010 :         struct prb_data_blk_lpos *blk_lpos = &desc_out->text_blk_lpos;
    1749        3010 :         enum desc_state d_state;
    1750        3010 :         u64 s;
    1751             : 
    1752        3010 :         d_state = desc_read(desc_ring, id, desc_out, &s, NULL);
    1753             : 
    1754             :         /*
    1755             :          * An unexpected @id (desc_miss) or @seq mismatch means the record
    1756             :          * does not exist. A descriptor in the reserved or committed state
    1757             :          * means the record does not yet exist for the reader.
    1758             :          */
    1759        3010 :         if (d_state == desc_miss ||
    1760        2045 :             d_state == desc_reserved ||
    1761        1949 :             d_state == desc_committed ||
    1762        1949 :             s != seq) {
    1763             :                 return -EINVAL;
    1764             :         }
    1765             : 
    1766             :         /*
    1767             :          * A descriptor in the reusable state may no longer have its data
    1768             :          * available; report it as existing but with lost data. Or the record
    1769             :          * may actually be a record with lost data.
    1770             :          */
    1771        1949 :         if (d_state == desc_reusable ||
    1772        1949 :             (blk_lpos->begin == FAILED_LPOS && blk_lpos->next == FAILED_LPOS)) {
    1773           0 :                 return -ENOENT;
    1774             :         }
    1775             : 
    1776             :         return 0;
    1777             : }
    1778             : 
    1779             : /*
    1780             :  * Copy the ringbuffer data from the record with @seq to the provided
    1781             :  * @r buffer. On success, 0 is returned.
    1782             :  *
    1783             :  * See desc_read_finalized_seq() for error return values.
    1784             :  */
    1785        2046 : static int prb_read(struct printk_ringbuffer *rb, u64 seq,
    1786             :                     struct printk_record *r, unsigned int *line_count)
    1787             : {
    1788        2046 :         struct prb_desc_ring *desc_ring = &rb->desc_ring;
    1789        2046 :         struct printk_info *info = to_info(desc_ring, seq);
    1790        2046 :         struct prb_desc *rdesc = to_desc(desc_ring, seq);
    1791        2046 :         atomic_long_t *state_var = &rdesc->state_var;
    1792        2046 :         struct prb_desc desc;
    1793        2046 :         unsigned long id;
    1794        2046 :         int err;
    1795             : 
    1796             :         /* Extract the ID, used to specify the descriptor to read. */
    1797        2046 :         id = DESC_ID(atomic_long_read(state_var));
    1798             : 
    1799             :         /* Get a local copy of the correct descriptor (if available). */
    1800        2046 :         err = desc_read_finalized_seq(desc_ring, id, seq, &desc);
    1801             : 
    1802             :         /*
    1803             :          * If @r is NULL, the caller is only interested in the availability
    1804             :          * of the record.
    1805             :          */
    1806        2046 :         if (err || !r)
    1807             :                 return err;
    1808             : 
    1809             :         /* If requested, copy meta data. */
    1810         964 :         if (r->info)
    1811         964 :                 memcpy(r->info, info, sizeof(*(r->info)));
    1812             : 
    1813             :         /* Copy text data. If it fails, this is a data-less record. */
    1814         964 :         if (!copy_data(&rb->text_data_ring, &desc.text_blk_lpos, info->text_len,
    1815             :                        r->text_buf, r->text_buf_size, line_count)) {
    1816             :                 return -ENOENT;
    1817             :         }
    1818             : 
    1819             :         /* Ensure the record is still finalized and has the same @seq. */
    1820         964 :         return desc_read_finalized_seq(desc_ring, id, seq, &desc);
    1821             : }
    1822             : 
    1823             : /* Get the sequence number of the tail descriptor. */
    1824        1061 : static u64 prb_first_seq(struct printk_ringbuffer *rb)
    1825             : {
    1826        1061 :         struct prb_desc_ring *desc_ring = &rb->desc_ring;
    1827        1061 :         enum desc_state d_state;
    1828        1061 :         struct prb_desc desc;
    1829        1061 :         unsigned long id;
    1830        1061 :         u64 seq;
    1831             : 
    1832        1061 :         for (;;) {
    1833        1061 :                 id = atomic_long_read(&rb->desc_ring.tail_id); /* LMM(prb_first_seq:A) */
    1834             : 
    1835        1061 :                 d_state = desc_read(desc_ring, id, &desc, &seq, NULL); /* LMM(prb_first_seq:B) */
    1836             : 
    1837             :                 /*
    1838             :                  * This loop will not be infinite because the tail is
    1839             :                  * _always_ in the finalized or reusable state.
    1840             :                  */
    1841        1061 :                 if (d_state == desc_finalized || d_state == desc_reusable)
    1842             :                         break;
    1843             : 
    1844             :                 /*
    1845             :                  * Guarantee the last state load from desc_read() is before
    1846             :                  * reloading @tail_id in order to see a new tail in the case
    1847             :                  * that the descriptor has been recycled. This pairs with
    1848             :                  * desc_reserve:D.
    1849             :                  *
    1850             :                  * Memory barrier involvement:
    1851             :                  *
    1852             :                  * If prb_first_seq:B reads from desc_reserve:F, then
    1853             :                  * prb_first_seq:A reads from desc_push_tail:B.
    1854             :                  *
    1855             :                  * Relies on:
    1856             :                  *
    1857             :                  * MB from desc_push_tail:B to desc_reserve:F
    1858             :                  *    matching
    1859             :                  * RMB prb_first_seq:B to prb_first_seq:A
    1860             :                  */
    1861           0 :                 smp_rmb(); /* LMM(prb_first_seq:C) */
    1862             :         }
    1863             : 
    1864        1061 :         return seq;
    1865             : }
    1866             : 
    1867             : /*
    1868             :  * Non-blocking read of a record. Updates @seq to the last finalized record
    1869             :  * (which may have no data available).
    1870             :  *
    1871             :  * See the description of prb_read_valid() and prb_read_valid_info()
    1872             :  * for details.
    1873             :  */
    1874        2046 : static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,
    1875             :                             struct printk_record *r, unsigned int *line_count)
    1876             : {
    1877        2046 :         u64 tail_seq;
    1878        2046 :         int err;
    1879             : 
    1880        2046 :         while ((err = prb_read(rb, *seq, r, line_count))) {
    1881        1061 :                 tail_seq = prb_first_seq(rb);
    1882             : 
    1883        1061 :                 if (*seq < tail_seq) {
    1884             :                         /*
    1885             :                          * Behind the tail. Catch up and try again. This
    1886             :                          * can happen for -ENOENT and -EINVAL cases.
    1887             :                          */
    1888           0 :                         *seq = tail_seq;
    1889             : 
    1890        1061 :                 } else if (err == -ENOENT) {
    1891             :                         /* Record exists, but no data available. Skip. */
    1892           0 :                         (*seq)++;
    1893             : 
    1894             :                 } else {
    1895             :                         /* Non-existent/non-finalized record. Must stop. */
    1896             :                         return false;
    1897             :                 }
    1898             :         }
    1899             : 
    1900             :         return true;
    1901             : }
    1902             : 
    1903             : /**
    1904             :  * prb_read_valid() - Non-blocking read of a requested record or (if gone)
    1905             :  *                    the next available record.
    1906             :  *
    1907             :  * @rb:  The ringbuffer to read from.
    1908             :  * @seq: The sequence number of the record to read.
    1909             :  * @r:   A record data buffer to store the read record to.
    1910             :  *
    1911             :  * This is the public function available to readers to read a record.
    1912             :  *
    1913             :  * The reader provides the @info and @text_buf buffers of @r to be
    1914             :  * filled in. Any of the buffer pointers can be set to NULL if the reader
    1915             :  * is not interested in that data. To ensure proper initialization of @r,
    1916             :  * prb_rec_init_rd() should be used.
    1917             :  *
    1918             :  * Context: Any context.
    1919             :  * Return: true if a record was read, otherwise false.
    1920             :  *
    1921             :  * On success, the reader must check r->info.seq to see which record was
    1922             :  * actually read. This allows the reader to detect dropped records.
    1923             :  *
    1924             :  * Failure means @seq refers to a not yet written record.
    1925             :  */
    1926        1952 : bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,
    1927             :                     struct printk_record *r)
    1928             : {
    1929        1952 :         return _prb_read_valid(rb, &seq, r, NULL);
    1930             : }
    1931             : 
    1932             : /**
    1933             :  * prb_read_valid_info() - Non-blocking read of meta data for a requested
    1934             :  *                         record or (if gone) the next available record.
    1935             :  *
    1936             :  * @rb:         The ringbuffer to read from.
    1937             :  * @seq:        The sequence number of the record to read.
    1938             :  * @info:       A buffer to store the read record meta data to.
    1939             :  * @line_count: A buffer to store the number of lines in the record text.
    1940             :  *
    1941             :  * This is the public function available to readers to read only the
    1942             :  * meta data of a record.
    1943             :  *
    1944             :  * The reader provides the @info, @line_count buffers to be filled in.
    1945             :  * Either of the buffer pointers can be set to NULL if the reader is not
    1946             :  * interested in that data.
    1947             :  *
    1948             :  * Context: Any context.
    1949             :  * Return: true if a record's meta data was read, otherwise false.
    1950             :  *
    1951             :  * On success, the reader must check info->seq to see which record meta data
    1952             :  * was actually read. This allows the reader to detect dropped records.
    1953             :  *
    1954             :  * Failure means @seq refers to a not yet written record.
    1955             :  */
    1956          76 : bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,
    1957             :                          struct printk_info *info, unsigned int *line_count)
    1958             : {
    1959          76 :         struct printk_record r;
    1960             : 
    1961          76 :         prb_rec_init_rd(&r, info, NULL, 0);
    1962             : 
    1963          76 :         return _prb_read_valid(rb, &seq, &r, line_count);
    1964             : }
    1965             : 
    1966             : /**
    1967             :  * prb_first_valid_seq() - Get the sequence number of the oldest available
    1968             :  *                         record.
    1969             :  *
    1970             :  * @rb: The ringbuffer to get the sequence number from.
    1971             :  *
    1972             :  * This is the public function available to readers to see what the
    1973             :  * first/oldest valid sequence number is.
    1974             :  *
    1975             :  * This provides readers a starting point to begin iterating the ringbuffer.
    1976             :  *
    1977             :  * Context: Any context.
    1978             :  * Return: The sequence number of the first/oldest record or, if the
    1979             :  *         ringbuffer is empty, 0 is returned.
    1980             :  */
    1981          18 : u64 prb_first_valid_seq(struct printk_ringbuffer *rb)
    1982             : {
    1983          18 :         u64 seq = 0;
    1984             : 
    1985          18 :         if (!_prb_read_valid(rb, &seq, NULL, NULL))
    1986             :                 return 0;
    1987             : 
    1988          18 :         return seq;
    1989             : }
    1990             : 
    1991             : /**
    1992             :  * prb_next_seq() - Get the sequence number after the last available record.
    1993             :  *
    1994             :  * @rb:  The ringbuffer to get the sequence number from.
    1995             :  *
    1996             :  * This is the public function available to readers to see what the next
    1997             :  * newest sequence number available to readers will be.
    1998             :  *
    1999             :  * This provides readers a sequence number to jump to if all currently
    2000             :  * available records should be skipped.
    2001             :  *
    2002             :  * Context: Any context.
    2003             :  * Return: The sequence number of the next newest (not yet available) record
    2004             :  *         for readers.
    2005             :  */
    2006           0 : u64 prb_next_seq(struct printk_ringbuffer *rb)
    2007             : {
    2008           0 :         u64 seq = 0;
    2009             : 
    2010             :         /* Search forward from the oldest descriptor. */
    2011           0 :         while (_prb_read_valid(rb, &seq, NULL, NULL))
    2012           0 :                 seq++;
    2013             : 
    2014           0 :         return seq;
    2015             : }
    2016             : 
    2017             : /**
    2018             :  * prb_init() - Initialize a ringbuffer to use provided external buffers.
    2019             :  *
    2020             :  * @rb:       The ringbuffer to initialize.
    2021             :  * @text_buf: The data buffer for text data.
    2022             :  * @textbits: The size of @text_buf as a power-of-2 value.
    2023             :  * @descs:    The descriptor buffer for ringbuffer records.
    2024             :  * @descbits: The count of @descs items as a power-of-2 value.
    2025             :  * @infos:    The printk_info buffer for ringbuffer records.
    2026             :  *
    2027             :  * This is the public function available to writers to setup a ringbuffer
    2028             :  * during runtime using provided buffers.
    2029             :  *
    2030             :  * This must match the initialization of DEFINE_PRINTKRB().
    2031             :  *
    2032             :  * Context: Any context.
    2033             :  */
    2034           0 : void prb_init(struct printk_ringbuffer *rb,
    2035             :               char *text_buf, unsigned int textbits,
    2036             :               struct prb_desc *descs, unsigned int descbits,
    2037             :               struct printk_info *infos)
    2038             : {
    2039           0 :         memset(descs, 0, _DESCS_COUNT(descbits) * sizeof(descs[0]));
    2040           0 :         memset(infos, 0, _DESCS_COUNT(descbits) * sizeof(infos[0]));
    2041             : 
    2042           0 :         rb->desc_ring.count_bits = descbits;
    2043           0 :         rb->desc_ring.descs = descs;
    2044           0 :         rb->desc_ring.infos = infos;
    2045           0 :         atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits));
    2046           0 :         atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits));
    2047             : 
    2048           0 :         rb->text_data_ring.size_bits = textbits;
    2049           0 :         rb->text_data_ring.data = text_buf;
    2050           0 :         atomic_long_set(&rb->text_data_ring.head_lpos, BLK0_LPOS(textbits));
    2051           0 :         atomic_long_set(&rb->text_data_ring.tail_lpos, BLK0_LPOS(textbits));
    2052             : 
    2053           0 :         atomic_long_set(&rb->fail, 0);
    2054             : 
    2055           0 :         atomic_long_set(&(descs[_DESCS_COUNT(descbits) - 1].state_var), DESC0_SV(descbits));
    2056           0 :         descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.begin = FAILED_LPOS;
    2057           0 :         descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.next = FAILED_LPOS;
    2058             : 
    2059           0 :         infos[0].seq = -(u64)_DESCS_COUNT(descbits);
    2060           0 :         infos[_DESCS_COUNT(descbits) - 1].seq = 0;
    2061           0 : }
    2062             : 
    2063             : /**
    2064             :  * prb_record_text_space() - Query the full actual used ringbuffer space for
    2065             :  *                           the text data of a reserved entry.
    2066             :  *
    2067             :  * @e: The successfully reserved entry to query.
    2068             :  *
    2069             :  * This is the public function available to writers to see how much actual
    2070             :  * space is used in the ringbuffer to store the text data of the specified
    2071             :  * entry.
    2072             :  *
    2073             :  * This function is only valid if @e has been successfully reserved using
    2074             :  * prb_reserve().
    2075             :  *
    2076             :  * Context: Any context.
    2077             :  * Return: The size in bytes used by the text data of the associated record.
    2078             :  */
    2079           0 : unsigned int prb_record_text_space(struct prb_reserved_entry *e)
    2080             : {
    2081           0 :         return e->text_space;
    2082             : }

Generated by: LCOV version 1.14