[PATCH printk v4 2/6] printk: ringbuffer: add BLK_DATALESS() macro

2020-09-08 Thread John Ogness
Rather than continually needing to explicitly check @begin and @next
to identify a dataless block, introduce and use a BLK_DATALESS()
macro.

Signed-off-by: John Ogness 
Reviewed-by: Petr Mladek 
---
 kernel/printk/printk_ringbuffer.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/printk/printk_ringbuffer.c 
b/kernel/printk/printk_ringbuffer.c
index aa6e31a27601..6ee5ebce1450 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -266,6 +266,8 @@
 
 /* Determine if a logical position refers to a data-less block. */
 #define LPOS_DATALESS(lpos)((lpos) & 1UL)
+#define BLK_DATALESS(blk)  (LPOS_DATALESS((blk)->begin) && \
+LPOS_DATALESS((blk)->next))
 
 /* Get the logical position at index 0 of the current wrap. */
 #define DATA_THIS_WRAP_START_LPOS(data_ring, lpos) \
@@ -1021,7 +1023,7 @@ static unsigned int space_used(struct prb_data_ring 
*data_ring,
   struct prb_data_blk_lpos *blk_lpos)
 {
/* Data-less blocks take no space. */
-   if (LPOS_DATALESS(blk_lpos->begin))
+   if (BLK_DATALESS(blk_lpos))
return 0;
 
if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, 
blk_lpos->next)) {
@@ -1054,7 +1056,7 @@ static const char *get_data(struct prb_data_ring 
*data_ring,
struct prb_data_block *db;
 
/* Data-less data block description. */
-   if (LPOS_DATALESS(blk_lpos->begin) && LPOS_DATALESS(blk_lpos->next)) {
+   if (BLK_DATALESS(blk_lpos)) {
if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) {
*data_size = 0;
return "";
-- 
2.20.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH printk v4 6/6] printk: reimplement log_cont using record extension

2020-09-08 Thread John Ogness
Use the record extending feature of the ringbuffer to implement
continuous messages. This preserves the existing continuous message
behavior.

Signed-off-by: John Ogness 
Reviewed-by: Petr Mladek 
---
 kernel/printk/printk.c | 98 +-
 1 file changed, 20 insertions(+), 78 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 964b5701688f..9a2e23191576 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -535,7 +535,10 @@ static int log_store(u32 caller_id, int facility, int 
level,
r.info->caller_id = caller_id;
 
/* insert message */
-   prb_commit();
+   if ((flags & LOG_CONT) || !(flags & LOG_NEWLINE))
+   prb_commit();
+   else
+   prb_final_commit();
 
return (text_len + trunc_msg_len);
 }
@@ -1084,7 +1087,7 @@ static unsigned int __init add_to_rb(struct 
printk_ringbuffer *rb,
dest_r.info->ts_nsec = r->info->ts_nsec;
dest_r.info->caller_id = r->info->caller_id;
 
-   prb_commit();
+   prb_final_commit();
 
return prb_record_text_space();
 }
@@ -1884,87 +1887,26 @@ static inline u32 printk_caller_id(void)
0x8000 + raw_smp_processor_id();
 }
 
-/*
- * Continuation lines are buffered, and not committed to the record buffer
- * until the line is complete, or a race forces it. The line fragments
- * though, are printed immediately to the consoles to ensure everything has
- * reached the console in case of a kernel crash.
- */
-static struct cont {
-   char buf[LOG_LINE_MAX];
-   size_t len; /* length == 0 means unused buffer */
-   u32 caller_id;  /* printk_caller_id() of first print */
-   u64 ts_nsec;/* time of first print */
-   u8 level;   /* log level of first message */
-   u8 facility;/* log facility of first message */
-   enum log_flags flags;   /* prefix, newline flags */
-} cont;
-
-static void cont_flush(void)
-{
-   if (cont.len == 0)
-   return;
-
-   log_store(cont.caller_id, cont.facility, cont.level, cont.flags,
- cont.ts_nsec, NULL, 0, cont.buf, cont.len);
-   cont.len = 0;
-}
-
-static bool cont_add(u32 caller_id, int facility, int level,
-enum log_flags flags, const char *text, size_t len)
-{
-   /* If the line gets too long, split it up in separate records. */
-   if (cont.len + len > sizeof(cont.buf)) {
-   cont_flush();
-   return false;
-   }
-
-   if (!cont.len) {
-   cont.facility = facility;
-   cont.level = level;
-   cont.caller_id = caller_id;
-   cont.ts_nsec = local_clock();
-   cont.flags = flags;
-   }
-
-   memcpy(cont.buf + cont.len, text, len);
-   cont.len += len;
-
-   // The original flags come from the first line,
-   // but later continuations can add a newline.
-   if (flags & LOG_NEWLINE) {
-   cont.flags |= LOG_NEWLINE;
-   cont_flush();
-   }
-
-   return true;
-}
-
 static size_t log_output(int facility, int level, enum log_flags lflags, const 
char *dict, size_t dictlen, char *text, size_t text_len)
 {
const u32 caller_id = printk_caller_id();
 
-   /*
-* If an earlier line was buffered, and we're a continuation
-* write from the same context, try to add it to the buffer.
-*/
-   if (cont.len) {
-   if (cont.caller_id == caller_id && (lflags & LOG_CONT)) {
-   if (cont_add(caller_id, facility, level, lflags, text, 
text_len))
-   return text_len;
-   }
-   /* Otherwise, make sure it's flushed */
-   cont_flush();
-   }
-
-   /* Skip empty continuation lines that couldn't be added - they just 
flush */
-   if (!text_len && (lflags & LOG_CONT))
-   return 0;
-
-   /* If it doesn't end in a newline, try to buffer the current line */
-   if (!(lflags & LOG_NEWLINE)) {
-   if (cont_add(caller_id, facility, level, lflags, text, 
text_len))
+   if (lflags & LOG_CONT) {
+   struct prb_reserved_entry e;
+   struct printk_record r;
+
+   prb_rec_init_wr(, text_len, 0);
+   if (prb_reserve_in_last(, prb, , caller_id)) {
+   memcpy(_buf[r.info->text_len], text, text_len);
+   r.info->text_len += text_len;
+   if (lflags & LOG_NEWLINE) {
+   r.info->flags |= LOG_NEWLINE;
+   prb_final_commit();
+   } else {
+   prb_commit();
+   }
return text_len;
+   }
}
 
/* Store it in the 

[PATCH printk v4 1/6] printk: ringbuffer: relocate get_data()

2020-09-08 Thread John Ogness
Move the internal get_data() function as-is above prb_reserve() so
that a later change can make use of the static function.

Signed-off-by: John Ogness 
Reviewed-by: Petr Mladek 
---
 kernel/printk/printk_ringbuffer.c | 116 +++---
 1 file changed, 58 insertions(+), 58 deletions(-)

diff --git a/kernel/printk/printk_ringbuffer.c 
b/kernel/printk/printk_ringbuffer.c
index 0659b50872b5..aa6e31a27601 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -1038,6 +1038,64 @@ static unsigned int space_used(struct prb_data_ring 
*data_ring,
DATA_SIZE(data_ring) - DATA_INDEX(data_ring, blk_lpos->begin));
 }
 
+/*
+ * Given @blk_lpos, return a pointer to the writer data from the data block
+ * and calculate the size of the data part. A NULL pointer is returned if
+ * @blk_lpos specifies values that could never be legal.
+ *
+ * This function (used by readers) performs strict validation on the lpos
+ * values to possibly detect bugs in the writer code. A WARN_ON_ONCE() is
+ * triggered if an internal error is detected.
+ */
+static const char *get_data(struct prb_data_ring *data_ring,
+   struct prb_data_blk_lpos *blk_lpos,
+   unsigned int *data_size)
+{
+   struct prb_data_block *db;
+
+   /* Data-less data block description. */
+   if (LPOS_DATALESS(blk_lpos->begin) && LPOS_DATALESS(blk_lpos->next)) {
+   if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) {
+   *data_size = 0;
+   return "";
+   }
+   return NULL;
+   }
+
+   /* Regular data block: @begin less than @next and in same wrap. */
+   if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, 
blk_lpos->next) &&
+   blk_lpos->begin < blk_lpos->next) {
+   db = to_block(data_ring, blk_lpos->begin);
+   *data_size = blk_lpos->next - blk_lpos->begin;
+
+   /* Wrapping data block: @begin is one wrap behind @next. */
+   } else if (DATA_WRAPS(data_ring, blk_lpos->begin + 
DATA_SIZE(data_ring)) ==
+  DATA_WRAPS(data_ring, blk_lpos->next)) {
+   db = to_block(data_ring, 0);
+   *data_size = DATA_INDEX(data_ring, blk_lpos->next);
+
+   /* Illegal block description. */
+   } else {
+   WARN_ON_ONCE(1);
+   return NULL;
+   }
+
+   /* A valid data block will always be aligned to the ID size. */
+   if (WARN_ON_ONCE(blk_lpos->begin != ALIGN(blk_lpos->begin, 
sizeof(db->id))) ||
+   WARN_ON_ONCE(blk_lpos->next != ALIGN(blk_lpos->next, 
sizeof(db->id {
+   return NULL;
+   }
+
+   /* A valid data block will always have at least an ID. */
+   if (WARN_ON_ONCE(*data_size < sizeof(db->id)))
+   return NULL;
+
+   /* Subtract block ID space from size to reflect data size. */
+   *data_size -= sizeof(db->id);
+
+   return >data[0];
+}
+
 /**
  * prb_reserve() - Reserve space in the ringbuffer.
  *
@@ -1192,64 +1250,6 @@ void prb_commit(struct prb_reserved_entry *e)
local_irq_restore(e->irqflags);
 }
 
-/*
- * Given @blk_lpos, return a pointer to the writer data from the data block
- * and calculate the size of the data part. A NULL pointer is returned if
- * @blk_lpos specifies values that could never be legal.
- *
- * This function (used by readers) performs strict validation on the lpos
- * values to possibly detect bugs in the writer code. A WARN_ON_ONCE() is
- * triggered if an internal error is detected.
- */
-static const char *get_data(struct prb_data_ring *data_ring,
-   struct prb_data_blk_lpos *blk_lpos,
-   unsigned int *data_size)
-{
-   struct prb_data_block *db;
-
-   /* Data-less data block description. */
-   if (LPOS_DATALESS(blk_lpos->begin) && LPOS_DATALESS(blk_lpos->next)) {
-   if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) {
-   *data_size = 0;
-   return "";
-   }
-   return NULL;
-   }
-
-   /* Regular data block: @begin less than @next and in same wrap. */
-   if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, 
blk_lpos->next) &&
-   blk_lpos->begin < blk_lpos->next) {
-   db = to_block(data_ring, blk_lpos->begin);
-   *data_size = blk_lpos->next - blk_lpos->begin;
-
-   /* Wrapping data block: @begin is one wrap behind @next. */
-   } else if (DATA_WRAPS(data_ring, blk_lpos->begin + 
DATA_SIZE(data_ring)) ==
-  DATA_WRAPS(data_ring, blk_lpos->next)) {
-   db = to_block(data_ring, 0);
-   *data_size = DATA_INDEX(data_ring, blk_lpos->next);
-
-   /* Illegal block description. */
-   } else {
-   WARN_ON_ONCE(1);
-   return NULL;
- 

[PATCH printk v4 5/6] printk: ringbuffer: add finalization/extension support

2020-09-08 Thread John Ogness
Add support for extending the newest data block. For this, introduce
a new finalization state (desc_finalized) denoting a committed
descriptor that cannot be extended.

Until a record is finalized, a writer can reopen that record to
append new data. Reopening a record means transitioning from the
desc_committed state back to the desc_reserved state.

A writer can explicitly finalize a record if there is no intention
of extending it. Also, records are automatically finalized when a
new record is reserved. This relieves writers of needing to
explicitly finalize while also making such records available to
readers sooner. (Readers can only traverse finalized records.)

Four new memory barrier pairs are introduced. Two of them are
insignificant additions (data_realloc:A/desc_read:D and
data_realloc:A/data_push_tail:B) because they are alternate path
memory barriers that exactly match the purpose, pairing, and
context of the two existing memory barrier pairs they provide an
alternate path for. The other two new memory barrier pairs are
significant additions:

desc_reopen_last:A / _prb_commit:B - When reopening a descriptor,
ensure the state transitions back to desc_reserved before
fully trusting the descriptor data.

_prb_commit:B / desc_reserve:D - When committing a descriptor,
ensure the state transitions to desc_committed before checking
the head ID to see if the descriptor needs to be finalized.

Signed-off-by: John Ogness 
---
 Documentation/admin-guide/kdump/gdbmacros.txt |   3 +-
 kernel/printk/printk_ringbuffer.c | 541 --
 kernel/printk/printk_ringbuffer.h |   6 +-
 scripts/gdb/linux/dmesg.py|   3 +-
 4 files changed, 491 insertions(+), 62 deletions(-)

diff --git a/Documentation/admin-guide/kdump/gdbmacros.txt 
b/Documentation/admin-guide/kdump/gdbmacros.txt
index 8f533b751c46..94fabb165abf 100644
--- a/Documentation/admin-guide/kdump/gdbmacros.txt
+++ b/Documentation/admin-guide/kdump/gdbmacros.txt
@@ -297,6 +297,7 @@ end
 define dmesg
# definitions from kernel/printk/printk_ringbuffer.h
set var $desc_committed = 1
+   set var $desc_finalized = 2
set var $desc_sv_bits = sizeof(long) * 8
set var $desc_flags_shift = $desc_sv_bits - 2
set var $desc_flags_mask = 3 << $desc_flags_shift
@@ -313,7 +314,7 @@ define dmesg
 
# skip non-committed record
set var $state = 3 & ($desc->state_var.counter >> 
$desc_flags_shift)
-   if ($state == $desc_committed)
+   if ($state == $desc_committed || $state == $desc_finalized)
dump_record $desc $prev_flags
set var $prev_flags = $desc->info.flags
end
diff --git a/kernel/printk/printk_ringbuffer.c 
b/kernel/printk/printk_ringbuffer.c
index 911fbe150e9a..f1fab8c82819 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -46,20 +46,26 @@
  * into a single descriptor field named @state_var, allowing ID and state to
  * be synchronously and atomically updated.
  *
- * Descriptors have three states:
+ * Descriptors have four states:
  *
  *   reserved
  * A writer is modifying the record.
  *
  *   committed
- * The record and all its data are complete and available for reading.
+ * The record and all its data are written. A writer can reopen the
+ * descriptor (transitioning it back to reserved), but in the committed
+ * state the data is consistent.
+ *
+ *   finalized
+ * The record and all its data are complete and available for reading. A
+ * writer cannot reopen the descriptor.
  *
  *   reusable
  * The record exists, but its text and/or dictionary data may no longer
  * be available.
  *
  * Querying the @state_var of a record requires providing the ID of the
- * descriptor to query. This can yield a possible fourth (pseudo) state:
+ * descriptor to query. This can yield a possible fifth (pseudo) state:
  *
  *   miss
  * The descriptor being queried has an unexpected ID.
@@ -79,6 +85,28 @@
  * committed or reusable queried state. This makes it possible that a valid
  * sequence number of the tail is always available.
  *
+ * Descriptor Finalization
+ * ~~~
+ * When a writer calls the commit function prb_commit(), record data is
+ * fully stored and is consistent within the ringbuffer. However, a writer can
+ * reopen that record, claiming exclusive access (as with prb_reserve()), and
+ * modify that record. When finished, the writer must again commit the record.
+ *
+ * In order for a record to be made available to readers (and also become
+ * recyclable for writers), it must be finalized. A finalized record cannot be
+ * reopened and can never become "unfinalized". Record finalization can occur
+ * in three different scenarios:
+ *
+ *   1) A writer can simultaneously commit and finalize its record by calling
+ *  prb_final_commit() 

[PATCH printk v4 4/6] printk: ringbuffer: change representation of states

2020-09-08 Thread John Ogness
Rather than deriving the state by evaluating bits within the flags
area of the state variable, assign the states explicit values and
set those values in the flags area. Introduce macros to make it
simple to read and write state values for the state variable.

Although the functionality is preserved, the binary representation
for the states is changed.

Signed-off-by: John Ogness 
---
 Documentation/admin-guide/kdump/gdbmacros.txt | 12 ---
 kernel/printk/printk_ringbuffer.c | 28 +
 kernel/printk/printk_ringbuffer.h | 31 ---
 scripts/gdb/linux/dmesg.py| 11 ---
 4 files changed, 41 insertions(+), 41 deletions(-)

diff --git a/Documentation/admin-guide/kdump/gdbmacros.txt 
b/Documentation/admin-guide/kdump/gdbmacros.txt
index 7adece30237e..8f533b751c46 100644
--- a/Documentation/admin-guide/kdump/gdbmacros.txt
+++ b/Documentation/admin-guide/kdump/gdbmacros.txt
@@ -295,9 +295,12 @@ document dump_record
 end
 
 define dmesg
-   set var $desc_committed = 1UL << ((sizeof(long) * 8) - 1)
-   set var $flags_mask = 3UL << ((sizeof(long) * 8) - 2)
-   set var $id_mask = ~$flags_mask
+   # definitions from kernel/printk/printk_ringbuffer.h
+   set var $desc_committed = 1
+   set var $desc_sv_bits = sizeof(long) * 8
+   set var $desc_flags_shift = $desc_sv_bits - 2
+   set var $desc_flags_mask = 3 << $desc_flags_shift
+   set var $id_mask = ~$desc_flags_mask
 
set var $desc_count = 1U << prb->desc_ring.count_bits
set var $prev_flags = 0
@@ -309,7 +312,8 @@ define dmesg
set var $desc = >desc_ring.descs[$id % $desc_count]
 
# skip non-committed record
-   if (($desc->state_var.counter & $flags_mask) == $desc_committed)
+   set var $state = 3 & ($desc->state_var.counter >> 
$desc_flags_shift)
+   if ($state == $desc_committed)
dump_record $desc $prev_flags
set var $prev_flags = $desc->info.flags
end
diff --git a/kernel/printk/printk_ringbuffer.c 
b/kernel/printk/printk_ringbuffer.c
index 82347abb22a5..911fbe150e9a 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -348,14 +348,6 @@ static bool data_check_size(struct prb_data_ring 
*data_ring, unsigned int size)
return true;
 }
 
-/* The possible responses of a descriptor state-query. */
-enum desc_state {
-   desc_miss,  /* ID mismatch */
-   desc_reserved,  /* reserved, in use by writer */
-   desc_committed, /* committed, writer is done */
-   desc_reusable,  /* free, not yet used by any writer */
-};
-
 /* Query the state of a descriptor. */
 static enum desc_state get_desc_state(unsigned long id,
  unsigned long state_val)
@@ -363,13 +355,7 @@ static enum desc_state get_desc_state(unsigned long id,
if (id != DESC_ID(state_val))
return desc_miss;
 
-   if (state_val & DESC_REUSE_MASK)
-   return desc_reusable;
-
-   if (state_val & DESC_COMMITTED_MASK)
-   return desc_committed;
-
-   return desc_reserved;
+   return DESC_STATE(state_val);
 }
 
 /*
@@ -467,8 +453,8 @@ static enum desc_state desc_read(struct prb_desc_ring 
*desc_ring,
 static void desc_make_reusable(struct prb_desc_ring *desc_ring,
   unsigned long id)
 {
-   unsigned long val_committed = id | DESC_COMMITTED_MASK;
-   unsigned long val_reusable = val_committed | DESC_REUSE_MASK;
+   unsigned long val_committed = DESC_SV(id, desc_committed);
+   unsigned long val_reusable = DESC_SV(id, desc_reusable);
struct prb_desc *desc = to_desc(desc_ring, id);
atomic_long_t *state_var = >state_var;
 
@@ -904,7 +890,7 @@ static bool desc_reserve(struct printk_ringbuffer *rb, 
unsigned long *id_out)
 */
prev_state_val = atomic_long_read(>state_var); /* 
LMM(desc_reserve:E) */
if (prev_state_val &&
-   prev_state_val != (id_prev_wrap | DESC_COMMITTED_MASK | 
DESC_REUSE_MASK)) {
+   get_desc_state(id_prev_wrap, prev_state_val) != desc_reusable) {
WARN_ON_ONCE(1);
return false;
}
@@ -918,7 +904,7 @@ static bool desc_reserve(struct printk_ringbuffer *rb, 
unsigned long *id_out)
 * This pairs with desc_read:D.
 */
if (!atomic_long_try_cmpxchg(>state_var, _state_val,
-id | 0)) { /* LMM(desc_reserve:F) */
+   DESC_SV(id, desc_reserved))) { /* LMM(desc_reserve:F) */
WARN_ON_ONCE(1);
return false;
}
@@ -1237,7 +1223,7 @@ void prb_commit(struct prb_reserved_entry *e)
 {
struct prb_desc_ring *desc_ring = >rb->desc_ring;
struct prb_desc *d = to_desc(desc_ring, e->id);
-   unsigned long prev_state_val = e->id | 0;
+   

[PATCH printk v4 3/6] printk: ringbuffer: clear initial reserved fields

2020-09-08 Thread John Ogness
prb_reserve() will set some meta data values and leave others
uninitialized (or rather, containing the values of the previous
wrap). Simplify the API by always clearing out all the fields.
Only the sequence number is filled in. The caller is now
responsible for filling in the rest of the meta data fields.
In particular, for correctly filling in text and dict lengths.

Signed-off-by: John Ogness 
---
 kernel/printk/printk.c| 12 
 kernel/printk/printk_ringbuffer.c | 30 ++
 2 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index fec71229169e..964b5701688f 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -520,8 +520,11 @@ static int log_store(u32 caller_id, int facility, int 
level,
memcpy(_buf[0], text, text_len);
if (trunc_msg_len)
memcpy(_buf[text_len], trunc_msg, trunc_msg_len);
-   if (r.dict_buf)
+   r.info->text_len = text_len + trunc_msg_len;
+   if (r.dict_buf) {
memcpy(_buf[0], dict, dict_len);
+   r.info->dict_len = dict_len;
+   }
r.info->facility = facility;
r.info->level = level & 7;
r.info->flags = flags & 0x1f;
@@ -1069,10 +1072,11 @@ static unsigned int __init add_to_rb(struct 
printk_ringbuffer *rb,
if (!prb_reserve(, rb, _r))
return 0;
 
-   memcpy(_r.text_buf[0], >text_buf[0], dest_r.text_buf_size);
+   memcpy(_r.text_buf[0], >text_buf[0], r->info->text_len);
+   dest_r.info->text_len = r->info->text_len;
if (dest_r.dict_buf) {
-   memcpy(_r.dict_buf[0], >dict_buf[0],
-  dest_r.dict_buf_size);
+   memcpy(_r.dict_buf[0], >dict_buf[0], r->info->dict_len);
+   dest_r.info->dict_len = r->info->dict_len;
}
dest_r.info->facility = r->info->facility;
dest_r.info->level = r->info->level;
diff --git a/kernel/printk/printk_ringbuffer.c 
b/kernel/printk/printk_ringbuffer.c
index 6ee5ebce1450..82347abb22a5 100644
--- a/kernel/printk/printk_ringbuffer.c
+++ b/kernel/printk/printk_ringbuffer.c
@@ -146,10 +146,13 @@
  *
  * if (prb_reserve(, _rb, )) {
  * snprintf(r.text_buf, r.text_buf_size, "%s", textstr);
+ * r.info->text_len = strlen(textstr);
  *
  * // dictionary allocation may have failed
- * if (r.dict_buf)
+ * if (r.dict_buf) {
  * snprintf(r.dict_buf, r.dict_buf_size, "%s", dictstr);
+ * r.info->dict_len = strlen(dictstr);
+ * }
  *
  * r.info->ts_nsec = local_clock();
  *
@@ -1125,9 +1128,9 @@ static const char *get_data(struct prb_data_ring 
*data_ring,
  * @dict_buf_size is set to 0. Writers must check this before writing to
  * dictionary space.
  *
- * @info->text_len and @info->dict_len will already be set to @text_buf_size
- * and @dict_buf_size, respectively. If dictionary space reservation fails,
- * @info->dict_len is set to 0.
+ * Important: @info->text_len and @info->dict_len need to be set correctly by
+ *the writer in order for data to be readable and/or extended.
+ *Their values are initialized to 0.
  */
 bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
 struct printk_record *r)
@@ -1135,6 +1138,7 @@ bool prb_reserve(struct prb_reserved_entry *e, struct 
printk_ringbuffer *rb,
struct prb_desc_ring *desc_ring = >desc_ring;
struct prb_desc *d;
unsigned long id;
+   u64 seq;
 
if (!data_check_size(>text_data_ring, r->text_buf_size))
goto fail;
@@ -1159,6 +1163,14 @@ bool prb_reserve(struct prb_reserved_entry *e, struct 
printk_ringbuffer *rb,
 
d = to_desc(desc_ring, id);
 
+   /*
+* All @info fields (except @seq) are cleared and must be filled in
+* by the writer. Save @seq before clearing because it is used to
+* determine the new sequence number.
+*/
+   seq = d->info.seq;
+   memset(>info, 0, sizeof(d->info));
+
/*
 * Set the @e fields here so that prb_commit() can be used if
 * text data allocation fails.
@@ -1177,17 +1189,15 @@ bool prb_reserve(struct prb_reserved_entry *e, struct 
printk_ringbuffer *rb,
 * See the "Bootstrap" comment block in printk_ringbuffer.h for
 * details about how the initializer bootstraps the descriptors.
 */
-   if (d->info.seq == 0 && DESC_INDEX(desc_ring, id) != 0)
+   if (seq == 0 && DESC_INDEX(desc_ring, id) != 0)
d->info.seq = DESC_INDEX(desc_ring, id);
else
-   d->info.seq += DESCS_COUNT(desc_ring);
+   d->info.seq = seq + DESCS_COUNT(desc_ring);
 
r->text_buf = data_alloc(rb, >text_data_ring, r->text_buf_size,
 >text_blk_lpos, id);
/* If text data 

[PATCH printk v4 0/6] printk: reimplement LOG_CONT handling

2020-09-08 Thread John Ogness
Hello,

Here is v4 for the second series to rework the printk subsystem.
(The v3 is here [0].) This series implements a new ringbuffer
feature that allows the last record to be extended. Petr Mladek
provided the initial proof of concept [1] for this.

Using the record extension feature, LOG_CONT is re-implemented
in a way that exactly preserves its behavior, but avoids the
need for an extra buffer. In particular, it avoids the need for
any synchronization that such a buffer requires.

This series deviates from the agreements [2] made at the meeting
during LPC2019 in Lisbon. The test results of the v1 series,
which implemented LOG_CONT as agreed upon, showed that the
effects on existing userspace tools using /dev/kmsg (journalctl,
dmesg) were not acceptable [3].

Patch 5 introduces *four* new memory barrier pairs. Two of them
are insignificant additions (data_realloc:A/desc_read:D and
data_realloc:A/data_push_tail:B) because they are alternate path
memory barriers that exactly match the purpose and context of
the two existing memory barrier pairs they provide an alternate
path for. The other two new memory barrier pairs are significant
additions:

desc_reopen_last:A / _prb_commit:B - When reopening a descriptor,
ensure the state transitions back to desc_reserved before
fully trusting the descriptor data.

_prb_commit:B / desc_reserve:D - When committing a descriptor,
ensure the state transitions to desc_committed before checking
the head ID to see if the descriptor needs to be finalized.

The test module used to test the ringbuffer is available
here [4].

The series is based on the printk-rework branch of the printk git
tree:
e60768311af8 ("scripts/gdb: update for lockless printk ringbuffer")

The list of changes since v3:

printk_ringbuffer
=

- move enum desc_state definition to printk_ringbuffer.h

- change enum desc_state to define the exact state values used
  in the state variable

- add DESC_STATE() macro to retrieve the state from the state
  variable

- add DESC_SV() macro to build a state variable value given an
  ID and state

- get_desc_state(): simply return the state value rather than
  processing state flags

- desc_finalized is now a queried state instead of a state flag

- desc_read(): always return a set @state_var, even if the
  descriptor is in an inconsistent state (desc_reopen_last()
  relies on this)

- change state logic that tested for desc_committed to now test
  for desc_finalized, since this is the new state directly
  preceding desc_reusable

- data_realloc(): add a check if the data block should shrink
  (and in that case, do not modify the data block, i.e. data
  blocks will never shrink)

- prb_reserve_in_last(): add WARN_ON for unexpected @text_len
  value

- prb_reserve(): save a copy of @seq and use use memset() to
  clear @info

- desc_read_committed_seq(): rename function to
  desc_read_finalized_seq() since desc_finalized is the desired
  state for readers

- documentation: update state and finalization descriptions

printk.c


- use @text_len and @dict_len for memcpy() size

gdb scripts
===

- update to use new state representation

John Ogness

[0] https://lkml.kernel.org/r/20200831011058.6286-1-john.ogn...@linutronix.de
[1] https://lkml.kernel.org/r/20200812163908.GH12903@alley
[2] https://lkml.kernel.org/r/87k1acz5rx@linutronix.de
[3] https://lkml.kernel.org/r/20200811160551.GC12903@alley
[4] https://github.com/Linutronix/prb-test.git


John Ogness (6):
  printk: ringbuffer: relocate get_data()
  printk: ringbuffer: add BLK_DATALESS() macro
  printk: ringbuffer: clear initial reserved fields
  printk: ringbuffer: change representation of states
  printk: ringbuffer: add finalization/extension support
  printk: reimplement log_cont using record extension

 Documentation/admin-guide/kdump/gdbmacros.txt |  13 +-
 kernel/printk/printk.c| 110 +--
 kernel/printk/printk_ringbuffer.c | 695 ++
 kernel/printk/printk_ringbuffer.h |  35 +-
 scripts/gdb/linux/dmesg.py|  12 +-
 5 files changed, 624 insertions(+), 241 deletions(-)

-- 
2.20.1


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec


[PATCH v2 2/7] kernel/resource: move and rename IORESOURCE_MEM_DRIVER_MANAGED

2020-09-08 Thread David Hildenbrand
IORESOURCE_MEM_DRIVER_MANAGED currently uses an unused PnP bit, which is
always set to 0 by hardware. This is far from beautiful (and confusing),
and the bit only applies to SYSRAM. So let's move it out of the
bus-specific (PnP) defined bits.

We'll add another SYSRAM specific bit soon. If we ever need more bits for
other purposes, we can steal some from "desc", or reshuffle/regroup what we
have.

Cc: Andrew Morton 
Cc: Michal Hocko 
Cc: Dan Williams 
Cc: Jason Gunthorpe 
Cc: Kees Cook 
Cc: Ard Biesheuvel 
Cc: Pankaj Gupta 
Cc: Baoquan He 
Cc: Wei Yang 
Cc: Eric Biederman 
Cc: Thomas Gleixner 
Cc: Greg Kroah-Hartman 
Cc: kexec@lists.infradead.org
Signed-off-by: David Hildenbrand 
---
 include/linux/ioport.h | 4 +++-
 kernel/kexec_file.c| 2 +-
 mm/memory_hotplug.c| 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 52a91f5fa1a36..d7620d7c941a0 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -58,6 +58,9 @@ struct resource {
 #define IORESOURCE_EXT_TYPE_BITS 0x0100/* Resource extended types */
 #define IORESOURCE_SYSRAM  0x0100  /* System RAM (modifier) */
 
+/* IORESOURCE_SYSRAM specific bits. */
+#define IORESOURCE_SYSRAM_DRIVER_MANAGED   0x0200 /* Always detected 
via a driver. */
+
 #define IORESOURCE_EXCLUSIVE   0x0800  /* Userland may not map this 
resource */
 
 #define IORESOURCE_DISABLED0x1000
@@ -103,7 +106,6 @@ struct resource {
 #define IORESOURCE_MEM_32BIT   (3<<3)
 #define IORESOURCE_MEM_SHADOWABLE  (1<<5)  /* dup: IORESOURCE_SHADOWABLE */
 #define IORESOURCE_MEM_EXPANSIONROM(1<<6)
-#define IORESOURCE_MEM_DRIVER_MANAGED  (1<<7)
 
 /* PnP I/O specific bits (IORESOURCE_BITS) */
 #define IORESOURCE_IO_16BIT_ADDR   (1<<0)
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index ca40bef75a616..dfeeed1aed084 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -520,7 +520,7 @@ static int locate_mem_hole_callback(struct resource *res, 
void *arg)
/* Returning 0 will take to next memory range */
 
/* Don't use memory that will be detected and handled by a driver. */
-   if (res->flags & IORESOURCE_MEM_DRIVER_MANAGED)
+   if (res->flags & IORESOURCE_SYSRAM_DRIVER_MANAGED)
return 0;
 
if (sz < kbuf->memsz)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 4c47b68a9f4b5..8e1cd18b5cf14 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -105,7 +105,7 @@ static struct resource *register_memory_resource(u64 start, 
u64 size,
unsigned long flags =  IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
if (strcmp(resource_name, "System RAM"))
-   flags |= IORESOURCE_MEM_DRIVER_MANAGED;
+   flags |= IORESOURCE_SYSRAM_DRIVER_MANAGED;
 
/*
 * Make sure value parsed from 'mem=' only restricts memory adding
@@ -1160,7 +1160,7 @@ EXPORT_SYMBOL_GPL(add_memory);
  *
  * For this memory, no entries in /sys/firmware/memmap ("raw firmware-provided
  * memory map") are created. Also, the created memory resource is flagged
- * with IORESOURCE_MEM_DRIVER_MANAGED, so in-kernel users can special-case
+ * with IORESOURCE_SYSRAM_DRIVER_MANAGED, so in-kernel users can special-case
  * this memory as well (esp., not place kexec images onto it).
  *
  * The resource_name (visible via /proc/iomem) has to have the format
-- 
2.26.2


___
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec