[PATCH v2 5/5] export_report: Use new version info format

2023-11-17 Thread Matthew Maurer
The new version info format has a superset of symbols in the old format.
Since this is a tool for in-tree modules, we don't need to parse the old
one with this tool any longer.

Signed-off-by: Matthew Maurer 
---
 scripts/export_report.pl | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/scripts/export_report.pl b/scripts/export_report.pl
index dcef915405f3..6a37df6f947f 100755
--- a/scripts/export_report.pl
+++ b/scripts/export_report.pl
@@ -114,31 +114,29 @@ foreach my $thismod (@allcfiles) {
}
 
my $state=0;
+   # State map:
+   # 0 - Looking for names
+   # 1 - Scanning names
+   # 2 - Done
while ( <$module> ) {
chomp;
if ($state == 0) {
-   $state = 1 if ($_ =~ /static const struct 
modversion_info/);
+   $state = 1 if ($_ =~ /__used 
__section\("__version_ext_names"\)/);
next;
}
if ($state == 1) {
-   $state = 2 if ($_ =~ /__used 
__section\("__versions"\)/);
-   next;
-   }
-   if ($state == 2) {
-   if ( $_ =~ /};/ ) {
-   $state = 3;
-   next;
-   }
-   if ( $_ !~ /0x[0-9a-f]+,/ ) {
+   if ( $_ =~ /;/ ) {
+   $state = 2;
next;
}
-   my $sym = (split /([,"])/,)[4];
+   $_ =~ /"(.*)\\0"/;
+   my $sym = $1;
my ($module, $value, $symbol, $gpl) = @{$SYMBOL{$sym}};
$SYMBOL{ $sym } =  [ $module, $value+1, $symbol, $gpl];
push(@{$MODULE{$thismod}} , $sym);
}
}
-   if ($state != 3) {
+   if ($state != 2) {
warn "WARNING:$thismod is not built with CONFIG_MODVERSIONS 
enabled\n";
$modversion_warnings++;
}
-- 
2.43.0.rc0.421.g78406f8d94-goog



[PATCH v2 4/5] rust: Allow MODVERSIONS

2023-11-17 Thread Matthew Maurer
With variable length symbol names from extended modversions, we can
enable MODVERSIONS alongside RUST due to support for its longer symbol
names.

Signed-off-by: Matthew Maurer 
---
 init/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/init/Kconfig b/init/Kconfig
index 9ffb103fc927..6cac5b4db8f6 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1885,7 +1885,6 @@ config RUST
bool "Rust support"
depends on HAVE_RUST
depends on RUST_IS_AVAILABLE
-   depends on !MODVERSIONS
depends on !GCC_PLUGINS
depends on !RANDSTRUCT
depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE
-- 
2.43.0.rc0.421.g78406f8d94-goog



[PATCH v2 3/5] modpost: Extended modversion support

2023-11-17 Thread Matthew Maurer
Adds a new format for modversions which stores each field in a separate
ELF section. This initially adds support for variable length names, but
could later be used to add additional fields to modversions in a
backwards compatible way if needed.

Since PPC munges its version records to strip leading dots, we reproduce
the munging for the new format. Other architectures do not appear to
have architecture-specific usage of this information.

Signed-off-by: Matthew Maurer 
---
 arch/powerpc/kernel/module_64.c | 25 -
 kernel/module/internal.h| 11 
 kernel/module/main.c| 92 ++---
 kernel/module/version.c | 43 +++
 scripts/mod/modpost.c   | 37 +++--
 5 files changed, 195 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 7112adc597a8..bff03627014c 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* FIXME: We don't do .init separately.  To do this, we'd need to have
a separate r2 value in the init and core section, and stub between
@@ -355,6 +356,24 @@ static void dedotify_versions(struct modversion_info *vers,
}
 }
 
+static void dedotify_ext_version_names(char *str_seq, unsigned long size)
+{
+   unsigned long out = 0;
+   unsigned long in;
+   char last = '\0';
+
+   for (in = 0; in < size; in++) {
+   if (last == '\0')
+   /* Skip all leading dots */
+   if (str_seq[in] == '.')
+   continue;
+   last = str_seq[in];
+   str_seq[out++] = last;
+   }
+   /* Zero the trailing portion of the names table for robustness */
+   bzero(&str_seq[out], size - out);
+}
+
 /*
  * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC.
  * seem to be defined (value set later).
@@ -424,10 +443,12 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
me->arch.toc_section = i;
if (sechdrs[i].sh_addralign < 8)
sechdrs[i].sh_addralign = 8;
-   }
-   else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
+   } else if (strcmp(secstrings + sechdrs[i].sh_name, 
"__versions") == 0)
dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
  sechdrs[i].sh_size);
+   else if (strcmp(secstrings + sechdrs[i].sh_name, 
"__version_ext_names") == 0)
+   dedotify_ext_version_names((void *)hdr + 
sechdrs[i].sh_offset,
+  sechdrs[i].sh_size);
 
if (sechdrs[i].sh_type == SHT_SYMTAB)
dedotify((void *)hdr + sechdrs[i].sh_offset,
diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index d8dc52eb9c82..35d89c3f657c 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -86,6 +86,8 @@ struct load_info {
unsigned int vers;
unsigned int info;
unsigned int pcpu;
+   unsigned int vers_ext_crc;
+   unsigned int vers_ext_name;
} index;
 };
 
@@ -389,6 +391,15 @@ void module_layout(struct module *mod, struct 
modversion_info *ver, struct kerne
   struct kernel_symbol *ks, struct tracepoint * const *tp);
 int check_modstruct_version(const struct load_info *info, struct module *mod);
 int same_magic(const char *amagic, const char *bmagic, bool has_crcs);
+struct modversion_info_ext {
+   size_t remaining;
+   const s32 *crc;
+   const char *name;
+};
+void modversion_ext_start(const struct load_info *info, struct 
modversion_info_ext *ver);
+void modversion_ext_advance(struct modversion_info_ext *ver);
+#define for_each_modversion_info_ext(ver, info) \
+   for (modversion_ext_start(info, &ver); ver.remaining > 0; 
modversion_ext_advance(&ver))
 #else /* !CONFIG_MODVERSIONS */
 static inline int check_version(const struct load_info *info,
const char *symname,
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 8b2848b3183a..2b175b7953e7 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -2021,6 +2021,82 @@ static int elf_validity_cache_index_str(struct load_info 
*info)
return 0;
 }
 
+/**
+ * elf_validity_cache_index_versions() - Validate and cache version indices
+ * @info:  Load info to cache version indices in.
+ * Must have &load_info->sechdrs and &load_info->secstrings populated.
+ * @flags: Load flags, relevant to suppress version loading, see
+ * uapi/linux/module.h
+ *
+ * If we're ignoring modversions based on @flags, zero all version indices
+ * and return validity. Othewrise check:
+ *
+ * * If "__version_ext_crcs

[PATCH v2 2/5] modules: Refactor + kdoc elf_validity_cached_copy

2023-11-17 Thread Matthew Maurer
Functionality is almost identical, just structured for better
documentation and readability. Changes:

* Section names are checked for *all* non-SHT_NULL sections, not just
  SHF_ALLOC sections. We have code that accesses section names of
  non-SHF_ALLOC sections (see find_any_sec for example)
* The section name check occurs *before* strcmping on section names.
  Previously, it was possible to use an out-of-bounds offset to strcmp
  against ".modinfo" or ".gnu.linkonce.this_module"
* strtab is checked for NUL lead+termination and nonzero size
* The symbol table is swept to ensure offsets are inbounds of strtab

While some of these oversights would normally be worrying, all of the
potentially unverified accesses occur after signature check, and only in
response to a user who can load a kernel module.

Signed-off-by: Matthew Maurer 
---
 kernel/module/internal.h |   7 +-
 kernel/module/main.c | 585 +--
 2 files changed, 444 insertions(+), 148 deletions(-)

diff --git a/kernel/module/internal.h b/kernel/module/internal.h
index c8b7b4dcf782..d8dc52eb9c82 100644
--- a/kernel/module/internal.h
+++ b/kernel/module/internal.h
@@ -80,7 +80,12 @@ struct load_info {
unsigned int used_pages;
 #endif
struct {
-   unsigned int sym, str, mod, vers, info, pcpu;
+   unsigned int sym;
+   unsigned int str;
+   unsigned int mod;
+   unsigned int vers;
+   unsigned int info;
+   unsigned int pcpu;
} index;
 };
 
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 98fedfdb8db5..8b2848b3183a 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -193,6 +193,38 @@ static unsigned int find_sec(const struct load_info *info, 
const char *name)
return 0;
 }
 
+/**
+ * find_any_unique_sec() - Find a unique section index by name
+ * @info: Load info for the module to scan
+ * @name: Name of the section we're looking for
+ *
+ * Locates a unique section by name. Ignores SHF_ALLOC.
+ *
+ * Return: Section index if found uniquely, zero if absent, negative count
+ * of total instances if multiple were found.
+ */
+static int find_any_unique_sec(const struct load_info *info, const char *name)
+{
+   unsigned int idx;
+   unsigned int count = 0;
+   int i;
+
+   for (i = 1; i < info->hdr->e_shnum; i++) {
+   if (strcmp(info->secstrings + info->sechdrs[i].sh_name,
+  name) == 0) {
+   count++;
+   idx = i;
+   }
+   }
+   if (count == 1) {
+   return idx;
+   } else if (count == 0) {
+   return 0;
+   } else {
+   return -count;
+   }
+}
+
 /* Find a module section, or NULL. */
 static void *section_addr(const struct load_info *info, const char *name)
 {
@@ -1627,7 +1659,7 @@ bool __weak module_exit_section(const char *name)
return strstarts(name, ".exit");
 }
 
-static int validate_section_offset(struct load_info *info, Elf_Shdr *shdr)
+static int validate_section_offset(const struct load_info *info, Elf_Shdr 
*shdr)
 {
 #if defined(CONFIG_64BIT)
unsigned long long secend;
@@ -1646,62 +1678,80 @@ static int validate_section_offset(struct load_info 
*info, Elf_Shdr *shdr)
return 0;
 }
 
-/*
- * Check userspace passed ELF module against our expectations, and cache
- * useful variables for further processing as we go.
- *
- * This does basic validity checks against section offsets and sizes, the
- * section name string table, and the indices used for it (sh_name).
+/**
+ * elf_validity_ehdr() - Checks an ELF header for module validity
+ * @info: Load info containing the ELF header to check
  *
- * As a last step, since we're already checking the ELF sections we cache
- * useful variables which will be used later for our convenience:
+ * Checks whether an ELF header could belong to a valid module. Checks:
  *
- * o pointers to section headers
- * o cache the modinfo symbol section
- * o cache the string symbol section
- * o cache the module section
+ * * ELF header is within the data the user provided
+ * * ELF magic is present
+ * * It is relocatable (not final linked, not core file, etc.)
+ * * The header's machine type matches what the architecture expects.
+ * * Optional arch-specific hook for other properties
+ *   - module_elf_check_arch() is currently only used by PPC to check
+ *   ELF ABI version, but may be used by others in the future.
  *
- * As a last step we set info->mod to the temporary copy of the module in
- * info->hdr. The final one will be allocated in move_module(). Any
- * modifications we make to our copy of the module will be carried over
- * to the final minted module.
+ * Return: %0 if valid, %-ENOEXEC on failure.
  */
-static int elf_validity_cache_copy(struct load_info *info, int flags)
+static int elf_validity_ehdr(const struct load_info 

[PATCH v2 1/5] export_report: Rehabilitate script

2023-11-17 Thread Matthew Maurer
* modules.order has .o files when in a build dir, support this
* .mod.c source layout has changed, update regexes to match
* Add a stage 3, to be more robust against additional .mod.c content

Signed-off-by: Matthew Maurer 
---
 scripts/export_report.pl | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/scripts/export_report.pl b/scripts/export_report.pl
index feb3d5542a62..dcef915405f3 100755
--- a/scripts/export_report.pl
+++ b/scripts/export_report.pl
@@ -55,6 +55,7 @@ sub collectcfiles {
 open my $fh, '< modules.order' or die "cannot open modules.order: $!\n";
 while (<$fh>) {
s/\.ko$/.mod.c/;
+   s/\.o$/.mod.c/;
push (@file, $_)
 }
 close($fh);
@@ -120,10 +121,14 @@ foreach my $thismod (@allcfiles) {
next;
}
if ($state == 1) {
-   $state = 2 if ($_ =~ 
/__attribute__\(\(section\("__versions"\)\)\)/);
+   $state = 2 if ($_ =~ /__used 
__section\("__versions"\)/);
next;
}
if ($state == 2) {
+   if ( $_ =~ /};/ ) {
+   $state = 3;
+   next;
+   }
if ( $_ !~ /0x[0-9a-f]+,/ ) {
next;
}
@@ -133,7 +138,7 @@ foreach my $thismod (@allcfiles) {
push(@{$MODULE{$thismod}} , $sym);
}
}
-   if ($state != 2) {
+   if ($state != 3) {
warn "WARNING:$thismod is not built with CONFIG_MODVERSIONS 
enabled\n";
$modversion_warnings++;
}
-- 
2.43.0.rc0.421.g78406f8d94-goog



[PATCH v2 0/5] MODVERSIONS + RUST Redux

2023-11-17 Thread Matthew Maurer
The goal of this patch series is to allow MODVERSIONS and RUST to be
enabled simultaneously. The primary issue with doing this at the moment
is that Rust uses some extremely long symbol names - for those
unfamiliar with Rust, it may be helpful to think of some of the mangled
C++ names you may have seen in binaries in the past.

Previously, Gary Guo attempted to accomplish this by modifying the
existing modversion format [1] to support variable-length symbol names.
This was unfortunately considered to be a potential userspace break
because kmod tools inspect this kernel module metadata. Masahiro Yamada
suggested [2] that this could instead be done with a section per-field.
This gives us the ability to be more flexible with this format in the
future, as a new field or additional information will be in a new
section which userspace tools will not yet attempt to read.

In the previous version of this patchset, Luis Chamberlain suggested [3]
I move validation out of the version checking and into the elf validity
checker, and also add kernel-docs over there. I found
elf_validity_cached_copy to be fairly dense and difficult to directly
describe, so I refactored it into easier to explain pieces. In the
process, I found a few missing checks and added those as well. See
[PATCH 2/5] for more details. If this is too much, I'm more than happy
to drop this patch from the series in favor of just adding the
kernel-doc to the original code, but figured I'd offer it up in case the
added clarity and checks were valuable.

[1] https://lore.kernel.org/lkml/2023061155.1349375-1-g...@garyguo.net/
[2] 
https://lore.kernel.org/lkml/CAK7LNATsuszFR7JB5ZkqVS1W=hWr9=e7btf+mvgj+nxt3az...@mail.gmail.com/
[3] https://lore.kernel.org/lkml/zvznh%2fpa5hivr...@bombadil.infradead.org/

Matthew Maurer (5):
  export_report: Rehabilitate script
  modules: Refactor + kdoc elf_validity_cached_copy
  modpost: Extended modversion support
  rust: Allow MODVERSIONS
  export_report: Use new version info format

 arch/powerpc/kernel/module_64.c |  25 +-
 init/Kconfig|   1 -
 kernel/module/internal.h|  18 +-
 kernel/module/main.c| 663 +---
 kernel/module/version.c |  43 +++
 scripts/export_report.pl|  17 +-
 scripts/mod/modpost.c   |  37 +-
 7 files changed, 642 insertions(+), 162 deletions(-)

-- 
2.43.0.rc0.421.g78406f8d94-goog



[PATCH v4 04/13] powerpc/rtas: Factor out function descriptor lookup

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Move the function descriptor table lookup out of rtas_function_token()
into a separate routine for use in new code to follow. No functional
change.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 31 +++
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index f0051881348a..1fc0b3fffdd1 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -469,29 +469,36 @@ static struct rtas_function rtas_function_table[] 
__ro_after_init = {
 static DEFINE_RAW_SPINLOCK(rtas_lock);
 static struct rtas_args rtas_args;
 
-/**
- * rtas_function_token() - RTAS function token lookup.
- * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN.
- *
- * Context: Any context.
- * Return: the token value for the function if implemented by this platform,
- * otherwise RTAS_UNKNOWN_SERVICE.
- */
-s32 rtas_function_token(const rtas_fn_handle_t handle)
+static struct rtas_function *rtas_function_lookup(const rtas_fn_handle_t 
handle)
 {
const size_t index = handle.index;
const bool out_of_bounds = index >= ARRAY_SIZE(rtas_function_table);
 
if (WARN_ONCE(out_of_bounds, "invalid function index %zu", index))
-   return RTAS_UNKNOWN_SERVICE;
+   return NULL;
/*
 * Various drivers attempt token lookups on non-RTAS
 * platforms.
 */
if (!rtas.dev)
-   return RTAS_UNKNOWN_SERVICE;
+   return NULL;
+
+   return &rtas_function_table[index];
+}
+
+/**
+ * rtas_function_token() - RTAS function token lookup.
+ * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN.
+ *
+ * Context: Any context.
+ * Return: the token value for the function if implemented by this platform,
+ * otherwise RTAS_UNKNOWN_SERVICE.
+ */
+s32 rtas_function_token(const rtas_fn_handle_t handle)
+{
+   const struct rtas_function *func = rtas_function_lookup(handle);
 
-   return rtas_function_table[index].token;
+   return func ? func->token : RTAS_UNKNOWN_SERVICE;
 }
 EXPORT_SYMBOL_GPL(rtas_function_token);
 

-- 
2.41.0



[PATCH v4 02/13] powerpc/rtas: Fall back to linear search on failed token->function lookup

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Enabling any of the powerpc:rtas_* tracepoints at boot is likely to
result in an oops on RTAS platforms. For example, booting a QEMU
pseries model with 'trace_event=powerpc:rtas_input' in the command
line leads to:

  BUG: Kernel NULL pointer dereference on read at 0x0008
  Oops: Kernel access of bad area, sig: 7 [#1]
  NIP [c004231c] do_enter_rtas+0x1bc/0x460
  LR [c004231c] do_enter_rtas+0x1bc/0x460
  Call Trace:
do_enter_rtas+0x1bc/0x460 (unreliable)
rtas_call+0x22c/0x4a0
rtas_get_boot_time+0x80/0x14c
read_persistent_clock64+0x124/0x150
read_persistent_wall_and_boot_offset+0x28/0x58
timekeeping_init+0x70/0x348
start_kernel+0xa0c/0xc1c
start_here_common+0x1c/0x20

(This is preceded by a warning for the failed lookup in
rtas_token_to_function().)

This happens when __do_enter_rtas_trace() attempts a token to function
descriptor lookup before the xarray containing the mappings has been
set up.

Fall back to linear scan of the table if rtas_token_to_function_xarray
is empty.

Signed-off-by: Nathan Lynch 
Fixes: 24098f580e2b ("powerpc/rtas: add tracepoints around RTAS entry")
---
 arch/powerpc/kernel/rtas.c | 18 ++
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 1ad1869e2e96..f0051881348a 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -557,11 +557,21 @@ static const struct rtas_function 
*rtas_token_to_function(s32 token)
return NULL;
 
func = xa_load(&rtas_token_to_function_xarray, token);
+   if (func)
+   return func;
+   /*
+* Fall back to linear scan in case the reverse mapping hasn't
+* been initialized yet.
+*/
+   if (xa_empty(&rtas_token_to_function_xarray)) {
+   for_each_rtas_function(func) {
+   if (func->token == token)
+   return func;
+   }
+   }
 
-   if (WARN_ONCE(!func, "unexpected failed lookup for token %d", token))
-   return NULL;
-
-   return func;
+   WARN_ONCE(true, "unexpected failed lookup for token %d", token);
+   return NULL;
 }
 
 /* This is here deliberately so it's only used in this file */

-- 
2.41.0



[PATCH v4 08/13] powerpc/uapi: Export papr-miscdev.h header

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Allocate one identifying code (the first column of the ioctl-number
table) for the collection of PAPR miscdev drivers to share.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/uapi/asm/papr-miscdev.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/include/uapi/asm/papr-miscdev.h 
b/arch/powerpc/include/uapi/asm/papr-miscdev.h
new file mode 100644
index ..49a2a270b7f3
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-miscdev.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_MISCDEV_H_
+#define _UAPI_PAPR_MISCDEV_H_
+
+enum {
+   PAPR_MISCDEV_IOC_ID = 0xb2,
+};
+
+#endif /* _UAPI_PAPR_MISCDEV_H_ */

-- 
2.41.0



[PATCH v4 06/13] powerpc/rtas: Serialize firmware activation sequences

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Use the function lock API to prevent interleaving call sequences of
the ibm,activate-firmware RTAS function, which typically requires
multiple calls to complete the update. While the spec does not
specifically prohibit interleaved sequences, there's almost certainly
no advantage to allowing them.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 52f2242d0c28..e38ba05ad613 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1753,10 +1753,14 @@ void rtas_activate_firmware(void)
return;
}
 
+   rtas_function_lock(RTAS_FN_IBM_ACTIVATE_FIRMWARE);
+
do {
fwrc = rtas_call(token, 0, 1, NULL);
} while (rtas_busy_delay(fwrc));
 
+   rtas_function_unlock(RTAS_FN_IBM_ACTIVATE_FIRMWARE);
+
if (fwrc)
pr_err("ibm,activate-firmware failed (%i)\n", fwrc);
 }

-- 
2.41.0



[PATCH v4 09/13] powerpc/pseries: Add papr-vpd character driver for VPD retrieval

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

PowerVM LPARs may retrieve Vital Product Data (VPD) for system
components using the ibm,get-vpd RTAS function.

We can expose this to user space with a /dev/papr-vpd character
device, where the programming model is:

  struct papr_location_code plc = { .str = "", }; /* obtain all VPD */
  int devfd = open("/dev/papr-vpd", O_RDONLY);
  int vpdfd = ioctl(devfd, PAPR_VPD_CREATE_HANDLE, &plc);
  size_t size = lseek(vpdfd, 0, SEEK_END);
  char *buf = malloc(size);
  pread(devfd, buf, size, 0);

When a file descriptor is obtained from ioctl(PAPR_VPD_CREATE_HANDLE),
the file contains the result of a complete ibm,get-vpd sequence. The
file contents are immutable from the POV of user space. To get a new
view of the VPD, the client must create a new handle.

This design choice insulates user space from most of the complexities
that ibm,get-vpd brings:

* ibm,get-vpd must be called more than once to obtain complete
  results.

* Only one ibm,get-vpd call sequence should be in progress at a time;
  interleaved sequences will disrupt each other. Callers must have a
  protocol for serializing their use of the function.

* A call sequence in progress may receive a "VPD changed, try again"
  status, requiring the client to abandon the sequence and start
  over.

The memory required for the VPD buffers seems acceptable, around 20KB
for all VPD on one of my systems. And the value of the
/rtas/ibm,vpd-size DT property (the estimated maximum size of VPD) is
consistently 300KB across various systems I've checked.

I've implemented support for this new ABI in the rtas_get_vpd()
function in librtas, which the vpdupdate command currently uses to
populate its VPD database. I've verified that an unmodified vpdupdate
binary generates an identical database when using a librtas.so that
prefers the new ABI.

Note that the driver needs to serialize its call sequences with legacy
sys_rtas(ibm,get-vpd) callers, so it exposes its internal lock for
sys_rtas.

Signed-off-by: Nathan Lynch 
---
 Documentation/userspace-api/ioctl/ioctl-number.rst |   2 +
 arch/powerpc/include/uapi/asm/papr-vpd.h   |  22 +
 arch/powerpc/platforms/pseries/Makefile|   1 +
 arch/powerpc/platforms/pseries/papr-vpd.c  | 536 +
 4 files changed, 561 insertions(+)

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 4ea5b837399a..a950545bf7cd 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -349,6 +349,8 @@ Code  Seq#Include File  
 Comments
  

 0xB1  00-1F  PPPoX
  

+0xB2  00 arch/powerpc/include/uapi/asm/papr-vpd.h
powerpc/pseries VPD API
+ 

 0xB3  00 linux/mmc/ioctl.h
 0xB4  00-0F  linux/gpio.h

 0xB5  00-0F  uapi/linux/rpmsg.h  

diff --git a/arch/powerpc/include/uapi/asm/papr-vpd.h 
b/arch/powerpc/include/uapi/asm/papr-vpd.h
new file mode 100644
index ..b62e4f897a70
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-vpd.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_VPD_H_
+#define _UAPI_PAPR_VPD_H_
+
+#include 
+#include 
+
+struct papr_location_code {
+   /*
+* PAPR+ 12.3.2.4 Converged Location Code Rules - Length
+* Restrictions. 79 characters plus nul.
+*/
+   char str[80];
+};
+
+/*
+ * ioctl for /dev/papr-vpd. Returns a VPD handle fd corresponding to
+ * the location code.
+ */
+#define PAPR_VPD_IOC_CREATE_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 0, struct 
papr_location_code)
+
+#endif /* _UAPI_PAPR_VPD_H_ */
diff --git a/arch/powerpc/platforms/pseries/Makefile 
b/arch/powerpc/platforms/pseries/Makefile
index 1476c5e4433c..f936962a2946 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -4,6 +4,7 @@ ccflags-$(CONFIG_PPC_PSERIES_DEBUG) += -DDEBUG
 
 obj-y  := lpar.o hvCall.o nvram.o reconfig.o \
   of_helpers.o rtas-work-area.o papr-sysparm.o \
+  papr-vpd.o \
   setup.o iommu.o event_sources.o ras.o \
   firmware.o power.o dlpar.o mobility.o rng.o \
   pci.o pci_dlpar.o eeh_pseries.o msi.o \
diff --git a/arch/powerpc/platforms/pseries/papr-vpd.c 
b/arch/powerpc/platforms/pseries/papr-vpd.c
new file mode 100644
i

[PATCH v4 11/13] powerpc/pseries/papr-sysparm: Expose character device to user space

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Until now the papr_sysparm APIs have been kernel-internal. But user
space needs access to PAPR system parameters too. The only method
available to user space today to get or set system parameters is using
sys_rtas() and /dev/mem to pass RTAS-addressable buffers between user
space and firmware. This is incompatible with lockdown and should be
deprecated.

So provide an alternative ABI to user space in the form of a
/dev/papr-sysparm character device with just two ioctl commands (get
and set). The data payloads involved are small enough to fit in the
ioctl argument buffer, making the code relatively simple.

Exposing the system parameters through sysfs has been considered but
it would be too awkward:

* The kernel currently does not have to contain an exhaustive list of
  defined system parameters. This is a convenient property to maintain
  because we don't have to update the kernel whenever a new parameter
  is added to PAPR. Exporting a named attribute in sysfs for each
  parameter would negate this.

* Some system parameters are text-based and some are not.

* Retrieval of at least one system parameter requires input data,
  which a simple read-oriented interface can't support.

Signed-off-by: Nathan Lynch 
---
 Documentation/userspace-api/ioctl/ioctl-number.rst |   2 +
 arch/powerpc/include/asm/papr-sysparm.h|  17 ++-
 arch/powerpc/include/uapi/asm/papr-sysparm.h   |  58 
 arch/powerpc/platforms/pseries/papr-sysparm.c  | 154 -
 4 files changed, 223 insertions(+), 8 deletions(-)

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst 
b/Documentation/userspace-api/ioctl/ioctl-number.rst
index a950545bf7cd..d8b6cb1a3636 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -351,6 +351,8 @@ Code  Seq#Include File  
 Comments
  

 0xB2  00 arch/powerpc/include/uapi/asm/papr-vpd.h
powerpc/pseries VPD API
  

+0xB2  01-02  arch/powerpc/include/uapi/asm/papr-sysparm.h
powerpc/pseries system parameter API
+ 

 0xB3  00 linux/mmc/ioctl.h
 0xB4  00-0F  linux/gpio.h

 0xB5  00-0F  uapi/linux/rpmsg.h  

diff --git a/arch/powerpc/include/asm/papr-sysparm.h 
b/arch/powerpc/include/asm/papr-sysparm.h
index f5fdbd8ae9db..0dbbff59101d 100644
--- a/arch/powerpc/include/asm/papr-sysparm.h
+++ b/arch/powerpc/include/asm/papr-sysparm.h
@@ -2,8 +2,10 @@
 #ifndef _ASM_POWERPC_PAPR_SYSPARM_H
 #define _ASM_POWERPC_PAPR_SYSPARM_H
 
+#include 
+
 typedef struct {
-   const u32 token;
+   u32 token;
 } papr_sysparm_t;
 
 #define mk_papr_sysparm(x_) ((papr_sysparm_t){ .token = x_, })
@@ -20,11 +22,14 @@ typedef struct {
 #define PAPR_SYSPARM_TLB_BLOCK_INVALIDATE_ATTRSmk_papr_sysparm(50)
 #define PAPR_SYSPARM_LPAR_NAME mk_papr_sysparm(55)
 
-enum {
-   PAPR_SYSPARM_MAX_INPUT  = 1024,
-   PAPR_SYSPARM_MAX_OUTPUT = 4000,
-};
-
+/**
+ * struct papr_sysparm_buf - RTAS work area layout for system parameter 
functions.
+ *
+ * This is the memory layout of the buffers passed to/from
+ * ibm,get-system-parameter and ibm,set-system-parameter. It is
+ * distinct from the papr_sysparm_io_block structure that is passed
+ * between user space and the kernel.
+ */
 struct papr_sysparm_buf {
__be16 len;
char val[PAPR_SYSPARM_MAX_OUTPUT];
diff --git a/arch/powerpc/include/uapi/asm/papr-sysparm.h 
b/arch/powerpc/include/uapi/asm/papr-sysparm.h
new file mode 100644
index ..9f9a0f267ea5
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-sysparm.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_SYSPARM_H_
+#define _UAPI_PAPR_SYSPARM_H_
+
+#include 
+#include 
+#include 
+
+enum {
+   PAPR_SYSPARM_MAX_INPUT  = 1024,
+   PAPR_SYSPARM_MAX_OUTPUT = 4000,
+};
+
+struct papr_sysparm_io_block {
+   __u32 parameter;
+   __u16 length;
+   char data[PAPR_SYSPARM_MAX_OUTPUT];
+};
+
+/**
+ * PAPR_SYSPARM_IOC_GET - Retrieve the value of a PAPR system parameter.
+ *
+ * Uses _IOWR because of one corner case: Retrieving the value of the
+ * "OS Service Entitlement Status" parameter (60) requires the caller
+ * to supply input data (a date string) in the buffer passed to
+ * firmware. So the @length and @data of the incoming
+ * papr_sysparm_io_block are always used to initialize the work area
+ * supplied to ibm,get-system-parameter. No other parameters are known
+ * to pa

[PATCH v4 12/13] powerpc/selftests: Add test for papr-vpd

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Add selftests for /dev/papr-vpd, exercising the common expected use
cases:

* Retrieve all VPD by passing an empty location code.
* Retrieve the "system VPD" by passing a location code derived from DT
  root node properties, as done by the vpdupdate command.

The tests also verify that certain intended properties of the driver
hold:

* Passing an unterminated location code to PAPR_VPD_CREATE_HANDLE gets
  EINVAL.
* Passing a NULL location code pointer to PAPR_VPD_CREATE_HANDLE gets
  EFAULT.
* Closing the device node without first issuing a
  PAPR_VPD_CREATE_HANDLE command to it succeeds.
* Releasing a handle without first consuming any data from it
  succeeds.
* Re-reading the contents of a handle returns the same data as the
  first time.

Some minimal validation of the returned data is performed.

The tests are skipped on systems where the papr-vpd driver does not
initialize, making this useful only on PowerVM LPARs at this point.

Signed-off-by: Nathan Lynch 
---
 tools/testing/selftests/powerpc/Makefile   |   1 +
 .../testing/selftests/powerpc/papr_vpd/.gitignore  |   1 +
 tools/testing/selftests/powerpc/papr_vpd/Makefile  |  12 +
 .../testing/selftests/powerpc/papr_vpd/papr_vpd.c  | 352 +
 4 files changed, 366 insertions(+)

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 7ea42fa02eab..05fc68d446c2 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -32,6 +32,7 @@ SUB_DIRS = alignment  \
   vphn \
   math \
   papr_attributes  \
+  papr_vpd \
   ptrace   \
   security \
   mce
diff --git a/tools/testing/selftests/powerpc/papr_vpd/.gitignore 
b/tools/testing/selftests/powerpc/papr_vpd/.gitignore
new file mode 100644
index ..49285031a656
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/.gitignore
@@ -0,0 +1 @@
+/papr_vpd
diff --git a/tools/testing/selftests/powerpc/papr_vpd/Makefile 
b/tools/testing/selftests/powerpc/papr_vpd/Makefile
new file mode 100644
index ..06b719703bfd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+   $(MAKE) -C ../
+
+TEST_GEN_PROGS := papr_vpd
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+$(OUTPUT)/papr_vpd: CFLAGS += $(KHDR_INCLUDES)
diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c 
b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
new file mode 100644
index ..98cbb9109ee6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "utils.h"
+
+#define DEVPATH "/dev/papr-vpd"
+
+static int dev_papr_vpd_open_close(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int dev_papr_vpd_get_handle_all(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+   struct papr_location_code lc = { .str = "", };
+   off_t size;
+   int fd;
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   errno = 0;
+   fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+   FAIL_IF(errno != 0);
+   FAIL_IF(fd < 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   size = lseek(fd, 0, SEEK_END);
+   FAIL_IF(size <= 0);
+
+   void *buf = malloc((size_t)size);
+   FAIL_IF(!buf);
+
+   ssize_t consumed = pread(fd, buf, size, 0);
+   FAIL_IF(consumed != size);
+
+   /* Ensure EOF */
+   FAIL_IF(read(fd, buf, size) != 0);
+   FAIL_IF(close(fd));
+
+   /* Verify that the buffer looks like VPD */
+   static const char needle[] = "System VPD";
+   FAIL_IF(!memmem(buf, size, needle, strlen(needle)));
+
+   return 0;
+}
+
+static int dev_papr_vpd_get_handle_byte_at_a_time(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+   struct papr_location_code lc = { .str = "", };
+   int fd;
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   errno = 0;
+   fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+   FAIL_IF(errno != 0);
+   FAIL_IF(fd < 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   size_t consumed = 0;
+   while (1) {
+   ssize_t res;
+   char c;
+
+   errno = 0;
+   res = read(fd, &c, sizeof(c));
+   FAIL_IF(res > sizeof(c))

[PATCH v4 13/13] powerpc/selftests: Add test for papr-sysparm

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Consistently testing system parameter access is a bit difficult by
nature -- the set of parameters available depends on the model and
system configuration, and updating a parameter should be considered a
destructive operation reserved for the admin.

So we validate some of the error paths and retrieve the SPLPAR
characteristics string, but not much else.

Signed-off-by: Nathan Lynch 
---
 tools/testing/selftests/powerpc/Makefile   |   1 +
 .../selftests/powerpc/papr_sysparm/.gitignore  |   1 +
 .../selftests/powerpc/papr_sysparm/Makefile|  12 ++
 .../selftests/powerpc/papr_sysparm/papr_sysparm.c  | 164 +
 4 files changed, 178 insertions(+)

diff --git a/tools/testing/selftests/powerpc/Makefile 
b/tools/testing/selftests/powerpc/Makefile
index 05fc68d446c2..c376151982c4 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -33,6 +33,7 @@ SUB_DIRS = alignment  \
   math \
   papr_attributes  \
   papr_vpd \
+  papr_sysparm \
   ptrace   \
   security \
   mce
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/.gitignore 
b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore
new file mode 100644
index ..f2a69bf59d40
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore
@@ -0,0 +1 @@
+/papr_sysparm
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/Makefile 
b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
new file mode 100644
index ..7f79e437634a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+   $(MAKE) -C ../
+
+TEST_GEN_PROGS := papr_sysparm
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+$(OUTPUT)/papr_sysparm: CFLAGS += $(KHDR_INCLUDES)
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c 
b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c
new file mode 100644
index ..fc25c03e8bc7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "utils.h"
+
+#define DEVPATH "/dev/papr-sysparm"
+
+static int open_close(void)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int get_splpar(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = 20, // SPLPAR characteristics
+   };
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+   FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != 0);
+   FAIL_IF(sp.length == 0);
+   FAIL_IF(sp.length > sizeof(sp.data));
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int get_bad_parameter(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = UINT32_MAX, // there are only ~60 specified 
parameters
+   };
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != -1);
+   FAIL_IF(errno != EOPNOTSUPP);
+
+   // Ensure the buffer is unchanged
+   FAIL_IF(sp.length != 0);
+   for (size_t i = 0; i < ARRAY_SIZE(sp.data); ++i)
+   FAIL_IF(sp.data[i] != 0);
+
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int check_efault_common(unsigned long cmd)
+{
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, cmd, NULL) != -1);
+   FAIL_IF(errno != EFAULT);
+
+   FAIL_IF(close(devfd) != 0);
+
+   return 0;
+}
+
+static int check_efault_get(void)
+{
+   return check_efault_common(PAPR_SYSPARM_IOC_GET);
+}
+
+static int check_efault_set(void)
+{
+   return check_efault_common(PAPR_SYSPARM_IOC_SET);
+}
+
+static int set_hmc0(void)
+{
+   struct papr_sysparm_io_block sp = {
+   .parameter = 0, // HMC0, not a settable parameter
+   };
+   const int devfd = open(DEVPATH, O_RDONLY);
+
+   SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+   DEVPATH " not present");
+
+   FAIL_IF(devfd < 0);
+
+   // Ensure expected error
+   FAIL_IF(ioctl(devfd, PAPR_

[PATCH v4 01/13] powerpc/rtas: Add for_each_rtas_function() iterator

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Add a convenience macro for iterating over every element of the
internal function table and convert the one site that can use it. An
additional user of the macro is anticipated in changes to follow.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index eddc031c4b95..1ad1869e2e96 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -454,6 +454,11 @@ static struct rtas_function rtas_function_table[] 
__ro_after_init = {
},
 };
 
+#define for_each_rtas_function(funcp)   \
+   for (funcp = &rtas_function_table[0];   \
+funcp < &rtas_function_table[ARRAY_SIZE(rtas_function_table)]; \
+++funcp)
+
 /*
  * Nearly all RTAS calls need to be serialized. All uses of the
  * default rtas_args block must hold rtas_lock.
@@ -525,10 +530,10 @@ static DEFINE_XARRAY(rtas_token_to_function_xarray);
 
 static int __init rtas_token_to_function_xarray_init(void)
 {
+   const struct rtas_function *func;
int err = 0;
 
-   for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) {
-   const struct rtas_function *func = &rtas_function_table[i];
+   for_each_rtas_function(func) {
const s32 token = func->token;
 
if (token == RTAS_UNKNOWN_SERVICE)

-- 
2.41.0



[PATCH v4 00/13] powerpc/pseries: New character devices for system parameters and VPD

2023-11-17 Thread Nathan Lynch via B4 Relay
Add character devices that expose PAPR-specific system parameters and
VPD to user space.

The problem: important platform features are enabled on Linux VMs
through the powerpc-specific rtas() syscall in combination with
writeable mappings of /dev/mem. In typical usage, this is encapsulated
behind APIs provided by the librtas library. This paradigm is
incompatible with lockdown, which prohibits /dev/mem access. It also
is too low-level in many cases: a single logical operation may require
multiple sys_rtas() calls in succession to complete. This carries the
risk that a process may exit while leaving an operation unfinished. It
also means that callers must coordinate their use of the syscall for
functions that cannot tolerate multiple concurrent clients, such as
ibm,get-vpd.

The solution presented here is to add a pair of small pseries-specific
"drivers," one for VPD and one for system parameters. The new drivers
expose these facilities to user space in ways that are compatible with
lockdown and require no coordination between their clients.

Since the ibm,get-vpd call sequence performed by the papr-vpd driver
must be serialized against all other uses of the function, the series
begins by adding some new APIs to the core RTAS support code for this
purpose.

Both drivers could potentially support poll() methods to notify
clients of changes to parameters or VPD that happen due to partition
migration and other events. But that should be safe to leave for
later, assuming there's any interest.

I have made changes to librtas to prefer the new interfaces and
verified that existing clients work correctly with the new code. A
draft PR for that work is here:

https://github.com/ibm-power-utilities/librtas/pull/36

The user-space ABI has not changed since v1 of this series.

I expect to propose at least one more small driver in this style for
platform dump retrieval in a separate submission in the future.

---
Changes in v4:
- Fix latent issue in rtas_token_to_function() which causes boot-time
  crashes.
- More small preparatory changes: a function table iterator and
  additional symbolic constants for RTAS function return values.
- Use symbolic constants for ibm,get-vpd statuses in papr-vpd.c.
- Add commentary to papr_vpd_ioc_create_handle() explaining choice to
  retrieve all VPD at file handle creation time instead of deferring
  it to the read handler.
- Rebase on current powerpc/next.
- Link to v3: 
https://lore.kernel.org/r/20231025-papr-sys_rtas-vs-lockdown-v3-0-5eb04559e...@linux.ibm.com

Changes in v3:
- Add new rtas_function_lock()/unlock() APIs and convert existing code
  to use them.
- Convert papr-vpd to use rtas_function_lock()/unlock() instead of
  having sys_rtas() obtain a driver-private mutex.
- Rebase on current powerpc/next.
- Link to v2: 
https://lore.kernel.org/r/20231013-papr-sys_rtas-vs-lockdown-v2-0-ead01ce01...@linux.ibm.com

Changes in v2:
- Fix unused-but-set variable warning in papr-sysparm code.
- Rebase on powerpc/next branch.
- Link to v1: 
https://lore.kernel.org/r/20231006-papr-sys_rtas-vs-lockdown-v1-0-3a36bfb66...@linux.ibm.com

Changes in v1 vs initial RFC:
- Add papr-sysparm driver and tests.
- Add a papr-miscdev.h uapi header.
- Prevent sys_rtas() from interfering with papr-vpd call sequences.
- Handle -4 ("VPD changed") status in papr-vpd.
- Include string_helpers.h in papr-vpd.c, per Michal Suchánek
- Link to RFC: 
https://lore.kernel.org/r/20230822-papr-sys_rtas-vs-lockdown-v1-0-932623cf3...@linux.ibm.com

---
Nathan Lynch (13):
  powerpc/rtas: Add for_each_rtas_function() iterator
  powerpc/rtas: Fall back to linear search on failed token->function lookup
  powerpc/rtas: Add function return status constants
  powerpc/rtas: Factor out function descriptor lookup
  powerpc/rtas: Facilitate high-level call sequences
  powerpc/rtas: Serialize firmware activation sequences
  powerpc/rtas: Warn if per-function lock isn't held
  powerpc/uapi: Export papr-miscdev.h header
  powerpc/pseries: Add papr-vpd character driver for VPD retrieval
  powerpc/pseries/papr-sysparm: Validate buffer object lengths
  powerpc/pseries/papr-sysparm: Expose character device to user space
  powerpc/selftests: Add test for papr-vpd
  powerpc/selftests: Add test for papr-sysparm

 Documentation/userspace-api/ioctl/ioctl-number.rst |   4 +
 arch/powerpc/include/asm/papr-sysparm.h|  17 +-
 arch/powerpc/include/asm/rtas.h|  27 +-
 arch/powerpc/include/uapi/asm/papr-miscdev.h   |   9 +
 arch/powerpc/include/uapi/asm/papr-sysparm.h   |  58 +++
 arch/powerpc/include/uapi/asm/papr-vpd.h   |  22 +
 arch/powerpc/kernel/rtas.c | 182 +--
 arch/powerpc/platforms/pseries/Makefile|   1 +
 arch/powerpc/platforms/pseries/papr-sysparm.c  | 201 +++-
 arch/powerpc/platforms/pseries/papr-vpd.c  | 536 +
 tools/testing/selftests/powerpc/Makefile  

[PATCH v4 05/13] powerpc/rtas: Facilitate high-level call sequences

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

On RTAS platforms there is a general restriction that the OS must not
enter RTAS on more than one CPU at a time. This low-level
serialization requirement is satisfied by holding a spin
lock (rtas_lock) across most RTAS function invocations.

However, some pseries RTAS functions require multiple successive calls
to complete a logical operation. Beginning a new call sequence for such a
function may disrupt any other sequences of that function already in
progress. Safe and reliable use of these functions effectively
requires higher-level serialization beyond what is already done at the
level of RTAS entry and exit.

Where a sequence-based RTAS function is invoked only through
sys_rtas(), with no in-kernel users, there is no issue as far as the
kernel is concerned. User space is responsible for appropriately
serializing its call sequences. (Whether user space code actually
takes measures to prevent sequence interleaving is another matter.)
Examples of such functions currently include ibm,platform-dump and
ibm,get-vpd.

But where a sequence-based RTAS function has both user space and
in-kernel uesrs, there is a hazard. Even if the in-kernel call sites
of such a function serialize their sequences correctly, a user of
sys_rtas() can invoke the same function at any time, potentially
disrupting a sequence in progress.

So in order to prevent disruption of kernel-based RTAS call sequences,
they must serialize not only with themselves but also with sys_rtas()
users, somehow. Preferably without adding global locks or adding more
function-specific hacks to sys_rtas(). This is a prerequisite for
adding an in-kernel call sequence of ibm,get-vpd, which is in a change
to follow.

Note that it has never been feasible for the kernel to prevent
sys_rtas()-based sequences from being disrupted because control
returns to user space on every call. sys_rtas()-based users of these
functions have always been, and continue to be, responsible for
coordinating their call sequences with other users, even those which
may invoke the RTAS functions through less direct means than
sys_rtas(). This is an unavoidable consequence of exposing
sequence-based RTAS functions through sys_rtas().

* Add new rtas_function_lock() and rtas_function_unlock() APIs for use
  with sequence-based RTAS functions.

* Add an optional per-function mutex to struct rtas_function. When this
  member is set, kernel-internal callers of the RTAS function are
  required to guard their call sequences with rtas_function_lock() and
  rtas_function_unlock(). This requirement will be enforced in a later
  change, after all affected call sites are updated.

* Populate the lock members of function table entries where
  serialization of call sequences is known to be necessary, along with
  justifying commentary.

* In sys_rtas(), acquire the per-function mutex when it is present.

There should be no perceivable change introduced here except that
concurrent callers of the same RTAS function via sys_rtas() may block
on a mutex instead of spinning on rtas_lock. Changes to follow will
add rtas_function_lock()/unlock() pairs to kernel-based call
sequences.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h |   2 +
 arch/powerpc/kernel/rtas.c  | 101 +++-
 2 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index b73010583a8d..9a20caba6858 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -421,6 +421,8 @@ static inline bool rtas_function_implemented(const 
rtas_fn_handle_t handle)
 {
return rtas_function_token(handle) != RTAS_UNKNOWN_SERVICE;
 }
+void rtas_function_lock(rtas_fn_handle_t handle);
+void rtas_function_unlock(rtas_fn_handle_t handle);
 extern int rtas_token(const char *service);
 extern int rtas_service_present(const char *service);
 extern int rtas_call(int token, int, int, int *, ...);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 1fc0b3fffdd1..52f2242d0c28 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -70,14 +71,34 @@ struct rtas_filter {
  *ppc64le, and we want to keep it that way. It does
  *not make sense for this to be set when @filter
  *is NULL.
+ * @lock: Pointer to an optional dedicated per-function mutex. This
+ *should be set for functions that require multiple calls in
+ *sequence to complete a single operation, and such sequences
+ *will disrupt each other if allowed to interleave. Users of
+ *this function are required to hold the associated lock for
+ *the duration of the call sequence. Add an explanatory
+ *comment to the function table entry if setting this member.
  */
 stru

[PATCH v4 03/13] powerpc/rtas: Add function return status constants

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

Not all of the generic RTAS function statuses specified in PAPR have
symbolic constants and descriptions in rtas.h. Fix this, providing a
little more background, slightly updating the existing wording, and
improving the formatting.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index c697c3c74694..b73010583a8d 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -201,12 +201,25 @@ typedef struct {
 /* Memory set aside for sys_rtas to use with calls that need a work area. */
 #define RTAS_USER_REGION_SIZE (64 * 1024)
 
-/* RTAS return status codes */
-#define RTAS_HARDWARE_ERROR-1/* Hardware Error */
-#define RTAS_BUSY  -2/* RTAS Busy */
-#define RTAS_INVALID_PARAMETER -3/* Invalid indicator/domain/sensor etc. */
-#define RTAS_EXTENDED_DELAY_MIN9900
-#define RTAS_EXTENDED_DELAY_MAX9905
+/*
+ * Common RTAS function return values, derived from the table "RTAS
+ * Status Word Values" in PAPR+ 7.2.8: "Return Codes". If a function
+ * can return a value in this table then generally it has the meaning
+ * listed here. More extended commentary in the documentation for
+ * rtas_call().
+ *
+ * RTAS functions may use negative and positive numbers not in this
+ * set for function-specific error and success conditions,
+ * respectively.
+ */
+#define RTAS_SUCCESS 0 /* Success. */
+#define RTAS_HARDWARE_ERROR -1 /* Hardware or other unspecified 
error. */
+#define RTAS_BUSY   -2 /* Retry immediately. */
+#define RTAS_INVALID_PARAMETER  -3 /* Invalid indicator/domain/sensor 
etc. */
+#define RTAS_UNEXPECTED_STATE_CHANGE-7 /* Seems limited to EEH and slot 
reset. */
+#define RTAS_EXTENDED_DELAY_MIN   9900 /* Retry after delaying for ~1ms. */
+#define RTAS_EXTENDED_DELAY_MAX   9905 /* Retry after delaying for ~100s. 
*/
+#define RTAS_ML_ISOLATION_ERROR  -9000 /* Multi-level isolation error. */
 
 /* statuses specific to ibm,suspend-me */
 #define RTAS_SUSPEND_ABORTED 9000 /* Suspension aborted */

-- 
2.41.0



[PATCH v4 10/13] powerpc/pseries/papr-sysparm: Validate buffer object lengths

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

The ability to get and set system parameters will be exposed to user
space, so let's get a little more strict about malformed
papr_sysparm_buf objects.

* Create accessors for the length field of struct papr_sysparm_buf.
  The length is always stored in MSB order and this is better than
  spreading the necessary conversions all over.

* Reject attempts to submit invalid buffers to RTAS.

* Warn if RTAS returns a buffer with an invalid length, clamping the
  returned length to a safe value that won't overrun the buffer.

These are meant as precautionary measures to mitigate both firmware
and kernel bugs in this area, should they arise, but I am not aware of
any.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/papr-sysparm.c | 47 +++
 1 file changed, 47 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/papr-sysparm.c 
b/arch/powerpc/platforms/pseries/papr-sysparm.c
index fedc61599e6c..a1e7aeac7416 100644
--- a/arch/powerpc/platforms/pseries/papr-sysparm.c
+++ b/arch/powerpc/platforms/pseries/papr-sysparm.c
@@ -23,6 +23,46 @@ void papr_sysparm_buf_free(struct papr_sysparm_buf *buf)
kfree(buf);
 }
 
+static size_t papr_sysparm_buf_get_length(const struct papr_sysparm_buf *buf)
+{
+   return be16_to_cpu(buf->len);
+}
+
+static void papr_sysparm_buf_set_length(struct papr_sysparm_buf *buf, size_t 
length)
+{
+   WARN_ONCE(length > sizeof(buf->val),
+ "bogus length %zu, clamping to safe value", length);
+   length = min(sizeof(buf->val), length);
+   buf->len = cpu_to_be16(length);
+}
+
+/*
+ * For use on buffers returned from ibm,get-system-parameter before
+ * returning them to callers. Ensures the encoded length of valid data
+ * cannot overrun buf->val[].
+ */
+static void papr_sysparm_buf_clamp_length(struct papr_sysparm_buf *buf)
+{
+   papr_sysparm_buf_set_length(buf, papr_sysparm_buf_get_length(buf));
+}
+
+/*
+ * Perform some basic diligence on the system parameter buffer before
+ * submitting it to RTAS.
+ */
+static bool papr_sysparm_buf_can_submit(const struct papr_sysparm_buf *buf)
+{
+   /*
+* Firmware ought to reject buffer lengths that exceed the
+* maximum specified in PAPR, but there's no reason for the
+* kernel to allow them either.
+*/
+   if (papr_sysparm_buf_get_length(buf) > sizeof(buf->val))
+   return false;
+
+   return true;
+}
+
 /**
  * papr_sysparm_get() - Retrieve the value of a PAPR system parameter.
  * @param: PAPR system parameter token as described in
@@ -63,6 +103,9 @@ int papr_sysparm_get(papr_sysparm_t param, struct 
papr_sysparm_buf *buf)
if (token == RTAS_UNKNOWN_SERVICE)
return -ENOENT;
 
+   if (!papr_sysparm_buf_can_submit(buf))
+   return -EINVAL;
+
work_area = rtas_work_area_alloc(sizeof(*buf));
 
memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));
@@ -77,6 +120,7 @@ int papr_sysparm_get(papr_sysparm_t param, struct 
papr_sysparm_buf *buf)
case 0:
ret = 0;
memcpy(buf, rtas_work_area_raw_buf(work_area), sizeof(*buf));
+   papr_sysparm_buf_clamp_length(buf);
break;
case -3: /* parameter not implemented */
ret = -EOPNOTSUPP;
@@ -115,6 +159,9 @@ int papr_sysparm_set(papr_sysparm_t param, const struct 
papr_sysparm_buf *buf)
if (token == RTAS_UNKNOWN_SERVICE)
return -ENOENT;
 
+   if (!papr_sysparm_buf_can_submit(buf))
+   return -EINVAL;
+
work_area = rtas_work_area_alloc(sizeof(*buf));
 
memcpy(rtas_work_area_raw_buf(work_area), buf, sizeof(*buf));

-- 
2.41.0



[PATCH v4 07/13] powerpc/rtas: Warn if per-function lock isn't held

2023-11-17 Thread Nathan Lynch via B4 Relay
From: Nathan Lynch 

If the function descriptor has a populated lock member, then callers
are required to hold it across calls. Now that the firmware activation
sequence is appropriately guarded, we can warn when the requirement
isn't satisfied.

__do_enter_rtas_trace() gets reorganized a bit as a result of
performing the function descriptor lookup unconditionally now.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 21 +
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index e38ba05ad613..deb6289fcf9c 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -685,28 +685,25 @@ static void __do_enter_rtas(struct rtas_args *args)
 
 static void __do_enter_rtas_trace(struct rtas_args *args)
 {
-   const char *name = NULL;
+   struct rtas_function *func = 
rtas_token_to_function(be32_to_cpu(args->token));
 
-   if (args == &rtas_args)
-   lockdep_assert_held(&rtas_lock);
/*
-* If the tracepoints that consume the function name aren't
-* active, avoid the lookup.
+* If there is a per-function lock, it must be held by the
+* caller.
 */
-   if ((trace_rtas_input_enabled() || trace_rtas_output_enabled())) {
-   const s32 token = be32_to_cpu(args->token);
-   const struct rtas_function *func = 
rtas_token_to_function(token);
+   if (func->lock)
+   WARN_ON(!mutex_is_locked(func->lock));
 
-   name = func->name;
-   }
+   if (args == &rtas_args)
+   lockdep_assert_held(&rtas_lock);
 
-   trace_rtas_input(args, name);
+   trace_rtas_input(args, func->name);
trace_rtas_ll_entry(args);
 
__do_enter_rtas(args);
 
trace_rtas_ll_exit(args);
-   trace_rtas_output(args, name);
+   trace_rtas_output(args, func->name);
 }
 
 static void do_enter_rtas(struct rtas_args *args)

-- 
2.41.0



[PATCH v4 5/5] RISC-V: Enable SBI based earlycon support

2023-11-17 Thread Anup Patel
Let us enable SBI based earlycon support in defconfigs for both RV32
and RV64 so that "earlycon=sbi" can be used again.

Signed-off-by: Anup Patel 
Reviewed-by: Andrew Jones 
---
 arch/riscv/configs/defconfig  | 1 +
 arch/riscv/configs/rv32_defconfig | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 905881282a7c..eaf34e871e30 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -149,6 +149,7 @@ CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_SH_SCI=y
+CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
 CONFIG_VIRTIO_CONSOLE=y
 CONFIG_HW_RANDOM=y
 CONFIG_HW_RANDOM_VIRTIO=y
diff --git a/arch/riscv/configs/rv32_defconfig 
b/arch/riscv/configs/rv32_defconfig
index 89b601e253a6..5721af39afd1 100644
--- a/arch/riscv/configs/rv32_defconfig
+++ b/arch/riscv/configs/rv32_defconfig
@@ -66,6 +66,7 @@ CONFIG_INPUT_MOUSEDEV=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SERIAL_EARLYCON_RISCV_SBI=y
 CONFIG_VIRTIO_CONSOLE=y
 CONFIG_HW_RANDOM=y
 CONFIG_HW_RANDOM_VIRTIO=y
-- 
2.34.1



[PATCH v4 4/5] tty: Add SBI debug console support to HVC SBI driver

2023-11-17 Thread Anup Patel
From: Atish Patra 

RISC-V SBI specification supports advanced debug console
support via SBI DBCN extension.

Extend the HVC SBI driver to support it.

Signed-off-by: Atish Patra 
Signed-off-by: Anup Patel 
---
 drivers/tty/hvc/Kconfig |  2 +-
 drivers/tty/hvc/hvc_riscv_sbi.c | 59 +
 2 files changed, 53 insertions(+), 8 deletions(-)

diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig
index 4f9264d005c0..6e05c5c7bca1 100644
--- a/drivers/tty/hvc/Kconfig
+++ b/drivers/tty/hvc/Kconfig
@@ -108,7 +108,7 @@ config HVC_DCC_SERIALIZE_SMP
 
 config HVC_RISCV_SBI
bool "RISC-V SBI console support"
-   depends on RISCV_SBI_V01
+   depends on RISCV_SBI
select HVC_DRIVER
help
  This enables support for console output via RISC-V SBI calls, which
diff --git a/drivers/tty/hvc/hvc_riscv_sbi.c b/drivers/tty/hvc/hvc_riscv_sbi.c
index 31f53fa77e4a..697c981221b5 100644
--- a/drivers/tty/hvc/hvc_riscv_sbi.c
+++ b/drivers/tty/hvc/hvc_riscv_sbi.c
@@ -39,21 +39,66 @@ static int hvc_sbi_tty_get(uint32_t vtermno, char *buf, int 
count)
return i;
 }
 
-static const struct hv_ops hvc_sbi_ops = {
+static const struct hv_ops hvc_sbi_v01_ops = {
.get_chars = hvc_sbi_tty_get,
.put_chars = hvc_sbi_tty_put,
 };
 
-static int __init hvc_sbi_init(void)
+static int hvc_sbi_dbcn_tty_put(uint32_t vtermno, const char *buf, int count)
 {
-   return PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_ops, 16));
+   phys_addr_t pa;
+
+   if (is_vmalloc_addr(buf)) {
+   pa = page_to_phys(vmalloc_to_page(buf)) + offset_in_page(buf);
+   if (PAGE_SIZE < (offset_in_page(buf) + count))
+   count = PAGE_SIZE - offset_in_page(buf);
+   } else {
+   pa = __pa(buf);
+   }
+
+   return sbi_debug_console_write(count, pa);
 }
-device_initcall(hvc_sbi_init);
 
-static int __init hvc_sbi_console_init(void)
+static int hvc_sbi_dbcn_tty_get(uint32_t vtermno, char *buf, int count)
 {
-   hvc_instantiate(0, 0, &hvc_sbi_ops);
+   phys_addr_t pa;
+
+   if (is_vmalloc_addr(buf)) {
+   pa = page_to_phys(vmalloc_to_page(buf)) + offset_in_page(buf);
+   if (PAGE_SIZE < (offset_in_page(buf) + count))
+   count = PAGE_SIZE - offset_in_page(buf);
+   } else {
+   pa = __pa(buf);
+   }
+
+   return sbi_debug_console_read(count, pa);
+}
+
+static const struct hv_ops hvc_sbi_dbcn_ops = {
+   .put_chars = hvc_sbi_dbcn_tty_put,
+   .get_chars = hvc_sbi_dbcn_tty_get,
+};
+
+static int __init hvc_sbi_init(void)
+{
+   int err;
+
+   if (sbi_debug_console_available) {
+   err = PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_dbcn_ops, 256));
+   if (err)
+   return err;
+   hvc_instantiate(0, 0, &hvc_sbi_dbcn_ops);
+   } else {
+   if (IS_ENABLED(CONFIG_RISCV_SBI_V01)) {
+   err = PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_v01_ops, 
256));
+   if (err)
+   return err;
+   hvc_instantiate(0, 0, &hvc_sbi_v01_ops);
+   } else {
+   return -ENODEV;
+   }
+   }
 
return 0;
 }
-console_initcall(hvc_sbi_console_init);
+device_initcall(hvc_sbi_init);
-- 
2.34.1



[PATCH v4 3/5] tty/serial: Add RISC-V SBI debug console based earlycon

2023-11-17 Thread Anup Patel
We extend the existing RISC-V SBI earlycon support to use the new
RISC-V SBI debug console extension.

Signed-off-by: Anup Patel 
Reviewed-by: Andrew Jones 
---
 drivers/tty/serial/Kconfig  |  2 +-
 drivers/tty/serial/earlycon-riscv-sbi.c | 24 
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 732c893c8d16..1f2594b8ab9d 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -87,7 +87,7 @@ config SERIAL_EARLYCON_SEMIHOST
 
 config SERIAL_EARLYCON_RISCV_SBI
bool "Early console using RISC-V SBI"
-   depends on RISCV_SBI_V01
+   depends on RISCV_SBI
select SERIAL_CORE
select SERIAL_CORE_CONSOLE
select SERIAL_EARLYCON
diff --git a/drivers/tty/serial/earlycon-riscv-sbi.c 
b/drivers/tty/serial/earlycon-riscv-sbi.c
index 27afb0b74ea7..5351e1e31f45 100644
--- a/drivers/tty/serial/earlycon-riscv-sbi.c
+++ b/drivers/tty/serial/earlycon-riscv-sbi.c
@@ -15,17 +15,33 @@ static void sbi_putc(struct uart_port *port, unsigned char 
c)
sbi_console_putchar(c);
 }
 
-static void sbi_console_write(struct console *con,
- const char *s, unsigned n)
+static void sbi_0_1_console_write(struct console *con,
+ const char *s, unsigned int n)
 {
struct earlycon_device *dev = con->data;
uart_console_write(&dev->port, s, n, sbi_putc);
 }
 
+static void sbi_dbcn_console_write(struct console *con,
+  const char *s, unsigned int n)
+{
+   sbi_debug_console_write(n, __pa(s));
+}
+
 static int __init early_sbi_setup(struct earlycon_device *device,
  const char *opt)
 {
-   device->con->write = sbi_console_write;
-   return 0;
+   int ret = 0;
+
+   if (sbi_debug_console_available) {
+   device->con->write = sbi_dbcn_console_write;
+   } else {
+   if (IS_ENABLED(CONFIG_RISCV_SBI_V01))
+   device->con->write = sbi_0_1_console_write;
+   else
+   ret = -ENODEV;
+   }
+
+   return ret;
 }
 EARLYCON_DECLARE(sbi, early_sbi_setup);
-- 
2.34.1



[PATCH v4 2/5] RISC-V: Add SBI debug console helper routines

2023-11-17 Thread Anup Patel
Let us provide SBI debug console helper routines which can be
shared by serial/earlycon-riscv-sbi.c and hvc/hvc_riscv_sbi.c.

Signed-off-by: Anup Patel 
---
 arch/riscv/include/asm/sbi.h |  5 +
 arch/riscv/kernel/sbi.c  | 43 
 2 files changed, 48 insertions(+)

diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 66f3933c14f6..ee7aef5f6233 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -334,6 +334,11 @@ static inline unsigned long sbi_mk_version(unsigned long 
major,
 }
 
 int sbi_err_map_linux_errno(int err);
+
+extern bool sbi_debug_console_available;
+int sbi_debug_console_write(unsigned int num_bytes, phys_addr_t base_addr);
+int sbi_debug_console_read(unsigned int num_bytes, phys_addr_t base_addr);
+
 #else /* CONFIG_RISCV_SBI */
 static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return 
-1; }
 static inline void sbi_init(void) {}
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index 5a62ed1da453..73a9c22c3945 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -571,6 +571,44 @@ long sbi_get_mimpid(void)
 }
 EXPORT_SYMBOL_GPL(sbi_get_mimpid);
 
+bool sbi_debug_console_available;
+
+int sbi_debug_console_write(unsigned int num_bytes, phys_addr_t base_addr)
+{
+   struct sbiret ret;
+
+   if (!sbi_debug_console_available)
+   return -EOPNOTSUPP;
+
+   if (IS_ENABLED(CONFIG_32BIT))
+   ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE,
+   num_bytes, lower_32_bits(base_addr),
+   upper_32_bits(base_addr), 0, 0, 0);
+   else
+   ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE,
+   num_bytes, base_addr, 0, 0, 0, 0);
+
+   return ret.error ? sbi_err_map_linux_errno(ret.error) : ret.value;
+}
+
+int sbi_debug_console_read(unsigned int num_bytes, phys_addr_t base_addr)
+{
+   struct sbiret ret;
+
+   if (!sbi_debug_console_available)
+   return -EOPNOTSUPP;
+
+   if (IS_ENABLED(CONFIG_32BIT))
+   ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ,
+   num_bytes, lower_32_bits(base_addr),
+   upper_32_bits(base_addr), 0, 0, 0);
+   else
+   ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ,
+   num_bytes, base_addr, 0, 0, 0, 0);
+
+   return ret.error ? sbi_err_map_linux_errno(ret.error) : ret.value;
+}
+
 void __init sbi_init(void)
 {
int ret;
@@ -612,6 +650,11 @@ void __init sbi_init(void)
sbi_srst_reboot_nb.priority = 192;
register_restart_handler(&sbi_srst_reboot_nb);
}
+   if ((sbi_spec_version >= sbi_mk_version(2, 0)) &&
+   (sbi_probe_extension(SBI_EXT_DBCN) > 0)) {
+   pr_info("SBI DBCN extension detected\n");
+   sbi_debug_console_available = true;
+   }
} else {
__sbi_set_timer = __sbi_set_timer_v01;
__sbi_send_ipi  = __sbi_send_ipi_v01;
-- 
2.34.1



[PATCH v4 1/5] RISC-V: Add stubs for sbi_console_putchar/getchar()

2023-11-17 Thread Anup Patel
The functions sbi_console_putchar() and sbi_console_getchar() are
not defined when CONFIG_RISCV_SBI_V01 is disabled so let us add
stub of these functions to avoid "#ifdef" on user side.

Signed-off-by: Anup Patel 
Reviewed-by: Andrew Jones 
---
 arch/riscv/include/asm/sbi.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 0892f4421bc4..66f3933c14f6 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -271,8 +271,13 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long 
arg0,
unsigned long arg3, unsigned long arg4,
unsigned long arg5);
 
+#ifdef CONFIG_RISCV_SBI_V01
 void sbi_console_putchar(int ch);
 int sbi_console_getchar(void);
+#else
+static inline void sbi_console_putchar(int ch) { }
+static inline int sbi_console_getchar(void) { return -ENOENT; }
+#endif
 long sbi_get_mvendorid(void);
 long sbi_get_marchid(void);
 long sbi_get_mimpid(void);
-- 
2.34.1



[PATCH v4 0/5] RISC-V SBI debug console extension support

2023-11-17 Thread Anup Patel
The SBI v2.0 specification is now frozen. The SBI v2.0 specification defines
SBI debug console (DBCN) extension which replaces the legacy SBI v0.1
functions sbi_console_putchar() and sbi_console_getchar().
(Refer v2.0-rc5 at https://github.com/riscv-non-isa/riscv-sbi-doc/releases)

This series adds support for SBI debug console (DBCN) extension in KVM RISC-V
and Linux RISC-V.

To try these patches with KVM RISC-V, use KVMTOOL from riscv_sbi_dbcn_v1
branch at: https://github.com/avpatel/kvmtool.git

These patches can also be found in the riscv_sbi_dbcn_v4 branch at:
https://github.com/avpatel/linux.git

Changes since v3:
 - Rebased on Linux-6.7-rc1
 - Dropped PATCH1 to PATCH5 of v3 series since these were merged through
   KVM RISC-V tree for Linux-6.7
 - Used proper error code in PATCH1
 - Added new PATCH2 which add common SBI debug console helper functions
 - Updated PATCH3 and PATCH4 to use SBI debug console helper functions

Changes since v2:
 - Rebased on Linux-6.6-rc5
 - Handled page-crossing in PATCH7 of v2 series
 - Addressed Drew's comment in PATCH3 of v2 series
 - Added new PATCH5 to make get-reg-list test aware of SBI DBCN extension

Changes since v1:
 - Remove use of #ifdef from PATCH4 and PATCH5 of the v1 series
 - Improved commit description of PATCH3 in v1 series
 - Introduced new PATCH3 in this series to allow some SBI extensions
   (such as SBI DBCN) do to disabled by default so that older KVM user space
   work fine and newer KVM user space have to explicitly opt-in for emulating
   SBI DBCN.
 - Introduced new PATCH5 in this series which adds inline version of
   sbi_console_getchar() and sbi_console_putchar() for the case where
   CONFIG_RISCV_SBI_V01 is disabled.

Anup Patel (4):
  RISC-V: Add stubs for sbi_console_putchar/getchar()
  RISC-V: Add SBI debug console helper routines
  tty/serial: Add RISC-V SBI debug console based earlycon
  RISC-V: Enable SBI based earlycon support

Atish Patra (1):
  tty: Add SBI debug console support to HVC SBI driver

 arch/riscv/configs/defconfig|  1 +
 arch/riscv/configs/rv32_defconfig   |  1 +
 arch/riscv/include/asm/sbi.h| 10 +
 arch/riscv/kernel/sbi.c | 43 ++
 drivers/tty/hvc/Kconfig |  2 +-
 drivers/tty/hvc/hvc_riscv_sbi.c | 59 ++---
 drivers/tty/serial/Kconfig  |  2 +-
 drivers/tty/serial/earlycon-riscv-sbi.c | 24 --
 8 files changed, 129 insertions(+), 13 deletions(-)

-- 
2.34.1



Re: [PATCH v9 00/27] Add support for QMC HDLC, framer infrastructure and PEF2256 framer

2023-11-17 Thread Jakub Kicinski
On Wed, 15 Nov 2023 15:39:36 +0100 Herve Codina wrote:
>- Removed Patches 6, 7 and 8 (patches applied)
> 
>- Patches 7, 20, 21, 23 (patches 10, 23, 24, 26 in v8)
>  Add 'Acked-by: Jakub Kicinski '

I thought someone (Mark?) asked for the networking stuff to be put 
on a branch. If that's still the preference - is it possible to factor
these out as a standalone series, too?  Will they build on their own?


[RFC - is this a bug?] powerpc/lib/sstep: Asking for some light on this, please. :)

2023-11-17 Thread Gustavo A. R. Silva

Hi all,

I'm trying to fix the following -Wstringop-overflow warnings, and I'd like
to get your feedback on this, please:

In function 'do_byte_reverse',
inlined from 'do_vec_store' at 
/home/gus/linux/arch/powerpc/lib/sstep.c:722:3,
inlined from 'emulate_loadstore' at 
/home/gus/linux/arch/powerpc/lib/sstep.c:3510:9:
arch/powerpc/lib/sstep.c:287:23: error: writing 8 bytes into a region of size 0 
[-Werror=stringop-overflow=]
  287 | up[3] = tmp;
  | ~~^
arch/powerpc/lib/sstep.c: In function 'emulate_loadstore':
arch/powerpc/lib/sstep.c:708:11: note: at offset [24, 39] into destination 
object 'u' of size 16
  708 | } u;
  |   ^
In function 'do_byte_reverse',
inlined from 'do_vec_store' at 
/home/gus/linux/arch/powerpc/lib/sstep.c:722:3,
inlined from 'emulate_loadstore' at 
/home/gus/linux/arch/powerpc/lib/sstep.c:3510:9:
arch/powerpc/lib/sstep.c:289:23: error: writing 8 bytes into a region of size 0 
[-Werror=stringop-overflow=]
  289 | up[2] = byterev_8(up[1]);
  | ~~^~
arch/powerpc/lib/sstep.c: In function 'emulate_loadstore':
arch/powerpc/lib/sstep.c:708:11: note: at offset 16 into destination object 'u' 
of size 16
  708 | } u;
  |   ^
In function 'do_byte_reverse',
inlined from 'do_vec_load' at 
/home/gus/linux/arch/powerpc/lib/sstep.c:691:3,
inlined from 'emulate_loadstore' at 
/home/gus/linux/arch/powerpc/lib/sstep.c:3439:9:
arch/powerpc/lib/sstep.c:287:23: error: writing 8 bytes into a region of size 0 
[-Werror=stringop-overflow=]
  287 | up[3] = tmp;
  | ~~^
arch/powerpc/lib/sstep.c: In function 'emulate_loadstore':
arch/powerpc/lib/sstep.c:681:11: note: at offset [24, 39] into destination 
object 'u' of size 16
  681 | } u = {};
  |   ^
arch/powerpc/lib/sstep.c:681:11: note: at offset [24, 39] into destination 
object 'u' of size 16
arch/powerpc/lib/sstep.c:681:11: note: at offset [24, 39] into destination 
object 'u' of size 16

The origing of the issue seems to be the following calls to function 
`do_vec_store()`, when
`size > 16`, or more precisely when `size == 32`

arch/powerpc/lib/sstep.c:
3436 case LOAD_VMX:
3437 if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC))
3438 return 0;
3439 err = do_vec_load(op->reg, ea, size, regs, cross_endian);
3440 break;
...
3507 case STORE_VMX:
3508 if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC))
3509 return 0;
3510 err = do_vec_store(op->reg, ea, size, regs, cross_endian);
3511 break;

Inside `do_vec_store()`, we have that the size of `union u` is 16 bytes:

 702 int size, struct pt_regs *regs,
 703 bool cross_endian)
 704 {
 705 union {
 706 __vector128 v;
 707 u8 b[sizeof(__vector128)];
 708 } u;

and then function `do_byte_reverse()` is called

 721 if (unlikely(cross_endian))
 722 do_byte_reverse(&u.b[ea & 0xf], size);

and if `size == 32`, the following piece of code tries to access
`ptr` outside of its boundaries:

 260 static nokprobe_inline void do_byte_reverse(void *ptr, int nb)
 261 {
 262 switch (nb) {
...
 281 case 32: {
 282 unsigned long *up = (unsigned long *)ptr;
 283 unsigned long tmp;
 284
 285 tmp = byterev_8(up[0]);
 286 up[0] = byterev_8(up[3]);
 287 up[3] = tmp;
 288 tmp = byterev_8(up[2]);
 289 up[2] = byterev_8(up[1]);
 290 up[1] = tmp;
 291 break;
 292 }

The following patch is merely a test to illustrate how the value in `size` 
initially appears
to influence the warnings:

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index a4ab8625061a..86786957b736 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -3436,7 +3436,7 @@ int emulate_loadstore(struct pt_regs *regs, struct 
instruction_op *op)
case LOAD_VMX:
if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC))
return 0;
-   err = do_vec_load(op->reg, ea, size, regs, cross_endian);
+   err = do_vec_load(op->reg, ea, (size > 16) ? 16 : size, regs, 
cross_endian);
break;
 #endif
 #ifdef CONFIG_VSX
@@ -3507,7 +3507,7 @@ int emulate_loadstore(struct pt_regs *regs, struct 
instruction_op *op)
case STORE_VMX:
if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC))
return 0;
-   err = do_vec_store(op->reg, ea, size, regs, cross_endian);
+   err = do_vec_store(op->reg, ea, (size > 16) ? 16 : size

Re: [PATCH v9 10/15] media: uapi: Add V4L2_CTRL_TYPE_FIXED_POINT

2023-11-17 Thread Hans Verkuil
On 17/11/2023 14:07, Sakari Ailus wrote:
> Hi Hans,
> 
> Thank you for the patch.
> 
> On Fri, Nov 17, 2023 at 01:07:44PM +0100, Hans Verkuil wrote:
>> Here is an RFC patch adding support for 'fraction_bits'. It's lacking
>> documentation, but it can be used for testing.
>>
>> It was rather a pain logging fixed point number in a reasonable format,
>> but I think it is OK.
>>
>> In userspace (where you can use floating point) it is a lot easier:
>>
>> printf("%.*g\n", fraction_bits, (double)v * (1.0 / (1ULL << fraction_bits)));
> 
> I wonder if we could add a printk() format specifier for this. Doesn't need
> to be done right now though, just an idea.
> 
>>
>> I decided to only expose fraction_bits in struct v4l2_query_ext_ctrl.
>> I could add it to struct v4l2_queryctrl, but I did not think that was
>> necessary. Other opinions are welcome.
>>
>> In the meantime, let me know if this works for your patch series. If it
>> does, then I can clean this up.
>>
>> Regards,
>>
>>  Hans
>>
>> Signed-off-by: Hans Verkuil 
>> ---
>>  drivers/media/v4l2-core/v4l2-ctrls-api.c  |  1 +
>>  drivers/media/v4l2-core/v4l2-ctrls-core.c | 72 +++
>>  include/media/v4l2-ctrls.h|  7 ++-
>>  include/uapi/linux/videodev2.h| 20 ++-
>>  4 files changed, 85 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/media/v4l2-core/v4l2-ctrls-api.c 
>> b/drivers/media/v4l2-core/v4l2-ctrls-api.c
>> index 002ea6588edf..3132df315b17 100644
>> --- a/drivers/media/v4l2-core/v4l2-ctrls-api.c
>> +++ b/drivers/media/v4l2-core/v4l2-ctrls-api.c
>> @@ -1101,6 +1101,7 @@ int v4l2_query_ext_ctrl(struct v4l2_ctrl_handler *hdl, 
>> struct v4l2_query_ext_ctr
>>  qc->elems = ctrl->elems;
>>  qc->nr_of_dims = ctrl->nr_of_dims;
>>  memcpy(qc->dims, ctrl->dims, qc->nr_of_dims * sizeof(qc->dims[0]));
>> +qc->fraction_bits = ctrl->fraction_bits;
>>  qc->minimum = ctrl->minimum;
>>  qc->maximum = ctrl->maximum;
>>  qc->default_value = ctrl->default_value;
>> diff --git a/drivers/media/v4l2-core/v4l2-ctrls-core.c 
>> b/drivers/media/v4l2-core/v4l2-ctrls-core.c
>> index a662fb60f73f..0e08a371af5c 100644
>> --- a/drivers/media/v4l2-core/v4l2-ctrls-core.c
>> +++ b/drivers/media/v4l2-core/v4l2-ctrls-core.c
>> @@ -252,12 +252,42 @@ void v4l2_ctrl_type_op_init(const struct v4l2_ctrl 
>> *ctrl, u32 from_idx,
>>  }
>>  EXPORT_SYMBOL(v4l2_ctrl_type_op_init);
>>
>> +static void v4l2_ctrl_log_fp(s64 v, unsigned int fraction_bits)
>> +{
>> +s64 i = v4l2_fp_integer(v, fraction_bits);
>> +s64 f = v4l2_fp_fraction(v, fraction_bits);
>> +
>> +if (!f) {
>> +pr_cont("%lld", i);
>> +} else if (fraction_bits < 20) {
>> +u64 div = 1ULL << fraction_bits;
>> +
>> +if (!i && f < 0)
>> +pr_cont("-%lld/%llu", -f, div);
>> +else if (!i)
>> +pr_cont("%lld/%llu", f, div);
>> +else if (i < 0 || f < 0)
>> +pr_cont("-%lld-%llu/%llu", -i, -f, div);
>> +else
>> +pr_cont("%lld+%llu/%llu", i, f, div);
>> +} else {
>> +if (!i && f < 0)
>> +pr_cont("-%lld/(2^%u)", -f, fraction_bits);
>> +else if (!i)
>> +pr_cont("%lld/(2^%u)", f, fraction_bits);
>> +else if (i < 0 || f < 0)
>> +pr_cont("-%lld-%llu/(2^%u)", -i, -f, fraction_bits);
>> +else
>> +pr_cont("%lld+%llu/(2^%u)", i, f, fraction_bits);
>> +}
>> +}
>> +
>>  void v4l2_ctrl_type_op_log(const struct v4l2_ctrl *ctrl)
>>  {
>>  union v4l2_ctrl_ptr ptr = ctrl->p_cur;
>>
>>  if (ctrl->is_array) {
>> -unsigned i;
>> +unsigned int i;
>>
>>  for (i = 0; i < ctrl->nr_of_dims; i++)
>>  pr_cont("[%u]", ctrl->dims[i]);
>> @@ -266,7 +296,10 @@ void v4l2_ctrl_type_op_log(const struct v4l2_ctrl *ctrl)
>>
>>  switch (ctrl->type) {
>>  case V4L2_CTRL_TYPE_INTEGER:
>> -pr_cont("%d", *ptr.p_s32);
>> +if (!ctrl->fraction_bits)
>> +pr_cont("%d", *ptr.p_s32);
>> +else
>> +v4l2_ctrl_log_fp(*ptr.p_s32, ctrl->fraction_bits);
>>  break;
>>  case V4L2_CTRL_TYPE_BOOLEAN:
>>  pr_cont("%s", *ptr.p_s32 ? "true" : "false");
>> @@ -281,19 +314,31 @@ void v4l2_ctrl_type_op_log(const struct v4l2_ctrl 
>> *ctrl)
>>  pr_cont("0x%08x", *ptr.p_s32);
>>  break;
>>  case V4L2_CTRL_TYPE_INTEGER64:
>> -pr_cont("%lld", *ptr.p_s64);
>> +if (!ctrl->fraction_bits)
>> +pr_cont("%lld", *ptr.p_s64);
>> +else
>> +v4l2_ctrl_log_fp(*ptr.p_s64, ctrl->fraction_bits);
>>  break;
>>  case V4L2_CTRL_TYPE_STRING:
>>  pr_cont("%s", ptr.p_char);
>>  break;
>>  case V4L2_CTRL_TYPE_U8:
>> -pr_con

Re: [PATCH 2/7] kexec_file: print out debugging message if required

2023-11-17 Thread b...@redhat.com
On 11/17/23 at 09:37am, Liu, Yujie wrote:
> Hi Baoquan,
> 
> On Fri, 2023-11-17 at 17:14 +0800, Baoquan He wrote:
> > Hi,
> > 
> > On 11/16/23 at 05:04am, kernel test robot wrote:
> > > Hi Baoquan,
> > > 
> > > kernel test robot noticed the following build errors:
> > > 
> > > [auto build test ERROR on arm64/for-next/core]
> > > [also build test ERROR on tip/x86/core powerpc/next powerpc/fixes 
> > > linus/master v6.7-rc1 next-20231115]
> > > [If your patch is applied to the wrong git tree, kindly drop us a note.
> > > And when submitting patch, we suggest to use '--base' as documented in
> > > https://git-scm.com/docs/git-format-patch#_base_tree_information]
> > > 
> > > url:    
> > > https://github.com/intel-lab-lkp/linux/commits/Baoquan-He/kexec_file-add-kexec_file-flag-to-control-debug-printing/20231114-234003
> > > base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git 
> > > for-next/core
> > > patch link:    
> > > https://lore.kernel.org/r/20231114153253.241262-3-bhe%40redhat.com
> > > patch subject: [PATCH 2/7] kexec_file: print out debugging message if 
> > > required
> > > config: hexagon-comet_defconfig 
> > > (https://download.01.org/0day-ci/archive/20231116/202311160431.bxpc7no9-...@intel.com/config)
> > > compiler: clang version 16.0.4 (https://github.com/llvm/llvm-project.git 
> > > ae42196bc493ffe877a7e3dff8be32035dea4d07)
> > > reproduce (this is a W=1 build): 
> > > (https://download.01.org/0day-ci/archive/20231116/202311160431.bxpc7no9-...@intel.com/reproduce)
> > > 
> > 
> > Thanks for reporting.
> > 
> > I met below failure when following the steps of provided reproducer.
> > Could anyone help check what's wrong with that?
> 
> Sorry this seems to be a bug in the reproducer. Could you please change
> the compiler parameter to "COMPILER=clang-16" and rerun the command? We
> will fix the issue ASAP.

Here you are. Thanks for your quick response.
--
[root@~ linux]# COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang-16 
~/bin/make.cross W=1 O=build_dir ARCH=hexagon olddefconfig
Compiler will be installed in /root/0day
lftpget -c 
https://cdn.kernel.org/pub/tools/llvm/files/./llvm-16.0.6-x86_64.tar.xz
/root/linux 

tar Jxf /root/0day/./llvm-16.0.6-x86_64.tar.xz -C /root/0day
PATH=/root/0day/llvm-16.0.6-x86_64/bin:/root/.local/bin:/root/bin:/usr/lib64/ccache:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin
make --keep-going LLVM=1 CROSS_COMPILE=hexagon-linux- LLVM_IAS=1 --jobs=128 
KCFLAGS=-Warray-bounds -Wundef -fstrict-flex-arrays=3 -funsigned-char 
-Wenum-conversion W=1 O=build_dir ARCH=hexagon olddefconfig
make[1]: Entering directory '/root/linux/build_dir'
  GEN Makefile
  HOSTCC  scripts/basic/fixdep
  HOSTCC  scripts/kconfig/conf.o
  HOSTCC  scripts/kconfig/confdata.o
  HOSTCC  scripts/kconfig/expr.o
  HOSTCC  scripts/kconfig/lexer.lex.o
  HOSTCC  scripts/kconfig/menu.o
  HOSTCC  scripts/kconfig/parser.tab.o
  HOSTCC  scripts/kconfig/preprocess.o
  HOSTCC  scripts/kconfig/symbol.o
  HOSTCC  scripts/kconfig/util.o
  HOSTLD  scripts/kconfig/conf
#
# configuration written to .config
#
make[1]: Leaving directory '/root/linux/build_dir'

> 
> > [root@~ linux]# COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang 
> > ~/bin/make.cross W=1 O=build_dir ARCH=hexagon olddefconfig
> > Compiler will be installed in /root/0day
> > lftpget -c https://cdn.kernel.org/pub/tools/llvm/files/
> > get1: /pub/tools/llvm/files/: files/: Is a directory
> > Failed to download https://cdn.kernel.org/pub/tools/llvm/files/
> > clang crosstool install failed
> > Install clang compiler failed
> > setup_crosstool failed
> 
> 



Re: [PATCH v3 8/9] tty: Add SBI debug console support to HVC SBI driver

2023-11-17 Thread Anup Patel
On Sat, Oct 21, 2023 at 10:16 PM Greg Kroah-Hartman
 wrote:
>
> On Fri, Oct 20, 2023 at 12:51:39PM +0530, Anup Patel wrote:
> > From: Atish Patra 
> >
> > RISC-V SBI specification supports advanced debug console
> > support via SBI DBCN extension.
> >
> > Extend the HVC SBI driver to support it.
> >
> > Signed-off-by: Atish Patra 
> > Signed-off-by: Anup Patel 
> > ---
> >  drivers/tty/hvc/Kconfig |  2 +-
> >  drivers/tty/hvc/hvc_riscv_sbi.c | 82 ++---
> >  2 files changed, 76 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/tty/hvc/Kconfig b/drivers/tty/hvc/Kconfig
> > index 4f9264d005c0..6e05c5c7bca1 100644
> > --- a/drivers/tty/hvc/Kconfig
> > +++ b/drivers/tty/hvc/Kconfig
> > @@ -108,7 +108,7 @@ config HVC_DCC_SERIALIZE_SMP
> >
> >  config HVC_RISCV_SBI
> >   bool "RISC-V SBI console support"
> > - depends on RISCV_SBI_V01
> > + depends on RISCV_SBI
> >   select HVC_DRIVER
> >   help
> > This enables support for console output via RISC-V SBI calls, which
> > diff --git a/drivers/tty/hvc/hvc_riscv_sbi.c 
> > b/drivers/tty/hvc/hvc_riscv_sbi.c
> > index 31f53fa77e4a..56da1a4b5aca 100644
> > --- a/drivers/tty/hvc/hvc_riscv_sbi.c
> > +++ b/drivers/tty/hvc/hvc_riscv_sbi.c
> > @@ -39,21 +39,89 @@ static int hvc_sbi_tty_get(uint32_t vtermno, char *buf, 
> > int count)
> >   return i;
> >  }
> >
> > -static const struct hv_ops hvc_sbi_ops = {
> > +static const struct hv_ops hvc_sbi_v01_ops = {
> >   .get_chars = hvc_sbi_tty_get,
> >   .put_chars = hvc_sbi_tty_put,
> >  };
> >
> > -static int __init hvc_sbi_init(void)
> > +static int hvc_sbi_dbcn_tty_put(uint32_t vtermno, const char *buf, int 
> > count)
> >  {
> > - return PTR_ERR_OR_ZERO(hvc_alloc(0, 0, &hvc_sbi_ops, 16));
> > + phys_addr_t pa;
> > + struct sbiret ret;
> > +
> > + if (is_vmalloc_addr(buf)) {
> > + pa = page_to_phys(vmalloc_to_page(buf)) + offset_in_page(buf);
> > + if (PAGE_SIZE < (offset_in_page(buf) + count))
> > + count = PAGE_SIZE - offset_in_page(buf);
> > + } else {
> > + pa = __pa(buf);
> > + }
> > +
> > + if (IS_ENABLED(CONFIG_32BIT))
> > + ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE,
> > + count, lower_32_bits(pa), upper_32_bits(pa),
> > + 0, 0, 0);
> > + else
> > + ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE,
> > + count, pa, 0, 0, 0, 0);
>
> Again, you need a helper function here to keep you from having to keep
> this all in sync.

Sure, I will update.

>
> > + if (ret.error)
> > + return 0;
> > +
> > + return count;
> >  }
> > -device_initcall(hvc_sbi_init);
> >
> > -static int __init hvc_sbi_console_init(void)
> > +static int hvc_sbi_dbcn_tty_get(uint32_t vtermno, char *buf, int count)
> >  {
> > - hvc_instantiate(0, 0, &hvc_sbi_ops);
> > + phys_addr_t pa;
> > + struct sbiret ret;
> > +
> > + if (is_vmalloc_addr(buf)) {
> > + pa = page_to_phys(vmalloc_to_page(buf)) + offset_in_page(buf);
> > + if (PAGE_SIZE < (offset_in_page(buf) + count))
> > + count = PAGE_SIZE - offset_in_page(buf);
> > + } else {
> > + pa = __pa(buf);
> > + }
> > +
> > + if (IS_ENABLED(CONFIG_32BIT))
> > + ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ,
> > + count, lower_32_bits(pa), upper_32_bits(pa),
> > + 0, 0, 0);
> > + else
> > + ret = sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_READ,
> > + count, pa, 0, 0, 0, 0);
>
> And here too.

Okay.

>
> thanks,
>
> greg k-h

Regards,
Anup


Re: [PATCH v3 7/9] tty/serial: Add RISC-V SBI debug console based earlycon

2023-11-17 Thread Anup Patel
On Sat, Oct 21, 2023 at 10:16 PM Greg Kroah-Hartman
 wrote:
>
> On Fri, Oct 20, 2023 at 12:51:38PM +0530, Anup Patel wrote:
> > We extend the existing RISC-V SBI earlycon support to use the new
> > RISC-V SBI debug console extension.
> >
> > Signed-off-by: Anup Patel 
> > Reviewed-by: Andrew Jones 
> > ---
> >  drivers/tty/serial/Kconfig  |  2 +-
> >  drivers/tty/serial/earlycon-riscv-sbi.c | 32 +
> >  2 files changed, 29 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
> > index bdc568a4ab66..cec46091a716 100644
> > --- a/drivers/tty/serial/Kconfig
> > +++ b/drivers/tty/serial/Kconfig
> > @@ -87,7 +87,7 @@ config SERIAL_EARLYCON_SEMIHOST
> >
> >  config SERIAL_EARLYCON_RISCV_SBI
> >   bool "Early console using RISC-V SBI"
> > - depends on RISCV_SBI_V01
> > + depends on RISCV_SBI
> >   select SERIAL_CORE
> >   select SERIAL_CORE_CONSOLE
> >   select SERIAL_EARLYCON
> > diff --git a/drivers/tty/serial/earlycon-riscv-sbi.c 
> > b/drivers/tty/serial/earlycon-riscv-sbi.c
> > index 27afb0b74ea7..c21cdef254e7 100644
> > --- a/drivers/tty/serial/earlycon-riscv-sbi.c
> > +++ b/drivers/tty/serial/earlycon-riscv-sbi.c
> > @@ -15,17 +15,41 @@ static void sbi_putc(struct uart_port *port, unsigned 
> > char c)
> >   sbi_console_putchar(c);
> >  }
> >
> > -static void sbi_console_write(struct console *con,
> > -   const char *s, unsigned n)
> > +static void sbi_0_1_console_write(struct console *con,
> > +   const char *s, unsigned int n)
> >  {
> >   struct earlycon_device *dev = con->data;
> >   uart_console_write(&dev->port, s, n, sbi_putc);
> >  }
> >
> > +static void sbi_dbcn_console_write(struct console *con,
> > +const char *s, unsigned int n)
> > +{
> > + phys_addr_t pa = __pa(s);
> > +
> > + if (IS_ENABLED(CONFIG_32BIT))
> > + sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE,
> > +   n, lower_32_bits(pa), upper_32_bits(pa), 0, 0, 0);
> > + else
> > + sbi_ecall(SBI_EXT_DBCN, SBI_EXT_DBCN_CONSOLE_WRITE,
> > +   n, pa, 0, 0, 0, 0);
>
> This is still a bit hard to follow, and I guarantee it will be a pain to
> maintain over time, trying to keep both calls in sync, right?
>
> Why not fix up sbi_ecall() to get this correct instead?  It should be
> handling phys_addr_t values, not forcing you to do odd bit masking every
> single time you call it, right?  That would make things much easier
> overall, and this patch simpler, as well as the next one.

On RV32 systems, the physical address can be 34bits wide hence
the on RV32 we have to pass physical address as two parameters
whereas on RV64 entier physical address can be passed as single
parameter.

>
> Oh wait, sbi_ecall() is crazy, and just a pass-through, so that's not
> going to work, you need a wrapper function for this mess to do that bit
> twiddeling for you instead of forcing you to do it each time, I guess
> that's what you are trying to do here, but ick, is it correct?

Yes, it is better to have a wrapper function to hide the differences
of RV32 and RV64 systems. I will update.

>
> thanks,
>
> greg k-h
>
> --
> kvm-riscv mailing list
> kvm-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kvm-riscv

Regards,
Anup


Re: [PATCH v9 10/15] media: uapi: Add V4L2_CTRL_TYPE_FIXED_POINT

2023-11-17 Thread Sakari Ailus
Hi Hans,

Thank you for the patch.

On Fri, Nov 17, 2023 at 01:07:44PM +0100, Hans Verkuil wrote:
> Here is an RFC patch adding support for 'fraction_bits'. It's lacking
> documentation, but it can be used for testing.
> 
> It was rather a pain logging fixed point number in a reasonable format,
> but I think it is OK.
> 
> In userspace (where you can use floating point) it is a lot easier:
> 
> printf("%.*g\n", fraction_bits, (double)v * (1.0 / (1ULL << fraction_bits)));

I wonder if we could add a printk() format specifier for this. Doesn't need
to be done right now though, just an idea.

> 
> I decided to only expose fraction_bits in struct v4l2_query_ext_ctrl.
> I could add it to struct v4l2_queryctrl, but I did not think that was
> necessary. Other opinions are welcome.
> 
> In the meantime, let me know if this works for your patch series. If it
> does, then I can clean this up.
> 
> Regards,
> 
>   Hans
> 
> Signed-off-by: Hans Verkuil 
> ---
>  drivers/media/v4l2-core/v4l2-ctrls-api.c  |  1 +
>  drivers/media/v4l2-core/v4l2-ctrls-core.c | 72 +++
>  include/media/v4l2-ctrls.h|  7 ++-
>  include/uapi/linux/videodev2.h| 20 ++-
>  4 files changed, 85 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/media/v4l2-core/v4l2-ctrls-api.c 
> b/drivers/media/v4l2-core/v4l2-ctrls-api.c
> index 002ea6588edf..3132df315b17 100644
> --- a/drivers/media/v4l2-core/v4l2-ctrls-api.c
> +++ b/drivers/media/v4l2-core/v4l2-ctrls-api.c
> @@ -1101,6 +1101,7 @@ int v4l2_query_ext_ctrl(struct v4l2_ctrl_handler *hdl, 
> struct v4l2_query_ext_ctr
>   qc->elems = ctrl->elems;
>   qc->nr_of_dims = ctrl->nr_of_dims;
>   memcpy(qc->dims, ctrl->dims, qc->nr_of_dims * sizeof(qc->dims[0]));
> + qc->fraction_bits = ctrl->fraction_bits;
>   qc->minimum = ctrl->minimum;
>   qc->maximum = ctrl->maximum;
>   qc->default_value = ctrl->default_value;
> diff --git a/drivers/media/v4l2-core/v4l2-ctrls-core.c 
> b/drivers/media/v4l2-core/v4l2-ctrls-core.c
> index a662fb60f73f..0e08a371af5c 100644
> --- a/drivers/media/v4l2-core/v4l2-ctrls-core.c
> +++ b/drivers/media/v4l2-core/v4l2-ctrls-core.c
> @@ -252,12 +252,42 @@ void v4l2_ctrl_type_op_init(const struct v4l2_ctrl 
> *ctrl, u32 from_idx,
>  }
>  EXPORT_SYMBOL(v4l2_ctrl_type_op_init);
> 
> +static void v4l2_ctrl_log_fp(s64 v, unsigned int fraction_bits)
> +{
> + s64 i = v4l2_fp_integer(v, fraction_bits);
> + s64 f = v4l2_fp_fraction(v, fraction_bits);
> +
> + if (!f) {
> + pr_cont("%lld", i);
> + } else if (fraction_bits < 20) {
> + u64 div = 1ULL << fraction_bits;
> +
> + if (!i && f < 0)
> + pr_cont("-%lld/%llu", -f, div);
> + else if (!i)
> + pr_cont("%lld/%llu", f, div);
> + else if (i < 0 || f < 0)
> + pr_cont("-%lld-%llu/%llu", -i, -f, div);
> + else
> + pr_cont("%lld+%llu/%llu", i, f, div);
> + } else {
> + if (!i && f < 0)
> + pr_cont("-%lld/(2^%u)", -f, fraction_bits);
> + else if (!i)
> + pr_cont("%lld/(2^%u)", f, fraction_bits);
> + else if (i < 0 || f < 0)
> + pr_cont("-%lld-%llu/(2^%u)", -i, -f, fraction_bits);
> + else
> + pr_cont("%lld+%llu/(2^%u)", i, f, fraction_bits);
> + }
> +}
> +
>  void v4l2_ctrl_type_op_log(const struct v4l2_ctrl *ctrl)
>  {
>   union v4l2_ctrl_ptr ptr = ctrl->p_cur;
> 
>   if (ctrl->is_array) {
> - unsigned i;
> + unsigned int i;
> 
>   for (i = 0; i < ctrl->nr_of_dims; i++)
>   pr_cont("[%u]", ctrl->dims[i]);
> @@ -266,7 +296,10 @@ void v4l2_ctrl_type_op_log(const struct v4l2_ctrl *ctrl)
> 
>   switch (ctrl->type) {
>   case V4L2_CTRL_TYPE_INTEGER:
> - pr_cont("%d", *ptr.p_s32);
> + if (!ctrl->fraction_bits)
> + pr_cont("%d", *ptr.p_s32);
> + else
> + v4l2_ctrl_log_fp(*ptr.p_s32, ctrl->fraction_bits);
>   break;
>   case V4L2_CTRL_TYPE_BOOLEAN:
>   pr_cont("%s", *ptr.p_s32 ? "true" : "false");
> @@ -281,19 +314,31 @@ void v4l2_ctrl_type_op_log(const struct v4l2_ctrl *ctrl)
>   pr_cont("0x%08x", *ptr.p_s32);
>   break;
>   case V4L2_CTRL_TYPE_INTEGER64:
> - pr_cont("%lld", *ptr.p_s64);
> + if (!ctrl->fraction_bits)
> + pr_cont("%lld", *ptr.p_s64);
> + else
> + v4l2_ctrl_log_fp(*ptr.p_s64, ctrl->fraction_bits);
>   break;
>   case V4L2_CTRL_TYPE_STRING:
>   pr_cont("%s", ptr.p_char);
>   break;
>   case V4L2_CTRL_TYPE_U8:
> - pr_cont("%u", (unsigned)*ptr.p_u8);
> + if (!ctrl->fraction_bits)
> + pr_cont("%u"

Re: [PATCH v3 6/9] RISC-V: Add stubs for sbi_console_putchar/getchar()

2023-11-17 Thread Anup Patel
On Sat, Oct 21, 2023 at 10:05 PM Greg Kroah-Hartman
 wrote:
>
> On Fri, Oct 20, 2023 at 12:51:37PM +0530, Anup Patel wrote:
> > The functions sbi_console_putchar() and sbi_console_getchar() are
> > not defined when CONFIG_RISCV_SBI_V01 is disabled so let us add
> > stub of these functions to avoid "#ifdef" on user side.
> >
> > Signed-off-by: Anup Patel 
> > Reviewed-by: Andrew Jones 
> > ---
> >  arch/riscv/include/asm/sbi.h | 5 +
> >  1 file changed, 5 insertions(+)
> >
> > diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
> > index 12dfda6bb924..cbcefa344417 100644
> > --- a/arch/riscv/include/asm/sbi.h
> > +++ b/arch/riscv/include/asm/sbi.h
> > @@ -271,8 +271,13 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned 
> > long arg0,
> >   unsigned long arg3, unsigned long arg4,
> >   unsigned long arg5);
> >
> > +#ifdef CONFIG_RISCV_SBI_V01
> >  void sbi_console_putchar(int ch);
> >  int sbi_console_getchar(void);
> > +#else
> > +static inline void sbi_console_putchar(int ch) { }
> > +static inline int sbi_console_getchar(void) { return -1; }
>
> Why not return a real error, "-1" isn't that :)

As-per SBI spec, the legacy sbi_console_getchar() returns
-1 upon failure hence the code.

Refer, section 5.3 of the latest SBI spec
https://github.com/riscv-non-isa/riscv-sbi-doc/releases/download/commit-fe4562532a9cc57e5743b6466946c5e5c98c73ca/riscv-sbi.pdf

Although, the users of this function only expect a negative
value upon failure so better to return proper error code here.

I will update.

>
> thanks,
>
> greg k-h
>
> --
> kvm-riscv mailing list
> kvm-ri...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kvm-riscv

Regards,
Anup


Re: [PATCH v9 10/15] media: uapi: Add V4L2_CTRL_TYPE_FIXED_POINT

2023-11-17 Thread Laurent Pinchart
Hi Hans,

Thank you for the patch.

On Fri, Nov 17, 2023 at 01:07:44PM +0100, Hans Verkuil wrote:
> Here is an RFC patch adding support for 'fraction_bits'. It's lacking
> documentation, but it can be used for testing.
> 
> It was rather a pain logging fixed point number in a reasonable format,
> but I think it is OK.
> 
> In userspace (where you can use floating point) it is a lot easier:
> 
> printf("%.*g\n", fraction_bits, (double)v * (1.0 / (1ULL << fraction_bits)));
> 
> I decided to only expose fraction_bits in struct v4l2_query_ext_ctrl.
> I could add it to struct v4l2_queryctrl, but I did not think that was
> necessary. Other opinions are welcome.

Agreed.

> In the meantime, let me know if this works for your patch series. If it
> does, then I can clean this up.
> 
> Regards,
> 
>   Hans
> 
> Signed-off-by: Hans Verkuil 
> ---
>  drivers/media/v4l2-core/v4l2-ctrls-api.c  |  1 +
>  drivers/media/v4l2-core/v4l2-ctrls-core.c | 72 +++
>  include/media/v4l2-ctrls.h|  7 ++-
>  include/uapi/linux/videodev2.h| 20 ++-
>  4 files changed, 85 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/media/v4l2-core/v4l2-ctrls-api.c 
> b/drivers/media/v4l2-core/v4l2-ctrls-api.c
> index 002ea6588edf..3132df315b17 100644
> --- a/drivers/media/v4l2-core/v4l2-ctrls-api.c
> +++ b/drivers/media/v4l2-core/v4l2-ctrls-api.c
> @@ -1101,6 +1101,7 @@ int v4l2_query_ext_ctrl(struct v4l2_ctrl_handler *hdl, 
> struct v4l2_query_ext_ctr
>   qc->elems = ctrl->elems;
>   qc->nr_of_dims = ctrl->nr_of_dims;
>   memcpy(qc->dims, ctrl->dims, qc->nr_of_dims * sizeof(qc->dims[0]));
> + qc->fraction_bits = ctrl->fraction_bits;
>   qc->minimum = ctrl->minimum;
>   qc->maximum = ctrl->maximum;
>   qc->default_value = ctrl->default_value;
> diff --git a/drivers/media/v4l2-core/v4l2-ctrls-core.c 
> b/drivers/media/v4l2-core/v4l2-ctrls-core.c
> index a662fb60f73f..0e08a371af5c 100644
> --- a/drivers/media/v4l2-core/v4l2-ctrls-core.c
> +++ b/drivers/media/v4l2-core/v4l2-ctrls-core.c
> @@ -252,12 +252,42 @@ void v4l2_ctrl_type_op_init(const struct v4l2_ctrl 
> *ctrl, u32 from_idx,
>  }
>  EXPORT_SYMBOL(v4l2_ctrl_type_op_init);
> 
> +static void v4l2_ctrl_log_fp(s64 v, unsigned int fraction_bits)
> +{
> + s64 i = v4l2_fp_integer(v, fraction_bits);
> + s64 f = v4l2_fp_fraction(v, fraction_bits);
> +
> + if (!f) {
> + pr_cont("%lld", i);
> + } else if (fraction_bits < 20) {
> + u64 div = 1ULL << fraction_bits;
> +
> + if (!i && f < 0)
> + pr_cont("-%lld/%llu", -f, div);
> + else if (!i)
> + pr_cont("%lld/%llu", f, div);
> + else if (i < 0 || f < 0)
> + pr_cont("-%lld-%llu/%llu", -i, -f, div);
> + else
> + pr_cont("%lld+%llu/%llu", i, f, div);
> + } else {
> + if (!i && f < 0)
> + pr_cont("-%lld/(2^%u)", -f, fraction_bits);
> + else if (!i)
> + pr_cont("%lld/(2^%u)", f, fraction_bits);
> + else if (i < 0 || f < 0)
> + pr_cont("-%lld-%llu/(2^%u)", -i, -f, fraction_bits);
> + else
> + pr_cont("%lld+%llu/(2^%u)", i, f, fraction_bits);
> + }
> +}
> +
>  void v4l2_ctrl_type_op_log(const struct v4l2_ctrl *ctrl)
>  {
>   union v4l2_ctrl_ptr ptr = ctrl->p_cur;
> 
>   if (ctrl->is_array) {
> - unsigned i;
> + unsigned int i;
> 
>   for (i = 0; i < ctrl->nr_of_dims; i++)
>   pr_cont("[%u]", ctrl->dims[i]);
> @@ -266,7 +296,10 @@ void v4l2_ctrl_type_op_log(const struct v4l2_ctrl *ctrl)
> 
>   switch (ctrl->type) {
>   case V4L2_CTRL_TYPE_INTEGER:
> - pr_cont("%d", *ptr.p_s32);
> + if (!ctrl->fraction_bits)
> + pr_cont("%d", *ptr.p_s32);
> + else
> + v4l2_ctrl_log_fp(*ptr.p_s32, ctrl->fraction_bits);
>   break;
>   case V4L2_CTRL_TYPE_BOOLEAN:
>   pr_cont("%s", *ptr.p_s32 ? "true" : "false");
> @@ -281,19 +314,31 @@ void v4l2_ctrl_type_op_log(const struct v4l2_ctrl *ctrl)
>   pr_cont("0x%08x", *ptr.p_s32);
>   break;
>   case V4L2_CTRL_TYPE_INTEGER64:
> - pr_cont("%lld", *ptr.p_s64);
> + if (!ctrl->fraction_bits)
> + pr_cont("%lld", *ptr.p_s64);
> + else
> + v4l2_ctrl_log_fp(*ptr.p_s64, ctrl->fraction_bits);
>   break;
>   case V4L2_CTRL_TYPE_STRING:
>   pr_cont("%s", ptr.p_char);
>   break;
>   case V4L2_CTRL_TYPE_U8:
> - pr_cont("%u", (unsigned)*ptr.p_u8);
> + if (!ctrl->fraction_bits)
> + pr_cont("%u", (unsigned int)*ptr.p_u8);
> + else
> + v4l2_ctrl_log_fp((unsigned int)*ptr.p_u8,

Re: [PATCH v9 10/15] media: uapi: Add V4L2_CTRL_TYPE_FIXED_POINT

2023-11-17 Thread Laurent Pinchart
Hi Tomasz,

On Thu, Nov 16, 2023 at 04:31:41PM +0900, Tomasz Figa wrote:
> On Wed, Nov 15, 2023 at 8:49 PM Laurent Pinchart wrote:
> > On Wed, Nov 15, 2023 at 12:19:31PM +0100, Hans Verkuil wrote:
> > > On 11/15/23 11:55, Laurent Pinchart wrote:
> > > > On Wed, Nov 15, 2023 at 09:09:42AM +0100, Hans Verkuil wrote:
> > > >> On 13/11/2023 13:44, Laurent Pinchart wrote:
> > > >>> On Mon, Nov 13, 2023 at 01:05:12PM +0100, Hans Verkuil wrote:
> > >  On 13/11/2023 12:43, Laurent Pinchart wrote:
> > > > On Mon, Nov 13, 2023 at 11:28:51AM +, Sakari Ailus wrote:
> > > >> On Mon, Nov 13, 2023 at 12:24:14PM +0100, Hans Verkuil wrote:
> > > >>> On 13/11/2023 12:07, Laurent Pinchart wrote:
> > >  On Mon, Nov 13, 2023 at 11:56:49AM +0100, Hans Verkuil wrote:
> > > > On 13/11/2023 11:42, Laurent Pinchart wrote:
> > > >> On Mon, Nov 13, 2023 at 11:29:09AM +0100, Hans Verkuil wrote:
> > > >>> On 10/11/2023 06:48, Shengjiu Wang wrote:
> > >  Fixed point controls are used by the user to configure
> > >  a fixed point value in 64bits, which Q31.32 format.
> > > 
> > >  Signed-off-by: Shengjiu Wang 
> > > >>>
> > > >>> This patch adds a new control type. This is something that 
> > > >>> also needs to be
> > > >>> tested by v4l2-compliance, and for that we need to add 
> > > >>> support for this to
> > > >>> one of the media test-drivers. The best place for that is the 
> > > >>> vivid driver,
> > > >>> since that has already a bunch of test controls for other 
> > > >>> control types.
> > > >>>
> > > >>> See e.g. VIVID_CID_INTEGER64 in vivid-ctrls.c.
> > > >>>
> > > >>> Can you add a patch adding a fixed point test control to 
> > > >>> vivid?
> > > >>
> > > >> I don't think V4L2_CTRL_TYPE_FIXED_POINT is a good idea. This 
> > > >> seems to
> > > >> relate more to units than control types. We have lots of 
> > > >> fixed-point
> > > >> values in controls already, using the 32-bit and 64-bit 
> > > >> integer control
> > > >> types. They use various locations for the decimal point, 
> > > >> depending on
> > > >> the control. If we want to make this more explicit to users, 
> > > >> we should
> > > >> work on adding unit support to the V4L2 controls.
> > > >
> > > > "Fixed Point" is not a unit, it's a type. 'Db', 'Hz' etc. are 
> > > > units.
> > > 
> > >  It's not a unit, but I think it's related to units. My point is 
> > >  that,
> > >  without units support, I don't see why we need a formal 
> > >  definition of
> > >  fixed-point types, and why this series couldn't just use
> > >  VIVID_CID_INTEGER64. Drivers already interpret 
> > >  VIVID_CID_INTEGER64
> > >  values as they see fit.
> > > >>>
> > > >>> They do? That's new to me. A quick grep for 
> > > >>> V4L2_CTRL_TYPE_INTEGER64
> > > >>> (I assume you meant that rather than VIVID_CID_INTEGER64) shows 
> > > >>> that it
> > > >
> > > > Yes, I meant V4L2_CTRL_TYPE_INTEGER64. Too hasty copy & paste :-)
> > > >
> > > >>> is always interpreted as a 64 bit integer and nothing else. As it 
> > > >>> should.
> > > >
> > > > The most common case for control handling in drivers is taking the
> > > > integer value and converting it to a register value, using
> > > > device-specific encoding of the register value. It can be a 
> > > > fixed-point
> > > > format or something else, depending on the device. My point is that
> > > > drivers routinely convert a "plain" integer to something else, and 
> > > > that
> > > > has never been considered as a cause of concern. I don't see why it
> > > > would be different in this series.
> > > >
> > > >>> And while we do not have support for units (other than the 
> > > >>> documentation),
> > > >>> we do have type support in the form of V4L2_CTRL_TYPE_*.
> > > >>>
> > > > A quick "git grep -i "fixed point" 
> > > > Documentation/userspace-api/media/'
> > > > only shows a single driver specific control (dw100.rst).
> > > >
> > > > I'm not aware of other controls in mainline that use fixed 
> > > > point.
> > > 
> > >  The analog gain control for sensors for instance.
> > > >>>
> > > >>> Not really. The documentation is super vague:
> > > >>>
> > > >>> V4L2_CID_ANALOGUE_GAIN (integer)
> > > >>>
> > > >>>   Analogue gain is gain affecting all colour components in 
> > > >>> the pixel matrix. The
> > > >>>   gain operation is performed in the analogue domain before 
> > > >>> A/D conversion.
> > > >>>
> > > >>> And the integer is just a

Re: [PATCH v9 10/15] media: uapi: Add V4L2_CTRL_TYPE_FIXED_POINT

2023-11-17 Thread Hans Verkuil
Here is an RFC patch adding support for 'fraction_bits'. It's lacking
documentation, but it can be used for testing.

It was rather a pain logging fixed point number in a reasonable format,
but I think it is OK.

In userspace (where you can use floating point) it is a lot easier:

printf("%.*g\n", fraction_bits, (double)v * (1.0 / (1ULL << fraction_bits)));

I decided to only expose fraction_bits in struct v4l2_query_ext_ctrl.
I could add it to struct v4l2_queryctrl, but I did not think that was
necessary. Other opinions are welcome.

In the meantime, let me know if this works for your patch series. If it
does, then I can clean this up.

Regards,

Hans

Signed-off-by: Hans Verkuil 
---
 drivers/media/v4l2-core/v4l2-ctrls-api.c  |  1 +
 drivers/media/v4l2-core/v4l2-ctrls-core.c | 72 +++
 include/media/v4l2-ctrls.h|  7 ++-
 include/uapi/linux/videodev2.h| 20 ++-
 4 files changed, 85 insertions(+), 15 deletions(-)

diff --git a/drivers/media/v4l2-core/v4l2-ctrls-api.c 
b/drivers/media/v4l2-core/v4l2-ctrls-api.c
index 002ea6588edf..3132df315b17 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls-api.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls-api.c
@@ -1101,6 +1101,7 @@ int v4l2_query_ext_ctrl(struct v4l2_ctrl_handler *hdl, 
struct v4l2_query_ext_ctr
qc->elems = ctrl->elems;
qc->nr_of_dims = ctrl->nr_of_dims;
memcpy(qc->dims, ctrl->dims, qc->nr_of_dims * sizeof(qc->dims[0]));
+   qc->fraction_bits = ctrl->fraction_bits;
qc->minimum = ctrl->minimum;
qc->maximum = ctrl->maximum;
qc->default_value = ctrl->default_value;
diff --git a/drivers/media/v4l2-core/v4l2-ctrls-core.c 
b/drivers/media/v4l2-core/v4l2-ctrls-core.c
index a662fb60f73f..0e08a371af5c 100644
--- a/drivers/media/v4l2-core/v4l2-ctrls-core.c
+++ b/drivers/media/v4l2-core/v4l2-ctrls-core.c
@@ -252,12 +252,42 @@ void v4l2_ctrl_type_op_init(const struct v4l2_ctrl *ctrl, 
u32 from_idx,
 }
 EXPORT_SYMBOL(v4l2_ctrl_type_op_init);

+static void v4l2_ctrl_log_fp(s64 v, unsigned int fraction_bits)
+{
+   s64 i = v4l2_fp_integer(v, fraction_bits);
+   s64 f = v4l2_fp_fraction(v, fraction_bits);
+
+   if (!f) {
+   pr_cont("%lld", i);
+   } else if (fraction_bits < 20) {
+   u64 div = 1ULL << fraction_bits;
+
+   if (!i && f < 0)
+   pr_cont("-%lld/%llu", -f, div);
+   else if (!i)
+   pr_cont("%lld/%llu", f, div);
+   else if (i < 0 || f < 0)
+   pr_cont("-%lld-%llu/%llu", -i, -f, div);
+   else
+   pr_cont("%lld+%llu/%llu", i, f, div);
+   } else {
+   if (!i && f < 0)
+   pr_cont("-%lld/(2^%u)", -f, fraction_bits);
+   else if (!i)
+   pr_cont("%lld/(2^%u)", f, fraction_bits);
+   else if (i < 0 || f < 0)
+   pr_cont("-%lld-%llu/(2^%u)", -i, -f, fraction_bits);
+   else
+   pr_cont("%lld+%llu/(2^%u)", i, f, fraction_bits);
+   }
+}
+
 void v4l2_ctrl_type_op_log(const struct v4l2_ctrl *ctrl)
 {
union v4l2_ctrl_ptr ptr = ctrl->p_cur;

if (ctrl->is_array) {
-   unsigned i;
+   unsigned int i;

for (i = 0; i < ctrl->nr_of_dims; i++)
pr_cont("[%u]", ctrl->dims[i]);
@@ -266,7 +296,10 @@ void v4l2_ctrl_type_op_log(const struct v4l2_ctrl *ctrl)

switch (ctrl->type) {
case V4L2_CTRL_TYPE_INTEGER:
-   pr_cont("%d", *ptr.p_s32);
+   if (!ctrl->fraction_bits)
+   pr_cont("%d", *ptr.p_s32);
+   else
+   v4l2_ctrl_log_fp(*ptr.p_s32, ctrl->fraction_bits);
break;
case V4L2_CTRL_TYPE_BOOLEAN:
pr_cont("%s", *ptr.p_s32 ? "true" : "false");
@@ -281,19 +314,31 @@ void v4l2_ctrl_type_op_log(const struct v4l2_ctrl *ctrl)
pr_cont("0x%08x", *ptr.p_s32);
break;
case V4L2_CTRL_TYPE_INTEGER64:
-   pr_cont("%lld", *ptr.p_s64);
+   if (!ctrl->fraction_bits)
+   pr_cont("%lld", *ptr.p_s64);
+   else
+   v4l2_ctrl_log_fp(*ptr.p_s64, ctrl->fraction_bits);
break;
case V4L2_CTRL_TYPE_STRING:
pr_cont("%s", ptr.p_char);
break;
case V4L2_CTRL_TYPE_U8:
-   pr_cont("%u", (unsigned)*ptr.p_u8);
+   if (!ctrl->fraction_bits)
+   pr_cont("%u", (unsigned int)*ptr.p_u8);
+   else
+   v4l2_ctrl_log_fp((unsigned int)*ptr.p_u8, 
ctrl->fraction_bits);
break;
case V4L2_CTRL_TYPE_U16:
-   pr_cont("%u", (unsigned)*ptr.p_u16);
+   if (!ctrl->fraction_bits)
+   pr_cont("%u", (unsigne

Re: [PATCH] powerpc: Restrict ARCH_HIBERNATION_POSSIBLE to supported configurations

2023-11-17 Thread Vishal Chourasia


On 17/11/23 4:52 am, Michael Ellerman wrote:
> Vishal Chourasia  writes:
>> On 15/11/23 5:46 pm, Aneesh Kumar K V wrote:
>>> On 11/15/23 5:23 PM, Vishal Chourasia wrote:
 On 15/11/23 1:39 pm, Aneesh Kumar K.V wrote:
> Vishal Chourasia  writes:
>
>> This patch modifies the ARCH_HIBERNATION_POSSIBLE option to ensure that 
>> it
>> correctly depends on these PowerPC configurations being enabled. As a 
>> result,
>> it prevents the HOTPLUG_CPU from being selected when the required 
>> dependencies
>> are not satisfied.
>>
>> This change aligns the dependency tree with the expected hardware 
>> support for
>> CPU hot-plugging under PowerPC architectures, ensuring that the kernel
>> configuration steps do not lead to inconsistent states.
>>
>> Signed-off-by: Vishal Chourasia 
>> ---
>> During the configuration process with 'make randconfig' followed by
>> 'make olddefconfig', we observed a warning indicating an unmet direct
>> dependency for the HOTPLUG_CPU option. The dependency in question 
>> relates to
>> various PowerPC configurations (PPC_PSERIES, PPC_PMAC, PPC_POWERNV,
>> FSL_SOC_BOOKE) which were not enabled, yet the HOTPLUG_CPU was 
>> beingDuring the configuration process with 'make randconfig' followed by
>> 'make olddefconfig', we observed a warning indicating an unmet direct
>> dependency for the HOTPLUG_CPU option. The dependency in question 
>> relates to
>> various PowerPC configurations (PPC_PSERIES, PPC_PMAC, PPC_POWERNV,
>> FSL_SOC_BOOKE) which were not enabled, yet the HOTPLUG_CPU was being
>> erroneously selected due to an implicit assumption by the PM_SLEEP_SMP 
>> option.
>> This misalignment in dependencies could potentially lead to inconsistent 
>> kernel
>> configuration states, especially when considering the necessary hardware
>> support for CPU hot-plugging on PowerPC platforms. The patch aims to 
>> correct
>> this by ensuring that ARCH_HIBERNATION_POSSIBLE is contingent upon the
>> appropriate PowerPC configurations being active.
>>
>> steps to reproduce (before applying the patch):
>>
>> Run 'make pseries_le_defconfig'
>> Run 'make menuconfig'
>> Enable hibernation [ Kernel options -> Hibernation (aka 'suspend to 
>> disk') ] 
>> Disable [ Platform support -> IBM PowerNV (Non-Virtualized) platform 
>> support ]
>> Disable [ Platform support -> IBM pSeries & new (POWER5-based) iSeries ]
>> Enable SMP [ Processor support -> Symmetric multi-processing support ]
>> Save the config
>> Run 'make olddefconfig'
>> erroneously selected due to an implicit assumption by the PM_SLEEP_SMP 
>> option.
>> This misalignment in dependencies could potentially lead to inconsistent 
>> kernel
>> configuration states, especially when considering the necessary hardware
>> support for CPU hot-plugging on PowerPC platforms. The patch aims to 
>> correct
>> this by ensuring that ARCH_HIBERNATION_POSSIBLE is contingent upon the
>> appropriate PowerPC configurations being active.
>>
>> steps to reproduce (before applying the patch):
>>
>> Run 'make pseries_le_defconfig'
>> Run 'make menuconfig'
>> Enable hibernation [ Kernel options -> Hibernation (aka 'suspend to 
>> disk') ] 
>> Disable [ Platform support -> IBM PowerNV (Non-Virtualized) platform 
>> support ]
>> Disable [ Platform support -> IBM pSeries & new (POWER5-based) iSeries ]
>> Enable SMP [ Processor support -> Symmetric multi-processing support ]
>> Save the config
>> Run 'make olddefconfig'
>>
>>  arch/powerpc/Kconfig | 5 +++--
>>  1 file changed, 3 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index 6f105ee4f3cf..bf99ff9869f6 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -380,8 +380,9 @@ config DEFAULT_UIMAGE
>>Used to allow a board to specify it wants a uImage built by 
>> default
>>  
>>  config ARCH_HIBERNATION_POSSIBLE
>> -bool
>> -default y
>> +def_bool y
>> +depends on PPC_PSERIES || \
>> +PPC_PMAC || PPC_POWERNV || FSL_SOC_BOOKE
>>  
>>  config ARCH_SUSPEND_POSSIBLE
>>  def_bool y
>>
> I am wondering whether it should be switched to using select from
> config PPC? 
>
> selecting ARCH_HIBERNATION_POSSIBLE based on value of config PPC
> will not guarantee config PPC_PSERIES being set
>
> PPC_PSERIES can be set to N, even when config PPC is set.
>> I understand what you meant before. Having ARCH_HIBERNATION_POSSIBLE under 
>> config PPC makes more sense.
> grep -A 5 -i "config ppc_pseries" arch/powerpc/platforms/pseries/Kconfig
> config PPC_PSERIES
>     depends on PPC64 && 

[PATCH net-next 00/10] net*: Convert to platform remove callback returning void

2023-11-17 Thread Uwe Kleine-König
Hello,

this series converts the platform drivers below drivers/net that are not
covered in the two other series converting drivers/net/ethernet and
drivers/net/wireless. I put them all in a single series even though they
are not maintained together. I thought that to be better than sending
them out individually, I hope you agree.

See commit 5c5a7680e67b ("platform: Provide a remove callback that
returns no value") for an extended explanation and the eventual goal.
The TL;DR; is to make it harder for driver authors to leak resources
without noticing.

The first patch is a fix, but I don't think it's worth to add that to
stable, it was broken since v5.7-rc1 and nobody seems to have hit the
problem.

Best regards
Uwe

Uwe Kleine-König (10):
  net: ipa: Don't error out in .remove()
  net: ipa: Convert to platform remove callback returning void
  net: fjes: Convert to platform remove callback returning void
  net: pcs: rzn1-miic: Convert to platform remove callback returning
void
  net: sfp: Convert to platform remove callback returning void
  net: wan/fsl_ucc_hdlc: Convert to platform remove callback returning
void
  net: wan/ixp4xx_hss: Convert to platform remove callback returning
void
  net: wwan: qcom_bam_dmux: Convert to platform remove callback
returning void
  ieee802154: fakelb: Convert to platform remove callback returning void
  ieee802154: hwsim: Convert to platform remove callback returning void

 drivers/net/fjes/fjes_main.c |  6 ++
 drivers/net/ieee802154/fakelb.c  |  5 ++---
 drivers/net/ieee802154/mac802154_hwsim.c |  6 ++
 drivers/net/ipa/ipa_main.c   | 20 +---
 drivers/net/pcs/pcs-rzn1-miic.c  |  6 ++
 drivers/net/phy/sfp.c|  6 ++
 drivers/net/wan/fsl_ucc_hdlc.c   |  6 ++
 drivers/net/wan/ixp4xx_hss.c |  5 ++---
 drivers/net/wwan/qcom_bam_dmux.c |  6 ++
 9 files changed, 21 insertions(+), 45 deletions(-)

base-commit: eff99d8edbed7918317331ebd1e365d8e955d65e
-- 
2.42.0



[PATCH net-next 06/10] net: wan/fsl_ucc_hdlc: Convert to platform remove callback returning void

2023-11-17 Thread Uwe Kleine-König
The .remove() callback for a platform driver returns an int which makes
many driver authors wrongly assume it's possible to do error handling by
returning an error code. However the value returned is ignored (apart
from emitting a warning) and this typically results in resource leaks.

To improve here there is a quest to make the remove callback return
void. In the first step of this quest all drivers are converted to
.remove_new(), which already returns void. Eventually after all drivers
are converted, .remove_new() will be renamed to .remove().

Trivially convert this driver from always returning zero in the remove
callback to the void returning variant.

Signed-off-by: Uwe Kleine-König 
---
 drivers/net/wan/fsl_ucc_hdlc.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index fd50bb313b92..605e70f7baac 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -1259,7 +1259,7 @@ static int ucc_hdlc_probe(struct platform_device *pdev)
return ret;
 }
 
-static int ucc_hdlc_remove(struct platform_device *pdev)
+static void ucc_hdlc_remove(struct platform_device *pdev)
 {
struct ucc_hdlc_private *priv = dev_get_drvdata(&pdev->dev);
 
@@ -1277,8 +1277,6 @@ static int ucc_hdlc_remove(struct platform_device *pdev)
kfree(priv);
 
dev_info(&pdev->dev, "UCC based hdlc module removed\n");
-
-   return 0;
 }
 
 static const struct of_device_id fsl_ucc_hdlc_of_match[] = {
@@ -1292,7 +1290,7 @@ MODULE_DEVICE_TABLE(of, fsl_ucc_hdlc_of_match);
 
 static struct platform_driver ucc_hdlc_driver = {
.probe  = ucc_hdlc_probe,
-   .remove = ucc_hdlc_remove,
+   .remove_new = ucc_hdlc_remove,
.driver = {
.name   = DRV_NAME,
.pm = HDLC_PM_OPS,
-- 
2.42.0



Re: [PATCH v4 1/5] crypto: mxs-dcp: Add support for hardware-bound keys

2023-11-17 Thread Herbert Xu
On Tue, Oct 24, 2023 at 06:20:15PM +0200, David Gstir wrote:
> DCP is capable of performing AES with two hardware-bound keys:
> 
> - The one-time programmable (OTP) key which is burnt via on-chip fuses
> - The unique key (UK) which is derived from the OTP key
> 
> In addition to the two hardware-bound keys, DCP also supports
> storing keys in 4 dedicated key slots within its secure memory area
> (internal SRAM).
> 
> These keys are not stored in main memory and are therefore
> not directly accessible by the operating system. To use them
> for AES operations, a one-byte key reference has to supplied
> with the DCP operation descriptor in the control register.
> 
> This adds support for using any of these 6 keys through the crypto API
> via their key reference after they have been set up. The main purpose
> is to add support for DCP-backed trusted keys. Other use cases are
> possible too (see similar existing paes implementations), but these
> should carefully be evaluated as e.g. enabling AF_ALG will give
> userspace full access to use keys. In scenarios with untrustworthy
> userspace, this will enable en-/decryption oracles.
> 
> Co-developed-by: Richard Weinberger 
> Signed-off-by: Richard Weinberger 
> Co-developed-by: David Oberhollenzer 
> Signed-off-by: David Oberhollenzer 
> Signed-off-by: David Gstir 
> ---
>  drivers/crypto/mxs-dcp.c | 104 ++-
>  include/soc/fsl/dcp.h|  17 +++
>  2 files changed, 110 insertions(+), 11 deletions(-)
>  create mode 100644 include/soc/fsl/dcp.h

Acked-by: Herbert Xu 
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


Re: [PATCH 2/7] kexec_file: print out debugging message if required

2023-11-17 Thread Liu, Yujie
Hi Baoquan,

On Fri, 2023-11-17 at 17:14 +0800, Baoquan He wrote:
> Hi,
> 
> On 11/16/23 at 05:04am, kernel test robot wrote:
> > Hi Baoquan,
> > 
> > kernel test robot noticed the following build errors:
> > 
> > [auto build test ERROR on arm64/for-next/core]
> > [also build test ERROR on tip/x86/core powerpc/next powerpc/fixes 
> > linus/master v6.7-rc1 next-20231115]
> > [If your patch is applied to the wrong git tree, kindly drop us a note.
> > And when submitting patch, we suggest to use '--base' as documented in
> > https://git-scm.com/docs/git-format-patch#_base_tree_information]
> > 
> > url:    
> > https://github.com/intel-lab-lkp/linux/commits/Baoquan-He/kexec_file-add-kexec_file-flag-to-control-debug-printing/20231114-234003
> > base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git 
> > for-next/core
> > patch link:    
> > https://lore.kernel.org/r/20231114153253.241262-3-bhe%40redhat.com
> > patch subject: [PATCH 2/7] kexec_file: print out debugging message if 
> > required
> > config: hexagon-comet_defconfig 
> > (https://download.01.org/0day-ci/archive/20231116/202311160431.bxpc7no9-...@intel.com/config)
> > compiler: clang version 16.0.4 (https://github.com/llvm/llvm-project.git 
> > ae42196bc493ffe877a7e3dff8be32035dea4d07)
> > reproduce (this is a W=1 build): 
> > (https://download.01.org/0day-ci/archive/20231116/202311160431.bxpc7no9-...@intel.com/reproduce)
> > 
> 
> Thanks for reporting.
> 
> I met below failure when following the steps of provided reproducer.
> Could anyone help check what's wrong with that?

Sorry this seems to be a bug in the reproducer. Could you please change
the compiler parameter to "COMPILER=clang-16" and rerun the command? We
will fix the issue ASAP.

Thanks,
Yujie

> [root@~ linux]# COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang 
> ~/bin/make.cross W=1 O=build_dir ARCH=hexagon olddefconfig
> Compiler will be installed in /root/0day
> lftpget -c https://cdn.kernel.org/pub/tools/llvm/files/
> get1: /pub/tools/llvm/files/: files/: Is a directory
> Failed to download https://cdn.kernel.org/pub/tools/llvm/files/
> clang crosstool install failed
> Install clang compiler failed
> setup_crosstool failed




Re: [PATCH 2/7] kexec_file: print out debugging message if required

2023-11-17 Thread Baoquan He
Hi,

On 11/16/23 at 05:04am, kernel test robot wrote:
> Hi Baoquan,
> 
> kernel test robot noticed the following build errors:
> 
> [auto build test ERROR on arm64/for-next/core]
> [also build test ERROR on tip/x86/core powerpc/next powerpc/fixes 
> linus/master v6.7-rc1 next-20231115]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
> 
> url:
> https://github.com/intel-lab-lkp/linux/commits/Baoquan-He/kexec_file-add-kexec_file-flag-to-control-debug-printing/20231114-234003
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git 
> for-next/core
> patch link:
> https://lore.kernel.org/r/20231114153253.241262-3-bhe%40redhat.com
> patch subject: [PATCH 2/7] kexec_file: print out debugging message if required
> config: hexagon-comet_defconfig 
> (https://download.01.org/0day-ci/archive/20231116/202311160431.bxpc7no9-...@intel.com/config)
> compiler: clang version 16.0.4 (https://github.com/llvm/llvm-project.git 
> ae42196bc493ffe877a7e3dff8be32035dea4d07)
> reproduce (this is a W=1 build): 
> (https://download.01.org/0day-ci/archive/20231116/202311160431.bxpc7no9-...@intel.com/reproduce)
> 

Thanks for reporting.

I met below failure when following the steps of provided reproducer.
Could anyone help check what's wrong with that?

[root@~ linux]# COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang 
~/bin/make.cross W=1 O=build_dir ARCH=hexagon olddefconfig
Compiler will be installed in /root/0day
lftpget -c https://cdn.kernel.org/pub/tools/llvm/files/
get1: /pub/tools/llvm/files/: files/: Is a directory
Failed to download https://cdn.kernel.org/pub/tools/llvm/files/
clang crosstool install failed
Install clang compiler failed
setup_crosstool failed

Thanks
Baoquan



Re: [PATCH] powerpc: Fix data corruption on IPI

2023-11-17 Thread Timothy Pearson



- Original Message -
> From: "Timothy Pearson" 
> To: "Timothy Pearson" 
> Cc: "npiggin" , "linuxppc-dev" 
> 
> Sent: Friday, November 17, 2023 2:26:37 AM
> Subject: Re: [PATCH] powerpc: Fix data corruption on IPI

> - Original Message -
>> From: "Timothy Pearson" 
>> To: "npiggin" 
>> Cc: "linuxppc-dev" 
>> Sent: Friday, November 17, 2023 2:20:29 AM
>> Subject: Re: [PATCH] powerpc: Fix data corruption on IPI
> 
>> - Original Message -
>>> From: "npiggin" 
>>> To: "Timothy Pearson" , "Michael Ellerman"
>>> 
>>> Cc: "linuxppc-dev" 
>>> Sent: Friday, November 17, 2023 2:01:12 AM
>>> Subject: Re: [PATCH] powerpc: Fix data corruption on IPI
>> 
>>> On Fri Nov 17, 2023 at 5:39 PM AEST, Timothy Pearson wrote:


 - Original Message -
 > From: "Michael Ellerman" 
 > To: "Timothy Pearson" , "linuxppc-dev"
 > 
 > Cc: "Jens Axboe" 
 > Sent: Tuesday, November 14, 2023 6:14:37 AM
 > Subject: Re: [PATCH] powerpc: Fix data corruption on IPI

 > Hi Timothy,
 > 
 > Thanks for debugging this, but I'm unclear why this is helping because
 > we should already have a full barrier (hwsync) on both the sending and
 > receiving side.
 > 
 > More below.

 I've spent another few days poking at this, and think I might finally have
 something more solid in terms of what exactly is happening, but would like 
 some
 feedback on the concept / how best to fix the potential problem.

 As background, there are several worker threads both in userspace and in 
 kernel
 mode.  Crucially, the main MariaDB data processing thread (the one that 
 handles
 tasks like flushing dirty pages to disk) always runs on the same core as 
 the
 io_uring kernel thread that picks up I/O worker creation requests and 
 handles
 them via create_worker_cb().

 Changes in the ~5.12 era switched away from a delayed worker setup.  
 io_uring
 currently sets up the new process with create_io_thread(), and immediately 
 uses
 an IPI to forcibly schedule the new process.  Because of the way the two
 threads interact, the new process ends up grabbing the CPU from the running
 MariaDB user thread; I've never seen it schedule on a different core.  If 
 the
 timing is right in this process, things get trampled on in userspace and 
 the
 database server either crashes or throws a corruption fault.

 Through extensive debugging, I've narrowed this down to invalid state in 
 the VSX
 registers on return to the MariaDB user thread from the new kernel thread. 
  For
 some reason, it seems we don't restore FP state on return from the 
 PF_IO_WORKER
 thread, and something in the kernel was busy writing new data to them.

 A direct example I was able to observe is as follows:

 xxspltd vs0,vs0,0  <-- vs0 now zeroed out
 xorir9,r9,1<-- Presumably we switch to the new kernel thread 
 here
 due to the IPI
 slwir9,r9,7<-- On userspace thread resume, vs0 now contains the
 value 0x8200400082004000
 xxswapd vs8,vs0<-- vs8 now has the wrong value
 stxvd2x vs8,r3,r12 <-- userspace is now getting stepped on
 stw r9,116(r3)
 stxvd2x vs8,r3,r0
 ...
 CRASH
>>> 
>>> Nice find, that looks pretty conclusive.
>>> 
 This is a very difficult race to hit, but MariaDB naturally does repetitive
 operations with VSX registers so it does eventually fail.  I ended up with 
 a
 tight loop around glibc operations that use VSX to trigger the failure 
 reliably
 enough to even understand what was going on.

 As I am not as familiar with this part of the Linux kernel as with most 
 other
 areas, what is the expected save/restore path for the FP/VSX registers 
 around
 an IPI and associated forced thread switch?  If restore_math() is in the 
 return
 path, note that MSR_FP is set in regs->msr.
>>> 
>>> Context switching these FP/vec registers should be pretty robust in
>>> general because it's not just io-uring that uses them. io-uring could
>>> be using some uncommon kernel code that uses the registers incorrectly
>>> though I guess.
>>> 

 Second question: should we even be using the VSX registers at all in kernel
 space?  Is this a side effect of io_uring interacting so closely with 
 userspace
 threads, or something else entirely?

 If I can get pointed somewhat in the right direction I'm ready to develop 
 the
 rest of the fix for this issue...just trying to avoid another several days 
 of
 slogging through the source to see what it's supposed to be doing in the 
 first
 place. :)
>>> 
>>> Kernel can use FP/VEC/VSX registers but it has to enable and disable
>>> explicitly. Such kernel code also should not be preemptible.
>>> 
>>> enable|disable_kernel_f

Re: [PATCH] powerpc: Restrict ARCH_HIBERNATION_POSSIBLE to supported configurations

2023-11-17 Thread Vishal Chourasia


On 17/11/23 4:52 am, Michael Ellerman wrote:
> Vishal Chourasia  writes:
>> On 15/11/23 5:46 pm, Aneesh Kumar K V wrote:
>>> On 11/15/23 5:23 PM, Vishal Chourasia wrote:
 On 15/11/23 1:39 pm, Aneesh Kumar K.V wrote:
> Vishal Chourasia  writes:
>
>> This patch modifies the ARCH_HIBERNATION_POSSIBLE option to ensure that 
>> it
>> correctly depends on these PowerPC configurations being enabled. As a 
>> result,
>> it prevents the HOTPLUG_CPU from being selected when the required 
>> dependencies
>> are not satisfied.
>>
>> This change aligns the dependency tree with the expected hardware 
>> support for
>> CPU hot-plugging under PowerPC architectures, ensuring that the kernel
>> configuration steps do not lead to inconsistent states.
>>
>> Signed-off-by: Vishal Chourasia 
>> ---
>> During the configuration process with 'make randconfig' followed by
>> 'make olddefconfig', we observed a warning indicating an unmet direct
>> dependency for the HOTPLUG_CPU option. The dependency in question 
>> relates to
>> various PowerPC configurations (PPC_PSERIES, PPC_PMAC, PPC_POWERNV,
>> FSL_SOC_BOOKE) which were not enabled, yet the HOTPLUG_CPU was being
>> erroneously selected due to an implicit assumption by the PM_SLEEP_SMP 
>> option.
>> This misalignment in dependencies could potentially lead to inconsistent 
>> kernel
>> configuration states, especially when considering the necessary hardware
>> support for CPU hot-plugging on PowerPC platforms. The patch aims to 
>> correct
>> this by ensuring that ARCH_HIBERNATION_POSSIBLE is contingent upon the
>> appropriate PowerPC configurations being active.
>>
>> steps to reproduce (before applying the patch):
>>
>> Run 'make pseries_le_defconfig'
>> Run 'make menuconfig'
>> Enable hibernation [ Kernel options -> Hibernation (aka 'suspend to 
>> disk') ] 
>> Disable [ Platform support -> IBM PowerNV (Non-Virtualized) platform 
>> support ]
>> Disable [ Platform support -> IBM pSeries & new (POWER5-based) iSeries ]
>> Enable SMP [ Processor support -> Symmetric multi-processing support ]
>> Save the config
>> Run 'make olddefconfig'
>>
>>  arch/powerpc/Kconfig | 5 +++--
>>  1 file changed, 3 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
>> index 6f105ee4f3cf..bf99ff9869f6 100644
>> --- a/arch/powerpc/Kconfig
>> +++ b/arch/powerpc/Kconfig
>> @@ -380,8 +380,9 @@ config DEFAULT_UIMAGE
>>Used to allow a board to specify it wants a uImage built by 
>> default
>>  
>>  config ARCH_HIBERNATION_POSSIBLE
>> -bool
>> -default y
>> +def_bool y
>> +depends on PPC_PSERIES || \
>> +PPC_PMAC || PPC_POWERNV || FSL_SOC_BOOKE
>>  
>>  config ARCH_SUSPEND_POSSIBLE
>>  def_bool y
>>
> I am wondering whether it should be switched to using select from
> config PPC? 
>
> selecting ARCH_HIBERNATION_POSSIBLE based on value of config PPC
> will not guarantee config PPC_PSERIES being set
>
> PPC_PSERIES can be set to N, even when config PPC is set.
>> I understand what you meant before. Having ARCH_HIBERNATION_POSSIBLE under 
>> config PPC makes more sense.
> grep -A 5 -i "config ppc_pseries" arch/powerpc/platforms/pseries/Kconfig
> config PPC_PSERIES
>     depends on PPC64 && PPC_BOOK3S
>     bool "IBM pSeries & new (POWER5-based) iSeries"
>     select HAVE_PCSPKR_PLATFORM
>     select MPIC
>     select OF_DYNAMIC
>
>>> modified   arch/powerpc/Kconfig
>>> @@ -156,6 +156,7 @@ config PPC
>>> select ARCH_HAS_UACCESS_FLUSHCACHE
>>> select ARCH_HAS_UBSAN_SANITIZE_ALL
>>> select ARCH_HAVE_NMI_SAFE_CMPXCHG
>>> +   select ARCH_HIBERNATION_POSSIBLEif (PPC_PSERIES || PPC_PMAC || 
>>> PPC_POWERNV || FSL_SOC_BOOKE)
>>> select ARCH_KEEP_MEMBLOCK
>>> select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE if PPC_RADIX_MMU
>>> select ARCH_MIGHT_HAVE_PC_PARPORT
>> Though, even with these changes I was able to reproduce same warnings. 
>> (using steps from above)
>> It's because one can enable HIBERNATION manually.
> But how? You shouldn't be able to enable it manually, it depends on
> ARCH_HIBERNATION_POSSIBLE which shouldn't be enabled.
>
> For the above to work you also need to make it default n, eg:
>
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 6f105ee4f3cf..dd2a9b938188 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -380,8 +380,7 @@ config DEFAULT_UIMAGE
>   Used to allow a board to specify it wants a uImage built by default
>
>  config ARCH_HIBERNATION_POSSIBLE
> -   bool
> -   default y
> +   def_bool n
>
>  config ARCH_SUSPEND_POSSIBLE
> def_bool y
Verified.


Re: [PATCH] powerpc: Fix data corruption on IPI

2023-11-17 Thread Timothy Pearson



- Original Message -
> From: "Timothy Pearson" 
> To: "npiggin" 
> Cc: "linuxppc-dev" 
> Sent: Friday, November 17, 2023 2:20:29 AM
> Subject: Re: [PATCH] powerpc: Fix data corruption on IPI

> - Original Message -
>> From: "npiggin" 
>> To: "Timothy Pearson" , "Michael Ellerman"
>> 
>> Cc: "linuxppc-dev" 
>> Sent: Friday, November 17, 2023 2:01:12 AM
>> Subject: Re: [PATCH] powerpc: Fix data corruption on IPI
> 
>> On Fri Nov 17, 2023 at 5:39 PM AEST, Timothy Pearson wrote:
>>>
>>>
>>> - Original Message -
>>> > From: "Michael Ellerman" 
>>> > To: "Timothy Pearson" , "linuxppc-dev"
>>> > 
>>> > Cc: "Jens Axboe" 
>>> > Sent: Tuesday, November 14, 2023 6:14:37 AM
>>> > Subject: Re: [PATCH] powerpc: Fix data corruption on IPI
>>>
>>> > Hi Timothy,
>>> > 
>>> > Thanks for debugging this, but I'm unclear why this is helping because
>>> > we should already have a full barrier (hwsync) on both the sending and
>>> > receiving side.
>>> > 
>>> > More below.
>>>
>>> I've spent another few days poking at this, and think I might finally have
>>> something more solid in terms of what exactly is happening, but would like 
>>> some
>>> feedback on the concept / how best to fix the potential problem.
>>>
>>> As background, there are several worker threads both in userspace and in 
>>> kernel
>>> mode.  Crucially, the main MariaDB data processing thread (the one that 
>>> handles
>>> tasks like flushing dirty pages to disk) always runs on the same core as the
>>> io_uring kernel thread that picks up I/O worker creation requests and 
>>> handles
>>> them via create_worker_cb().
>>>
>>> Changes in the ~5.12 era switched away from a delayed worker setup.  
>>> io_uring
>>> currently sets up the new process with create_io_thread(), and immediately 
>>> uses
>>> an IPI to forcibly schedule the new process.  Because of the way the two
>>> threads interact, the new process ends up grabbing the CPU from the running
>>> MariaDB user thread; I've never seen it schedule on a different core.  If 
>>> the
>>> timing is right in this process, things get trampled on in userspace and the
>>> database server either crashes or throws a corruption fault.
>>>
>>> Through extensive debugging, I've narrowed this down to invalid state in 
>>> the VSX
>>> registers on return to the MariaDB user thread from the new kernel thread.  
>>> For
>>> some reason, it seems we don't restore FP state on return from the 
>>> PF_IO_WORKER
>>> thread, and something in the kernel was busy writing new data to them.
>>>
>>> A direct example I was able to observe is as follows:
>>>
>>> xxspltd vs0,vs0,0  <-- vs0 now zeroed out
>>> xorir9,r9,1<-- Presumably we switch to the new kernel thread 
>>> here
>>> due to the IPI
>>> slwir9,r9,7<-- On userspace thread resume, vs0 now contains the
>>> value 0x8200400082004000
>>> xxswapd vs8,vs0<-- vs8 now has the wrong value
>>> stxvd2x vs8,r3,r12 <-- userspace is now getting stepped on
>>> stw r9,116(r3)
>>> stxvd2x vs8,r3,r0
>>> ...
>>> CRASH
>> 
>> Nice find, that looks pretty conclusive.
>> 
>>> This is a very difficult race to hit, but MariaDB naturally does repetitive
>>> operations with VSX registers so it does eventually fail.  I ended up with a
>>> tight loop around glibc operations that use VSX to trigger the failure 
>>> reliably
>>> enough to even understand what was going on.
>>>
>>> As I am not as familiar with this part of the Linux kernel as with most 
>>> other
>>> areas, what is the expected save/restore path for the FP/VSX registers 
>>> around
>>> an IPI and associated forced thread switch?  If restore_math() is in the 
>>> return
>>> path, note that MSR_FP is set in regs->msr.
>> 
>> Context switching these FP/vec registers should be pretty robust in
>> general because it's not just io-uring that uses them. io-uring could
>> be using some uncommon kernel code that uses the registers incorrectly
>> though I guess.
>> 
>>>
>>> Second question: should we even be using the VSX registers at all in kernel
>>> space?  Is this a side effect of io_uring interacting so closely with 
>>> userspace
>>> threads, or something else entirely?
>>>
>>> If I can get pointed somewhat in the right direction I'm ready to develop 
>>> the
>>> rest of the fix for this issue...just trying to avoid another several days 
>>> of
>>> slogging through the source to see what it's supposed to be doing in the 
>>> first
>>> place. :)
>> 
>> Kernel can use FP/VEC/VSX registers but it has to enable and disable
>> explicitly. Such kernel code also should not be preemptible.
>> 
>> enable|disable_kernel_fp|altivec|vsx().
>> 
>> Maybe try run the test with ppc_strict_facility_enable boot option to
>> start with.
>> 
>> If no luck with that, maybe put WARN_ON(preemptible()); checks also in
>> the disable_kernel_* functions.
>> 
>> You could also add an enable/disable counter for each, and make sure it
>> is balanced on context switch or kernel->userspace exit.
>> 

Re: [PATCH 2/7] kexec_file: print out debugging message if required

2023-11-17 Thread Baoquan He
On 11/16/23 at 05:57am, kernel test robot wrote:
> Hi Baoquan,
> 
> kernel test robot noticed the following build errors:
> 
> [auto build test ERROR on arm64/for-next/core]
> [also build test ERROR on tip/x86/core powerpc/next powerpc/fixes 
> linus/master v6.7-rc1 next-20231115]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
> 
> url:
> https://github.com/intel-lab-lkp/linux/commits/Baoquan-He/kexec_file-add-kexec_file-flag-to-control-debug-printing/20231114-234003
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git 
> for-next/core
> patch link:
> https://lore.kernel.org/r/20231114153253.241262-3-bhe%40redhat.com
> patch subject: [PATCH 2/7] kexec_file: print out debugging message if required
> config: x86_64-randconfig-002-20231115 
> (https://download.01.org/0day-ci/archive/20231116/202311160502.jnu7b8kf-...@intel.com/config)
> compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
> reproduce (this is a W=1 build): 
> (https://download.01.org/0day-ci/archive/20231116/202311160502.jnu7b8kf-...@intel.com/reproduce)
> 
> If you fix the issue in a separate patch/commit (i.e. not just a new version 
> of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot 
> | Closes: 
> https://lore.kernel.org/oe-kbuild-all/202311160502.jnu7b8kf-...@intel.com/
> 
> All errors (new ones prefixed by >>):
> 
>kernel/crash_core.c: In function 'crash_prepare_elf64_headers':
> >> kernel/crash_core.c:412:17: error: implicit declaration of function 
> >> 'kexec_dprintk'; did you mean '_dev_printk'? 
> >> [-Werror=implicit-function-declaration]
>  412 | kexec_dprintk("Crash PT_LOAD ELF header. phdr=%p 
> vaddr=0x%llx, paddr=0x%llx, "
>  | ^
>  | _dev_printk
>cc1: some warnings being treated as errors
> 
> 
> vim +412 kernel/crash_core.c

Thanks for reporting this.

Below code can fix it.

crash_prepare_elf64_headers() could be used by kexec_load interface for
cpu/memory hotplug updating elfcoreheader, or by kexec_file_load
interface for elfcoreheader composing or updating. Only print out the
debugging message about ELF core header when kexec_file_load interface
is taken.

diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 41001ffbaa99..1485fd7bb67f 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -551,10 +551,12 @@ int crash_prepare_elf64_headers(struct crash_mem *mem, 
int need_kernel_map,
phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
phdr->p_align = 0;
ehdr->e_phnum++;
+#ifdef CONFIG_KEXEC_FILE
kexec_dprintk("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, 
paddr=0x%llx, "
"sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
ehdr->e_phnum, phdr->p_offset);
+#endif
phdr++;
}



Re: [PATCH] powerpc: Fix data corruption on IPI

2023-11-17 Thread Timothy Pearson



- Original Message -
> From: "npiggin" 
> To: "Timothy Pearson" , "Michael Ellerman" 
> 
> Cc: "linuxppc-dev" 
> Sent: Friday, November 17, 2023 2:01:12 AM
> Subject: Re: [PATCH] powerpc: Fix data corruption on IPI

> On Fri Nov 17, 2023 at 5:39 PM AEST, Timothy Pearson wrote:
>>
>>
>> - Original Message -
>> > From: "Michael Ellerman" 
>> > To: "Timothy Pearson" , "linuxppc-dev"
>> > 
>> > Cc: "Jens Axboe" 
>> > Sent: Tuesday, November 14, 2023 6:14:37 AM
>> > Subject: Re: [PATCH] powerpc: Fix data corruption on IPI
>>
>> > Hi Timothy,
>> > 
>> > Thanks for debugging this, but I'm unclear why this is helping because
>> > we should already have a full barrier (hwsync) on both the sending and
>> > receiving side.
>> > 
>> > More below.
>>
>> I've spent another few days poking at this, and think I might finally have
>> something more solid in terms of what exactly is happening, but would like 
>> some
>> feedback on the concept / how best to fix the potential problem.
>>
>> As background, there are several worker threads both in userspace and in 
>> kernel
>> mode.  Crucially, the main MariaDB data processing thread (the one that 
>> handles
>> tasks like flushing dirty pages to disk) always runs on the same core as the
>> io_uring kernel thread that picks up I/O worker creation requests and handles
>> them via create_worker_cb().
>>
>> Changes in the ~5.12 era switched away from a delayed worker setup.  io_uring
>> currently sets up the new process with create_io_thread(), and immediately 
>> uses
>> an IPI to forcibly schedule the new process.  Because of the way the two
>> threads interact, the new process ends up grabbing the CPU from the running
>> MariaDB user thread; I've never seen it schedule on a different core.  If the
>> timing is right in this process, things get trampled on in userspace and the
>> database server either crashes or throws a corruption fault.
>>
>> Through extensive debugging, I've narrowed this down to invalid state in the 
>> VSX
>> registers on return to the MariaDB user thread from the new kernel thread.  
>> For
>> some reason, it seems we don't restore FP state on return from the 
>> PF_IO_WORKER
>> thread, and something in the kernel was busy writing new data to them.
>>
>> A direct example I was able to observe is as follows:
>>
>> xxspltd vs0,vs0,0  <-- vs0 now zeroed out
>> xorir9,r9,1<-- Presumably we switch to the new kernel thread here
>> due to the IPI
>> slwir9,r9,7<-- On userspace thread resume, vs0 now contains the
>> value 0x8200400082004000
>> xxswapd vs8,vs0<-- vs8 now has the wrong value
>> stxvd2x vs8,r3,r12 <-- userspace is now getting stepped on
>> stw r9,116(r3)
>> stxvd2x vs8,r3,r0
>> ...
>> CRASH
> 
> Nice find, that looks pretty conclusive.
> 
>> This is a very difficult race to hit, but MariaDB naturally does repetitive
>> operations with VSX registers so it does eventually fail.  I ended up with a
>> tight loop around glibc operations that use VSX to trigger the failure 
>> reliably
>> enough to even understand what was going on.
>>
>> As I am not as familiar with this part of the Linux kernel as with most other
>> areas, what is the expected save/restore path for the FP/VSX registers around
>> an IPI and associated forced thread switch?  If restore_math() is in the 
>> return
>> path, note that MSR_FP is set in regs->msr.
> 
> Context switching these FP/vec registers should be pretty robust in
> general because it's not just io-uring that uses them. io-uring could
> be using some uncommon kernel code that uses the registers incorrectly
> though I guess.
> 
>>
>> Second question: should we even be using the VSX registers at all in kernel
>> space?  Is this a side effect of io_uring interacting so closely with 
>> userspace
>> threads, or something else entirely?
>>
>> If I can get pointed somewhat in the right direction I'm ready to develop the
>> rest of the fix for this issue...just trying to avoid another several days of
>> slogging through the source to see what it's supposed to be doing in the 
>> first
>> place. :)
> 
> Kernel can use FP/VEC/VSX registers but it has to enable and disable
> explicitly. Such kernel code also should not be preemptible.
> 
> enable|disable_kernel_fp|altivec|vsx().
> 
> Maybe try run the test with ppc_strict_facility_enable boot option to
> start with.
> 
> If no luck with that, maybe put WARN_ON(preemptible()); checks also in
> the disable_kernel_* functions.
> 
> You could also add an enable/disable counter for each, and make sure it
> is balanced on context switch or kernel->userspace exit.
> 
> Thanks,
> Nick

Will do, thanks for the hints!

I had a debug idea just as I sent the earlier message, and was able to confirm 
the kernel is purposefully restoring the bad data in at least one spot in the 
thread's history, though curiously *not* right before everything goes off the 
rails.  I also dumped the entire kernel binary and confirmed it isn'

Re: [PATCH] powerpc: Fix data corruption on IPI

2023-11-17 Thread Nicholas Piggin
On Fri Nov 17, 2023 at 5:39 PM AEST, Timothy Pearson wrote:
>
>
> - Original Message -
> > From: "Michael Ellerman" 
> > To: "Timothy Pearson" , "linuxppc-dev" 
> > 
> > Cc: "Jens Axboe" 
> > Sent: Tuesday, November 14, 2023 6:14:37 AM
> > Subject: Re: [PATCH] powerpc: Fix data corruption on IPI
>
> > Hi Timothy,
> > 
> > Thanks for debugging this, but I'm unclear why this is helping because
> > we should already have a full barrier (hwsync) on both the sending and
> > receiving side.
> > 
> > More below.
>
> I've spent another few days poking at this, and think I might finally have 
> something more solid in terms of what exactly is happening, but would like 
> some feedback on the concept / how best to fix the potential problem.
>
> As background, there are several worker threads both in userspace and in 
> kernel mode.  Crucially, the main MariaDB data processing thread (the one 
> that handles tasks like flushing dirty pages to disk) always runs on the same 
> core as the io_uring kernel thread that picks up I/O worker creation requests 
> and handles them via create_worker_cb().
>
> Changes in the ~5.12 era switched away from a delayed worker setup.  io_uring 
> currently sets up the new process with create_io_thread(), and immediately 
> uses an IPI to forcibly schedule the new process.  Because of the way the two 
> threads interact, the new process ends up grabbing the CPU from the running 
> MariaDB user thread; I've never seen it schedule on a different core.  If the 
> timing is right in this process, things get trampled on in userspace and the 
> database server either crashes or throws a corruption fault.
>
> Through extensive debugging, I've narrowed this down to invalid state in the 
> VSX registers on return to the MariaDB user thread from the new kernel 
> thread.  For some reason, it seems we don't restore FP state on return from 
> the PF_IO_WORKER thread, and something in the kernel was busy writing new 
> data to them.
>
> A direct example I was able to observe is as follows:
>
> xxspltd vs0,vs0,0  <-- vs0 now zeroed out
> xorir9,r9,1<-- Presumably we switch to the new kernel thread here 
> due to the IPI
> slwir9,r9,7<-- On userspace thread resume, vs0 now contains the 
> value 0x8200400082004000
> xxswapd vs8,vs0<-- vs8 now has the wrong value
> stxvd2x vs8,r3,r12 <-- userspace is now getting stepped on
> stw r9,116(r3)
> stxvd2x vs8,r3,r0
> ...
> CRASH

Nice find, that looks pretty conclusive.

> This is a very difficult race to hit, but MariaDB naturally does repetitive 
> operations with VSX registers so it does eventually fail.  I ended up with a 
> tight loop around glibc operations that use VSX to trigger the failure 
> reliably enough to even understand what was going on.
>
> As I am not as familiar with this part of the Linux kernel as with most other 
> areas, what is the expected save/restore path for the FP/VSX registers around 
> an IPI and associated forced thread switch?  If restore_math() is in the 
> return path, note that MSR_FP is set in regs->msr.

Context switching these FP/vec registers should be pretty robust in
general because it's not just io-uring that uses them. io-uring could
be using some uncommon kernel code that uses the registers incorrectly
though I guess.

>
> Second question: should we even be using the VSX registers at all in kernel 
> space?  Is this a side effect of io_uring interacting so closely with 
> userspace threads, or something else entirely?
>
> If I can get pointed somewhat in the right direction I'm ready to develop the 
> rest of the fix for this issue...just trying to avoid another several days of 
> slogging through the source to see what it's supposed to be doing in the 
> first place. :)

Kernel can use FP/VEC/VSX registers but it has to enable and disable
explicitly. Such kernel code also should not be preemptible.

enable|disable_kernel_fp|altivec|vsx().

Maybe try run the test with ppc_strict_facility_enable boot option to
start with.

If no luck with that, maybe put WARN_ON(preemptible()); checks also in
the disable_kernel_* functions.

You could also add an enable/disable counter for each, and make sure it
is balanced on context switch or kernel->userspace exit.

Thanks,
Nick


[PATCH] powerpc: Add PVN support for HeXin C2000 processor

2023-11-17 Thread Zhao Ke
HeXin Tech Co. has applied for a new PVN from the OpenPower Community
for its new processor C2000. The OpenPower has assigned a new PVN
and this newly assigned PVN is 0x0066, add pvr register related
support for this PVN.

Signed-off-by: Zhao Ke 
Link: 
https://discuss.openpower.foundation/t/how-to-get-a-new-pvr-for-processors-follow-power-isa/477/10
---
 arch/powerpc/include/asm/reg.h|  1 +
 arch/powerpc/kernel/cpu_specs_book3s_64.h | 15 +++
 arch/powerpc/kvm/book3s_pr.c  |  1 +
 arch/powerpc/mm/book3s64/pkeys.c  |  3 ++-
 arch/powerpc/platforms/powernv/subcore.c  |  3 ++-
 drivers/misc/cxl/cxl.h|  3 ++-
 6 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 4ae4ab9090a2..7fd09f25452d 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1361,6 +1361,7 @@
 #define PVR_POWER8E0x004B
 #define PVR_POWER8NVL  0x004C
 #define PVR_POWER8 0x004D
+#define PVR_HX_C2000   0x0066
 #define PVR_POWER9 0x004E
 #define PVR_POWER100x0080
 #define PVR_BE 0x0070
diff --git a/arch/powerpc/kernel/cpu_specs_book3s_64.h 
b/arch/powerpc/kernel/cpu_specs_book3s_64.h
index c370c1b804a9..4f604934da7c 100644
--- a/arch/powerpc/kernel/cpu_specs_book3s_64.h
+++ b/arch/powerpc/kernel/cpu_specs_book3s_64.h
@@ -238,6 +238,21 @@ static struct cpu_spec cpu_specs[] __initdata = {
.machine_check_early= __machine_check_early_realmode_p8,
.platform   = "power8",
},
+   {   /* 2.07-compliant processor, HeXin C2000 processor */
+   .pvr_mask   = 0x,
+   .pvr_value  = 0x0066,
+   .cpu_name   = "POWER8 (architected)",
+   .cpu_features   = CPU_FTRS_POWER8,
+   .cpu_user_features  = COMMON_USER_POWER8,
+   .cpu_user_features2 = COMMON_USER2_POWER8,
+   .mmu_features   = MMU_FTRS_POWER8,
+   .icache_bsize   = 128,
+   .dcache_bsize   = 128,
+   .cpu_setup  = __setup_cpu_power8,
+   .cpu_restore= __restore_cpu_power8,
+   .machine_check_early= __machine_check_early_realmode_p8,
+   .platform   = "power8",
+   },
{   /* 3.00-compliant processor, i.e. Power9 "architected" mode */
.pvr_mask   = 0x,
.pvr_value  = 0x0f05,
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 9118242063fb..5b92619a05fd 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -604,6 +604,7 @@ static void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 
pvr)
case PVR_POWER8:
case PVR_POWER8E:
case PVR_POWER8NVL:
+   case PVR_HX_C2000:
case PVR_POWER9:
vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
BOOK3S_HFLAG_NEW_TLBIE;
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index 125733962033..c38f378e1942 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -89,7 +89,8 @@ static int __init scan_pkey_feature(void)
unsigned long pvr = mfspr(SPRN_PVR);
 
if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == 
PVR_POWER8E ||
-   PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == 
PVR_POWER9)
+   PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == 
PVR_POWER9 ||
+   PVR_VER(pvr) == PVR_HX_C2000)
pkeys_total = 32;
}
}
diff --git a/arch/powerpc/platforms/powernv/subcore.c 
b/arch/powerpc/platforms/powernv/subcore.c
index 191424468f10..58e7331e1e7e 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -425,7 +425,8 @@ static int subcore_init(void)
 
if (pvr_ver != PVR_POWER8 &&
pvr_ver != PVR_POWER8E &&
-   pvr_ver != PVR_POWER8NVL)
+   pvr_ver != PVR_POWER8NVL &&
+   pvr_ver != PVR_HX_C2000)
return 0;
 
/*
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 0562071cdd4a..9ac2991b29c7 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -836,7 +836,8 @@ static inline bool cxl_is_power8(void)
 {
if ((pvr_version_is(PVR_POWER8E)) ||
(pvr_version_is(PVR_POWER8NVL)) ||
-   (pvr_version_is(PVR_POWER8)))
+   (pvr_version_is(PVR_POWER8)) ||
+   (pvr_version_is(PVR_HX_C2000)))
return true;
return false;
 }
-- 
2.34.1