Re: [PATCH v3 05/10] VAS: Define helpers to init window context

2017-03-24 Thread Michael Neuling

> > > + val = 0ULL;
> > > + if (user_win) {
> > > + val = SET_FIELD(VAS_XLATE_MSR_DR, val, true);
> > > + val = SET_FIELD(VAS_XLATE_MSR_TA, val, false);
> > > + val = SET_FIELD(VAS_XLATE_MSR_PR, val, true);
> > > + val = SET_FIELD(VAS_XLATE_MSR_US, val, false);
> > > + val = SET_FIELD(VAS_XLATE_MSR_HV, val, true);
> > > + val = SET_FIELD(VAS_XLATE_MSR_SF, val, true);
> > > + val = SET_FIELD(VAS_XLATE_MSR_UV, val, false);
> > > + } else {
> > > + val = SET_FIELD(VAS_XLATE_MSR_DR, val, false);
> > 
> > kernel contexts don't go through the nestmmu?
> 
> I think so, but will check with Alistair/Ben.

Well it's kinda up to you.  Do you want to use the kernel mapping or not?  I
probably would unless there are issues.

Mikey


Re: Build failure -- powerpc/boot: Add OPAL console to epapr wrappers

2017-03-24 Thread Oliver O'Halloran
On Sat, Mar 25, 2017 at 4:00 AM, Daniel Walker  wrote:
> I get this build failure,
>
>
> In file included from arch/powerpc/boot/fdt.c:51:
> ../arch/powerpc/boot/libfdt_env.h:9: error: redefinition of typedef
> 'uint32_t'
> ../arch/powerpc/boot/types.h:20: note: previous declaration of 'uint32_t'
> was here
> ../arch/powerpc/boot/libfdt_env.h:10: error: redefinition of typedef
> 'uint64_t'
> ../arch/powerpc/boot/types.h:21: note: previous declaration of 'uint64_t'
> was here
> make[2]: *** [arch/powerpc/boot/fdt.o] Error 1
> make[1]: *** [uImage] Error 2
> make[1]: Leaving directory `/nobackup/danielwa/linux/t1040'
> make: *** [sub-make] Error 2
>
>
> and it bisects to ,
>
>
> commit 656ad58ef19e2a763fa5c938b20ae0f6b8d67242
> Author: Oliver O'Halloran 
> Date:   Fri Jul 1 00:34:37 2016 +1000
>
> powerpc/boot: Add OPAL console to epapr wrappers
>
> This patch adds an OPAL console backend to the powerpc boot wrapper so
> that decompression failures inside the wrapper can be reported to the
> user. This is important since it typically indicates data corruption in
> the firmware and other nasty things.
>
> Currently this only works when building a little endian kernel. When
> compiling a 64 bit BE kernel the wrapper is always build 32 bit to be
> compatible with some 32 bit firmwares. BE support will be added at a
> later date. Another limitation of this is that only the "raw" type of
> OPAL console is supported, however machines that provide a hvsi console
> also provide a raw console so this is not an issue in practice.
>
> Actually-written-by: Benjamin Herrenschmidt 
> Signed-off-by: Oliver O'Halloran 
> [mpe: Move #ifdef __powerpc64__ to avoid warnings on 32-bit]
> Signed-off-by: Michael Ellerman 
>
>
> I can provide a config file if needed. My apologies if this was already
> reported.

Thanks for the report, I don't think this is a known bug. mpe's build
testing is pretty thorough so I'm surprised this wasn't caught sooner.

A config file and the version of gcc that you're using would be useful.

Oliver


Re: AW: problem with cuImage.mpc834x_mds image

2017-03-24 Thread Scott Wood
On Fri, 2017-03-24 at 22:27 +0100, Giuseppe Lippolis wrote:
> > 
> Therefore the code crash during the call in:
> bl  setup_common_caches
> 
> 
> I'm using the iomega_150d based on the MPC8347.
> 
> Do you have some tips about the setup_common_caches?

Once caching is enabled[1] you won't be able to do I/O until the MMU is set up
for an uncached I/O mapping.

-Scott

[1] Or at some similar point during early init.  It's been a while since I
worked on chips like this, so I don't recall the details of which caches are
enabled on kernel entry and whether there's some magic to exempt I/O, but I do
remember there being a stretch of time during init where doing I/O was a
problem.


Re: [PATCH v3 06/10] VAS: Define helpers to alloc/free windows

2017-03-24 Thread Sukadev Bhattiprolu
Michael Neuling [michael.neul...@au1.ibm.com] wrote:
> On Thu, 2017-03-16 at 20:33 -0700, Sukadev Bhattiprolu wrote:
> > Define helpers to allocate/free VAS window objects. These will
> > be used in follow-on patches when opening/closing windows.
> > 
> > Signed-off-by: Sukadev Bhattiprolu 
> > ---
> >  drivers/misc/vas/vas-window.c | 74 
> > +-
> > -
> >  1 file changed, 72 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/misc/vas/vas-window.c b/drivers/misc/vas/vas-window.c
> > index edf5c9f..9233bf5 100644
> > --- a/drivers/misc/vas/vas-window.c
> > +++ b/drivers/misc/vas/vas-window.c
> > @@ -119,7 +119,7 @@ static void unmap_wc_mmio_bars(struct vas_window 
> > *window)
> >   * OS/User Window Context (UWC) MMIO Base Address Region for the given
> > window.
> >   * Map these bus addresses and save the mapped kernel addresses in @window.
> >   */
> > -int map_wc_mmio_bars(struct vas_window *window)
> > +static int map_wc_mmio_bars(struct vas_window *window)
> >  {
> >     int len;
> >     uint64_t start;
> > @@ -472,8 +472,78 @@ int init_winctx_regs(struct vas_window *window, struct
> > vas_winctx *winctx)
> >     return 0;
> >  }
> >  
> > -/* stub for now */
> > +DEFINE_SPINLOCK(vas_ida_lock);
> > +
> > +void vas_release_window_id(struct ida *ida, int winid)
> > +{
> > +   spin_lock(_ida_lock);
> > +   ida_remove(ida, winid);
> > +   spin_unlock(_ida_lock);
> > +}
> > +
> > +int vas_assign_window_id(struct ida *ida)
> > +{
> > +   int rc, winid;
> > +
> > +   rc = ida_pre_get(ida, GFP_KERNEL);
> > +   if (!rc)
> > +   return -EAGAIN;
> > +
> > +   spin_lock(_ida_lock);
> > +   rc = ida_get_new_above(ida, 0, );
> > +   spin_unlock(_ida_lock);
> > +
> > +   if (rc)
> > +   return rc;
> > +
> > +   if (winid > VAS_MAX_WINDOWS_PER_CHIP) {
> > +   pr_err("VAS: Too many (%d) open windows\n", winid);
> > +   vas_release_window_id(ida, winid);
> > +   return -EAGAIN;
> > +   }
> > +
> > +   return winid;
> > +}
> > +
> > +static void vas_window_free(struct vas_window *window)
> > +{
> > +   unmap_wc_mmio_bars(window);
> > +   kfree(window->paste_addr_name);
> > +   kfree(window);
> > +}
> > +
> > +static struct vas_window *vas_window_alloc(struct vas_instance *vinst, int
> > id)
> > +{
> > +   struct vas_window *window;
> > +
> > +   window = kzalloc(sizeof(*window), GFP_KERNEL);
> > +   if (!window)
> > +   return NULL;
> > +
> > +   window->vinst = vinst;
> > +   window->winid = id;
> > +
> > +   if (map_wc_mmio_bars(window))
> > +   goto out_free;
> > +
> > +   return window;
> > +
> > +out_free:
> > +   kfree(window);
> > +   return NULL;
> > +}
> > +
> >  int vas_window_reset(struct vas_instance *vinst, int winid)
> > 
> 
> This interface seems a little weird to me. Needing an alloc in a hardware 
> reset
> path seems a bit strange.

Yeah, the name alloc in this interface is awkward.

I probably can drop this interface. Its used only during start up to clear
the window contexts. But since we must and do clear each window context
before using, we don't to do this during start up.

> 
> Maybe the data structures are the issue.  A window is a hardware construct. 
> Something that uses it should probably be called something else like a 
> context. 
> Something that references a window should just be the vas_instance + winid. 
> 
> You should be able to reset this hardware window by referencing structures
> already allocated.  Something associated with the struct vas_instance.
> 

'struct vas_winctx' is the window context (register fields associated
with the window) 'struct vas_window' is a container for the kernel state
associated with a window.

> Mikey

Thanks for the review.

Sukadev



Re: [PATCH v3 05/10] VAS: Define helpers to init window context

2017-03-24 Thread Sukadev Bhattiprolu
Michael Neuling [mi...@neuling.org] wrote:
> On Thu, 2017-03-16 at 20:33 -0700, Sukadev Bhattiprolu wrote:
> >  #ifdef CONFIG_PPC_4K_PAGES
> > @@ -336,9 +337,6 @@ struct vas_window {
> >     /* Feilds applicable only to receive windows */
> >     enum vas_cop_type cop;
> >     atomic_t num_txwins;
> > -
> > -   int32_t hwirq;
> > -   uint64_t irq_port;
> 
> We are removing things already? :-)

:-) They are needed when we add support for user windows will
remove them in the earlier patch. 

> 
> >  };
> >  
> >  /*
> > @@ -392,4 +390,59 @@ struct vas_winctx {
> >  extern int vas_initialized;
> >  extern int vas_window_reset(struct vas_instance *vinst, int winid);
> >  extern struct vas_instance *find_vas_instance(int vasid);
> > +
> > +/*
> > + * VREG(x):
> > + * Expand a register's short name (eg: LPID) into two parameters:
> > + * - the register's short name in string form ("LPID"), and
> > + * - the name of the macro (eg: VAS_LPID_OFFSET), defining the
> > + *   register's offset in the window context
> > + */
> > +#define VREG_SFX(n, s) __stringify(n), VAS_##n##s
> > +#define VREG(r)VREG_SFX(r, _OFFSET)
> > +
> > +#ifndef vas_debug
> > +static inline void vas_log_write(struct vas_window *win, char *name,
> > +   void *regptr, uint64_t val)
> > +{
> > +   if (val)
> > +   pr_err("%swin #%d: %s reg %p, val 0x%llx\n",
> > +   win->tx_win ? "Tx" : "Rx", win->winid, name,
> > +   regptr, val);
> > +}
> > +
> > +#else  /* vas_debug */
> > +
> > +#define vas_log_write(win, name, reg, val)
> > +
> > +#endif /* vas_debug */
> > +
> > +static inline void write_uwc_reg(struct vas_window *win, char *name,
> > +   int32_t reg, uint64_t val)
> > +{
> > +   void *regptr;
> > +
> > +   regptr = win->uwc_map + reg;
> > +   vas_log_write(win, name, regptr, val);
> > +
> > +   out_be64(regptr, val);
> > +}
> > +
> > +static inline void write_hvwc_reg(struct vas_window *win, char *name,
> > +   int32_t reg, uint64_t val)
> > +{
> > +   void *regptr;
> > +
> > +   regptr = win->hvwc_map + reg;
> > +   vas_log_write(win, name, regptr, val);
> > +
> > +   out_be64(regptr, val);
> > +}
> > +
> > +static inline uint64_t read_hvwc_reg(struct vas_window *win,
> > +   char *name __maybe_unused, int32_t reg)
> > +{
> > +   return in_be64(win->hvwc_map+reg);
> > +}
> > +
> >  #endif
> > diff --git a/drivers/misc/vas/vas-window.c b/drivers/misc/vas/vas-window.c
> > index 32dd1d0..edf5c9f 100644
> > --- a/drivers/misc/vas/vas-window.c
> > +++ b/drivers/misc/vas/vas-window.c
> > @@ -14,6 +14,8 @@
> >  #include 
> >  #include "vas-internal.h"
> >  
> > +static int fault_winid;
> > +
> >  /*
> >   * Compute the paste address region for the window @window using the
> >   * ->win_base_addr and ->win_id_shift we got from device tree.
> > @@ -138,6 +140,338 @@ int map_wc_mmio_bars(struct vas_window *window)
> >     return 0;
> >  }
> >  
> > +/*
> > + * Reset all valid registers in the HV and OS/User Window Contexts for
> > + * the window identified by @window.
> > + *
> > + * NOTE: We cannot really use a for loop to reset window context. Not all
> > + *  offsets in a window context are valid registers and the valid
> > + *  registers are not sequential. And, we can only write to offsets
> > + *  with valid registers (or is that only in Simics?).
> > + */
> > +void reset_window_regs(struct vas_window *window)
> > +{
> > +   write_hvwc_reg(window, VREG(LPID), 0ULL);
> > +   write_hvwc_reg(window, VREG(PID), 0ULL);
> > +   write_hvwc_reg(window, VREG(XLATE_MSR), 0ULL);
> > +   write_hvwc_reg(window, VREG(XLATE_LPCR), 0ULL);
> > +   write_hvwc_reg(window, VREG(XLATE_CTL), 0ULL);
> > +   write_hvwc_reg(window, VREG(AMR), 0ULL);
> > +   write_hvwc_reg(window, VREG(SEIDR), 0ULL);
> > +   write_hvwc_reg(window, VREG(FAULT_TX_WIN), 0ULL);
> > +   write_hvwc_reg(window, VREG(OSU_INTR_SRC_RA), 0ULL);
> > +   write_hvwc_reg(window, VREG(HV_INTR_SRC_RA), 0ULL);
> > +   write_hvwc_reg(window, VREG(PSWID), 0ULL);
> > +   write_hvwc_reg(window, VREG(SPARE1), 0ULL);
> > +   write_hvwc_reg(window, VREG(SPARE2), 0ULL);
> > +   write_hvwc_reg(window, VREG(SPARE3), 0ULL);
> > +   write_hvwc_reg(window, VREG(SPARE4), 0ULL);
> > +   write_hvwc_reg(window, VREG(SPARE5), 0ULL);
> > +   write_hvwc_reg(window, VREG(SPARE6), 0ULL);
> > +   write_hvwc_reg(window, VREG(LFIFO_BAR), 0ULL);
> > +   write_hvwc_reg(window, VREG(LDATA_STAMP_CTL), 0ULL);
> > +   write_hvwc_reg(window, VREG(LDMA_CACHE_CTL), 0ULL);
> > +   write_hvwc_reg(window, VREG(LRFIFO_PUSH), 0ULL);
> > +   write_hvwc_reg(window, VREG(CURR_MSG_COUNT), 0ULL);
> > +   write_hvwc_reg(window, VREG(LNOTIFY_AFTER_COUNT), 0ULL);
> > +   write_hvwc_reg(window, VREG(LRX_WCRED), 0ULL);
> > +   write_hvwc_reg(window, VREG(LRX_WCRED_ADDER), 0ULL);
> > +   write_hvwc_reg(window, VREG(TX_WCRED), 0ULL);
> > +   write_hvwc_reg(window, VREG(TX_WCRED_ADDER), 0ULL);
> > +   

Re: [PATCH v3 01/10] VAS: Define macros, register fields and structures

2017-03-24 Thread Sukadev Bhattiprolu
Michael Neuling [mi...@neuling.org] wrote:
> On Thu, 2017-03-16 at 20:33 -0700, Sukadev Bhattiprolu wrote:
> > Define macros for the VAS hardware registers and bit-fields as well
> > as couple of data structures needed by the VAS driver.
> > 
> > > Signed-off-by: Sukadev Bhattiprolu 
> > ---
> > Changelog[v3]
> > - Rename winctx->pid to winctx->pidr to reflect that its a value
> >   from the PID register (SPRN_PID), not the linux process id.
> > - Make it easier to split header into kernel/user parts
> > - To keep user interface simple, use macros rather than enum for
> >   the threshold-control modes.
> > - Add a pid field to struct vas_window - needed for user space
> >   send windows.
> > 
> > Changelog[v2]
> > - Add an overview of VAS in vas-internal.h
> > - Get window context parameters from device tree and drop
> >   unnecessary macros.
> > ---
> >  MAINTAINERS |   6 +
> >  arch/powerpc/include/asm/vas.h  |  43 +
> >  drivers/misc/vas/vas-internal.h | 392 
> > 
> 
> This is going to have to go through gregkh/lkml if it's drivers/misc.  you'll 
> at
> least need gregkh's ack/ok before mpe will take them (which is what we did for
> CAPI).
> 
> We might want to keep this in arch/powerpc but I'm not sure.
> 

We will have device nodes accessible to user space so put it here and can
copy Gregkh next time. But let me know if we should move to arch/powerpc.

> >  3 files changed, 441 insertions(+)
> >  create mode 100644 arch/powerpc/include/asm/vas.h
> >  create mode 100644 drivers/misc/vas/vas-internal.h
> > 
> 
> > +
> > +/*
> > + * Overview of Virtual Accelerator Switchboard (VAS).
> > + *
> > + * VAS is a hardware "switchboard" that allows senders and receivers to
> > + * exchange messages with _minimal_ kernel involvment. The receivers are
> > + * typically NX coprocessor engines that perform compression or encryption
> > + * in hardware, but receivers can also be other software threads.
> > + *
> > + * Senders are user/kernel threads that submit compression/encryption or
> > + * other requests to the receivers. Senders must format their messages as
> > + * Coprocessor Request Blocks (CRB)s and submit them using the instructions
> > + * "copy" and "paste" which were introduced in Power9.
> > + *
> > + * A Power node can have (upto?) 8 Power chips. There is one instance of
> > + * VAS in each Power9 chip. Each instance of VAS has 64K windows or ports,
> > + * Senders and receivers must each connect to a separate window before they
> > + * can exchange messages through the switchboard.
> > + *
> > + * Each window is described by two types of window contexts:
> > + *
> > > + *   Hypervisor Window Context (HVWC) of size VAS_HVWC_SIZE bytes
> > + *
> > > + *   OS/User Window Context (UWC) of size VAS_UWC_SIZE bytes.
> > + *
> > + * A window context can be viewed as a set of 64-bit registers. The 
> > settings
> > + * in these registers configure/control/determine the behavior of the VAS
> > + * hardware when messages are sent/received through the window. The 
> > registers
> > + * in the HVWC are configured by the kernel while the registers in the UWC 
> > can
> > + * be configured by the kernel or by the user space application that is 
> > using
> > + * the window.
> > + *
> > + * The HVWCs for all windows on a specific instance of VAS are in a 
> > contiguous
> > + * range of hardware addresses or Base address region (BAR) referred to as 
> > the
> > + * HVWC BAR for the instance. Similarly the UWCs for all windows on an 
> > instance
> > + * are referred to as the UWC BAR for the instance. The two BARs for each
> > + * instance are defined Power9 MMIO Ranges spreadsheet and available to the
> > + * kernel the device tree as follows:
> > + *
> > > + *   /proc/device-tree/xscom@.../vas@.../hvwc-bar-start
> > > + *   /proc/device-tree/xscom@.../vas@.../hvwc-bar-size
> > > + *   /proc/device-tree/xscom@.../vas@.../uwc-bar-start
> > + * /proc/device-tree/xscom@.../vas@.../uwc-bar-size
> 
> should these just be reg properties?

I guess they could. Will try that
> 
> > + *
> > + * The kernel maps these two hardware address regions into the kernel 
> > address
> > + * space (hvwc_map and uwc_map) and accesses the window contexts of a 
> > specific
> > + * window using:
> > + *
> > > + *    hvwc = hvwc_map + winid * VAS_HVWC_SIZE.
> > > + *    uwc = uwc_map + winid * VAS_UWC_SIZE.
> > + *
> > + * where winid is the window index (0..64K).
> > + *
> > + * Note that the window contexts are used to "configure" the windows. In
> > + * addition to this configuration address, each _send_ window also has a
> > + * unique hardware address, referred to as the "paste-address" to which the
> > + * sender must "paste" the message (CRB) they wish to submit. This hardware
> > + * paste address for window can be computed from the following nodes in the
> > + * device 

AW: problem with cuImage.mpc834x_mds image

2017-03-24 Thread Giuseppe Lippolis
> On Fri, 2016-05-27 at 23:12 +0200, Giuseppe Lippolis wrote:
> > Dear All,
> > I'm trying with buildroot to build the linux-4.4.3 for an iomega 150d
> > machine mounting the mpc8347E sys.

[...]

> > Finalizing device tree... flat tree at 0x94d120
> >
> >
> > But at this point the process crash and the system is reset.
> > It should be something near the handover between the first kenel boot
> > stage and the vmlinux start, but I do not have a jtag to proceed the
> > investigation.

[..]

> > Any suggestion?
> 
> Check (with serial output) that flow control reaches the very end of the
> bootwrapper before it jumps to Linux.  Use the same code that you use in
> head.S (with loop added in both cases).  Print the entry address, and some
> bytes loaded from that address to confirm the image is there.  Check that the
> BATs are set up correctly, etc.
> 
> -Scott

Hi Scott, thanks for your tips.
I proceed my investigation using now Linux-4.10.4 and I found two issue:

1) the U-Boot 1.1.4 make some mistake when decompress the gzip -9. Therefore, 
as a workaround, I currently modify the wrapper to use the uncompressed image.

Nevertheless the image is crashing during the boot.

I modified the  cpu_setup_6xx.S in this way (in order to print debug char on 
the console) :

.equ cnsladdr, 0xe0004500

_GLOBAL(__setup_cpu_603)
mflrr5
BEGIN_MMU_FTR_SECTION
li  r10,0
mtspr   SPRN_SPRG_603_LRU,r10   /* init SW LRU tracking */
END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)

lis r23, cnsladdr@ha
addir23, r23, cnsladdr@l
li  r25, 'i'
stb r25, 0(r23)

BEGIN_FTR_SECTION
bl  __init_fpu_registers

lis r23, cnsladdr@ha
addir23, r23, cnsladdr@l
li  r25, 'l'
stb r25, 0(r23)

END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE)
bl  setup_common_caches

lis r23, cnsladdr@ha
addir23, r23, cnsladdr@l
li  r25, 'm'
stb r25, 0(r23)

mtlrr5

lis r23, cnsladdr@ha
addir23, r23, cnsladdr@l
li  r25, 'm'
stb r25, 0(r23)
trap

blr

and now I get:

Finalizing device tree... flat tree at 0xbdb960
Il

Therefore the code crash during the call in:
bl  setup_common_caches


I'm using the iomega_150d based on the MPC8347.

Do you have some tips about the setup_common_caches?

Thanks.
Bye.



Re: [PATCH v3 03/10] VAS: Define vas_init() and vas_exit()

2017-03-24 Thread Sukadev Bhattiprolu
Michael Neuling [michael.neul...@au1.ibm.com] wrote:
> On Thu, 2017-03-16 at 20:33 -0700, Sukadev Bhattiprolu wrote:
> > Implement vas_init() and vas_exit() functions for a new VAS module.
> > This VAS module is essentially a library for other device drivers
> > and kernel users of the NX coprocessors like NX-842 and NX-GZIP.
> > 
> > Signed-off-by: Sukadev Bhattiprolu 
> > ---
> > Changelog[v3]:
> > - Zero vas_instances memory on allocation
> > - [Haren Myneni] Fix description in Kconfig
> > Changelog[v2]:
> > - Get HVWC, UWC and window address parameters from device tree.
> > ---
> >  MAINTAINERS |   8 ++-
> >  arch/powerpc/include/asm/reg.h  |   1 +
> >  drivers/misc/Kconfig|   1 +
> >  drivers/misc/Makefile   |   1 +
> >  drivers/misc/vas/Kconfig|  21 ++
> >  drivers/misc/vas/Makefile   |   3 +
> >  drivers/misc/vas/vas-internal.h |   3 +
> >  drivers/misc/vas/vas-window.c   |  19 +
> >  drivers/misc/vas/vas.c  | 155
> > 
> >  9 files changed, 210 insertions(+), 2 deletions(-)
> >  create mode 100644 drivers/misc/vas/Kconfig
> >  create mode 100644 drivers/misc/vas/Makefile
> >  create mode 100644 drivers/misc/vas/vas-window.c
> >  create mode 100644 drivers/misc/vas/vas.c
> > 
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index 2a910c9..4037252 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -3673,8 +3673,6 @@ F:arch/powerpc/platforms/powernv/pci-cxl.c
> >  F: drivers/misc/cxl/
> >  F: include/misc/cxl*
> >  F: include/uapi/misc/cxl.h
> > -F: Documentation/powerpc/cxl.txt
> > -F: Documentation/ABI/testing/sysfs-class-cxl
> 
> err?

Yeah, something got messed up here and

> 
> >  CXLFLASH (IBM Coherent Accelerator Processor Interface CAPI Flash) SCSI
> > DRIVER
> >  M: Manoj N. Kumar 
> > @@ -3686,6 +3684,12 @@ F:   drivers/scsi/cxlflash/
> >  F: include/uapi/scsi/cxlflash_ioctls.h
> >  F: Documentation/powerpc/cxlflash.txt
> >  
> > +VAS (IBM Virtual Accelerator Switch) DRIVER
> > +M: Sukadev Bhattiprolu 
> > +L: linuxppc-dev@lists.ozlabs.org
> > +S: Supported
> > +F: drivers/misc/vas/
> > +
> 
> This was already added in patch 1.

here. Will fix

> 
> >  STMMAC ETHERNET DRIVER
> >  M: Giuseppe Cavallaro 
> >  M: Alexandre Torgue 
> > diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> > index fc879fd..7a45ff7 100644
> > --- a/arch/powerpc/include/asm/reg.h
> > +++ b/arch/powerpc/include/asm/reg.h
> > @@ -1225,6 +1225,7 @@
> >  #define PVR_POWER8E0x004B
> >  #define PVR_POWER8NVL  0x004C
> >  #define PVR_POWER8 0x004D
> > +#define PVR_POWER9 0x004E
> 
> Can you send this up separately?  

Sure.

> 
> >  #define PVR_BE 0x0070
> >  #define PVR_PA6T   0x0090
> >  
> > diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
> > index c290990..97d652e 100644
> > --- a/drivers/misc/Kconfig
> > +++ b/drivers/misc/Kconfig
> > @@ -783,4 +783,5 @@ source "drivers/misc/mic/Kconfig"
> >  source "drivers/misc/genwqe/Kconfig"
> >  source "drivers/misc/echo/Kconfig"
> >  source "drivers/misc/cxl/Kconfig"
> > +source "drivers/misc/vas/Kconfig"
> >  endmenu
> > diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
> > index 7a3ea89..5201ffd 100644
> > --- a/drivers/misc/Makefile
> > +++ b/drivers/misc/Makefile
> > @@ -53,6 +53,7 @@ obj-$(CONFIG_GENWQE)  += genwqe/
> >  obj-$(CONFIG_ECHO) += echo/
> >  obj-$(CONFIG_VEXPRESS_SYSCFG)  += vexpress-syscfg.o
> >  obj-$(CONFIG_CXL_BASE) += cxl/
> > +obj-$(CONFIG_VAS)  += vas/
> >  obj-$(CONFIG_PANEL) += panel.o
> >  
> >  lkdtm-$(CONFIG_LKDTM)  += lkdtm_core.o
> > diff --git a/drivers/misc/vas/Kconfig b/drivers/misc/vas/Kconfig
> > new file mode 100644
> > index 000..43cedda
> > --- /dev/null
> > +++ b/drivers/misc/vas/Kconfig
> > @@ -0,0 +1,21 @@
> > +#
> > +# IBM Virtual Accelarator Switchboard (VAS) compatible devices
> > +#depends on PPC_POWERNV && PCI_MSI && EEH
> > +#
> > +
> > +config VAS
> > +   tristate "Support for IBM Virtual Accelerator Switchboard (VAS)"
> > +   depends on PPC_POWERNV
> > +   default n
> > +   help
> > +     Select this option to enable driver support for IBM Virtual
> > +     Accelerator Switchboard (VAS).
> > +
> > +     VAS allows accelerators in co processors like NX-842 to be
> > +     directly available to a user process. This driver enables
> > +     userspace programs to access these accelerators via device
> > +     nodes like /dev/crypto/nx-gzip.
> 
> I though this was kernel only users for now?

Yes, its only kernel for now. Will drop the last sentence.
> 
> > +
> > +     VAS adapters are found in POWER9 based systems.
> > +
> > +     If unsure, say N.
> > diff --git a/drivers/misc/vas/Makefile b/drivers/misc/vas/Makefile
> > new file mode 100644

Re: [PATCH v3 04/10] VAS: Define helpers for access MMIO regions

2017-03-24 Thread Sukadev Bhattiprolu
Michael Neuling [michael.neul...@au1.ibm.com] wrote:
> > +static inline void get_uwc_mmio_bar(struct vas_window *window,
> > +   uint64_t *start, int *len)
> > +{
> > +   uint64_t pbaddr;
> > +
> > +   pbaddr = window->vinst->uwc_bar_start;
> > +   *start = pbaddr + window->winid * VAS_UWC_SIZE;
> > +   *len = VAS_UWC_SIZE;
> 
> I'm not sure this works for 4K pages since VAS_UWC_SIZE = PAGE_SIZE but in
> reality I think it's always 64K.  Right?

I believe the idea is that each user process will have its OS/User Window
context on a separate page - to provide proper protection of the window
context between processes. So, the OS/User Window context size _is_ 
dependent on page size.

The page size is set to 64K in init_north_ctl() in skiboot but can be
set to 4K as well. The amount of memory allocated for the OS/User Window
Context MMIO BAR (init_uwcm() in skiboot) also depends on the page size
256MB for 4K or 4GB for 64K.

> 
> Seem like we are mixing pages sizes and hardware sizes here.
> 
> (I realise 4K isn't supported yet, but)

Yes, only 64K page-size for now.

Thanks,

Sukadev



Re: [PATCH guest kernel] vfio/powerpc/spapr_tce: Enforce IOMMU type compatibility check

2017-03-24 Thread Alex Williamson
On Fri, 24 Mar 2017 17:44:06 +1100
Alexey Kardashevskiy  wrote:

> The existing SPAPR TCE driver advertises both VFIO_SPAPR_TCE_IOMMU and
> VFIO_SPAPR_TCE_v2_IOMMU types to the userspace and the userspace usually
> picks the v2.
> 
> Normally the userspace would create a container, attach an IOMMU group
> to it and only then set the IOMMU type (which would normally be v2).
> 
> However a specific IOMMU group may not support v2, in other words
> it may not implement set_window/unset_window/take_ownership/
> release_ownership and such a group should not be attached to
> a v2 container.
> 
> This adds extra checks that a new group can do what the selected IOMMU
> type suggests. The userspace can then test the return value from
> ioctl(VFIO_SET_IOMMU, VFIO_SPAPR_TCE_v2_IOMMU) and try
> VFIO_SPAPR_TCE_IOMMU.
> 
> Signed-off-by: Alexey Kardashevskiy 
> ---
> 
> This is one of the patches needed to do nested VFIO - for either
> second level guest or DPDK running in a guest.
> ---
>  drivers/vfio/vfio_iommu_spapr_tce.c | 8 
>  1 file changed, 8 insertions(+)

I'm not sure I understand why you're labeling this "guest kernel", is a
VM the only case where we can have combinations that only a subset of
the groups might support v2?  What terrible things happen when such a
combination is created?  The fix itself seems sane, but I'm trying to
figure out whether it should be marked for stable, should go in for
v4.11, or be queued for v4.12.  Thanks,

Alex

> diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c 
> b/drivers/vfio/vfio_iommu_spapr_tce.c
> index cf3de91fbfe7..a7d811524092 100644
> --- a/drivers/vfio/vfio_iommu_spapr_tce.c
> +++ b/drivers/vfio/vfio_iommu_spapr_tce.c
> @@ -1335,8 +1335,16 @@ static int tce_iommu_attach_group(void *iommu_data,
>  
>   if (!table_group->ops || !table_group->ops->take_ownership ||
>   !table_group->ops->release_ownership) {
> + if (container->v2) {
> + ret = -EPERM;
> + goto unlock_exit;
> + }
>   ret = tce_iommu_take_ownership(container, table_group);
>   } else {
> + if (!container->v2) {
> + ret = -EPERM;
> + goto unlock_exit;
> + }
>   ret = tce_iommu_take_ownership_ddw(container, table_group);
>   if (!tce_groups_attached(container) && !container->tables[0])
>   container->def_window_pending = true;



Re: [PATCH kernel v11 10/10] KVM: PPC: VFIO: Add in-kernel acceleration for VFIO

2017-03-24 Thread Alex Williamson
On Wed, 22 Mar 2017 15:21:56 +1100
Alexey Kardashevskiy  wrote:

> This allows the host kernel to handle H_PUT_TCE, H_PUT_TCE_INDIRECT
> and H_STUFF_TCE requests targeted an IOMMU TCE table used for VFIO
> without passing them to user space which saves time on switching
> to user space and back.
> 
> This adds H_PUT_TCE/H_PUT_TCE_INDIRECT/H_STUFF_TCE handlers to KVM.
> KVM tries to handle a TCE request in the real mode, if failed
> it passes the request to the virtual mode to complete the operation.
> If it a virtual mode handler fails, the request is passed to
> the user space; this is not expected to happen though.
> 
> To avoid dealing with page use counters (which is tricky in real mode),
> this only accelerates SPAPR TCE IOMMU v2 clients which are required
> to pre-register the userspace memory. The very first TCE request will
> be handled in the VFIO SPAPR TCE driver anyway as the userspace view
> of the TCE table (iommu_table::it_userspace) is not allocated till
> the very first mapping happens and we cannot call vmalloc in real mode.
> 
> If we fail to update a hardware IOMMU table unexpected reason, we just
> clear it and move on as there is nothing really we can do about it -
> for example, if we hot plug a VFIO device to a guest, existing TCE tables
> will be mirrored automatically to the hardware and there is no interface
> to report to the guest about possible failures.
> 
> This adds new attribute - KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE - to
> the VFIO KVM device. It takes a VFIO group fd and SPAPR TCE table fd
> and associates a physical IOMMU table with the SPAPR TCE table (which
> is a guest view of the hardware IOMMU table). The iommu_table object
> is cached and referenced so we do not have to look up for it in real mode.
> 
> This does not implement the UNSET counterpart as there is no use for it -
> once the acceleration is enabled, the existing userspace won't
> disable it unless a VFIO container is destroyed; this adds necessary
> cleanup to the KVM_DEV_VFIO_GROUP_DEL handler.
> 
> This advertises the new KVM_CAP_SPAPR_TCE_VFIO capability to the user
> space.
> 
> This adds real mode version of WARN_ON_ONCE() as the generic version
> causes problems with rcu_sched. Since we testing what vmalloc_to_phys()
> returns in the code, this also adds a check for already existing
> vmalloc_to_phys() call in kvmppc_rm_h_put_tce_indirect().
> 
> This finally makes use of vfio_external_user_iommu_id() which was
> introduced quite some time ago and was considered for removal.
> 
> Tests show that this patch increases transmission speed from 220MB/s
> to 750..1020MB/s on 10Gb network (Chelsea CXGB3 10Gb ethernet card).
> 
> Signed-off-by: Alexey Kardashevskiy 
> ---
> Changes:
> v11:
> * fixed vfio_group reference leak in KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE
> handler
> 
> v10:
> * fixed leaking references in virt/kvm/vfio.c
> * moved code to helpers - kvm_vfio_group_get_iommu_group, 
> kvm_spapr_tce_release_vfio_group
> * fixed possible race between referencing table and destroying it via
> VFIO add/remove window ioctls()
> 
> v9:
> * removed referencing a group in KVM, only referencing iommu_table's now
> * fixed a reference leak in KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE handler
> * fixed typo in vfio.txt
> * removed @argsz and @flags from struct kvm_vfio_spapr_tce
> 
> v8:
> * changed all (!pua) checks to return H_TOO_HARD as ioctl() is supposed
> to handle them
> * changed vmalloc_to_phys() callers to return H_HARDWARE
> * changed real mode iommu_tce_xchg_rm() callers to return H_TOO_HARD
> and added a comment about this in the code
> * changed virtual mode iommu_tce_xchg() callers to return H_HARDWARE
> and do WARN_ON
> * added WARN_ON_ONCE_RM(!rmap) in kvmppc_rm_h_put_tce_indirect() to
> have all vmalloc_to_phys() callsites covered
> 
> v7:
> * added realmode-friendly WARN_ON_ONCE_RM
> 
> v6:
> * changed handling of errors returned by kvmppc_(rm_)tce_iommu_(un)map()
> * moved kvmppc_gpa_to_ua() to TCE validation
> 
> v5:
> * changed error codes in multiple places
> * added bunch of WARN_ON() in places which should not really happen
> * adde a check that an iommu table is not attached already to LIOBN
> * dropped explicit calls to iommu_tce_clear_param_check/
> iommu_tce_put_param_check as kvmppc_tce_validate/kvmppc_ioba_validate
> call them anyway (since the previous patch)
> * if we fail to update a hardware IOMMU table for unexpected reason,
> this just clears the entry
> 
> v4:
> * added note to the commit log about allowing multiple updates of
> the same IOMMU table;
> * instead of checking for if any memory was preregistered, this
> returns H_TOO_HARD if a specific page was not;
> * fixed comments from v3 about error handling in many places;
> * simplified TCE handlers and merged IOMMU parts inline - for example,
> there used to be kvmppc_h_put_tce_iommu(), now it is merged into
> kvmppc_h_put_tce(); this allows to check IOBA boundaries against
> the first attached 

Re: [PATCH kernel v11 04/10] powerpc/vfio_spapr_tce: Add reference counting to iommu_table

2017-03-24 Thread Alex Williamson
On Wed, 22 Mar 2017 15:21:50 +1100
Alexey Kardashevskiy  wrote:

> So far iommu_table obejcts were only used in virtual mode and had
> a single owner. We are going to change this by implementing in-kernel
> acceleration of DMA mapping requests. The proposed acceleration
> will handle requests in real mode and KVM will keep references to tables.
> 
> This adds a kref to iommu_table and defines new helpers to update it.
> This replaces iommu_free_table() with iommu_tce_table_put() and makes
> iommu_free_table() static. iommu_tce_table_get() is not used in this patch
> but it will be in the following patch.
> 
> Since this touches prototypes, this also removes @node_name parameter as
> it has never been really useful on powernv and carrying it for
> the pseries platform code to iommu_free_table() seems to be quite
> useless as well.
> 
> This should cause no behavioral change.
> 
> Signed-off-by: Alexey Kardashevskiy 
> Reviewed-by: David Gibson 
> ---
> Changes:
> v10:
> * iommu_tce_table_get() can fail now if a table is being destroyed, will be
> used in 10/10
> * iommu_tce_table_put() returns what kref_put() returned
> * iommu_tce_table_put() got WARN_ON(!tbl) as the callers already check
> for it and do not call _put() when tbl==NULL
> 
> v9:
> * s/iommu_table_get/iommu_tce_table_get/ and
> s/iommu_table_put/iommu_tce_table_put/ -- so I removed r-b/a-b
> ---
>  arch/powerpc/include/asm/iommu.h  |  5 +++--
>  arch/powerpc/kernel/iommu.c   | 27 ++-
>  arch/powerpc/platforms/powernv/pci-ioda.c | 14 +++---
>  arch/powerpc/platforms/powernv/pci.c  |  1 +
>  arch/powerpc/platforms/pseries/iommu.c|  3 ++-
>  arch/powerpc/platforms/pseries/vio.c  |  2 +-
>  drivers/vfio/vfio_iommu_spapr_tce.c   |  2 +-
>  7 files changed, 37 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/iommu.h 
> b/arch/powerpc/include/asm/iommu.h
> index 4554699aec02..d96142572e6d 100644
> --- a/arch/powerpc/include/asm/iommu.h
> +++ b/arch/powerpc/include/asm/iommu.h
> @@ -119,6 +119,7 @@ struct iommu_table {
>   struct list_head it_group_list;/* List of iommu_table_group_link */
>   unsigned long *it_userspace; /* userspace view of the table */
>   struct iommu_table_ops *it_ops;
> + struct krefit_kref;
>  };
>  
>  #define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
> @@ -151,8 +152,8 @@ static inline void *get_iommu_table_base(struct device 
> *dev)
>  
>  extern int dma_iommu_dma_supported(struct device *dev, u64 mask);
>  
> -/* Frees table for an individual device node */
> -extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
> +extern struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl);
> +extern int iommu_tce_table_put(struct iommu_table *tbl);
>  
>  /* Initializes an iommu_table based in values set in the passed-in
>   * structure
> diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
> index bc142d87130f..af915da5e03a 100644
> --- a/arch/powerpc/kernel/iommu.c
> +++ b/arch/powerpc/kernel/iommu.c
> @@ -711,13 +711,13 @@ struct iommu_table *iommu_init_table(struct iommu_table 
> *tbl, int nid)
>   return tbl;
>  }
>  
> -void iommu_free_table(struct iommu_table *tbl, const char *node_name)
> +static void iommu_table_free(struct kref *kref)
>  {
>   unsigned long bitmap_sz;
>   unsigned int order;
> + struct iommu_table *tbl;
>  
> - if (!tbl)
> - return;
> + tbl = container_of(kref, struct iommu_table, it_kref);
>  
>   if (tbl->it_ops->free)
>   tbl->it_ops->free(tbl);
> @@ -736,7 +736,7 @@ void iommu_free_table(struct iommu_table *tbl, const char 
> *node_name)
>  
>   /* verify that table contains no entries */
>   if (!bitmap_empty(tbl->it_map, tbl->it_size))
> - pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
> + pr_warn("%s: Unexpected TCEs\n", __func__);
>  
>   /* calculate bitmap size in bytes */
>   bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
> @@ -748,7 +748,24 @@ void iommu_free_table(struct iommu_table *tbl, const 
> char *node_name)
>   /* free table */
>   kfree(tbl);
>  }
> -EXPORT_SYMBOL_GPL(iommu_free_table);
> +
> +struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl)
> +{
> + if (kref_get_unless_zero(>it_kref))
> + return tbl;
> +
> + return NULL;
> +}
> +EXPORT_SYMBOL_GPL(iommu_tce_table_get);
> +
> +int iommu_tce_table_put(struct iommu_table *tbl)
> +{
> + if (WARN_ON(!tbl))
> + return 0;
> +
> + return kref_put(>it_kref, iommu_table_free);
> +}
> +EXPORT_SYMBOL_GPL(iommu_tce_table_put);
>  
>  /* Creates TCEs for a user provided buffer.  The user buffer must be
>   * contiguous real kernel storage (not vmalloc).  The address passed here
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> 

[v2 1/5] sparc64: simplify vmemmap_populate

2017-03-24 Thread Pavel Tatashin
Remove duplicating code, by using common functions
vmemmap_pud_populate and vmemmap_pgd_populate functions.

Signed-off-by: Pavel Tatashin 
Reviewed-by: Shannon Nelson 
---
 arch/sparc/mm/init_64.c |   23 ++-
 1 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 2c0cb2a..01eccab 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2526,30 +2526,19 @@ int __meminit vmemmap_populate(unsigned long vstart, 
unsigned long vend,
vstart = vstart & PMD_MASK;
vend = ALIGN(vend, PMD_SIZE);
for (; vstart < vend; vstart += PMD_SIZE) {
-   pgd_t *pgd = pgd_offset_k(vstart);
+   pgd_t *pgd = vmemmap_pgd_populate(vstart, node);
unsigned long pte;
pud_t *pud;
pmd_t *pmd;
 
-   if (pgd_none(*pgd)) {
-   pud_t *new = vmemmap_alloc_block(PAGE_SIZE, node);
+   if (!pgd)
+   return -ENOMEM;
 
-   if (!new)
-   return -ENOMEM;
-   pgd_populate(_mm, pgd, new);
-   }
-
-   pud = pud_offset(pgd, vstart);
-   if (pud_none(*pud)) {
-   pmd_t *new = vmemmap_alloc_block(PAGE_SIZE, node);
-
-   if (!new)
-   return -ENOMEM;
-   pud_populate(_mm, pud, new);
-   }
+   pud = vmemmap_pud_populate(pgd, vstart, node);
+   if (!pud)
+   return -ENOMEM;
 
pmd = pmd_offset(pud, vstart);
-
pte = pmd_val(*pmd);
if (!(pte & _PAGE_VALID)) {
void *block = vmemmap_alloc_block(PMD_SIZE, node);
-- 
1.7.1



[v2 0/5] parallelized "struct page" zeroing

2017-03-24 Thread Pavel Tatashin
Changelog:
v1 - v2
- Per request, added s390 to deferred "struct page" zeroing
- Collected performance data on x86 which proofs the importance to
  keep memset() as prefetch (see below).

When deferred struct page initialization feature is enabled, we get a
performance gain of initializing vmemmap in parallel after other CPUs are
started. However, we still zero the memory for vmemmap using one boot CPU.
This patch-set fixes the memset-zeroing limitation by deferring it as well.

Performance gain on SPARC with 32T:
base:   https://hastebin.com/ozanelatat.go
fix:https://hastebin.com/utonawukof.go

As you can see without the fix it takes: 97.89s to boot
With the fix it takes: 46.91 to boot.

Performance gain on x86 with 1T:
base:   https://hastebin.com/uvifasohon.pas
fix:https://hastebin.com/anodiqaguj.pas

On Intel we save 10.66s/T while on SPARC we save 1.59s/T. Intel has
twice as many pages, and also fewer nodes than SPARC (sparc 32 nodes, vs.
intel 8 nodes).

It takes one thread 11.25s to zero vmemmap on Intel for 1T, so it should
take additional 11.25 / 8 = 1.4s  (this machine has 8 nodes) per node to
initialize the memory, but it takes only additional 0.456s per node, which
means on Intel we also benefit from having memset() and initializing all
other fields in one place.

Pavel Tatashin (5):
  sparc64: simplify vmemmap_populate
  mm: defining memblock_virt_alloc_try_nid_raw
  mm: add "zero" argument to vmemmap allocators
  mm: zero struct pages during initialization
  mm: teach platforms not to zero struct pages memory

 arch/powerpc/mm/init_64.c |4 +-
 arch/s390/mm/vmem.c   |5 ++-
 arch/sparc/mm/init_64.c   |   26 +++
 arch/x86/mm/init_64.c |3 +-
 include/linux/bootmem.h   |3 ++
 include/linux/mm.h|   15 +++--
 mm/memblock.c |   46 --
 mm/page_alloc.c   |3 ++
 mm/sparse-vmemmap.c   |   48 +---
 9 files changed, 103 insertions(+), 50 deletions(-)



[v2 3/5] mm: add "zero" argument to vmemmap allocators

2017-03-24 Thread Pavel Tatashin
Allow clients to request non-zeroed memory from vmemmap allocator.
The following two public function have a new boolean argument called zero:

__vmemmap_alloc_block_buf()
vmemmap_alloc_block()

When zero is true, memory that is allocated by memblock allocator is zeroed
(the current behavior), when argument is false, the memory is not zeroed.

This change allows for optimizations where client knows when it is better
to zero memory: may be later when other CPUs are started, or may be client
is going to set every byte in the allocated memory, so no need to zero
memory beforehand.

Signed-off-by: Pavel Tatashin 
Reviewed-by: Shannon Nelson 
---
 arch/powerpc/mm/init_64.c |4 +-
 arch/s390/mm/vmem.c   |5 ++-
 arch/sparc/mm/init_64.c   |3 +-
 arch/x86/mm/init_64.c |3 +-
 include/linux/mm.h|6 ++--
 mm/sparse-vmemmap.c   |   48 +---
 6 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 9be9920..eb4c270 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -133,7 +133,7 @@ static int __meminit vmemmap_populated(unsigned long start, 
int page_size)
 
/* allocate a page when required and hand out chunks */
if (!num_left) {
-   next = vmemmap_alloc_block(PAGE_SIZE, node);
+   next = vmemmap_alloc_block(PAGE_SIZE, node, true);
if (unlikely(!next)) {
WARN_ON(1);
return NULL;
@@ -181,7 +181,7 @@ int __meminit vmemmap_populate(unsigned long start, 
unsigned long end, int node)
if (vmemmap_populated(start, page_size))
continue;
 
-   p = vmemmap_alloc_block(page_size, node);
+   p = vmemmap_alloc_block(page_size, node, true);
if (!p)
return -ENOMEM;
 
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 60d3899..9c75214 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -251,7 +251,8 @@ int __meminit vmemmap_populate(unsigned long start, 
unsigned long end, int node)
if (MACHINE_HAS_EDAT1) {
void *new_page;
 
-   new_page = vmemmap_alloc_block(PMD_SIZE, node);
+   new_page = vmemmap_alloc_block(PMD_SIZE, node,
+  true);
if (!new_page)
goto out;
pmd_val(*pm_dir) = __pa(new_page) | sgt_prot;
@@ -271,7 +272,7 @@ int __meminit vmemmap_populate(unsigned long start, 
unsigned long end, int node)
if (pte_none(*pt_dir)) {
void *new_page;
 
-   new_page = vmemmap_alloc_block(PAGE_SIZE, node);
+   new_page = vmemmap_alloc_block(PAGE_SIZE, node, true);
if (!new_page)
goto out;
pte_val(*pt_dir) = __pa(new_page) | pgt_prot;
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 01eccab..d91e462 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2541,7 +2541,8 @@ int __meminit vmemmap_populate(unsigned long vstart, 
unsigned long vend,
pmd = pmd_offset(pud, vstart);
pte = pmd_val(*pmd);
if (!(pte & _PAGE_VALID)) {
-   void *block = vmemmap_alloc_block(PMD_SIZE, node);
+   void *block = vmemmap_alloc_block(PMD_SIZE, node,
+ true);
 
if (!block)
return -ENOMEM;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 15173d3..46101b6 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1176,7 +1176,8 @@ static int __meminit vmemmap_populate_hugepages(unsigned 
long start,
if (pmd_none(*pmd)) {
void *p;
 
-   p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
+   p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap,
+ true);
if (p) {
pte_t entry;
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5f01c88..54df194 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2410,13 +2410,13 @@ void sparse_mem_maps_populate_node(struct page 
**map_map,
 pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
 pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
 pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
-void *vmemmap_alloc_block(unsigned long size, int node);

[v2 5/5] mm: teach platforms not to zero struct pages memory

2017-03-24 Thread Pavel Tatashin
If we are using deferred struct page initialization feature, most of
"struct page"es are getting initialized after other CPUs are started, and
hence we are benefiting from doing this job in parallel. However, we are
still zeroing all the memory that is allocated for "struct pages" using the
boot CPU.  This patch solves this problem, by deferring zeroing "struct
pages" to only when they are initialized.

Signed-off-by: Pavel Tatashin 
Reviewed-by: Shannon Nelson 
---
 arch/powerpc/mm/init_64.c |2 +-
 arch/s390/mm/vmem.c   |2 +-
 arch/sparc/mm/init_64.c   |2 +-
 arch/x86/mm/init_64.c |2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index eb4c270..24faf2d 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -181,7 +181,7 @@ int __meminit vmemmap_populate(unsigned long start, 
unsigned long end, int node)
if (vmemmap_populated(start, page_size))
continue;
 
-   p = vmemmap_alloc_block(page_size, node, true);
+   p = vmemmap_alloc_block(page_size, node, VMEMMAP_ZERO);
if (!p)
return -ENOMEM;
 
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 9c75214..ffe9ba1 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -252,7 +252,7 @@ int __meminit vmemmap_populate(unsigned long start, 
unsigned long end, int node)
void *new_page;
 
new_page = vmemmap_alloc_block(PMD_SIZE, node,
-  true);
+  VMEMMAP_ZERO);
if (!new_page)
goto out;
pmd_val(*pm_dir) = __pa(new_page) | sgt_prot;
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index d91e462..280834e 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2542,7 +2542,7 @@ int __meminit vmemmap_populate(unsigned long vstart, 
unsigned long vend,
pte = pmd_val(*pmd);
if (!(pte & _PAGE_VALID)) {
void *block = vmemmap_alloc_block(PMD_SIZE, node,
- true);
+ VMEMMAP_ZERO);
 
if (!block)
return -ENOMEM;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 46101b6..9d8c72c 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1177,7 +1177,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned 
long start,
void *p;
 
p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap,
- true);
+ VMEMMAP_ZERO);
if (p) {
pte_t entry;
 
-- 
1.7.1



[v2 4/5] mm: zero struct pages during initialization

2017-03-24 Thread Pavel Tatashin
When deferred struct page initialization is enabled, do not expect that
the memory that was allocated for struct pages was zeroed by the
allocator. Zero it when "struct pages" are initialized.

Also, a defined boolean VMEMMAP_ZERO is provided to tell platforms whether
they should zero memory or can deffer it.

Signed-off-by: Pavel Tatashin 
Reviewed-by: Shannon Nelson 
---
 include/linux/mm.h |9 +
 mm/page_alloc.c|3 +++
 2 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 54df194..eb052f6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2427,6 +2427,15 @@ int vmemmap_populate_basepages(unsigned long start, 
unsigned long end,
 #ifdef CONFIG_MEMORY_HOTPLUG
 void vmemmap_free(unsigned long start, unsigned long end);
 #endif
+/*
+ * Don't zero "struct page"es during early boot, and zero only when they are
+ * initialized in parallel.
+ */
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+#define VMEMMAP_ZERO   false
+#else
+#define VMEMMAP_ZERO   true
+#endif
 void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
  unsigned long size);
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f202f8b..02945e4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1168,6 +1168,9 @@ static void free_one_page(struct zone *zone,
 static void __meminit __init_single_page(struct page *page, unsigned long pfn,
unsigned long zone, int nid)
 {
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+   memset(page, 0, sizeof(struct page));
+#endif
set_page_links(page, zone, nid, pfn);
init_page_count(page);
page_mapcount_reset(page);
-- 
1.7.1



[v2 2/5] mm: defining memblock_virt_alloc_try_nid_raw

2017-03-24 Thread Pavel Tatashin
A new version of memblock_virt_alloc_* allocations:
- Does not zero the allocated memory
- Does not panic if request cannot be satisfied

Signed-off-by: Pavel Tatashin 
Reviewed-by: Shannon Nelson 
---
 include/linux/bootmem.h |3 +++
 mm/memblock.c   |   46 +++---
 2 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index dbaf312..b61ea10 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -160,6 +160,9 @@ extern int reserve_bootmem_node(pg_data_t *pgdat,
 #define BOOTMEM_ALLOC_ANYWHERE (~(phys_addr_t)0)
 
 /* FIXME: Move to memblock.h at a point where we remove nobootmem.c */
+void *memblock_virt_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
+ phys_addr_t min_addr,
+ phys_addr_t max_addr, int nid);
 void *memblock_virt_alloc_try_nid_nopanic(phys_addr_t size,
phys_addr_t align, phys_addr_t min_addr,
phys_addr_t max_addr, int nid);
diff --git a/mm/memblock.c b/mm/memblock.c
index 696f06d..7fdc555 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1271,7 +1271,7 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t 
size, phys_addr_t align, i
 static void * __init memblock_virt_alloc_internal(
phys_addr_t size, phys_addr_t align,
phys_addr_t min_addr, phys_addr_t max_addr,
-   int nid)
+   int nid, bool zero)
 {
phys_addr_t alloc;
void *ptr;
@@ -1322,7 +1322,8 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t 
size, phys_addr_t align, i
return NULL;
 done:
ptr = phys_to_virt(alloc);
-   memset(ptr, 0, size);
+   if (zero)
+   memset(ptr, 0, size);
 
/*
 * The min_count is set to 0 so that bootmem allocated blocks
@@ -1336,6 +1337,37 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t 
size, phys_addr_t align, i
 }
 
 /**
+ * memblock_virt_alloc_try_nid_raw - allocate boot memory block without zeroing
+ * memory and without panicking
+ * @size: size of memory block to be allocated in bytes
+ * @align: alignment of the region and block's size
+ * @min_addr: the lower bound of the memory region from where the allocation
+ *   is preferred (phys address)
+ * @max_addr: the upper bound of the memory region from where the allocation
+ *   is preferred (phys address), or %BOOTMEM_ALLOC_ACCESSIBLE to
+ *   allocate only from memory limited by memblock.current_limit value
+ * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
+ *
+ * Public function, provides additional debug information (including caller
+ * info), if enabled. Does not zero allocated memory, does not panic if request
+ * cannot be satisfied.
+ *
+ * RETURNS:
+ * Virtual address of allocated memory block on success, NULL on failure.
+ */
+void * __init memblock_virt_alloc_try_nid_raw(
+   phys_addr_t size, phys_addr_t align,
+   phys_addr_t min_addr, phys_addr_t max_addr,
+   int nid)
+{
+   memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=0x%llx 
max_addr=0x%llx %pF\n",
+__func__, (u64)size, (u64)align, nid, (u64)min_addr,
+(u64)max_addr, (void *)_RET_IP_);
+   return memblock_virt_alloc_internal(size, align,
+  min_addr, max_addr, nid, false);
+}
+
+/**
  * memblock_virt_alloc_try_nid_nopanic - allocate boot memory block
  * @size: size of memory block to be allocated in bytes
  * @align: alignment of the region and block's size
@@ -1346,8 +1378,8 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t 
size, phys_addr_t align, i
  *   allocate only from memory limited by memblock.current_limit value
  * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
  *
- * Public version of _memblock_virt_alloc_try_nid_nopanic() which provides
- * additional debug information (including caller info), if enabled.
+ * Public function, provides additional debug information (including caller
+ * info), if enabled. This function zeroes the allocated memory.
  *
  * RETURNS:
  * Virtual address of allocated memory block on success, NULL on failure.
@@ -1361,7 +1393,7 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t 
size, phys_addr_t align, i
 __func__, (u64)size, (u64)align, nid, (u64)min_addr,
 (u64)max_addr, (void *)_RET_IP_);
return memblock_virt_alloc_internal(size, align, min_addr,
-max_addr, nid);
+max_addr, nid, true);
 }
 
 /**
@@ -1375,7 +1407,7 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t 
size, 

Re: [PATCH v2] ASoC: imx-wm8960: Let codec driver enable/disable its MCLK

2017-03-24 Thread Nicolin Chen
On Fri, Mar 24, 2017 at 11:14:48AM +0200, Daniel Baluta wrote:
> From: Daniel Baluta 
> 
> WM8962 needs its MCLK when powerup in wm8962_resume(). Thus it's better
> to control the MCLK in codec driver. Thus remove the clock enable in
> machine driver accordingly.
> 
> While at it, get rid of imx_wm8962_remove function since it is now
> empty.
> 
> Signed-off-by: Daniel Baluta 
> ---
> Changes since v1:
>   * s/wm8960/imx-wm890/ in subject prefix

imx-wm8962 vs imx-wm8960 :)

>   * s/dirver/driver in commit message
>   * took ownership over the patch from Nicolin Chen
>   as per his agreement.
> 
>  sound/soc/fsl/imx-wm8962.c | 40 
>  1 file changed, 8 insertions(+), 32 deletions(-)
> 
> diff --git a/sound/soc/fsl/imx-wm8962.c b/sound/soc/fsl/imx-wm8962.c
> index 1b60958..3d894d9 100644
> --- a/sound/soc/fsl/imx-wm8962.c
> +++ b/sound/soc/fsl/imx-wm8962.c
> @@ -33,7 +33,6 @@ struct imx_wm8962_data {
>   struct snd_soc_card card;
>   char codec_dai_name[DAI_NAME_SIZE];
>   char platform_name[DAI_NAME_SIZE];
> - struct clk *codec_clk;
>   unsigned int clk_frequency;
>  };
>  
> @@ -163,6 +162,7 @@ static int imx_wm8962_probe(struct platform_device *pdev)
>   struct imx_priv *priv = _priv;
>   struct i2c_client *codec_dev;
>   struct imx_wm8962_data *data;
> + struct clk *codec_clk;
>   int int_port, ext_port;
>   int ret;
>  
> @@ -231,19 +231,14 @@ static int imx_wm8962_probe(struct platform_device 
> *pdev)
>   goto fail;
>   }
>  
> - data->codec_clk = devm_clk_get(_dev->dev, NULL);
> - if (IS_ERR(data->codec_clk)) {
> - ret = PTR_ERR(data->codec_clk);
> + codec_clk = devm_clk_get(_dev->dev, NULL);

I actually just noticed a problem here -- not from your change but
it existed in the first place. IIRC, devm_clk_get() would only be
properly clk_put() for this driver if we passed pdev->dev while we
are using codec_dev->dev. So here we probably should use clk_get()
instead and call a clk_put() right after fetching the clock rate.

Would you please fix this issue along with this change since you
are touching the exact same line of the code?

Thanks
Nicolin

> + if (IS_ERR(codec_clk)) {
> + ret = PTR_ERR(codec_clk);
>   dev_err(_dev->dev, "failed to get codec clk: %d\n", ret);
>   goto fail;
>   }
>  
> - data->clk_frequency = clk_get_rate(data->codec_clk);
> - ret = clk_prepare_enable(data->codec_clk);
> - if (ret) {
> - dev_err(_dev->dev, "failed to enable codec clk: %d\n", 
> ret);
> - goto fail;
> - }
> + data->clk_frequency = clk_get_rate(codec_clk);
>  
>   data->dai.name = "HiFi";
>   data->dai.stream_name = "HiFi";
> @@ -258,10 +253,10 @@ static int imx_wm8962_probe(struct platform_device 
> *pdev)
>   data->card.dev = >dev;
>   ret = snd_soc_of_parse_card_name(>card, "model");
>   if (ret)
> - goto clk_fail;
> + goto fail;
>   ret = snd_soc_of_parse_audio_routing(>card, "audio-routing");
>   if (ret)
> - goto clk_fail;
> + goto fail;
>   data->card.num_links = 1;
>   data->card.owner = THIS_MODULE;
>   data->card.dai_link = >dai;
> @@ -277,16 +272,9 @@ static int imx_wm8962_probe(struct platform_device *pdev)
>   ret = devm_snd_soc_register_card(>dev, >card);
>   if (ret) {
>   dev_err(>dev, "snd_soc_register_card failed (%d)\n", ret);
> - goto clk_fail;
> + goto fail;
>   }
>  
> - of_node_put(ssi_np);
> - of_node_put(codec_np);
> -
> - return 0;
> -
> -clk_fail:
> - clk_disable_unprepare(data->codec_clk);
>  fail:
>   of_node_put(ssi_np);
>   of_node_put(codec_np);
> @@ -294,17 +282,6 @@ static int imx_wm8962_probe(struct platform_device *pdev)
>   return ret;
>  }



Build failure -- powerpc/boot: Add OPAL console to epapr wrappers

2017-03-24 Thread Daniel Walker

I get this build failure,


In file included from arch/powerpc/boot/fdt.c:51:
../arch/powerpc/boot/libfdt_env.h:9: error: redefinition of typedef 
'uint32_t'
../arch/powerpc/boot/types.h:20: note: previous declaration of 
'uint32_t' was here
../arch/powerpc/boot/libfdt_env.h:10: error: redefinition of typedef 
'uint64_t'
../arch/powerpc/boot/types.h:21: note: previous declaration of 
'uint64_t' was here

make[2]: *** [arch/powerpc/boot/fdt.o] Error 1
make[1]: *** [uImage] Error 2
make[1]: Leaving directory `/nobackup/danielwa/linux/t1040'
make: *** [sub-make] Error 2


and it bisects to ,


commit 656ad58ef19e2a763fa5c938b20ae0f6b8d67242
Author: Oliver O'Halloran 
Date:   Fri Jul 1 00:34:37 2016 +1000

powerpc/boot: Add OPAL console to epapr wrappers

This patch adds an OPAL console backend to the powerpc boot wrapper so
that decompression failures inside the wrapper can be reported to the
user. This is important since it typically indicates data corruption in
the firmware and other nasty things.

Currently this only works when building a little endian kernel. When
compiling a 64 bit BE kernel the wrapper is always build 32 bit to be
compatible with some 32 bit firmwares. BE support will be added at a
later date. Another limitation of this is that only the "raw" type of
OPAL console is supported, however machines that provide a hvsi console
also provide a raw console so this is not an issue in practice.

Actually-written-by: Benjamin Herrenschmidt 
Signed-off-by: Oliver O'Halloran 
[mpe: Move #ifdef __powerpc64__ to avoid warnings on 32-bit]
Signed-off-by: Michael Ellerman 


I can provide a config file if needed. My apologies if this was already 
reported.



Daniel



[PATCH v3 1/7] Add multibyte memset functions

2017-03-24 Thread Matthew Wilcox
From: Matthew Wilcox 

memset16(), memset32() and memset64() are like memset(), but allow the
caller to fill the destination with a multibyte pattern.  memset_l()
and memset_p() allow the caller to use unsigned long and pointer
values respectively.  memset64() is currently only available on 64-bit
architectures.

Signed-off-by: Matthew Wilcox 
---
 include/linux/string.h | 30 ++
 lib/string.c   | 68 ++
 2 files changed, 98 insertions(+)

diff --git a/include/linux/string.h b/include/linux/string.h
index 26b6f6a66f83..b376875b650c 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -99,6 +99,36 @@ extern __kernel_size_t strcspn(const char *,const char *);
 #ifndef __HAVE_ARCH_MEMSET
 extern void * memset(void *,int,__kernel_size_t);
 #endif
+
+#ifndef __HAVE_ARCH_MEMSET16
+extern void *memset16(uint16_t *, uint16_t, __kernel_size_t);
+#endif
+
+#ifndef __HAVE_ARCH_MEMSET32
+extern void *memset32(uint32_t *, uint32_t, __kernel_size_t);
+#endif
+
+#ifndef __HAVE_ARCH_MEMSET64
+extern void *memset64(uint64_t *, uint64_t, __kernel_size_t);
+#endif
+
+static inline void *memset_l(unsigned long *p, unsigned long v,
+   __kernel_size_t n)
+{
+   if (BITS_PER_LONG == 32)
+   return memset32((uint32_t *)p, v, n);
+   else
+   return memset64((uint64_t *)p, v, n);
+}
+
+static inline void *memset_p(void **p, void *v, __kernel_size_t n)
+{
+   if (BITS_PER_LONG == 32)
+   return memset32((uint32_t *)p, (uintptr_t)v, n);
+   else
+   return memset64((uint64_t *)p, (uintptr_t)v, n);
+}
+
 #ifndef __HAVE_ARCH_MEMCPY
 extern void * memcpy(void *,const void *,__kernel_size_t);
 #endif
diff --git a/lib/string.c b/lib/string.c
index ed83562a53ae..f18ba402e503 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -697,6 +697,74 @@ void memzero_explicit(void *s, size_t count)
 }
 EXPORT_SYMBOL(memzero_explicit);
 
+#ifndef __HAVE_ARCH_MEMSET16
+/**
+ * memset16() - Fill a memory area with a uint16_t
+ * @s: Pointer to the start of the area.
+ * @v: The value to fill the area with
+ * @count: The number of values to store
+ *
+ * Differs from memset() in that it fills with a uint16_t instead
+ * of a byte.  Remember that @count is the number of uint16_ts to
+ * store, not the number of bytes.
+ */
+void *memset16(uint16_t *s, uint16_t v, size_t count)
+{
+   uint16_t *xs = s;
+
+   while (count--)
+   *xs++ = v;
+   return s;
+}
+EXPORT_SYMBOL(memset16);
+#endif
+
+#ifndef __HAVE_ARCH_MEMSET32
+/**
+ * memset32() - Fill a memory area with a uint32_t
+ * @s: Pointer to the start of the area.
+ * @v: The value to fill the area with
+ * @count: The number of values to store
+ *
+ * Differs from memset() in that it fills with a uint32_t instead
+ * of a byte.  Remember that @count is the number of uint32_ts to
+ * store, not the number of bytes.
+ */
+void *memset32(uint32_t *s, uint32_t v, size_t count)
+{
+   uint32_t *xs = s;
+
+   while (count--)
+   *xs++ = v;
+   return s;
+}
+EXPORT_SYMBOL(memset32);
+#endif
+
+#ifndef __HAVE_ARCH_MEMSET64
+#if BITS_PER_LONG > 32
+/**
+ * memset64() - Fill a memory area with a uint64_t
+ * @s: Pointer to the start of the area.
+ * @v: The value to fill the area with
+ * @count: The number of values to store
+ *
+ * Differs from memset() in that it fills with a uint64_t instead
+ * of a byte.  Remember that @count is the number of uint64_ts to
+ * store, not the number of bytes.
+ */
+void *memset64(uint64_t *s, uint64_t v, size_t count)
+{
+   uint64_t *xs = s;
+
+   while (count--)
+   *xs++ = v;
+   return s;
+}
+EXPORT_SYMBOL(memset64);
+#endif
+#endif
+
 #ifndef __HAVE_ARCH_MEMCPY
 /**
  * memcpy - Copy one area of memory to another
-- 
2.11.0



[PATCH v3 5/7] zram: Convert to using memset_l

2017-03-24 Thread Matthew Wilcox
From: Matthew Wilcox 

zram was the motivation for creating memset_l().  Minchan Kim sees a 7%
performance improvement on x86 with 100MB of non-zero deduplicatable
data:

perf stat -r 10 dd if=/dev/zram0 of=/dev/null

vanilla:0.232050465 seconds time elapsed ( +-  0.51% )
memset_l:   0.217219387 seconds time elapsed ( +-  0.07% )

Signed-off-by: Matthew Wilcox 
Tested-by: Minchan Kim 
---
 drivers/block/zram/zram_drv.c | 15 +++
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index e27d89a36c34..25dcad309695 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -157,20 +157,11 @@ static inline void update_used_max(struct zram *zram,
} while (old_max != cur_max);
 }
 
-static inline void zram_fill_page(char *ptr, unsigned long len,
+static inline void zram_fill_page(void *ptr, unsigned long len,
unsigned long value)
 {
-   int i;
-   unsigned long *page = (unsigned long *)ptr;
-
WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
-
-   if (likely(value == 0)) {
-   memset(ptr, 0, len);
-   } else {
-   for (i = 0; i < len / sizeof(*page); i++)
-   page[i] = value;
-   }
+   memset_l(ptr, value, len / sizeof(unsigned long));
 }
 
 static bool page_same_filled(void *ptr, unsigned long *element)
@@ -193,7 +184,7 @@ static bool page_same_filled(void *ptr, unsigned long 
*element)
 static void handle_same_page(struct bio_vec *bvec, unsigned long element)
 {
struct page *page = bvec->bv_page;
-   void *user_mem;
+   char *user_mem;
 
user_mem = kmap_atomic(page);
zram_fill_page(user_mem + bvec->bv_offset, bvec->bv_len, element);
-- 
2.11.0



[PATCH v3 2/7] ARM: Implement memset16, memset32 & memset64

2017-03-24 Thread Matthew Wilcox
From: Matthew Wilcox 

ARM is only 32-bit, so it doesn't really need a memset64, but it was
essentially free to add it to the existing implementation.

Signed-off-by: Matthew Wilcox 
Reviewed-by: Russell King 
---
 arch/arm/include/asm/string.h | 21 +
 arch/arm/kernel/armksyms.c|  3 +++
 arch/arm/lib/memset.S | 44 ++-
 3 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h
index cf4f3aad0fc1..bc7a1be7a76a 100644
--- a/arch/arm/include/asm/string.h
+++ b/arch/arm/include/asm/string.h
@@ -24,6 +24,27 @@ extern void * memchr(const void *, int, __kernel_size_t);
 #define __HAVE_ARCH_MEMSET
 extern void * memset(void *, int, __kernel_size_t);
 
+#define __HAVE_ARCH_MEMSET16
+extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
+static inline void *memset16(uint16_t *p, uint16_t v, __kernel_size_t n)
+{
+   return __memset16(p, v, n * 2);
+}
+
+#define __HAVE_ARCH_MEMSET32
+extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
+static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n)
+{
+   return __memset32(p, v, n * 4);
+}
+
+#define __HAVE_ARCH_MEMSET64
+extern void *__memset64(uint64_t *, uint32_t low, __kernel_size_t, uint32_t 
hi);
+static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
+{
+   return __memset64(p, v, n * 8, v >> 32);
+}
+
 extern void __memzero(void *ptr, __kernel_size_t n);
 
 #define memset(p,v,n)  \
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index 8e8d20cdbce7..633341ed0713 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -87,6 +87,9 @@ EXPORT_SYMBOL(__raw_writesl);
 EXPORT_SYMBOL(strchr);
 EXPORT_SYMBOL(strrchr);
 EXPORT_SYMBOL(memset);
+EXPORT_SYMBOL(__memset16);
+EXPORT_SYMBOL(__memset32);
+EXPORT_SYMBOL(__memset64);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memmove);
 EXPORT_SYMBOL(memchr);
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 3c65e3bd790f..9adc9bdf3ffb 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -21,14 +21,14 @@ ENTRY(memset)
 UNWIND( .fnstart )
andsr3, r0, #3  @ 1 unaligned?
mov ip, r0  @ preserve r0 as return value
+   orr r1, r1, r1, lsl #8
bne 6f  @ 1
 /*
  * we know that the pointer in ip is aligned to a word boundary.
  */
-1: orr r1, r1, r1, lsl #8
-   orr r1, r1, r1, lsl #16
+1: orr r1, r1, r1, lsl #16
mov r3, r1
-   cmp r2, #16
+7: cmp r2, #16
blt 4f
 
 #if ! CALGN(1)+0
@@ -41,7 +41,7 @@ UNWIND( .fnend  )
 UNWIND( .fnstart)
 UNWIND( .save {r8, lr}  )
mov r8, r1
-   mov lr, r1
+   mov lr, r3
 
 2: subsr2, r2, #64
stmgeia ip!, {r1, r3, r8, lr}   @ 64 bytes at a time.
@@ -73,11 +73,11 @@ UNWIND( .fnend )
 UNWIND( .fnstart   )
 UNWIND( .save {r4-r8, lr}  )
mov r4, r1
-   mov r5, r1
+   mov r5, r3
mov r6, r1
-   mov r7, r1
+   mov r7, r3
mov r8, r1
-   mov lr, r1
+   mov lr, r3
 
cmp r2, #96
tstgt   ip, #31
@@ -114,12 +114,13 @@ UNWIND( .fnstart)
tst r2, #4
strne   r1, [ip], #4
 /*
- * When we get here, we've got less than 4 bytes to zero.  We
+ * When we get here, we've got less than 4 bytes to set.  We
  * may have an unaligned pointer as well.
  */
 5: tst r2, #2
+   movne   r3, r1, lsr #8  @ the top half of a 16-bit pattern
strneb  r1, [ip], #1
-   strneb  r1, [ip], #1
+   strneb  r3, [ip], #1
tst r2, #1
strneb  r1, [ip], #1
ret lr
@@ -135,3 +136,28 @@ UNWIND( .fnstart)
 UNWIND( .fnend   )
 ENDPROC(memset)
 ENDPROC(mmioset)
+
+ENTRY(__memset16)
+UNWIND( .fnstart )
+   tst r0, #2  @ pointer unaligned?
+   mov ip, r0  @ preserve r0 as return value
+   beq 1b  @ jump into the middle of memset
+   subsr2, r2, #2  @ cope with n == 0
+   movge   r3, r1, lsr #8  @ r3 = r1 >> 8
+   strgeb  r1, [ip], #1@ *ip = r1
+   strgeb  r3, [ip], #1@ *ip = r3
+   bgt 1b  @ back into memset if n > 0
+   ret lr  @ otherwise return
+UNWIND( .fnend   )
+ENDPROC(__memset16)
+ENTRY(__memset32)
+UNWIND( .fnstart )
+   mov r3, r1  @ copy r1 to r3 and fall into memset64
+UNWIND( .fnend   )
+ENDPROC(__memset32)
+ENTRY(__memset64)
+UNWIND( .fnstart )
+   

[PATCH v3 6/7] sym53c8xx_2: Convert to use memset32

2017-03-24 Thread Matthew Wilcox
From: Matthew Wilcox 

memset32() can be used to initialise these three arrays.  Minor code
footprint reduction.

Signed-off-by: Matthew Wilcox 
---
 drivers/scsi/sym53c8xx_2/sym_hipd.c | 11 +++
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c 
b/drivers/scsi/sym53c8xx_2/sym_hipd.c
index 6b349e301869..b886b10e3499 100644
--- a/drivers/scsi/sym53c8xx_2/sym_hipd.c
+++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c
@@ -4985,13 +4985,10 @@ struct sym_lcb *sym_alloc_lcb (struct sym_hcb *np, 
u_char tn, u_char ln)
 *  Compute the bus address of this table.
 */
if (ln && !tp->luntbl) {
-   int i;
-
tp->luntbl = sym_calloc_dma(256, "LUNTBL");
if (!tp->luntbl)
goto fail;
-   for (i = 0 ; i < 64 ; i++)
-   tp->luntbl[i] = cpu_to_scr(vtobus(>badlun_sa));
+   memset32(tp->luntbl, cpu_to_scr(vtobus(>badlun_sa)), 64);
tp->head.luntbl_sa = cpu_to_scr(vtobus(tp->luntbl));
}
 
@@ -5077,8 +5074,7 @@ static void sym_alloc_lcb_tags (struct sym_hcb *np, 
u_char tn, u_char ln)
/*
 *  Initialize the task table with invalid entries.
 */
-   for (i = 0 ; i < SYM_CONF_MAX_TASK ; i++)
-   lp->itlq_tbl[i] = cpu_to_scr(np->notask_ba);
+   memset32(lp->itlq_tbl, cpu_to_scr(np->notask_ba), SYM_CONF_MAX_TASK);
 
/*
 *  Fill up the tag buffer with tag numbers.
@@ -5764,8 +5760,7 @@ int sym_hcb_attach(struct Scsi_Host *shost, struct sym_fw 
*fw, struct sym_nvram
goto attach_failed;
 
np->badlun_sa = cpu_to_scr(SCRIPTB_BA(np, resel_bad_lun));
-   for (i = 0 ; i < 64 ; i++)  /* 64 luns/target, no less */
-   np->badluntbl[i] = cpu_to_scr(vtobus(>badlun_sa));
+   memset32(np->badluntbl, cpu_to_scr(vtobus(>badlun_sa)), 64);
 
/*
 *  Prepare the bus address array that contains the bus 
-- 
2.11.0



[PATCH v3 7/7] vga: Optimise console scrolling

2017-03-24 Thread Matthew Wilcox
From: Matthew Wilcox 

Where possible, call memset16(), memmove() or memcpy() instead of using
open-coded loops.  If an architecture doesn't define VT_BUF_HAVE_RW,
we can do that from the generic code.  For the architectures which do
have special RW routines, usually we can do the special thing (pointer
test or byteswap) once (and then use a mem* call) instead of each time
around a loop.  Alpha is the only architecture missing a scr_memmovew()
definition (because it's non-trivial to write).

I don't like the calling convention that uses a byte count instead of
a count of u16s, but it's a little late to change that.  Reduces code
size of fbcon.o by almost 400 bytes on my laptop build.

Signed-off-by: Matthew Wilcox 
---
 arch/mips/include/asm/vga.h|  6 ++
 arch/powerpc/include/asm/vga.h |  8 
 arch/sparc/include/asm/vga.h   | 24 
 include/linux/vt_buffer.h  | 12 
 4 files changed, 50 insertions(+)

diff --git a/arch/mips/include/asm/vga.h b/arch/mips/include/asm/vga.h
index f82c83749a08..7510f406e1e1 100644
--- a/arch/mips/include/asm/vga.h
+++ b/arch/mips/include/asm/vga.h
@@ -40,9 +40,15 @@ static inline u16 scr_readw(volatile const u16 *addr)
return le16_to_cpu(*addr);
 }
 
+static inline void scr_memsetw(u16 *s, u16 v, unsigned int count)
+{
+   memset16(s, cpu_to_le16(v), count / 2);
+}
+
 #define scr_memcpyw(d, s, c) memcpy(d, s, c)
 #define scr_memmovew(d, s, c) memmove(d, s, c)
 #define VT_BUF_HAVE_MEMCPYW
 #define VT_BUF_HAVE_MEMMOVEW
+#define VT_BUF_HAVE_MEMSETW
 
 #endif /* _ASM_VGA_H */
diff --git a/arch/powerpc/include/asm/vga.h b/arch/powerpc/include/asm/vga.h
index ab3acd2f2786..7a7b541b7493 100644
--- a/arch/powerpc/include/asm/vga.h
+++ b/arch/powerpc/include/asm/vga.h
@@ -33,8 +33,16 @@ static inline u16 scr_readw(volatile const u16 *addr)
return le16_to_cpu(*addr);
 }
 
+#define VT_BUF_HAVE_MEMSETW
+static inline void scr_memsetw(u16 *s, u16 v, unsigned int n)
+{
+   memset16(s, cpu_to_le16(v), n / 2);
+}
+
 #define VT_BUF_HAVE_MEMCPYW
+#define VT_BUF_HAVE_MEMMOVEW
 #define scr_memcpywmemcpy
+#define scr_memmovew   memmove
 
 #endif /* !CONFIG_VGA_CONSOLE && !CONFIG_MDA_CONSOLE */
 
diff --git a/arch/sparc/include/asm/vga.h b/arch/sparc/include/asm/vga.h
index ec0e9967d93d..1fab92b110d9 100644
--- a/arch/sparc/include/asm/vga.h
+++ b/arch/sparc/include/asm/vga.h
@@ -11,6 +11,9 @@
 #include 
 
 #define VT_BUF_HAVE_RW
+#define VT_BUF_HAVE_MEMSETW
+#define VT_BUF_HAVE_MEMCPYW
+#define VT_BUF_HAVE_MEMMOVEW
 
 #undef scr_writew
 #undef scr_readw
@@ -29,6 +32,27 @@ static inline u16 scr_readw(const u16 *addr)
return *addr;
 }
 
+static inline void scr_memsetw(u16 *p, u16 v, unsigned int n)
+{
+   BUG_ON((long) p >= 0);
+
+   memset16(s, cpu_to_le16(v), n / 2);
+}
+
+static inline void scr_memcpyw(u16 *d, u16 *s, unsigned int n)
+{
+   BUG_ON((long) d >= 0);
+
+   memcpy(d, s, n);
+}
+
+static inline void scr_memmovew(u16 *d, u16 *s, unsigned int n)
+{
+   BUG_ON((long) d >= 0);
+
+   memmove(d, s, n);
+}
+
 #define VGA_MAP_MEM(x,s) (x)
 
 #endif
diff --git a/include/linux/vt_buffer.h b/include/linux/vt_buffer.h
index f38c10ba3ff5..31b92fcd8f03 100644
--- a/include/linux/vt_buffer.h
+++ b/include/linux/vt_buffer.h
@@ -26,24 +26,33 @@
 #ifndef VT_BUF_HAVE_MEMSETW
 static inline void scr_memsetw(u16 *s, u16 c, unsigned int count)
 {
+#ifdef VT_BUF_HAVE_RW
count /= 2;
while (count--)
scr_writew(c, s++);
+#else
+   memset16(s, c, count / 2);
+#endif
 }
 #endif
 
 #ifndef VT_BUF_HAVE_MEMCPYW
 static inline void scr_memcpyw(u16 *d, const u16 *s, unsigned int count)
 {
+#ifdef VT_BUF_HAVE_RW
count /= 2;
while (count--)
scr_writew(scr_readw(s++), d++);
+#else
+   memcpy(d, s, count);
+#endif
 }
 #endif
 
 #ifndef VT_BUF_HAVE_MEMMOVEW
 static inline void scr_memmovew(u16 *d, const u16 *s, unsigned int count)
 {
+#ifdef VT_BUF_HAVE_RW
if (d < s)
scr_memcpyw(d, s, count);
else {
@@ -53,6 +62,9 @@ static inline void scr_memmovew(u16 *d, const u16 *s, 
unsigned int count)
while (count--)
scr_writew(scr_readw(--s), --d);
}
+#else
+   memmove(d, s, count);
+#endif
 }
 #endif
 
-- 
2.11.0



[PATCH v3 0/7] Add memsetN functions

2017-03-24 Thread Matthew Wilcox
From: Matthew Wilcox 

zram was recently enhanced to support compressing pages with a repeating
pattern up to the size of an unsigned long.  As part of the discussion,
we noted it would be nice if architectures had optimised routines
to fill regions of memory with patterns larger than those contained
in a single byte.  Our suspicions were right; the x86 version offers
approximately a 7% performance improvement over the C implementation.

The generic memfill() function is part of Lars Wirzenius' publib,
but it doesn't offer the most convenient interface.  I chose to add
five more-specific functions as part of this patchset -- memset16(),
memset32(), memset64(), memset_l() (long) and memset_p() (pointer).

It would be nice to have some more architectures implement optimised
memsetN calls.  It would also be nice to find more places in the kernel
which could benefit from calling these functions.  Maybe a coccinelle
script could be written to find such places?  We're looking for loops
over an array where the value being stored into the array does not depend
on the iteration variable.

Since v1 of the patchset, I stumbled on Alpha's memsetw() which
caused me to add memset16() to complete the set.  I removed the
'__HAVE_ARCH_MEMSET_PLUS' preprocessor symbol in favour of separate
MEMSET16 MEMSET32 and MEMSET64 symbols.  I also reviewed the scr_mem*w()
usages across the different architectures and implemented some obvious
missing optimisations.  Alpha is still missing scr_memmovew() as it
would be non-trivial to write.

Russell's review on patch 2 only applies to the memset32/memset64
implementation.  The memset16 is unreviewed (and, indeed, untested)
to date.

Matthew Wilcox (7):
  Add multibyte memset functions
  ARM: Implement memset16, memset32 & memset64
  x86: Implement memset16, memset32 & memset64
  alpha: Add support for memset16
  zram: Convert to using memset_l
  sym53c8xx_2: Convert to use memset32
  vga: Optimise console scrolling

 arch/alpha/include/asm/string.h | 15 
 arch/alpha/include/asm/vga.h|  2 +-
 arch/alpha/lib/memset.S | 10 +++---
 arch/arm/include/asm/string.h   | 21 
 arch/arm/kernel/armksyms.c  |  3 ++
 arch/arm/lib/memset.S   | 44 +++-
 arch/mips/include/asm/vga.h |  6 
 arch/powerpc/include/asm/vga.h  |  8 +
 arch/sparc/include/asm/vga.h| 24 +
 arch/x86/include/asm/string_32.h| 24 +
 arch/x86/include/asm/string_64.h| 36 
 drivers/block/zram/zram_drv.c   | 15 ++--
 drivers/scsi/sym53c8xx_2/sym_hipd.c | 11 ++
 include/linux/string.h  | 30 
 include/linux/vt_buffer.h   | 12 +++
 lib/string.c| 68 +
 16 files changed, 287 insertions(+), 42 deletions(-)

-- 
2.11.0


[PATCH v3 3/7] x86: Implement memset16, memset32 & memset64

2017-03-24 Thread Matthew Wilcox
From: Matthew Wilcox 

These are single instructions on x86.  There's no 64-bit instruction
for x86-32, but we don't yet have any user for memset64() on 32-bit
architectures, so don't bother to implement it.

Signed-off-by: Matthew Wilcox 
---
 arch/x86/include/asm/string_32.h | 24 
 arch/x86/include/asm/string_64.h | 36 
 2 files changed, 60 insertions(+)

diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index 3d3e8353ee5c..84da91fe13ac 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -331,6 +331,30 @@ void *__constant_c_and_count_memset(void *s, unsigned long 
pattern,
 : __memset((s), (c), (count)))
 #endif
 
+#define __HAVE_ARCH_MEMSET16
+static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
+{
+   int d0, d1;
+   asm volatile("rep\n\t"
+"stosw"
+: "=" (d0), "=" (d1)
+: "a" (v), "1" (s), "0" (n)
+: "memory");
+   return s;
+}
+
+#define __HAVE_ARCH_MEMSET_32
+static inline void *memset32(uint32_t *s, uint32_t v, size_t n)
+{
+   int d0, d1;
+   asm volatile("rep\n\t"
+"stosl"
+: "=" (d0), "=" (d1)
+: "a" (v), "1" (s), "0" (n)
+: "memory");
+   return s;
+}
+
 /*
  * find the first occurrence of byte 'c', or 1 past the area if none
  */
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index a164862d77e3..71c5e860c7da 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -56,6 +56,42 @@ extern void *__memcpy(void *to, const void *from, size_t 
len);
 void *memset(void *s, int c, size_t n);
 void *__memset(void *s, int c, size_t n);
 
+#define __HAVE_ARCH_MEMSET16
+static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
+{
+   long d0, d1;
+   asm volatile("rep\n\t"
+"stosw"
+: "=" (d0), "=" (d1)
+: "a" (v), "1" (s), "0" (n)
+: "memory");
+   return s;
+}
+
+#define __HAVE_ARCH_MEMSET32
+static inline void *memset32(uint32_t *s, uint32_t v, size_t n)
+{
+   long d0, d1;
+   asm volatile("rep\n\t"
+"stosl"
+: "=" (d0), "=" (d1)
+: "a" (v), "1" (s), "0" (n)
+: "memory");
+   return s;
+}
+
+#define __HAVE_ARCH_MEMSET64
+static inline void *memset64(uint64_t *s, uint64_t v, size_t n)
+{
+   long d0, d1;
+   asm volatile("rep\n\t"
+"stosq"
+: "=" (d0), "=" (d1)
+: "a" (v), "1" (s), "0" (n)
+: "memory");
+   return s;
+}
+
 #define __HAVE_ARCH_MEMMOVE
 void *memmove(void *dest, const void *src, size_t count);
 void *__memmove(void *dest, const void *src, size_t count);
-- 
2.11.0



[PATCH v3 4/7] alpha: Add support for memset16

2017-03-24 Thread Matthew Wilcox
From: Matthew Wilcox 

Alpha already had an optimised memset-16-bit-quantity assembler routine
called memsetw().  It has a slightly different calling convention
from memset16() in that it takes a byte count, not a count of words.
That's the same convention used by ARM's __memset16(), so rename Alpha's
routine to match and add a memset16() wrapper around it.  Then convert
Alpha's scr_memsetw() to call memset16() instead of memsetw().

Signed-off-by: Matthew Wilcox 
---
 arch/alpha/include/asm/string.h | 15 ---
 arch/alpha/include/asm/vga.h|  2 +-
 arch/alpha/lib/memset.S | 10 +-
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/arch/alpha/include/asm/string.h b/arch/alpha/include/asm/string.h
index c2911f591704..74c0a693b76b 100644
--- a/arch/alpha/include/asm/string.h
+++ b/arch/alpha/include/asm/string.h
@@ -65,13 +65,14 @@ extern void * memchr(const void *, int, size_t);
aligned values.  The DEST and COUNT parameters must be even for 
correct operation.  */
 
-#define __HAVE_ARCH_MEMSETW
-extern void * __memsetw(void *dest, unsigned short, size_t count);
-
-#define memsetw(s, c, n)\
-(__builtin_constant_p(c)\
- ? __constant_c_memset((s),0x0001000100010001UL*(unsigned short)(c),(n)) \
- : __memsetw((s),(c),(n)))
+#define __HAVE_ARCH_MEMSET16
+extern void * __memset16(void *dest, unsigned short, size_t count);
+static inline void *memset16(uint16_t *p, uint16_t v, size_t n)
+{
+   if (__builtin_constant_p(v))
+   return __constant_c_memset(p, 0x0001000100010001UL * v, n * 2)
+   return __memset16(p, v, n * 2);
+}
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/alpha/include/asm/vga.h b/arch/alpha/include/asm/vga.h
index c00106bac521..3c1c2b6128e7 100644
--- a/arch/alpha/include/asm/vga.h
+++ b/arch/alpha/include/asm/vga.h
@@ -34,7 +34,7 @@ static inline void scr_memsetw(u16 *s, u16 c, unsigned int 
count)
if (__is_ioaddr(s))
memsetw_io((u16 __iomem *) s, c, count);
else
-   memsetw(s, c, count);
+   memset16(s, c, count / 2);
 }
 
 /* Do not trust that the usage will be correct; analyze the arguments.  */
diff --git a/arch/alpha/lib/memset.S b/arch/alpha/lib/memset.S
index 89a26f5e89de..f824969e9e77 100644
--- a/arch/alpha/lib/memset.S
+++ b/arch/alpha/lib/memset.S
@@ -20,7 +20,7 @@
.globl memset
.globl __memset
.globl ___memset
-   .globl __memsetw
+   .globl __memset16
.globl __constant_c_memset
 
.ent ___memset
@@ -110,8 +110,8 @@ EXPORT_SYMBOL(___memset)
 EXPORT_SYMBOL(__constant_c_memset)
 
.align 5
-   .ent __memsetw
-__memsetw:
+   .ent __memset16
+__memset16:
.prologue 0
 
inswl $17,0,$1  /* E0 */
@@ -123,8 +123,8 @@ __memsetw:
or $1,$4,$17/* E0 */
br __constant_c_memset  /* .. E1 */
 
-   .end __memsetw
-EXPORT_SYMBOL(__memsetw)
+   .end __memset16
+EXPORT_SYMBOL(__memset16)
 
 memset = ___memset
 __memset = ___memset
-- 
2.11.0



[PATCH v2] cxl: Enable PCI device IDs for future IBM CXL adapters

2017-03-24 Thread Matthew R. Ochs
Add support for future IBM Coherent Accelerator (CXL) devices
with an IDs of 0x0623 and 0x0628.

Signed-off-by: Matthew R. Ochs 
Signed-off-by: Uma Krishnan 
Acked-by: Frederic Barrat 
---
Changes in v2:
 - Add device ID 0x0628

 drivers/misc/cxl/pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 80a87ab..dd606a2 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -123,6 +123,8 @@ static const struct pci_device_id cxl_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x044b), },
{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x04cf), },
{ PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0601), },
+   { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0623), },
+   { PCI_DEVICE(PCI_VENDOR_ID_IBM, 0x0628), },
{ PCI_DEVICE_CLASS(0x12, ~0), },
 
{ }
-- 
2.1.0



Re: [PATCH 1/4] crypto: powerpc - Factor out the core CRC vpmsum algorithm

2017-03-24 Thread Herbert Xu
Daniel Axtens  wrote:
> The core nuts and bolts of the crc32c vpmsum algorithm will
> also work for a number of other CRC algorithms with different
> polynomials. Factor out the function into a new asm file.
> 
> To handle multiple users of the function, a user simply
> provides constants, defines the name of their CRC function,
> and then #includes the core algorithm file.
> 
> Cc: Anton Blanchard 
> Signed-off-by: Daniel Axtens 

All patches applied.  Thanks.
-- 
Email: Herbert Xu 
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt


Re: [PATCH 3/3] powerpc/powernv: Introduce address translation services for Nvlink2

2017-03-24 Thread kbuild test robot
Hi Alistair,

[auto build test ERROR on powerpc/next]
[also build test ERROR on v4.11-rc3 next-20170324]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Alistair-Popple/drivers-of-base-c-Add-of_property_read_u64_index/20170324-070416
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-xes_mpc85xx_defconfig (attached as .config)
compiler: powerpc-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget 
https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=powerpc 

All errors (new ones prefixed by >>):

   In file included from arch/powerpc/mm/mem.c:49:0:
   arch/powerpc/include/asm/tlb.h: In function 'mm_is_core_local':
>> arch/powerpc/include/asm/tlb.h:74:21: error: 'mm_context_t {aka struct 
>> }' has no member named 'npu_context'
 return !mm->context.npu_context && cpumask_subset(mm_cpumask(mm),
^
   arch/powerpc/include/asm/tlb.h: In function 'mm_is_thread_local':
   arch/powerpc/include/asm/tlb.h:80:21: error: 'mm_context_t {aka struct 
}' has no member named 'npu_context'
 return !mm->context.npu_context && cpumask_equal(mm_cpumask(mm),
^
--
   In file included from arch/powerpc/mm/tlb_nohash.c:43:0:
   arch/powerpc/include/asm/tlb.h: In function 'mm_is_core_local':
>> arch/powerpc/include/asm/tlb.h:74:21: error: 'mm_context_t {aka struct 
>> }' has no member named 'npu_context'
 return !mm->context.npu_context && cpumask_subset(mm_cpumask(mm),
^
   arch/powerpc/include/asm/tlb.h: In function 'mm_is_thread_local':
   arch/powerpc/include/asm/tlb.h:80:21: error: 'mm_context_t {aka struct 
}' has no member named 'npu_context'
 return !mm->context.npu_context && cpumask_equal(mm_cpumask(mm),
^
   arch/powerpc/include/asm/tlb.h: In function 'mm_is_core_local':
   arch/powerpc/include/asm/tlb.h:76:1: error: control reaches end of non-void 
function [-Werror=return-type]
}
^
   cc1: all warnings being treated as errors

vim +74 arch/powerpc/include/asm/tlb.h

68   * nest mmu. In this case we need to do a broadcast tlb to invalidate
69   * any caches on the nest mmu. Invalidations on the GPU are handled
70   * via mmu notfiers.
71   */
72  static inline int mm_is_core_local(struct mm_struct *mm)
73  {
  > 74  return !mm->context.npu_context && 
cpumask_subset(mm_cpumask(mm),
75
topology_sibling_cpumask(smp_processor_id()));
76  }
77  

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [v7] powerpc/powernv: add hdat attribute to sysfs

2017-03-24 Thread kbuild test robot
Hi Matt,

[auto build test ERROR on powerpc/next]
[also build test ERROR on v4.11-rc3 next-20170324]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Matt-Brown/powerpc-powernv-add-hdat-attribute-to-sysfs/20170324-191306
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-defconfig (attached as .config)
compiler: powerpc64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget 
https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=powerpc 

Note: it may well be a FALSE warning. FWIW you are at least aware of it now.
http://gcc.gnu.org/wiki/Better_Uninitialized_Warnings

All errors (new ones prefixed by >>):

   arch/powerpc/platforms/powernv/opal.c: In function 
'__machine_initcall_powernv_opal_init':
>> arch/powerpc/platforms/powernv/opal.c:651:12: error: 'attr_name' may be used 
>> uninitialized in this function [-Werror=maybe-uninitialized]
  attr_name[n] = kstrdup(prop->name, GFP_KERNEL);
   ^
   arch/powerpc/platforms/powernv/opal.c:618:9: note: 'attr_name' was declared 
here
 char **attr_name;
^
>> arch/powerpc/platforms/powernv/opal.c:661:12: error: 'exported_attrs' may be 
>> used uninitialized in this function [-Werror=maybe-uninitialized]
  attr_tmp = _attrs[n];
  ~^~~~
   arch/powerpc/platforms/powernv/opal.c:617:24: note: 'exported_attrs' was 
declared here
 struct bin_attribute *exported_attrs;
   ^~
   cc1: all warnings being treated as errors

vim +/attr_name +651 arch/powerpc/platforms/powernv/opal.c

   645  GFP_KERNEL);
   646  attr_name = kzalloc(sizeof(char *)*(attr_count-2), 
GFP_KERNEL);
   647  }
   648  
   649  for_each_property_of_node(fw, prop) {
   650  
 > 651  attr_name[n] = kstrdup(prop->name, GFP_KERNEL);
   652  syms = of_get_property(fw, attr_name[n], );
   653  
   654  if (!strcmp(attr_name[n], "name") ||
   655  !strcmp(attr_name[n], "phandle"))
   656  continue;
   657  
   658  if (!syms || size != 2 * sizeof(__be64))
   659  continue;
   660  
 > 661  attr_tmp = _attrs[n];
   662  attr_tmp->attr.name = attr_name[n];
   663  attr_tmp->attr.mode = 0400;
   664  attr_tmp->read = export_attr_read;

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


Re: [PATCH 3/3] powerpc/powernv: Introduce address translation services for Nvlink2

2017-03-24 Thread Michael Ellerman
Alistair Popple  writes:

> diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
> index 6095575..fc61fca 100644
> --- a/arch/powerpc/include/asm/tlb.h
> +++ b/arch/powerpc/include/asm/tlb.h
> @@ -63,15 +63,21 @@ static inline void 
> tlb_remove_check_page_size_change(struct mmu_gather *tlb,
>  }
>  
>  #ifdef CONFIG_SMP
> +/* If there is an NPU context associated with this thread it may have
> + * been active on a GPU which has issued translation requests via the
> + * nest mmu. In this case we need to do a broadcast tlb to invalidate
> + * any caches on the nest mmu. Invalidations on the GPU are handled
> + * via mmu notfiers.
> + */
>  static inline int mm_is_core_local(struct mm_struct *mm)
>  {
> - return cpumask_subset(mm_cpumask(mm),
> + return !mm->context.npu_context && cpumask_subset(mm_cpumask(mm),
> topology_sibling_cpumask(smp_processor_id()));
>  }

This breaks the BookE build (corenet64_smp_defconfig):

23:22:58 In file included from arch/powerpc/mm/pgtable-book3e.c:15:0:
23:22:58 ./arch/powerpc/include/asm/tlb.h: In function 'mm_is_core_local':
23:22:58 ./arch/powerpc/include/asm/tlb.h:75:21: error: 'mm_context_t {aka 
struct }' has no member named 'npu_context'
23:22:58   return !mm->context.npu_context && cpumask_subset(mm_cpumask(mm),
23:22:58  ^

cheers


Re: [PATCH 2/5] KVM: PPC: Book3S: Add MMIO emulation for FP and VSX instructions

2017-03-24 Thread kbuild test robot
Hi Bin,

[auto build test ERROR on powerpc/next]
[also build test ERROR on v4.11-rc3 next-20170324]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Paul-Mackerras/KVM-PPC-Improve-MMIO-emulation/20170323-180125
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-allyesconfig (attached as .config)
compiler: powerpc64-linux-gnu-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
wget 
https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
make.cross ARCH=powerpc 

All errors (new ones prefixed by >>):

>> arch/powerpc/kvm/fpu.o:(.opd+0x0): multiple definition of `fps_fres'
   arch/powerpc/kvm/fpu.o:(.opd+0x0): first defined here
   arch/powerpc/kvm/fpu.o: In function `fps_fres':
   (.text+0x0): multiple definition of `.fps_fres'
   arch/powerpc/kvm/fpu.o:(.text+0x0): first defined here
>> arch/powerpc/kvm/fpu.o:(.opd+0x18): multiple definition of `fps_frsqrte'
   arch/powerpc/kvm/fpu.o:(.opd+0x18): first defined here
   arch/powerpc/kvm/fpu.o: In function `fps_frsqrte':
   (.text+0x20): multiple definition of `.fps_frsqrte'
   arch/powerpc/kvm/fpu.o:(.text+0x20): first defined here
>> arch/powerpc/kvm/fpu.o:(.opd+0x30): multiple definition of `fps_fsqrts'
   arch/powerpc/kvm/fpu.o:(.opd+0x30): first defined here
   arch/powerpc/kvm/fpu.o: In function `fps_fsqrts':
   (.text+0x40): multiple definition of `.fps_fsqrts'
   arch/powerpc/kvm/fpu.o:(.text+0x40): first defined here
>> arch/powerpc/kvm/fpu.o:(.opd+0x48): multiple definition of `fps_fadds'
   arch/powerpc/kvm/fpu.o:(.opd+0x48): first defined here
   arch/powerpc/kvm/fpu.o: In function `fps_fadds':
   (.text+0x60): multiple definition of `.fps_fadds'
   arch/powerpc/kvm/fpu.o:(.text+0x60): first defined here
>> arch/powerpc/kvm/fpu.o:(.opd+0x60): multiple definition of `fps_fdivs'
   arch/powerpc/kvm/fpu.o:(.opd+0x60): first defined here
   arch/powerpc/kvm/fpu.o: In function `fps_fdivs':
   (.text+0x84): multiple definition of `.fps_fdivs'
   arch/powerpc/kvm/fpu.o:(.text+0x84): first defined here
>> arch/powerpc/kvm/fpu.o:(.opd+0x78): multiple definition of `fps_fmuls'
   arch/powerpc/kvm/fpu.o:(.opd+0x78): first defined here
   arch/powerpc/kvm/fpu.o: In function `fps_fmuls':
   (.text+0xa8): multiple definition of `.fps_fmuls'
   arch/powerpc/kvm/fpu.o:(.text+0xa8): first defined here
>> arch/powerpc/kvm/fpu.o:(.opd+0x90): multiple definition of `fps_fsubs'
   arch/powerpc/kvm/fpu.o:(.opd+0x90): first defined here
   arch/powerpc/kvm/fpu.o: In function `fps_fsubs':
   (.text+0xcc): multiple definition of `.fps_fsubs'
   arch/powerpc/kvm/fpu.o:(.text+0xcc): first defined here
>> arch/powerpc/kvm/fpu.o:(.opd+0xa8): multiple definition of `fps_fmadds'
   arch/powerpc/kvm/fpu.o:(.opd+0xa8): first defined here
   arch/powerpc/kvm/fpu.o: In function `fps_fmadds':
   (.text+0xf0): multiple definition of `.fps_fmadds'
   arch/powerpc/kvm/fpu.o:(.text+0xf0): first defined here
>> arch/powerpc/kvm/fpu.o:(.opd+0xc0): multiple definition of `fps_fmsubs'
   arch/powerpc/kvm/fpu.o:(.opd+0xc0): first defined here
   arch/powerpc/kvm/fpu.o: In function `fps_fmsubs':
   (.text+0x118): multiple definition of `.fps_fmsubs'
   arch/powerpc/kvm/fpu.o:(.text+0x118): first defined here
>> arch/powerpc/kvm/fpu.o:(.opd+0xd8): multiple definition of `fps_fnmadds'
   arch/powerpc/kvm/fpu.o:(.opd+0xd8): first defined here
   arch/powerpc/kvm/fpu.o: In function `fps_fnmadds':
   (.text+0x140): multiple definition of `.fps_fnmadds'
   arch/powerpc/kvm/fpu.o:(.text+0x140): first defined here
>> arch/powerpc/kvm/fpu.o:(.opd+0xf0): multiple definition of `fps_fnmsubs'
   arch/powerpc/kvm/fpu.o:(.opd+0xf0): first defined here
   arch/powerpc/kvm/fpu.o: In function `fps_fnmsubs':
   (.text+0x168): multiple definition of `.fps_fnmsubs'
   arch/powerpc/kvm/fpu.o:(.text+0x168): first defined here
>> arch/powerpc/kvm/fpu.o:(.opd+0x108): multiple definition of `fps_fsel'
   arch/powerpc/kvm/fpu.o:(.opd+0x108): first defined here
   arch/powerpc/kvm/fpu.o: In function `fps_fsel':
   (.text+0x190): multiple definition of `.fps_fsel'
   arch/powerpc/kvm/fpu.o:(.text+0x190): first defined here
>> arch/powerpc/kvm/fpu.o:(.opd+0x120): multiple definition of `fpd_fsqrts'
   arch/powerpc/kvm/fpu.o:(.opd+0x120): first defined here
   arch/powerpc/kvm/fpu.o: In function `fpd_fsqrts':
   (.text+0x1f0): multiple definition of `.fpd_fsqrts'
   arch/powerpc/kvm/fpu.o:(.text+0x1f0): first defined here
>> arch/powerpc/kvm/fpu.o:(.opd+0x138): multiple definition of `fpd_frsqrtes'
   arch/powerpc/kvm/fpu.o:(.opd+0x138): first defined here
   arch/powerpc/kvm/fpu.o: In function `fpd_frsqrtes':
   (.text+0x204): multiple definition of `

[RESEND PATCH v4 5/5] powerpc/fadump: update documentation about crashkernel parameter reuse

2017-03-24 Thread Hari Bathini
As we are reusing crashkernel parameter instead of fadump_reserve_mem
parameter to specify the memory to reserve for fadump's crash kernel,
update the documentation accordingly.

Signed-off-by: Hari Bathini 
---
 Documentation/powerpc/firmware-assisted-dump.txt |   23 ++
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/Documentation/powerpc/firmware-assisted-dump.txt 
b/Documentation/powerpc/firmware-assisted-dump.txt
index 3007bc9..8394bc8 100644
--- a/Documentation/powerpc/firmware-assisted-dump.txt
+++ b/Documentation/powerpc/firmware-assisted-dump.txt
@@ -55,10 +55,14 @@ as follows:
  booted with restricted memory. By default, the boot memory
  size will be the larger of 5% of system RAM or 256MB.
  Alternatively, user can also specify boot memory size
- through boot parameter 'fadump_reserve_mem=' which will
- override the default calculated size. Use this option
- if default boot memory size is not sufficient for second
- kernel to boot successfully.
+ through boot parameter 'crashkernel=' which will override
+ the default calculated size. Use this option if default
+ boot memory size is not sufficient for second kernel to
+ boot successfully. For syntax of crashkernel= parameter,
+ refer to Documentation/kdump/kdump.txt. If any offset is
+ provided in crashkernel= parameter, it will be ignored
+ as fadump reserves memory at end of RAM for boot memory
+ dump preservation in case of a crash.
 
 -- After the low memory (boot memory) area has been saved, the
firmware will reset PCI and other hardware state.  It will
@@ -158,13 +162,16 @@ How to enable firmware-assisted dump (fadump):
 
 1. Set config option CONFIG_FA_DUMP=y and build kernel.
 2. Boot into linux kernel with 'fadump=on' kernel cmdline option.
-3. Optionally, user can also set 'fadump_reserve_mem=' kernel cmdline
+3. Optionally, user can also set 'crashkernel=' kernel cmdline
to specify size of the memory to reserve for boot memory dump
preservation.
 
-NOTE: If firmware-assisted dump fails to reserve memory then it will
-   fallback to existing kdump mechanism if 'crashkernel=' option
-   is set at kernel cmdline.
+NOTE: 1. 'fadump_reserve_mem=' parameter has been deprecated. Instead
+ use 'crashkernel=' to specify size of the memory to reserve
+ for boot memory dump preservation.
+  2. If firmware-assisted dump fails to reserve memory then it
+ will fallback to existing kdump mechanism if 'crashkernel='
+ option is set at kernel cmdline.
 
 Sysfs/debugfs files:
 



[RESEND PATCH v4 3/5] powerpc/fadump: remove dependency with CONFIG_KEXEC

2017-03-24 Thread Hari Bathini
Now that crashkernel parameter parsing and vmcoreinfo related code is
moved under CONFIG_CRASH_CORE instead of CONFIG_KEXEC_CORE, remove
dependency with CONFIG_KEXEC for CONFIG_FA_DUMP. While here, get rid
of definitions of fadump_append_elf_note() & fadump_final_note()
functions to reuse similar functions compiled under CONFIG_CRASH_CORE.

Signed-off-by: Hari Bathini 
Reviewed-by: Mahesh Salgaonkar 
---
 arch/powerpc/Kconfig   |   10 ++
 arch/powerpc/include/asm/fadump.h  |2 ++
 arch/powerpc/kernel/crash.c|2 --
 arch/powerpc/kernel/fadump.c   |   34 +++---
 arch/powerpc/kernel/setup-common.c |5 +
 5 files changed, 16 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 97a8bc8..6bc1fa2 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -522,21 +522,23 @@ config RELOCATABLE_TEST
  relocation code.
 
 config CRASH_DUMP
-   bool "Build a kdump crash kernel"
+   bool "Build a dump capture kernel"
depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP)
select RELOCATABLE if (PPC64 && !COMPILE_TEST) || 44x || FSL_BOOKE
help
- Build a kernel suitable for use as a kdump capture kernel.
+ Build a kernel suitable for use as a dump capture kernel.
  The same kernel binary can be used as production kernel and dump
  capture kernel.
 
 config FA_DUMP
bool "Firmware-assisted dump"
-   depends on PPC64 && PPC_RTAS && CRASH_DUMP && KEXEC_CORE
+   depends on PPC64 && PPC_RTAS
+   select CRASH_CORE
+   select CRASH_DUMP
help
  A robust mechanism to get reliable kernel crash dump with
  assistance from firmware. This approach does not use kexec,
- instead firmware assists in booting the kdump kernel
+ instead firmware assists in booting the capture kernel
  while preserving memory contents. Firmware-assisted dump
  is meant to be a kdump replacement offering robustness and
  speed not possible without system firmware assistance.
diff --git a/arch/powerpc/include/asm/fadump.h 
b/arch/powerpc/include/asm/fadump.h
index 0031806..60b9108 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -73,6 +73,8 @@
reg_entry++;\
 })
 
+extern int crashing_cpu;
+
 /* Kernel Dump section info */
 struct fadump_section {
__be32  request_flag;
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 47b63de..cbabb5a 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -43,8 +43,6 @@
 #define IPI_TIMEOUT1
 #define REAL_MODE_TIMEOUT  1
 
-/* This keeps a track of which one is the crashing cpu. */
-int crashing_cpu = -1;
 static int time_to_dump;
 
 #define CRASH_HANDLER_MAX 3
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 8ff0dd4..31c0abe 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -509,34 +509,6 @@ fadump_read_registers(struct fadump_reg_entry *reg_entry, 
struct pt_regs *regs)
return reg_entry;
 }
 
-static u32 *fadump_append_elf_note(u32 *buf, char *name, unsigned type,
-   void *data, size_t data_len)
-{
-   struct elf_note note;
-
-   note.n_namesz = strlen(name) + 1;
-   note.n_descsz = data_len;
-   note.n_type   = type;
-   memcpy(buf, , sizeof(note));
-   buf += (sizeof(note) + 3)/4;
-   memcpy(buf, name, note.n_namesz);
-   buf += (note.n_namesz + 3)/4;
-   memcpy(buf, data, note.n_descsz);
-   buf += (note.n_descsz + 3)/4;
-
-   return buf;
-}
-
-static void fadump_final_note(u32 *buf)
-{
-   struct elf_note note;
-
-   note.n_namesz = 0;
-   note.n_descsz = 0;
-   note.n_type   = 0;
-   memcpy(buf, , sizeof(note));
-}
-
 static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
 {
struct elf_prstatus prstatus;
@@ -547,8 +519,8 @@ static u32 *fadump_regs_to_elf_notes(u32 *buf, struct 
pt_regs *regs)
 * prstatus.pr_pid = 
 */
elf_core_copy_kernel_regs(_reg, regs);
-   buf = fadump_append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
-   , sizeof(prstatus));
+   buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS,
+ , sizeof(prstatus));
return buf;
 }
 
@@ -689,7 +661,7 @@ static int __init fadump_build_cpu_notes(const struct 
fadump_mem_struct *fdm)
note_buf = fadump_regs_to_elf_notes(note_buf, );
}
}
-   fadump_final_note(note_buf);
+   final_note(note_buf);
 
if (fdh) {
pr_debug("Updating elfcore header (%llx) with cpu notes\n",
diff 

[RESEND PATCH v4 4/5] powerpc/fadump: reuse crashkernel parameter for fadump memory reservation

2017-03-24 Thread Hari Bathini
fadump supports specifying memory to reserve for fadump's crash kernel
with fadump_reserve_mem kernel parameter. This parameter currently
supports passing a fixed memory size, like fadump_reserve_mem=
only. This patch aims to add support for other syntaxes like range-based
memory size :[,:,:,...]
which allows using the same parameter to boot the kernel with different
system RAM sizes.

As crashkernel parameter already supports the above mentioned syntaxes,
this patch deprecates fadump_reserve_mem parameter and reuses crashkernel
parameter instead, to specify memory for fadump's crash kernel memory
reservation as well. If any offset is provided in crashkernel parameter,
it will be ignored in case of fadump, as fadump reserves memory at end
of RAM.

Advantages using crashkernel parameter instead of fadump_reserve_mem
parameter are one less kernel parameter overall, code reuse and support
for multiple syntaxes to specify memory.

Suggested-by: Dave Young 
Signed-off-by: Hari Bathini 
Reviewed-by: Mahesh Salgaonkar 
---
 arch/powerpc/kernel/fadump.c |   23 ++-
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 31c0abe..e013f8f 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -210,14 +210,20 @@ static unsigned long init_fadump_mem_struct(struct 
fadump_mem_struct *fdm,
  */
 static inline unsigned long fadump_calculate_reserve_size(void)
 {
-   unsigned long size;
+   int ret;
+   unsigned long long base, size;
 
/*
-* Check if the size is specified through fadump_reserve_mem= cmdline
-* option. If yes, then use that.
+* Check if the size is specified through crashkernel= cmdline
+* option. If yes, then use that but ignore base as fadump
+* reserves memory at end of RAM.
 */
-   if (fw_dump.reserve_bootvar)
+   ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+   , );
+   if (ret == 0 && size > 0) {
+   fw_dump.reserve_bootvar = (unsigned long)size;
return fw_dump.reserve_bootvar;
+   }
 
/* divide by 20 to get 5% of value */
size = memblock_end_of_DRAM() / 20;
@@ -353,15 +359,6 @@ static int __init early_fadump_param(char *p)
 }
 early_param("fadump", early_fadump_param);
 
-/* Look for fadump_reserve_mem= cmdline option */
-static int __init early_fadump_reserve_mem(char *p)
-{
-   if (p)
-   fw_dump.reserve_bootvar = memparse(p, );
-   return 0;
-}
-early_param("fadump_reserve_mem", early_fadump_reserve_mem);
-
 static void register_fw_dump(struct fadump_mem_struct *fdm)
 {
int rc;



[RESEND PATCH v4 1/5] crash: move crashkernel parsing and vmcore related code under CONFIG_CRASH_CORE

2017-03-24 Thread Hari Bathini
Traditionally, kdump is used to save vmcore in case of a crash. Some
architectures like powerpc can save vmcore using architecture specific
support instead of kexec/kdump mechanism. Such architecture specific
support also needs to reserve memory, to be used by dump capture kernel.
crashkernel parameter can be a reused, for memory reservation, by such
architecture specific infrastructure.

But currently, code related to vmcoreinfo and parsing of crashkernel
parameter is built under CONFIG_KEXEC_CORE. This patch introduces
CONFIG_CRASH_CORE and moves the above mentioned code under this config,
allowing code reuse without dependency on CONFIG_KEXEC. There is no
functional change with this patch.

Signed-off-by: Hari Bathini 
Acked-by: Dave Young 
---

Changes from v3:
* Renamed log_buf_kexec_setup()to log_buf_vmcoreinfo_setup() instead of
  log_buf_crash_setup().

Changes from v2:
* Used CONFIG_CRASH_CORE instead of CONFIG_KEXEC_CORE at
  appropriate places in printk and ksysfs.


 arch/Kconfig   |4 
 include/linux/crash_core.h |   65 ++
 include/linux/kexec.h  |   57 --
 include/linux/printk.h |4 
 kernel/Makefile|1 
 kernel/crash_core.c|  445 
 kernel/kexec_core.c|  403 
 kernel/ksysfs.c|8 +
 kernel/printk/printk.c |6 -
 9 files changed, 531 insertions(+), 462 deletions(-)
 create mode 100644 include/linux/crash_core.h
 create mode 100644 kernel/crash_core.c

diff --git a/arch/Kconfig b/arch/Kconfig
index cd211a1..ffdf5e3 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -2,7 +2,11 @@
 # General architecture dependent options
 #
 
+config CRASH_CORE
+   bool
+
 config KEXEC_CORE
+   select CRASH_CORE
bool
 
 config HAVE_IMA_KEXEC
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
new file mode 100644
index 000..18d0f94
--- /dev/null
+++ b/include/linux/crash_core.h
@@ -0,0 +1,65 @@
+#ifndef LINUX_CRASH_CORE_H
+#define LINUX_CRASH_CORE_H
+
+#include 
+#include 
+#include 
+
+#define CRASH_CORE_NOTE_NAME  "CORE"
+#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4)
+#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4)
+#define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4)
+
+#define CRASH_CORE_NOTE_BYTES ((CRASH_CORE_NOTE_HEAD_BYTES * 2) +  \
+CRASH_CORE_NOTE_NAME_BYTES +   \
+CRASH_CORE_NOTE_DESC_BYTES)
+
+#define VMCOREINFO_BYTES  (4096)
+#define VMCOREINFO_NOTE_NAME  "VMCOREINFO"
+#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
+#define VMCOREINFO_NOTE_SIZE  ((CRASH_CORE_NOTE_HEAD_BYTES * 2) +  \
+VMCOREINFO_NOTE_NAME_BYTES +   \
+VMCOREINFO_BYTES)
+
+typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4];
+
+void crash_save_vmcoreinfo(void);
+void arch_crash_save_vmcoreinfo(void);
+__printf(1, 2)
+void vmcoreinfo_append_str(const char *fmt, ...);
+phys_addr_t paddr_vmcoreinfo_note(void);
+
+#define VMCOREINFO_OSRELEASE(value) \
+   vmcoreinfo_append_str("OSRELEASE=%s\n", value)
+#define VMCOREINFO_PAGESIZE(value) \
+   vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
+#define VMCOREINFO_SYMBOL(name) \
+   vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long))
+#define VMCOREINFO_SIZE(name) \
+   vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
+ (unsigned long)sizeof(name))
+#define VMCOREINFO_STRUCT_SIZE(name) \
+   vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \
+ (unsigned long)sizeof(struct name))
+#define VMCOREINFO_OFFSET(name, field) \
+   vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
+ (unsigned long)offsetof(struct name, field))
+#define VMCOREINFO_LENGTH(name, value) \
+   vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value)
+#define VMCOREINFO_NUMBER(name) \
+   vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name)
+#define VMCOREINFO_CONFIG(name) \
+   vmcoreinfo_append_str("CONFIG_%s=y\n", #name)
+
+extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
+extern size_t vmcoreinfo_size;
+extern size_t vmcoreinfo_max_size;
+
+int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
+   unsigned long long *crash_size, unsigned long long *crash_base);
+int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
+   unsigned long long *crash_size, unsigned long long *crash_base);
+int parse_crashkernel_low(char *cmdline, unsigned long long system_ram,
+   unsigned long long *crash_size, unsigned long long *crash_base);
+
+#endif /* LINUX_CRASH_CORE_H */
diff --git 

[RESEND PATCH v4 2/5] ia64: reuse append_elf_note() and final_note() functions

2017-03-24 Thread Hari Bathini
Get rid of multiple definitions of append_elf_note() & final_note()
functions. Reuse these functions compiled under CONFIG_CRASH_CORE
Also, define Elf_Word and use it instead of generic u32 or the more
specific Elf64_Word.

Signed-off-by: Hari Bathini 
Acked-by: Dave Young 
Acked-by: Tony Luck 
---

Changes from v3:
* Dropped hard-coded values and used DIV_ROUND_UP().

Changes from v2:
* Added a definition for Elf_Word.
* Used IA64 version of append_elf_note() and final_note() functions.


 arch/ia64/kernel/crash.c   |   22 --
 include/linux/crash_core.h |4 
 include/linux/elf.h|2 ++
 kernel/crash_core.c|   34 ++
 kernel/kexec_core.c|   28 
 5 files changed, 20 insertions(+), 70 deletions(-)

diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
index 2955f35..75859a0 100644
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@@ -27,28 +27,6 @@ static int kdump_freeze_monarch;
 static int kdump_on_init = 1;
 static int kdump_on_fatal_mca = 1;
 
-static inline Elf64_Word
-*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
-   size_t data_len)
-{
-   struct elf_note *note = (struct elf_note *)buf;
-   note->n_namesz = strlen(name) + 1;
-   note->n_descsz = data_len;
-   note->n_type   = type;
-   buf += (sizeof(*note) + 3)/4;
-   memcpy(buf, name, note->n_namesz);
-   buf += (note->n_namesz + 3)/4;
-   memcpy(buf, data, data_len);
-   buf += (data_len + 3)/4;
-   return buf;
-}
-
-static void
-final_note(void *buf)
-{
-   memset(buf, 0, sizeof(struct elf_note));
-}
-
 extern void ia64_dump_cpu_regs(void *);
 
 static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h
index 18d0f94..541a197 100644
--- a/include/linux/crash_core.h
+++ b/include/linux/crash_core.h
@@ -55,6 +55,10 @@ extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
 extern size_t vmcoreinfo_size;
 extern size_t vmcoreinfo_max_size;
 
+Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
+ void *data, size_t data_len);
+void final_note(Elf_Word *buf);
+
 int __init parse_crashkernel(char *cmdline, unsigned long long system_ram,
unsigned long long *crash_size, unsigned long long *crash_base);
 int parse_crashkernel_high(char *cmdline, unsigned long long system_ram,
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 20fa8d8..ba069e8 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -29,6 +29,7 @@ extern Elf32_Dyn _DYNAMIC [];
 #define elf_note   elf32_note
 #define elf_addr_t Elf32_Off
 #define Elf_Half   Elf32_Half
+#define Elf_Word   Elf32_Word
 
 #else
 
@@ -39,6 +40,7 @@ extern Elf64_Dyn _DYNAMIC [];
 #define elf_note   elf64_note
 #define elf_addr_t Elf64_Off
 #define Elf_Half   Elf64_Half
+#define Elf_Word   Elf64_Word
 
 #endif
 
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index 4261587..fcbd568 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -291,32 +291,26 @@ int __init parse_crashkernel_low(char *cmdline,
"crashkernel=", suffix_tbl[SUFFIX_LOW]);
 }
 
-static u32 *append_elf_note(u32 *buf, char *name, unsigned int type,
-   void *data, size_t data_len)
+Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
+ void *data, size_t data_len)
 {
-   struct elf_note note;
-
-   note.n_namesz = strlen(name) + 1;
-   note.n_descsz = data_len;
-   note.n_type   = type;
-   memcpy(buf, , sizeof(note));
-   buf += (sizeof(note) + 3)/4;
-   memcpy(buf, name, note.n_namesz);
-   buf += (note.n_namesz + 3)/4;
-   memcpy(buf, data, note.n_descsz);
-   buf += (note.n_descsz + 3)/4;
+   struct elf_note *note = (struct elf_note *)buf;
+
+   note->n_namesz = strlen(name) + 1;
+   note->n_descsz = data_len;
+   note->n_type   = type;
+   buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word));
+   memcpy(buf, name, note->n_namesz);
+   buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word));
+   memcpy(buf, data, data_len);
+   buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word));
 
return buf;
 }
 
-static void final_note(u32 *buf)
+void final_note(Elf_Word *buf)
 {
-   struct elf_note note;
-
-   note.n_namesz = 0;
-   note.n_descsz = 0;
-   note.n_type   = 0;
-   memcpy(buf, , sizeof(note));
+   memset(buf, 0, sizeof(struct elf_note));
 }
 
 static void update_vmcoreinfo_note(void)
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 9dd7229..ae1a3ba 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -990,34 +990,6 @@ int crash_shrink_memory(unsigned long 

[RESEND PATCH v4 0/5] kexec/fadump: remove dependency with CONFIG_KEXEC and reuse crashkernel parameter for fadump

2017-03-24 Thread Hari Bathini
Traditionally, kdump is used to save vmcore in case of a crash. Some
architectures like powerpc can save vmcore using architecture specific
support instead of kexec/kdump mechanism. Such architecture specific
support also needs to reserve memory, to be used by dump capture kernel.
crashkernel parameter can be a reused, for memory reservation, by such
architecture specific infrastructure.

This patchset removes dependency with CONFIG_KEXEC for crashkernel parameter
and vmcoreinfo related code as it can be reused without kexec support. Also,
crashkernel parameter is reused instead of fadump_reserve_mem to reserve
memory for fadump.

The first patch moves crashkernel parameter parsing and vmcoreinfo related
code under CONFIG_CRASH_CORE instead of CONFIG_KEXEC_CORE. The second patch
reuses the definitions of append_elf_note() & final_note() functions under
CONFIG_CRASH_CORE in IA64 arch code. The third patch removes dependency on
CONFIG_KEXEC for firmware-assisted dump (fadump) in powerpc. The next patch
reuses crashkernel parameter for reserving memory for fadump, instead of the
fadump_reserve_mem parameter. This has the advantage of using all syntaxes
crashkernel parameter supports, for fadump as well. The last patch updates
fadump kernel documentation about use of crashkernel parameter.

---

Hari Bathini (5):
  crash: move crashkernel parsing and vmcore related code under 
CONFIG_CRASH_CORE
  ia64: reuse append_elf_note() and final_note() functions
  powerpc/fadump: remove dependency with CONFIG_KEXEC
  powerpc/fadump: reuse crashkernel parameter for fadump memory reservation
  powerpc/fadump: update documentation about crashkernel parameter reuse


 Documentation/powerpc/firmware-assisted-dump.txt |   23 +
 arch/Kconfig |4 
 arch/ia64/kernel/crash.c |   22 -
 arch/powerpc/Kconfig |   10 -
 arch/powerpc/include/asm/fadump.h|2 
 arch/powerpc/kernel/crash.c  |2 
 arch/powerpc/kernel/fadump.c |   57 +--
 arch/powerpc/kernel/setup-common.c   |5 
 include/linux/crash_core.h   |   69 +++
 include/linux/elf.h  |2 
 include/linux/kexec.h|   57 ---
 include/linux/printk.h   |4 
 kernel/Makefile  |1 
 kernel/crash_core.c  |  439 ++
 kernel/kexec_core.c  |  431 --
 kernel/ksysfs.c  |8 
 kernel/printk/printk.c   |6 
 17 files changed, 572 insertions(+), 570 deletions(-)
 create mode 100644 include/linux/crash_core.h
 create mode 100644 kernel/crash_core.c



Re: [v1 0/5] parallelized "struct page" zeroing

2017-03-24 Thread Heiko Carstens
On Fri, Mar 24, 2017 at 09:51:09AM +0100, Christian Borntraeger wrote:
> On 03/24/2017 12:01 AM, Pavel Tatashin wrote:
> > When deferred struct page initialization feature is enabled, we get a
> > performance gain of initializing vmemmap in parallel after other CPUs are
> > started. However, we still zero the memory for vmemmap using one boot CPU.
> > This patch-set fixes the memset-zeroing limitation by deferring it as well.
> > 
> > Here is example performance gain on SPARC with 32T:
> > base
> > https://hastebin.com/ozanelatat.go
> > 
> > fix
> > https://hastebin.com/utonawukof.go
> > 
> > As you can see without the fix it takes: 97.89s to boot
> > With the fix it takes: 46.91 to boot.
> > 
> > On x86 time saving is going to be even greater (proportionally to memory 
> > size)
> > because there are twice as many "struct page"es for the same amount of 
> > memory,
> > as base pages are twice smaller.
> 
> Fixing the linux-s390 mailing list email.
> This might be useful for s390 as well.

Unfortunately only for the fake numa case, since as far as I understand it,
parallelization happens only on a node granularity. And since we are
usually only having one node...

But anyway, it won't hurt to set ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT on
s390 also. I'll do some testing and then we'll see.

Pavel, could you please change your patch 5 so it also converts the s390
call sites of vmemmap_alloc_block() so they use VMEMMAP_ZERO instead of
'true' as argument?



[PATCH v2] ASoC: imx-wm8960: Let codec driver enable/disable its MCLK

2017-03-24 Thread Daniel Baluta
From: Daniel Baluta 

WM8962 needs its MCLK when powerup in wm8962_resume(). Thus it's better
to control the MCLK in codec driver. Thus remove the clock enable in
machine driver accordingly.

While at it, get rid of imx_wm8962_remove function since it is now
empty.

Signed-off-by: Daniel Baluta 
---
Changes since v1:
* s/wm8960/imx-wm890/ in subject prefix
* s/dirver/driver in commit message
* took ownership over the patch from Nicolin Chen
as per his agreement.

 sound/soc/fsl/imx-wm8962.c | 40 
 1 file changed, 8 insertions(+), 32 deletions(-)

diff --git a/sound/soc/fsl/imx-wm8962.c b/sound/soc/fsl/imx-wm8962.c
index 1b60958..3d894d9 100644
--- a/sound/soc/fsl/imx-wm8962.c
+++ b/sound/soc/fsl/imx-wm8962.c
@@ -33,7 +33,6 @@ struct imx_wm8962_data {
struct snd_soc_card card;
char codec_dai_name[DAI_NAME_SIZE];
char platform_name[DAI_NAME_SIZE];
-   struct clk *codec_clk;
unsigned int clk_frequency;
 };
 
@@ -163,6 +162,7 @@ static int imx_wm8962_probe(struct platform_device *pdev)
struct imx_priv *priv = _priv;
struct i2c_client *codec_dev;
struct imx_wm8962_data *data;
+   struct clk *codec_clk;
int int_port, ext_port;
int ret;
 
@@ -231,19 +231,14 @@ static int imx_wm8962_probe(struct platform_device *pdev)
goto fail;
}
 
-   data->codec_clk = devm_clk_get(_dev->dev, NULL);
-   if (IS_ERR(data->codec_clk)) {
-   ret = PTR_ERR(data->codec_clk);
+   codec_clk = devm_clk_get(_dev->dev, NULL);
+   if (IS_ERR(codec_clk)) {
+   ret = PTR_ERR(codec_clk);
dev_err(_dev->dev, "failed to get codec clk: %d\n", ret);
goto fail;
}
 
-   data->clk_frequency = clk_get_rate(data->codec_clk);
-   ret = clk_prepare_enable(data->codec_clk);
-   if (ret) {
-   dev_err(_dev->dev, "failed to enable codec clk: %d\n", 
ret);
-   goto fail;
-   }
+   data->clk_frequency = clk_get_rate(codec_clk);
 
data->dai.name = "HiFi";
data->dai.stream_name = "HiFi";
@@ -258,10 +253,10 @@ static int imx_wm8962_probe(struct platform_device *pdev)
data->card.dev = >dev;
ret = snd_soc_of_parse_card_name(>card, "model");
if (ret)
-   goto clk_fail;
+   goto fail;
ret = snd_soc_of_parse_audio_routing(>card, "audio-routing");
if (ret)
-   goto clk_fail;
+   goto fail;
data->card.num_links = 1;
data->card.owner = THIS_MODULE;
data->card.dai_link = >dai;
@@ -277,16 +272,9 @@ static int imx_wm8962_probe(struct platform_device *pdev)
ret = devm_snd_soc_register_card(>dev, >card);
if (ret) {
dev_err(>dev, "snd_soc_register_card failed (%d)\n", ret);
-   goto clk_fail;
+   goto fail;
}
 
-   of_node_put(ssi_np);
-   of_node_put(codec_np);
-
-   return 0;
-
-clk_fail:
-   clk_disable_unprepare(data->codec_clk);
 fail:
of_node_put(ssi_np);
of_node_put(codec_np);
@@ -294,17 +282,6 @@ static int imx_wm8962_probe(struct platform_device *pdev)
return ret;
 }
 
-static int imx_wm8962_remove(struct platform_device *pdev)
-{
-   struct snd_soc_card *card = platform_get_drvdata(pdev);
-   struct imx_wm8962_data *data = snd_soc_card_get_drvdata(card);
-
-   if (!IS_ERR(data->codec_clk))
-   clk_disable_unprepare(data->codec_clk);
-
-   return 0;
-}
-
 static const struct of_device_id imx_wm8962_dt_ids[] = {
{ .compatible = "fsl,imx-audio-wm8962", },
{ /* sentinel */ }
@@ -318,7 +295,6 @@ static struct platform_driver imx_wm8962_driver = {
.of_match_table = imx_wm8962_dt_ids,
},
.probe = imx_wm8962_probe,
-   .remove = imx_wm8962_remove,
 };
 module_platform_driver(imx_wm8962_driver);
 
-- 
2.7.4



Re: [PATCH v3 06/10] VAS: Define helpers to alloc/free windows

2017-03-24 Thread Michael Neuling
On Thu, 2017-03-16 at 20:33 -0700, Sukadev Bhattiprolu wrote:
> Define helpers to allocate/free VAS window objects. These will
> be used in follow-on patches when opening/closing windows.
> 
> Signed-off-by: Sukadev Bhattiprolu 
> ---
>  drivers/misc/vas/vas-window.c | 74 +-
> -
>  1 file changed, 72 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/misc/vas/vas-window.c b/drivers/misc/vas/vas-window.c
> index edf5c9f..9233bf5 100644
> --- a/drivers/misc/vas/vas-window.c
> +++ b/drivers/misc/vas/vas-window.c
> @@ -119,7 +119,7 @@ static void unmap_wc_mmio_bars(struct vas_window *window)
>   * OS/User Window Context (UWC) MMIO Base Address Region for the given
> window.
>   * Map these bus addresses and save the mapped kernel addresses in @window.
>   */
> -int map_wc_mmio_bars(struct vas_window *window)
> +static int map_wc_mmio_bars(struct vas_window *window)
>  {
>   int len;
>   uint64_t start;
> @@ -472,8 +472,78 @@ int init_winctx_regs(struct vas_window *window, struct
> vas_winctx *winctx)
>   return 0;
>  }
>  
> -/* stub for now */
> +DEFINE_SPINLOCK(vas_ida_lock);
> +
> +void vas_release_window_id(struct ida *ida, int winid)
> +{
> + spin_lock(_ida_lock);
> + ida_remove(ida, winid);
> + spin_unlock(_ida_lock);
> +}
> +
> +int vas_assign_window_id(struct ida *ida)
> +{
> + int rc, winid;
> +
> + rc = ida_pre_get(ida, GFP_KERNEL);
> + if (!rc)
> + return -EAGAIN;
> +
> + spin_lock(_ida_lock);
> + rc = ida_get_new_above(ida, 0, );
> + spin_unlock(_ida_lock);
> +
> + if (rc)
> + return rc;
> +
> + if (winid > VAS_MAX_WINDOWS_PER_CHIP) {
> + pr_err("VAS: Too many (%d) open windows\n", winid);
> + vas_release_window_id(ida, winid);
> + return -EAGAIN;
> + }
> +
> + return winid;
> +}
> +
> +static void vas_window_free(struct vas_window *window)
> +{
> + unmap_wc_mmio_bars(window);
> + kfree(window->paste_addr_name);
> + kfree(window);
> +}
> +
> +static struct vas_window *vas_window_alloc(struct vas_instance *vinst, int
> id)
> +{
> + struct vas_window *window;
> +
> + window = kzalloc(sizeof(*window), GFP_KERNEL);
> + if (!window)
> + return NULL;
> +
> + window->vinst = vinst;
> + window->winid = id;
> +
> + if (map_wc_mmio_bars(window))
> + goto out_free;
> +
> + return window;
> +
> +out_free:
> + kfree(window);
> + return NULL;
> +}
> +
>  int vas_window_reset(struct vas_instance *vinst, int winid)
> 

This interface seems a little weird to me. Needing an alloc in a hardware reset
path seems a bit strange.

Maybe the data structures are the issue.  A window is a hardware construct. 
Something that uses it should probably be called something else like a context. 
Something that references a window should just be the vas_instance + winid. 

You should be able to reset this hardware window by referencing structures
already allocated.  Something associated with the struct vas_instance.

Mikey

>  {
> + struct vas_window *window;
> +
> + window = vas_window_alloc(vinst, winid);
> + if (!window)
> + return -ENOMEM;
> +
> + reset_window_regs(window);
> +
> + vas_window_free(window);
> +
>   return 0;
>  }


Re: [v1 0/5] parallelized "struct page" zeroing

2017-03-24 Thread Christian Borntraeger
On 03/24/2017 12:01 AM, Pavel Tatashin wrote:
> When deferred struct page initialization feature is enabled, we get a
> performance gain of initializing vmemmap in parallel after other CPUs are
> started. However, we still zero the memory for vmemmap using one boot CPU.
> This patch-set fixes the memset-zeroing limitation by deferring it as well.
> 
> Here is example performance gain on SPARC with 32T:
> base
> https://hastebin.com/ozanelatat.go
> 
> fix
> https://hastebin.com/utonawukof.go
> 
> As you can see without the fix it takes: 97.89s to boot
> With the fix it takes: 46.91 to boot.
> 
> On x86 time saving is going to be even greater (proportionally to memory size)
> because there are twice as many "struct page"es for the same amount of memory,
> as base pages are twice smaller.

Fixing the linux-s390 mailing list email.
This might be useful for s390 as well.

> 
> 
> Pavel Tatashin (5):
>   sparc64: simplify vmemmap_populate
>   mm: defining memblock_virt_alloc_try_nid_raw
>   mm: add "zero" argument to vmemmap allocators
>   mm: zero struct pages during initialization
>   mm: teach platforms not to zero struct pages memory
> 
>  arch/powerpc/mm/init_64.c |4 +-
>  arch/s390/mm/vmem.c   |5 ++-
>  arch/sparc/mm/init_64.c   |   26 +++
>  arch/x86/mm/init_64.c |3 +-
>  include/linux/bootmem.h   |3 ++
>  include/linux/mm.h|   15 +++--
>  mm/memblock.c |   46 --
>  mm/page_alloc.c   |3 ++
>  mm/sparse-vmemmap.c   |   48 +---
>  9 files changed, 103 insertions(+), 50 deletions(-)
> 




Re: [v2 PATCH] powernv-cpuidle: Validate DT property array size

2017-03-24 Thread Shilpasri G Bhat


On 03/15/2017 01:45 PM, Gautham R. Shenoy wrote:
> From: "Gautham R. Shenoy" 
> 
> The various properties associated with powernv idle states such as
> names, flags, residency-ns, latencies-ns, psscr, psscr-mask are
> exposed in the device-tree as property arrays such the pointwise
> entries in each of these arrays correspond to the properties of the
> same idle state.
> 
> This patch validates that the lengths of the property arrays are the
> same. If there is a mismatch, the patch will ensure that we bail out
> and not expose the platform idle states via cpuidle.
> 
> Signed-off-by: Gautham R. Shenoy 
> ---
> v1: https://lkml.org/lkml/2017/2/23/349
> Changes from v1: Print the full property array name in warning message.

Reviewed-by: Shilpasri G Bhat 

> 
>  drivers/cpuidle/cpuidle-powernv.c | 64 
> +--
>  1 file changed, 61 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/cpuidle/cpuidle-powernv.c 
> b/drivers/cpuidle/cpuidle-powernv.c
> index 3705930..a06df51 100644
> --- a/drivers/cpuidle/cpuidle-powernv.c
> +++ b/drivers/cpuidle/cpuidle-powernv.c
> @@ -197,11 +197,25 @@ static inline void add_powernv_state(int index, const 
> char *name,
>   stop_psscr_table[index].mask = psscr_mask;
>  }
> 
> +/*
> + * Returns 0 if prop1_len == prop2_len. Else returns -1
> + */
> +static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len,
> +  const char *prop2, int prop2_len)
> +{
> + if (prop1_len == prop2_len)
> + return 0;
> +
> + pr_warn("cpuidle-powernv: array sizes don't match for %s and %s\n",
> + prop1, prop2);
> + return -1;
> +}
> +
>  static int powernv_add_idle_states(void)
>  {
>   struct device_node *power_mgt;
>   int nr_idle_states = 1; /* Snooze */
> - int dt_idle_states;
> + int dt_idle_states, count;
>   u32 latency_ns[CPUIDLE_STATE_MAX];
>   u32 residency_ns[CPUIDLE_STATE_MAX];
>   u32 flags[CPUIDLE_STATE_MAX];
> @@ -226,6 +240,21 @@ static int powernv_add_idle_states(void)
>   goto out;
>   }
> 
> + count = of_property_count_u32_elems(power_mgt,
> + "ibm,cpu-idle-state-latencies-ns");
> +
> + if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", dt_idle_states,
> +"ibm,cpu-idle-state-latencies-ns",
> +count) != 0)
> + goto out;
> +
> + count = of_property_count_strings(power_mgt,
> +   "ibm,cpu-idle-state-names");
> + if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags", dt_idle_states,
> +"ibm,cpu-idle-state-names",
> +count) != 0)
> + goto out;
> +
>   /*
>* Since snooze is used as first idle state, max idle states allowed is
>* CPUIDLE_STATE_MAX -1
> @@ -260,6 +289,22 @@ static int powernv_add_idle_states(void)
>   has_stop_states = (flags[0] &
>  (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP));
>   if (has_stop_states) {
> + count = of_property_count_u64_elems(power_mgt,
> + "ibm,cpu-idle-state-psscr");
> + if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags",
> +dt_idle_states,
> +"ibm,cpu-idle-state-psscr",
> +count) != 0)
> + goto out;
> +
> + count = of_property_count_u64_elems(power_mgt,
> + 
> "ibm,cpu-idle-state-psscr-mask");
> + if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags",
> +dt_idle_states,
> +"ibm,cpu-idle-state-psscr-mask",
> +count) != 0)
> + goto out;
> +
>   if (of_property_read_u64_array(power_mgt,
>   "ibm,cpu-idle-state-psscr", psscr_val, dt_idle_states)) {
>   pr_warn("cpuidle-powernv: missing 
> ibm,cpu-idle-state-psscr in DT\n");
> @@ -274,8 +319,21 @@ static int powernv_add_idle_states(void)
>   }
>   }
> 
> - rc = of_property_read_u32_array(power_mgt,
> - "ibm,cpu-idle-state-residency-ns", residency_ns, 
> dt_idle_states);
> + count = of_property_count_u32_elems(power_mgt,
> + "ibm,cpu-idle-state-residency-ns");
> +
> + if (count < 0) {
> + rc = count;
> + } else if (validate_dt_prop_sizes("ibm,cpu-idle-state-flags",
> +   dt_idle_states,
> +   

Re: [PATCH v2 1/2] powerpc/powernv/cpuidle: Pass correct drv->cpumask for registration

2017-03-24 Thread Gautham R Shenoy
On Thu, Mar 23, 2017 at 8:52 PM, Vaidyanathan Srinivasan
 wrote:
> drv->cpumask defaults to cpu_possible_mask in __cpuidle_driver_init().
> On PowerNV platform cpu_present could be less than cpu_possible in cases
> where firmware detects the cpu, but it is not available to the OS.  When
> CONFIG_HOTPLUG_CPU=n, such cpus are not hotplugable at runtime and hence
> we skip creating cpu_device.
>
> This breaks cpuidle on powernv where register_cpu() is not called for
> cpus in cpu_possible_mask that cannot be hot-added at runtime.
>
> Trying cpuidle_register_device() on cpu without cpu_device will cause
> crash like this:
>
> cpu 0xf: Vector: 380 (Data SLB Access) at [c00ff1503490]
> pc: c022c8bc: string+0x34/0x60
> lr: c022ed78: vsnprintf+0x284/0x42c
> sp: c00ff1503710
>msr: 90009033
>dar: 60006000
>   current = 0xc00ff148
>   paca= 0xcfe82d00   softe: 0irq_happened: 0x01
> pid   = 1, comm = swapper/8
> Linux version 4.11.0-rc2 (sv@sagarika) (gcc version 4.9.4
> (Buildroot 2017.02-4-gc28573e) ) #15 SMP Fri Mar 17 19:32:02 IST 2017
> enter ? for help
> [link register   ] c022ed78 vsnprintf+0x284/0x42c
> [c00ff1503710] c022ebb8 vsnprintf+0xc4/0x42c (unreliable)
> [c00ff1503800] c022ef40 vscnprintf+0x20/0x44
> [c00ff1503830] c00ab61c vprintk_emit+0x94/0x2cc
> [c00ff15038a0] c00acc9c vprintk_func+0x60/0x74
> [c00ff15038c0] c0619694 printk+0x38/0x4c
> [c00ff15038e0] c0224950 kobject_get+0x40/0x60
> [c00ff1503950] c022507c kobject_add_internal+0x60/0x2c4
> [c00ff15039e0] c0225350 kobject_init_and_add+0x70/0x78
> [c00ff1503a60] c053c288 cpuidle_add_sysfs+0x9c/0xe0
> [c00ff1503ae0] c053aeac cpuidle_register_device+0xd4/0x12c
> [c00ff1503b30] c053b108 cpuidle_register+0x98/0xcc
> [c00ff1503bc0] c085eaf0 powernv_processor_idle_init+0x140/0x1e0
> [c00ff1503c60] c000cd60 do_one_initcall+0xc0/0x15c
> [c00ff1503d20] c0833e84 kernel_init_freeable+0x1a0/0x25c
> [c00ff1503dc0] c000d478 kernel_init+0x24/0x12c
> [c00ff1503e30] c000b564 ret_from_kernel_thread+0x5c/0x78
>
> This patch fixes the bug by passing correct cpumask from
> powernv-cpuidle driver.
>
> Signed-off-by: Vaidyanathan Srinivasan 


Reviewed-by: Gautham R. Shenoy 

> ---
>  drivers/cpuidle/cpuidle-powernv.c | 18 ++
>  1 file changed, 18 insertions(+)
>
> diff --git a/drivers/cpuidle/cpuidle-powernv.c 
> b/drivers/cpuidle/cpuidle-powernv.c
> index a06df51..82f7b33 100644
> --- a/drivers/cpuidle/cpuidle-powernv.c
> +++ b/drivers/cpuidle/cpuidle-powernv.c
> @@ -175,6 +175,24 @@ static int powernv_cpuidle_driver_init(void)
> drv->state_count += 1;
> }
>
> +   /*
> +* On PowerNV platform cpu_present may be less that cpu_possible in
> +* cases where firmware detects the cpu, but it is not available to 
> the
> +* OS.  If CONFIG_HOTPLUG_CPU=n then such CPUs are not hotplugable at
> +* runtime and hence cpu_devices are not created for those cpus by
> +* generic topology_init().
> +*
> +* drv->cpumask defaults to cpu_possible_mask in
> +* __cpuidle_driver_init().  This breaks cpuidle on powernv where
> +* cpu_devices are not created for cpus in cpu_possible_mask that
> +* cannot be hot-added later at runtime.
> +*
> +* Trying cpuidle_register_device() on a cpu without cpu_devices is
> +* incorrect. Hence pass correct cpu mask to generic cpuidle driver.
> +*/
> +
> +   drv->cpumask = (struct cpumask *)cpu_present_mask;
> +
> return 0;
>  }
>
> --
> 2.9.3
>



-- 
Thanks and Regards
gautham.


Re: [PATCH] ASoC: WM8962: Let codec driver enable/disable its MCLK

2017-03-24 Thread Nicolin Chen
On Fri, Mar 24, 2017 at 09:37:00AM +0200, Daniel Baluta wrote:
> On Fri, Mar 24, 2017 at 6:58 AM, Nicolin Chen  wrote:
> > On Thu, Mar 23, 2017 at 02:01:50PM +0200, Daniel Baluta wrote:
> >> From: Nicolin Chen 
> >>
> >> WM8962 needs its MCLK when powerup in wm8962_resume(). Thus it's better
> >> to control the MCLK in codec driver. Thus remove the clock enable in
> >> machine dirver accordingly.
> >>
> >> While at it, get rid of imx_wm8962_remove function since it is now
> >> empty.
> >>
> >> Signed-off-by: Nicolin Chen 
> >
> > Hmm...it'd probably be better to let yourself be the author and remove
> > my signed-off here. Just got an email deliver failure since that email
> > address is apparently not available any more.
> 
> Hi Nic,
> 
> I will fix the prefix subject as suggested. Is it OK to use
> your current email address for the signed-off-by tag?

The email address in the "From" would probably still remain that one,
which causes the problem.

I personally prefer that you resend it using your own and I could give
an Ack to it. It isn't really necessary to let me take the credit :)

Thanks
Nicolin


Re: [PATCH] ASoC: WM8962: Let codec driver enable/disable its MCLK

2017-03-24 Thread Daniel Baluta
On Fri, Mar 24, 2017 at 6:58 AM, Nicolin Chen  wrote:
> On Thu, Mar 23, 2017 at 02:01:50PM +0200, Daniel Baluta wrote:
>> From: Nicolin Chen 
>>
>> WM8962 needs its MCLK when powerup in wm8962_resume(). Thus it's better
>> to control the MCLK in codec driver. Thus remove the clock enable in
>> machine dirver accordingly.
>>
>> While at it, get rid of imx_wm8962_remove function since it is now
>> empty.
>>
>> Signed-off-by: Nicolin Chen 
>
> Hmm...it'd probably be better to let yourself be the author and remove
> my signed-off here. Just got an email deliver failure since that email
> address is apparently not available any more.

Hi Nic,

I will fix the prefix subject as suggested. Is it OK to use
your current email address for the signed-off-by tag?

Daniel.


[PATCH kernel] KVM: PPC: Preserve storage control bits

2017-03-24 Thread Alexey Kardashevskiy
PR KVM page fault handler performs eaddr to pte translation for a guest,
however kvmppc_mmu_book3s_64_xlate() does not preserve WIMG bits
(storage control) in the kvmppc_pte struct. If PR KVM is running as
a second level guest under HV KVM, and PR KVM tries inserting HPT entry,
this fails in HV KVM if it already has this mapping.

This preserves WIMG bits between kvmppc_mmu_book3s_64_xlate() and
kvmppc_mmu_map_page().

Signed-off-by: Alexey Kardashevskiy 
---

This allows MMIO BAR mapping for nested guest with VFIO.

This is the check in HV KVM which failed:

arch/powerpc/kvm/book3s_hv_rm_mmu.c
long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
[...]

/*If we had host pte mapping then  Check WIMG */
if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
if (is_ci)
return H_PARAMETER;
/*
 * Allow guest to map emulated device memory as
 * uncacheable, but actually make it cacheable.
 */
ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
ptel |= HPTE_R_M;
}
---
 arch/powerpc/include/asm/kvm_host.h   | 1 +
 arch/powerpc/kvm/book3s_64_mmu.c  | 1 +
 arch/powerpc/kvm/book3s_64_mmu_host.c | 2 ++
 arch/powerpc/kvm/book3s_pr.c  | 2 +-
 4 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 7bba8f415627..bf6822cd4f86 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -345,6 +345,7 @@ struct kvmppc_pte {
bool may_read   : 1;
bool may_write  : 1;
bool may_execute: 1;
+   unsigned long wimg;
u8 page_size;   /* MMU_PAGE_xxx */
 };
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 70153578131a..29ebe2fd5867 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -319,6 +319,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu 
*vcpu, gva_t eaddr,
gpte->may_execute = true;
gpte->may_read = false;
gpte->may_write = false;
+   gpte->wimg = r & HPTE_R_WIMG;
 
switch (pp) {
case 0:
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c 
b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 4b4e927c4822..145a61892c48 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -145,6 +145,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct 
kvmppc_pte *orig_pte,
else
kvmppc_mmu_flush_icache(pfn);
 
+   rflags = (rflags & ~HPTE_R_WIMG) | orig_pte->wimg;
+
/*
 * Use 64K pages if possible; otherwise, on 64K page kernels,
 * we need to transfer 4 more bits from guest real to host real addr.
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index ce437b98477e..f026b062c0ed 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -537,7 +537,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
int r = RESUME_GUEST;
int relocated;
int page_found = 0;
-   struct kvmppc_pte pte;
+   struct kvmppc_pte pte = { 0 };
bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false;
bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false;
u64 vsid;
-- 
2.11.0



[PATCH kernel] KVM: PPC: Exit KVM on failed mapping

2017-03-24 Thread Alexey Kardashevskiy
At the moment kvmppc_mmu_map_page() returns -1 if
mmu_hash_ops.hpte_insert() fails for any reason so the page fault handler
resumes the guest and it faults on the same address again.

This adds distinction to kvmppc_mmu_map_page() to return -EIO if
mmu_hash_ops.hpte_insert() failed for a reason other than full pteg.
At the moment only pSeries_lpar_hpte_insert() returns -2 if
plpar_pte_enter() failed with a code other than H_PTEG_FULL.
Other mmu_hash_ops.hpte_insert() instances can only fail with
-1 "full pteg".

With this change, if PR KVM fails to update HPT, it can signal
the userspace about this instead of returning to guest and having
the very same page fault over and over again.

Signed-off-by: Alexey Kardashevskiy 
---

This was found with nested KVM+VFIO when PR KVM was trying to map MMIO BAR
of a VFIO PCI device but since it would not preserve WIMG bits, HV KVM
would fail, mmu_hash_ops.hpte_insert() would return error and PR KVM
would just continue and trap again on the same memory access.

With this patch but without "KVM: PPC: Preserve storage control bits"
nested QEMU will abort with informative screen instead of endlessly
trying to proceed further in booting.
---
 arch/powerpc/kvm/book3s_64_mmu_host.c | 5 -
 arch/powerpc/kvm/book3s_pr.c  | 6 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c 
b/arch/powerpc/kvm/book3s_64_mmu_host.c
index a587e8f4fd26..4b4e927c4822 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -177,12 +177,15 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct 
kvmppc_pte *orig_pte,
ret = mmu_hash_ops.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
   hpsize, hpsize, MMU_SEGSIZE_256M);
 
-   if (ret < 0) {
+   if (ret == -1) {
/* If we couldn't map a primary PTE, try a secondary */
hash = ~hash;
vflags ^= HPTE_V_SECONDARY;
attempt++;
goto map_again;
+   } else if (ret < 0) {
+   r = -EIO;
+   goto out_unlock;
} else {
trace_kvm_book3s_64_mmu_map(rflags, hpteg,
vpn, hpaddr, orig_pte);
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 633502f52bbb..ce437b98477e 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -625,7 +625,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
kvmppc_mmu_unmap_page(vcpu, );
}
/* The guest's PTE is not mapped yet. Map on the host */
-   kvmppc_mmu_map_page(vcpu, , iswrite);
+   if (kvmppc_mmu_map_page(vcpu, , iswrite) == -EIO) {
+   /* Exit KVM if mapping failed */
+   run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+   return RESUME_HOST;
+   }
if (data)
vcpu->stat.sp_storage++;
else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
-- 
2.11.0



[PATCH kernel] KVM: PPC: Get rid of unused local variable

2017-03-24 Thread Alexey Kardashevskiy
@is_mmio has never been used since introduction in
commit 2f4cf5e42d13 ("Add book3s.c") from 2009.

Signed-off-by: Alexey Kardashevskiy 
---
 arch/powerpc/kvm/book3s_pr.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index d4dfc0ca2a44..633502f52bbb 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -538,7 +538,6 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
int relocated;
int page_found = 0;
struct kvmppc_pte pte;
-   bool is_mmio = false;
bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false;
bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false;
u64 vsid;
@@ -616,8 +615,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
/* Page not found in guest SLB */
kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
-   } else if (!is_mmio &&
-  kvmppc_visible_gpa(vcpu, pte.raddr)) {
+   } else if (kvmppc_visible_gpa(vcpu, pte.raddr)) {
if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
/*
 * There is already a host HPTE there, presumably
-- 
2.11.0



[PATCH guest kernel] vfio/powerpc/spapr_tce: Enforce IOMMU type compatibility check

2017-03-24 Thread Alexey Kardashevskiy
The existing SPAPR TCE driver advertises both VFIO_SPAPR_TCE_IOMMU and
VFIO_SPAPR_TCE_v2_IOMMU types to the userspace and the userspace usually
picks the v2.

Normally the userspace would create a container, attach an IOMMU group
to it and only then set the IOMMU type (which would normally be v2).

However a specific IOMMU group may not support v2, in other words
it may not implement set_window/unset_window/take_ownership/
release_ownership and such a group should not be attached to
a v2 container.

This adds extra checks that a new group can do what the selected IOMMU
type suggests. The userspace can then test the return value from
ioctl(VFIO_SET_IOMMU, VFIO_SPAPR_TCE_v2_IOMMU) and try
VFIO_SPAPR_TCE_IOMMU.

Signed-off-by: Alexey Kardashevskiy 
---

This is one of the patches needed to do nested VFIO - for either
second level guest or DPDK running in a guest.
---
 drivers/vfio/vfio_iommu_spapr_tce.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c 
b/drivers/vfio/vfio_iommu_spapr_tce.c
index cf3de91fbfe7..a7d811524092 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -1335,8 +1335,16 @@ static int tce_iommu_attach_group(void *iommu_data,
 
if (!table_group->ops || !table_group->ops->take_ownership ||
!table_group->ops->release_ownership) {
+   if (container->v2) {
+   ret = -EPERM;
+   goto unlock_exit;
+   }
ret = tce_iommu_take_ownership(container, table_group);
} else {
+   if (!container->v2) {
+   ret = -EPERM;
+   goto unlock_exit;
+   }
ret = tce_iommu_take_ownership_ddw(container, table_group);
if (!tce_groups_attached(container) && !container->tables[0])
container->def_window_pending = true;
-- 
2.11.0



[PATCH guest kernel] powerpc/pseries: Enable VFIO

2017-03-24 Thread Alexey Kardashevskiy
This enables VFIO on pseries host in order to allow VFIO in nested guest
under PR KVM or DPDK in a HV guest. This adds support of
the VFIO_SPAPR_TCE_IOMMU type.

This adds exchange() callback to allow TCE updates by the SPAPR TCE IOMMU
driver in VFIO.

This initializes DMA32 window parameters in iommu_table_group as
as this does not implement VFIO_SPAPR_TCE_v2_IOMMU and
VFIO_SPAPR_TCE_IOMMU just reuses the existing DMA32 window.

Signed-off-by: Alexey Kardashevskiy 
---
 arch/powerpc/platforms/pseries/iommu.c | 40 --
 1 file changed, 38 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/iommu.c 
b/arch/powerpc/platforms/pseries/iommu.c
index 4d757eaa46bf..7c8ed68d727e 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -550,6 +550,7 @@ static void iommu_table_setparms(struct pci_controller *phb,
 static void iommu_table_setparms_lpar(struct pci_controller *phb,
  struct device_node *dn,
  struct iommu_table *tbl,
+ struct iommu_table_group *table_group,
  const __be32 *dma_window)
 {
unsigned long offset, size;
@@ -563,6 +564,9 @@ static void iommu_table_setparms_lpar(struct pci_controller 
*phb,
tbl->it_type = TCE_PCI;
tbl->it_offset = offset >> tbl->it_page_shift;
tbl->it_size = size >> tbl->it_page_shift;
+
+   table_group->tce32_start = offset;
+   table_group->tce32_size = size;
 }
 
 struct iommu_table_ops iommu_table_pseries_ops = {
@@ -651,8 +655,38 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
pr_debug("ISA/IDE, window size is 0x%llx\n", pci->phb->dma_window_size);
 }
 
+#ifdef CONFIG_IOMMU_API
+static int tce_exchange_pSeries(struct iommu_table *tbl, long index,
+   unsigned long *tce, enum dma_data_direction *direction)
+{
+   long rc;
+   unsigned long ioba = (unsigned long) index << tbl->it_page_shift;
+   unsigned long flags, oldtce = 0;
+   u64 proto_tce = iommu_direction_to_tce_perm(*direction);
+   unsigned long newtce = *tce | proto_tce;
+
+   spin_lock_irqsave(>large_pool.lock, flags);
+
+   rc = plpar_tce_get((u64)tbl->it_index, ioba, );
+   if (!rc)
+   rc = plpar_tce_put((u64)tbl->it_index, ioba, newtce);
+
+   if (!rc) {
+   *direction = iommu_tce_direction(oldtce);
+   *tce = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+   }
+
+   spin_unlock_irqrestore(>large_pool.lock, flags);
+
+   return rc;
+}
+#endif
+
 struct iommu_table_ops iommu_table_lpar_multi_ops = {
.set = tce_buildmulti_pSeriesLP,
+#ifdef CONFIG_IOMMU_API
+   .exchange = tce_exchange_pSeries,
+#endif
.clear = tce_freemulti_pSeriesLP,
.get = tce_get_pSeriesLP
 };
@@ -689,7 +723,8 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
if (!ppci->table_group) {
ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node);
tbl = ppci->table_group->tables[0];
-   iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
+   iommu_table_setparms_lpar(ppci->phb, pdn, tbl,
+   ppci->table_group, dma_window);
tbl->it_ops = _table_lpar_multi_ops;
iommu_init_table(tbl, ppci->phb->node);
iommu_register_group(ppci->table_group,
@@ -1143,7 +1178,8 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev 
*dev)
if (!pci->table_group) {
pci->table_group = iommu_pseries_alloc_group(pci->phb->node);
tbl = pci->table_group->tables[0];
-   iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window);
+   iommu_table_setparms_lpar(pci->phb, pdn, tbl,
+   pci->table_group, dma_window);
tbl->it_ops = _table_lpar_multi_ops;
iommu_init_table(tbl, pci->phb->node);
iommu_register_group(pci->table_group,
-- 
2.11.0