On 12/27/19 10:03 AM, Eugeniy Paltsev wrote:
> To be able to run DSP-enabled userspace applications we need to
> save and restore following DSP-related registers:
> At IRQ/exception entry/exit:
>  * ACC0_GLO, ACC0_GHI, DSP_CTRL
>  * ACC0_LO, ACC0_HI (we already save them as r58, r59 pair)
> At context switch:
>  * DSP_BFLY0, DSP_FFT_CTRL

Good summary: but the question is this more than needed.
For kernel proper, you've disabled guard bits, saturation etc already. AFAIKS 
gcc
won't generate complex/fractional math for kernel so your bullet #1 can likely 
be
considered user space only hence can go to bullet #3. Do you have reasons to
believe otherwise ?

> Signed-off-by: Eugeniy Paltsev <eugeniy.palt...@synopsys.com>
> ---
>  arch/arc/Kconfig                   |  7 +++
>  arch/arc/include/asm/arcregs.h     |  2 +
>  arch/arc/include/asm/dsp-impl.h    | 75 +++++++++++++++++++++++++++++-
>  arch/arc/include/asm/dsp.h         | 42 +++++++++++++++++
>  arch/arc/include/asm/entry-arcv2.h |  3 ++
>  arch/arc/include/asm/processor.h   |  4 ++
>  arch/arc/include/asm/ptrace.h      |  4 ++
>  arch/arc/include/asm/switch_to.h   |  2 +
>  arch/arc/kernel/asm-offsets.c      |  7 +++
>  arch/arc/kernel/setup.c            |  2 +-
>  10 files changed, 146 insertions(+), 2 deletions(-)
>  create mode 100644 arch/arc/include/asm/dsp.h
>
> diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
> index b9cd7ce3f878..c3210754a3d2 100644
> --- a/arch/arc/Kconfig
> +++ b/arch/arc/Kconfig
> @@ -432,6 +432,13 @@ config ARC_DSP_KERNEL
>         DSP extension presence in HW, no support for DSP-enabled userspace
>         applications. We don't save / restore DSP registers and only do
>         some minimal preparations so userspace won't be able to break kernel
> +
> +config ARC_DSP_USERSPACE
> +     bool "Support DSP for userspace apps"
> +     select ARC_HAS_ACCL_REGS
> +     help
> +       DSP extension presence in HW, support save / restore DSP registers to
> +       run DSP-enabled userspace applications
>  endchoice
>  
>  config ARC_IRQ_NO_AUTOSAVE
> diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h
> index 0004b1e9b325..a713819cab3c 100644
> --- a/arch/arc/include/asm/arcregs.h
> +++ b/arch/arc/include/asm/arcregs.h
> @@ -118,6 +118,8 @@
>  
>  /*
>   * DSP-related registers
> + * Registers names must correspond to dsp_callee_regs structure fields names
> + * for automatic offset calculation in DSP_AUX_SAVE_RESTORE macros.

good idea for preventing carbon errors.

>   */
>  #define ARC_AUX_DSP_BUILD    0x7A
>  #define ARC_AUX_ACC0_LO              0x580
> diff --git a/arch/arc/include/asm/dsp-impl.h b/arch/arc/include/asm/dsp-impl.h
> index 788093cbe689..7b640a680dfc 100644
> --- a/arch/arc/include/asm/dsp-impl.h
> +++ b/arch/arc/include/asm/dsp-impl.h
> @@ -7,6 +7,8 @@
>  #ifndef __ASM_ARC_DSP_IMPL_H
>  #define __ASM_ARC_DSP_IMPL_H
>  
> +#include <asm/dsp.h>
> +
>  #define DSP_CTRL_DISABLED_ALL                0
>  
>  #ifdef __ASSEMBLY__
> @@ -28,11 +30,82 @@
>        * able to break kernel */
>       mov     r58, DSP_CTRL_DISABLED_ALL
>       sr      r58, [ARC_AUX_DSP_CTRL]
> -#endif /* ARC_DSP_KERNEL */
> +
> +#elif defined(ARC_DSP_SAVE_RESTORE_REGS)
> +     lr      r58, [ARC_AUX_ACC0_GLO]
> +     lr      r59, [ARC_AUX_ACC0_GHI]
> +     ST2     r58, r59, PT_ACC0_GLO
> +
> +     lr      r58, [ARC_AUX_DSP_CTRL]
> +     st      r58, [sp, PT_DSP_CTRL]
> +
> +#endif
> +.endm
> +
> +/* clobbers r58, r59 registers pair */
> +.macro DSP_RESTORE_REGFILE_IRQ
> +#if defined(ARC_DSP_SAVE_RESTORE_REGS)
> +     LD2     r58, r59, PT_ACC0_GLO
> +     sr      r58, [ARC_AUX_ACC0_GLO]
> +     sr      r59, [ARC_AUX_ACC0_GHI]
> +
> +     ld      r58, [sp, PT_DSP_CTRL]
> +     sr      r58, [ARC_AUX_DSP_CTRL]
> +
> +#endif

Assuming you still need this, consider using different scratch registers if
possible. I understand it gets tricky in restore path but there even more
registers are available to clobber as they will be restored after this code.

> +#ifdef ARC_DSP_SAVE_RESTORE_REGS
> +
> +/*
> + * As we save new and restore old AUX register value in the same place we
> + * can optimize a bit and use AEX instruction (swap contents of an auxiliary
> + * register with a core register) instead of LR + SR pair.
> + */
> +#define AUX_SAVE_RESTORE(_saveto, _readfrom, _offt, _aux, _scratch)  \
> +do {                                                                 \
> +     __asm__ __volatile__(                                           \
> +             "ld     %0, [%2, %4]                    \n"             \
> +             "aex    %0, [%3]                        \n"             \
> +             "st     %0, [%1, %4]                    \n"             \
> +             :                                                       \
> +               "=&r" (_scratch)      /* must be early clobber */     \

Define the scratch variable locally for clarity and better liveness tracking - 
for
both code reader and compiler :-)
Also avoids having to initialize it.

> +             :                                                       \
> +                "r" (_saveto),                                       \
> +                "r" (_readfrom),                                     \
> +                "I" (_aux),                                          \
> +                "I" (_offt)                                          \
> +             :                                                       \

AEX with "I" constraint will likely be an 8 byte instructions always. Best to 
give
compiler wiggle room with "Ir"

> +               "memory"                                              \
> +     );                                                              \
> +} while (0)
> +
> +#define DSP_AUX_SAVE_RESTORE(_saveto, _readfrom, _aux, _scratch)     \
> +     AUX_SAVE_RESTORE(_saveto, _readfrom,                            \
> +             offsetof(struct dsp_callee_regs, _aux),                 \
> +             ARC_AUX_##_aux, _scratch)
> +
> +static inline void arc_dsp_save_restore(struct task_struct *prev,
> +                                     struct task_struct *next)
> +{
> +     long unsigned int *saveto = &prev->thread.dsp.DSP_BFLY0;
> +     long unsigned int *readfrom = &next->thread.dsp.DSP_BFLY0;
> +
> +     long unsigned int zero = 0;

See comment about pushing scratch to the relevant code block.

> +
> +     DSP_AUX_SAVE_RESTORE(saveto, readfrom, DSP_BFLY0, zero);
> +     DSP_AUX_SAVE_RESTORE(saveto, readfrom, DSP_FFT_CTRL, zero);
> +}
> +

[snip]

> +
> +#if defined(CONFIG_ARC_DSP_USERSPACE)
> +#define ARC_DSP_SAVE_RESTORE_REGS    1
> +#endif

I can see u added this for consistency with below which is a really bad idea: 
see
below.

> +
> +#ifndef __ASSEMBLY__
> +
> +/* some defines to simplify config sanitize in kernel/setup.c */
> +#if defined(CONFIG_ARC_DSP_KERNEL)   || \
> +    defined(CONFIG_ARC_DSP_USERSPACE)
> +#define ARC_DSP_HANDLED                      1
> +#else
> +#define ARC_DSP_HANDLED                      0
> +#endif

This is a really bad idea - u r introducing explicit include dependencies which
can change even outside of arch changes !
We've dealt with enough of these problems with current.h, so best to avoid, even
if there is some code clutter.

> +
> +#if defined(CONFIG_ARC_DSP_USERSPACE)
> +#define ARC_DSP_OPT_NAME             "CONFIG_ARC_DSP_USERSPACE"
> +#else
> +#define ARC_DSP_OPT_NAME             "CONFIG_ARC_DSP_KERNEL"
> +#endif
> +
> +/*
> + * DSP-related saved registers - need to be saved only when you are
> + * scheduled out.
> + * structure fields name must correspond to aux register defenitions for
> + * automatic offset calculation in DSP_AUX_SAVE_RESTORE macros
> + */
> +struct dsp_callee_regs {
> +     unsigned long DSP_BFLY0, DSP_FFT_CTRL;
> +};
> +
> +#endif /* !__ASSEMBLY__ */
> +
> +#endif /* __ASM_ARC_DSP_H */
> diff --git a/arch/arc/include/asm/entry-arcv2.h 
> b/arch/arc/include/asm/entry-arcv2.h
> index e3f8bd3e2eba..5d079f0b6243 100644
> --- a/arch/arc/include/asm/entry-arcv2.h
> +++ b/arch/arc/include/asm/entry-arcv2.h
> @@ -192,6 +192,9 @@
>       ld      r25, [sp, PT_user_r25]
>  #endif
>  
> +     /* clobbers r58, r59 registers pair, so must be before r58, r59 restore 
> */
> +     DSP_RESTORE_REGFILE_IRQ
> +
>  #ifdef CONFIG_ARC_HAS_ACCL_REGS
>       LD2     r58, r59, PT_r58
>  #endif
> diff --git a/arch/arc/include/asm/processor.h 
> b/arch/arc/include/asm/processor.h
> index 706edeaa5583..1716df0f4472 100644
> --- a/arch/arc/include/asm/processor.h
> +++ b/arch/arc/include/asm/processor.h
> @@ -14,6 +14,7 @@
>  #ifndef __ASSEMBLY__
>  
>  #include <asm/ptrace.h>
> +#include <asm/dsp.h>
>  
>  #ifdef CONFIG_ARC_FPU_SAVE_RESTORE
>  /* These DPFP regs need to be saved/restored across ctx-sw */
> @@ -39,6 +40,9 @@ struct thread_struct {
>       unsigned long ksp;      /* kernel mode stack pointer */
>       unsigned long callee_reg;       /* pointer to callee regs */
>       unsigned long fault_address;    /* dbls as brkpt holder as well */
> +#ifdef ARC_DSP_SAVE_RESTORE_REGS
> +     struct dsp_callee_regs dsp;
> +#endif
>  #ifdef CONFIG_ARC_FPU_SAVE_RESTORE
>       struct arc_fpu fpu;
>  #endif
> diff --git a/arch/arc/include/asm/ptrace.h b/arch/arc/include/asm/ptrace.h
> index ba9854ef39e8..a5851ee141de 100644
> --- a/arch/arc/include/asm/ptrace.h
> +++ b/arch/arc/include/asm/ptrace.h
> @@ -8,6 +8,7 @@
>  #define __ASM_ARC_PTRACE_H
>  
>  #include <uapi/asm/ptrace.h>
> +#include <asm/dsp.h>
>  
>  #ifndef __ASSEMBLY__
>  
> @@ -91,6 +92,9 @@ struct pt_regs {
>  #ifdef CONFIG_ARC_HAS_ACCL_REGS
>       unsigned long r58, r59; /* ACCL/ACCH used by FPU / DSP MPY */
>  #endif
> +#ifdef ARC_DSP_SAVE_RESTORE_REGS

Use the Kconfig option name directly or !CONFIG_NO_DSP etc

> +     unsigned long ACC0_GLO, ACC0_GHI, DSP_CTRL;
> +#endif

see comments at top.

>  
>       /*------- Below list auto saved by h/w -----------*/
>       unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;

[snip]

>  }
> diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c
> index b3995dd673d9..30d31579c51d 100644
> --- a/arch/arc/kernel/setup.c
> +++ b/arch/arc/kernel/setup.c
> @@ -447,7 +447,7 @@ static void arc_chk_core_config(void)
>               CHK_OPT_STRICT(CONFIG_ARC_HAS_ACCL_REGS, present);
>  
>               present = dsp_exist();
> -             CHK_OPT_STRICT(CONFIG_ARC_DSP_KERNEL, present);
> +             chk_opt_strict(ARC_DSP_OPT_NAME, present, ARC_DSP_HANDLED);

This needs to be reworked given the header fudge is not a good idea.

_______________________________________________
linux-snps-arc mailing list
linux-snps-arc@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-snps-arc

Reply via email to