Am Dienstag, dem 02.03.2021 um 09:37 +0100 schrieb Ahmad Fatoum:
> Hello Jules, Yann,
>
> On 01.03.21 16:58, Jules Maselbas wrote:
> > From: Yann Sionneau <[email protected]>
>
> Some comments inline. I am not a cache cohereny expert, so take
> it with a grain of salt.
>
> >
> > Signed-off-by: Yann Sionneau <[email protected]>
> > Signed-off-by: Jules Maselbas <[email protected]>
> > ---
>
> > --- /dev/null
> > +++ b/arch/kvx/include/asm/dma.h
> > @@ -0,0 +1,35 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/* SPDX-FileCopyrightText: 2021 Yann Sionneau <[email protected]>,
> > Kalray Inc. */
> > +
> > +#ifndef __ASM_DMA_H
> > +#define __ASM_DMA_H
> > +
> > +#include <common.h>
> > +
> > +#define KVX_DDR_32BIT_RAM_WINDOW_BA (0x80000000ULL)
> > +#define KVX_DDR_64BIT_RAM_WINDOW_BA (0x100000000ULL)
> > +#define MAX_32BIT_ADDR (0xffffffffULL)
> > +
> > +#define dma_alloc dma_alloc
> > +static inline void *dma_alloc(size_t size)
> > +{
> > + return xmemalign(64, ALIGN(size, 64));
> > +}
> > +
> > +static inline void *dma_alloc_coherent(size_t size, dma_addr_t *dma_handle)
> > +{
> > + void *ret = xmemalign(PAGE_SIZE, size);
> > +
> > + if (dma_handle)
> > + *dma_handle = (dma_addr_t)(uintptr_t)ret;
> > +
> > + return ret;
> > +}
>
> This would imply that the CPU barebox is booting is coherent with all
>
> devices that barebox needs to access. Is that the case?
>
> (See below)
>
> > +
> > +static inline void dma_free_coherent(void *mem, dma_addr_t dma_handle,
> > + size_t size)
> > +{
> > + free(mem);
> > +}
> > +
> > +#endif /* __ASM_DMA_H */
> > diff --git a/arch/kvx/include/asm/sys_arch.h
> > b/arch/kvx/include/asm/sys_arch.h
> > index 9df32c4e7..ce07a5598 100644
> > --- a/arch/kvx/include/asm/sys_arch.h
> > +++ b/arch/kvx/include/asm/sys_arch.h
> > @@ -11,6 +11,9 @@
> > #define EXCEPTION_STRIDE 0x40
> > #define EXCEPTION_ALIGNMENT 0x100
> >
> >
> >
> >
> >
> >
> >
> >
> > +#define kvx_cluster_id() ((int) \
> > + ((kvx_sfr_get(PCR) & KVX_SFR_PCR_CID_MASK) \
> > + >> KVX_SFR_PCR_CID_SHIFT))
> > #define KVX_SFR_START(__sfr_reg) \
> > (KVX_SFR_## __sfr_reg ## _SHIFT)
> >
> >
> >
> >
> >
> >
> >
> >
> > diff --git a/arch/kvx/lib/Makefile b/arch/kvx/lib/Makefile
> > index d271ebccf..c730e1c23 100644
> > --- a/arch/kvx/lib/Makefile
> > +++ b/arch/kvx/lib/Makefile
> > @@ -3,4 +3,4 @@
> > # Copyright (C) 2019 Kalray Inc.
> > #
> >
> >
> >
> >
> >
> >
> >
> >
> > -obj-y += cpuinfo.o board.o dtb.o poweroff.o bootm.o setjmp.o cache.o
> > +obj-y += cpuinfo.o board.o dtb.o poweroff.o bootm.o setjmp.o cache.o
> > dma-default.o
> > diff --git a/arch/kvx/lib/dma-default.c b/arch/kvx/lib/dma-default.c
> > new file mode 100644
> > index 000000000..755a8c66f
> > --- /dev/null
> > +++ b/arch/kvx/lib/dma-default.c
> > @@ -0,0 +1,91 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +// SPDX-FileCopyrightText: 2021 Yann Sionneau <[email protected]>,
> > Kalray Inc.
> > +
> > +#include <dma.h>
> > +#include <asm/barrier.h>
> > +#include <asm/io.h>
> > +#include <asm/cache.h>
> > +#include <asm/sfr.h>
> > +#include <asm/sys_arch.h>
> > +
> > +/*
> > + * The implementation of arch should follow the following rules:
> > + * map for_cpu for_device unmap
> > + * TO_DEV writeback none writeback none
> > + * FROM_DEV invalidate invalidate(*) invalidate
> > invalidate(*)
> > + * BIDIR writeback invalidate writeback invalidate
> > + *
> > + * (*) - only necessary if the CPU speculatively prefetches.
> > + *
> > + * (see https://lkml.org/lkml/2018/5/18/979)
> > + */
> > +
> > +void dma_sync_single_for_device(dma_addr_t addr, size_t size,
> > + enum dma_data_direction dir)
> > +{
> > + switch (dir) {
> > + case DMA_FROM_DEVICE:
> > + kvx_dcache_invalidate_mem_area(addr, size);
Why do you need to explicitly invalidate, but not flush? Even if the
CPU speculatively prefetches, the coherency protocol should make sure
to invalidate the speculatively loaded lines, right?
> > + break;
> > + case DMA_TO_DEVICE:
> > + case DMA_BIDIRECTIONAL:
> > + /* allow device to read buffer written by CPU */
> > + wmb();
>
> If the interconnect was indeed coherent, like dma_alloc_coherent
> above hints, you wouldn't need any barriers here..?
Coherency does not imply strict ordering, so the barriers are in fact
correct, as the CPU write buffers and/or the interconnect can still
change the ordering of the writes as seen by a remote observer.
> > + break;
> > + default:
> > + BUG();
> > + }
> > +}
> > +
> > +void dma_sync_single_for_cpu(dma_addr_t addr, size_t size,
> > + enum dma_data_direction dir)
> > +{
> > + switch (dir) {
> > + case DMA_FROM_DEVICE:
> > + case DMA_TO_DEVICE:
> > + break;
> > + case DMA_BIDIRECTIONAL:
> > + kvx_dcache_invalidate_mem_area(addr, size);
> > + break;
> > + default:
> > + BUG();
> > + }
> > +}
> > +
> > +#define KVX_DDR_ALIAS_OFFSET \
> > + (KVX_DDR_64BIT_RAM_WINDOW_BA - KVX_DDR_32BIT_RAM_WINDOW_BA)
> > +#define KVX_DDR_ALIAS_WINDOW \
> > + (KVX_DDR_64BIT_RAM_WINDOW_BA + KVX_DDR_ALIAS_OFFSET)
> > +
> > +/* Local smem is aliased between 0 and 16MB */
> > +#define KVX_SMEM_LOCAL_ALIAS 0x1000000ULL
> > +
> > +dma_addr_t dma_map_single(struct device_d *dev, void *ptr, size_t size,
> > + enum dma_data_direction dir)
> > +{
> > + uintptr_t addr = (uintptr_t) ptr;
> > +
> > + dma_sync_single_for_device(addr, size, dir);
> > +
> > + /* Local smem alias should never be used for dma */
> > + if (addr < KVX_SMEM_LOCAL_ALIAS)
> > + return addr + (1 + kvx_cluster_id()) * KVX_SMEM_LOCAL_ALIAS;
> > +
> > + if (dev->dma_mask && addr <= dev->dma_mask)
> > + return addr;
> > +
> > + if (addr >= KVX_DDR_ALIAS_WINDOW)
> > + return DMA_ERROR_CODE;
> > +
> > + addr -= KVX_DDR_ALIAS_OFFSET;
> > + if (dev->dma_mask && addr > dev->dma_mask)
> > + return DMA_ERROR_CODE;
> > +
> > + return addr;
> > +}
> > +
> > +void dma_unmap_single(struct device_d *dev, dma_addr_t addr, size_t size,
> > + enum dma_data_direction dir)
> > +{
> > + dma_sync_single_for_cpu(addr, size, dir);
> > +}
> >
>
_______________________________________________
barebox mailing list
[email protected]
http://lists.infradead.org/mailman/listinfo/barebox