> Quoting Roland Dreier <[EMAIL PROTECTED]>:
> Subject: Re: libmlx4 wc flash
> 
> ...and this for libmlx4?
> 
> diff --git a/src/mlx4.h b/src/mlx4.h
> index c4d389f..1e92b88 100644
> --- a/src/mlx4.h
> +++ b/src/mlx4.h
> @@ -65,6 +65,20 @@
>  #  define wmb() mb()
>  #endif
>  
> +#ifndef wc_wmb
> +
> +#if defined(__i386__)
> +#define wc_wmb() asm volatile("lock; addl $0,0(%%esp) " ::: "memory")
> +#elif defined(__x86_64__)
> +#define wc_wmb() asm volatile("sfence" ::: "memory")
> +#elif defined(__ia64__)
> +#define wc_wmb() asm volatile("fwb" ::: "memory")
> +#else
> +#define wc_wmb() wmb()
> +#endif
> +
> +#endif
> +
>  #define HIDDEN               __attribute__((visibility ("hidden")))
>  
>  #define PFX          "mlx4: "
> diff --git a/src/qp.c b/src/qp.c
> index a70e5f2..a4384f9 100644
> --- a/src/qp.c
> +++ b/src/qp.c
> @@ -282,9 +282,12 @@ out:
>               ++qp->sq.head;
>  
>               pthread_spin_lock(&ctx->bf_lock);
> +
>               memcpy(ctx->bf_page + ctx->bf_offset, ctrl, align(size * 16, 
> 64));
> -             /* FIXME flush wc buffers */
> +             wc_wmb();
> +
>               ctx->bf_offset ^= ctx->bf_buf_size;
> +
>               pthread_spin_unlock(&ctx->bf_lock);
>       } else if (nreq) {
>               qp->sq.head += nreq;

Since both the need for fencing and the size being copied are
architecture-dependent, it might be that a better API would be
memcpy_wc() that does the size alignment tricks and the flush
in one go.


-- 
MST
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to