Re: [Qemu-devel] [PATCH v7 38/42] memory: Single byte swap along the I/O path

2019-08-18 Thread Richard Henderson
On 8/16/19 8:38 AM, tony.ngu...@bt.com wrote:
> +static void adjust_endianness(MemoryRegion *mr, uint64_t *data, MemOp op)
>  {
> +if ((op & MO_BSWAP) != mr->ops->endianness) {
> +switch (op & MO_SIZE) {

You'll want to use devend_memop() here, as previously discussed.

> @@ -2331,7 +2322,7 @@ void memory_region_add_eventfd(MemoryRegion *mr,
>  }
> 
>  if (size) {
> -adjust_endianness(mr, , size);
> +adjust_endianness(mr, , size_memop(size));
>  }
>  memory_region_transaction_begin();
>  for (i = 0; i < mr->ioeventfd_nb; ++i) {
> @@ -2366,7 +2357,7 @@ void memory_region_del_eventfd(MemoryRegion *mr,
>  unsigned i;
> 
>  if (size) {
> -adjust_endianness(mr, , size);
> +adjust_endianness(mr, , size_memop(size));
>  }
>  memory_region_transaction_begin();
>  for (i = 0; i < mr->ioeventfd_nb; ++i) {

To preserve behaviour it would appear that these need MO_TE.


r~



[Qemu-devel] [PATCH v7 38/42] memory: Single byte swap along the I/O path

2019-08-16 Thread tony.nguyen
Now that MemOp has been pushed down into the memory API, and
callers are encoding endianness, we can collapse byte swaps
along the I/O path into the accelerator and target independent
adjust_endianness.

Collapsing byte swaps along the I/O path enables additional endian
inversion logic, e.g. SPARC64 Invert Endian TTE bit, with redundant
byte swaps cancelling out.

Suggested-by: Richard Henderson 
Signed-off-by: Tony Nguyen 
---
 accel/tcg/cputlb.c | 42 +++--
 hw/virtio/virtio-pci.c | 10 
 memory.c   | 33 ++
 memory_ldst.inc.c  | 63 --
 4 files changed, 19 insertions(+), 129 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 8022c81..bb2f55d 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1200,38 +1200,6 @@ static void *atomic_mmu_lookup(CPUArchState *env, 
target_ulong addr,
 cpu_loop_exit_atomic(env_cpu(env), retaddr);
 }

-#ifdef TARGET_WORDS_BIGENDIAN
-#define NEED_BE_BSWAP 0
-#define NEED_LE_BSWAP 1
-#else
-#define NEED_BE_BSWAP 1
-#define NEED_LE_BSWAP 0
-#endif
-
-/*
- * Byte Swap Helper
- *
- * This should all dead code away depending on the build host and
- * access type.
- */
-
-static inline uint64_t handle_bswap(uint64_t val, MemOp op)
-{
-if ((memop_big_endian(op) && NEED_BE_BSWAP) ||
-(!memop_big_endian(op) && NEED_LE_BSWAP)) {
-switch (op & MO_SIZE) {
-case MO_8: return val;
-case MO_16: return bswap16(val);
-case MO_32: return bswap32(val);
-case MO_64: return bswap64(val);
-default:
-g_assert_not_reached();
-}
-} else {
-return val;
-}
-}
-
 /*
  * Load Helpers
  *
@@ -1306,10 +1274,8 @@ load_helper(CPUArchState *env, target_ulong addr, 
TCGMemOpIdx oi,
 }
 }

-/* FIXME: io_readx ignores MO_BSWAP.  */
-res = io_readx(env, _tlb(env)->d[mmu_idx].iotlb[index],
-   mmu_idx, addr, retaddr, access_type, op);
-return handle_bswap(res, op);
+return io_readx(env, _tlb(env)->d[mmu_idx].iotlb[index],
+mmu_idx, addr, retaddr, access_type, op);
 }

 /* Handle slow unaligned access (it spans two pages or IO).  */
@@ -1552,10 +1518,8 @@ store_helper(CPUArchState *env, target_ulong addr, 
uint64_t val,
 }
 }

-/* FIXME: io_writex ignores MO_BSWAP.  */
 io_writex(env, _tlb(env)->d[mmu_idx].iotlb[index], mmu_idx,
-  handle_bswap(val, op),
-  addr, retaddr, op);
+  val, addr, retaddr, op);
 return;
 }

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index ad06c12..84f820d 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -542,16 +542,15 @@ void virtio_address_space_write(VirtIOPCIProxy *proxy, 
hwaddr addr,
 val = pci_get_byte(buf);
 break;
 case 2:
-val = cpu_to_le16(pci_get_word(buf));
+val = pci_get_word(buf);
 break;
 case 4:
-val = cpu_to_le32(pci_get_long(buf));
+val = pci_get_long(buf);
 break;
 default:
 /* As length is under guest control, handle illegal values. */
 return;
 }
-/* FIXME: memory_region_dispatch_write ignores MO_BSWAP.  */
 memory_region_dispatch_write(mr, addr, val, size_memop(len) | MO_LE,
  MEMTXATTRS_UNSPECIFIED);
 }
@@ -576,7 +575,6 @@ virtio_address_space_read(VirtIOPCIProxy *proxy, hwaddr 
addr,
 /* Make sure caller aligned buf properly */
 assert(!(((uintptr_t)buf) & (len - 1)));

-/* FIXME: memory_region_dispatch_read ignores MO_BSWAP.  */
 memory_region_dispatch_read(mr, addr, , size_memop(len) | MO_LE,
 MEMTXATTRS_UNSPECIFIED);
 switch (len) {
@@ -584,10 +582,10 @@ virtio_address_space_read(VirtIOPCIProxy *proxy, hwaddr 
addr,
 pci_set_byte(buf, val);
 break;
 case 2:
-pci_set_word(buf, le16_to_cpu(val));
+pci_set_word(buf, val);
 break;
 case 4:
-pci_set_long(buf, le32_to_cpu(val));
+pci_set_long(buf, val);
 break;
 default:
 /* As length is under guest control, handle illegal values. */
diff --git a/memory.c b/memory.c
index 01fd29d..ebe0066 100644
--- a/memory.c
+++ b/memory.c
@@ -343,32 +343,23 @@ static void flatview_simplify(FlatView *view)
 }
 }

-static bool memory_region_wrong_endianness(MemoryRegion *mr)
+static void adjust_endianness(MemoryRegion *mr, uint64_t *data, MemOp op)
 {
-#ifdef TARGET_WORDS_BIGENDIAN
-return mr->ops->endianness == MO_LE;
-#else
-return mr->ops->endianness == MO_BE;
-#endif
-}
-
-static void adjust_endianness(MemoryRegion *mr, uint64_t *data, unsigned size)
-{
-if (memory_region_wrong_endianness(mr)) {
-switch (size) {
-case 1:
+if ((op & MO_BSWAP) !=