Re: [Qemu-devel] [V17 3/4] hw/i386: Introduce AMD IOMMU

2016-09-17 Thread David Kiarie
On Sat, Sep 17, 2016 at 7:59 AM, David Kiarie 
wrote:

>
>
> On 16/09/16 21:58, Michael S. Tsirkin wrote:
>
>> On Wed, Aug 31, 2016 at 07:17:42PM +0300, David Kiarie wrote:
>>
> Hi Michael,
>
> +
>> +/* issue a PCIe completion packet for devid */
>> +typedef struct QEMU_PACKED {
>> +uint32_t reserved_1:16;
>> +uint32_t devid:16;
>> +
>> +#ifdef HOST_WORDS_BIGENDIAN
>> +uint32_t type:4;  /* command type   */
>> +uint32_t reserved_2:8;
>> +uint32_t pasid_19_0:20
>> +#else
>> +uint32_t pasid_19_0:20;
>> +uint32_t reserved_2:8;
>> +uint32_t type:4;
>> +#endif /* __BIG_ENDIAN_BITFIELD */
>> +
>> +#ifdef HOST_WORDS_BIGENDIAN
>> +uint32_t reserved_3:29;
>> +uint32_t guest:1;
>> +uint32_t reserved_4:2;
>> +#else
>> +uint32_t reserved_3:2;
>> +uint32_t guest:1;
>> +uint32_t reserved_4:29;
>> +#endif /* __BIG_ENDIAN_BITFIELD */
>> +
>> +uint32_t completion_tag:16;  /* PCIe PRI information */
>> +uint32_t reserved_5:16;
>> +} CMDCompletePPR;
>> So as Peter points, out, we don't do this kind of
>> thing. Pls drop this ifdefery and rework using
>> masks and cpu_to_le. E.g.
>>
>>  > +#ifdef HOST_WORDS_BIGENDIAN
>>  > +uint32_t reserved_3:29;
>>  > +uint32_t guest:1;
>>  > +uint32_t reserved_4:2;
>>  > +#else
>>  > +uint32_t reserved_3:2;
>>  > +uint32_t guest:1;
>>  > +uint32_t reserved_4:29;
>>  > +#endif /* __BIG_ENDIAN_BITFIELD */
>>
>>  guest = 1;
>>
>> ->
>>
>> #define GUEST cpu_to_le32(0x1 << 2)
>> uint32_t guest;
>>
>> guest = GUEST;
>>
>
> This might not solve the problem we are encountering here. I don't intent
> to write any values to these fields. I only intend to read.
>
> I read into these structs using 'dma_memory_read' which gives me a memory
> order dependent on the host. Byte swapping the read buffer will lead into
> some of the fields being permanently corrupted since they span across byte
> boundaries. I actually didn't have any bitfields(and was not taking care of
> BE) but then there were some complains that the code was barely readable
> hence the bitfields and later the ifdefinery but I've never checked that
> the BE code complies.


'extract64' seems to solve everything though so I can get rid of all the
host dependent defines: didn't know we had this before.


>


Re: [Qemu-devel] [V17 3/4] hw/i386: Introduce AMD IOMMU

2016-09-16 Thread David Kiarie



On 16/09/16 21:58, Michael S. Tsirkin wrote:

On Wed, Aug 31, 2016 at 07:17:42PM +0300, David Kiarie wrote:

Hi Michael,

+
+/* issue a PCIe completion packet for devid */
+typedef struct QEMU_PACKED {
+uint32_t reserved_1:16;
+uint32_t devid:16;
+
+#ifdef HOST_WORDS_BIGENDIAN
+uint32_t type:4;  /* command type   */
+uint32_t reserved_2:8;
+uint32_t pasid_19_0:20
+#else
+uint32_t pasid_19_0:20;
+uint32_t reserved_2:8;
+uint32_t type:4;
+#endif /* __BIG_ENDIAN_BITFIELD */
+
+#ifdef HOST_WORDS_BIGENDIAN
+uint32_t reserved_3:29;
+uint32_t guest:1;
+uint32_t reserved_4:2;
+#else
+uint32_t reserved_3:2;
+uint32_t guest:1;
+uint32_t reserved_4:29;
+#endif /* __BIG_ENDIAN_BITFIELD */
+
+uint32_t completion_tag:16;  /* PCIe PRI information */
+uint32_t reserved_5:16;
+} CMDCompletePPR;
So as Peter points, out, we don't do this kind of
thing. Pls drop this ifdefery and rework using
masks and cpu_to_le. E.g.

 > +#ifdef HOST_WORDS_BIGENDIAN
 > +uint32_t reserved_3:29;
 > +uint32_t guest:1;
 > +uint32_t reserved_4:2;
 > +#else
 > +uint32_t reserved_3:2;
 > +uint32_t guest:1;
 > +uint32_t reserved_4:29;
 > +#endif /* __BIG_ENDIAN_BITFIELD */

 guest = 1;

->

#define GUEST cpu_to_le32(0x1 << 2)
uint32_t guest;

guest = GUEST;


This might not solve the problem we are encountering here. I don't 
intent to write any values to these fields. I only intend to read.


I read into these structs using 'dma_memory_read' which gives me a 
memory order dependent on the host. Byte swapping the read buffer will 
lead into some of the fields being permanently corrupted since they span 
across byte boundaries. I actually didn't have any bitfields(and was not 
taking care of BE) but then there were some complains that the code was 
barely readable hence the bitfields and later the ifdefinery but I've 
never checked that the BE code complies.










Re: [Qemu-devel] [V17 3/4] hw/i386: Introduce AMD IOMMU

2016-09-16 Thread Michael S. Tsirkin
On Wed, Aug 31, 2016 at 07:17:42PM +0300, David Kiarie wrote:
> +/* serialize IOMMU command processing */
> +typedef struct QEMU_PACKED {
> +#ifdef HOST_WORDS_BIGENDIAN
> +uint64_t type:4;   /* command type   */
> +uint64_t reserved:8;
> +uint64_t store_addr:49;/* addr to write  */
> +uint64_t completion_flush:1;   /* allow more executions  */
> +uint64_t completion_int:1; /* set MMIOWAITINT*/
> +uint64_t completion_store:1;   /* write data to address  */
> +#else
> +uint64_t completion_store:1;
> +uint64_t completion_int:1;
> +uint64_t completion_flush:1;
> +uint64_t store_addr:49;
> +uint64_t reserved:8;
> +uint64_t type:4;
> +#endif /* __BIG_ENDIAN_BITFIELD */
> +uint64_t store_data;   /* data to write  */
> +} CMDCompletionWait;
> +
> +/* invalidate internal caches for devid */
> +typedef struct QEMU_PACKED {
> +#ifdef HOST_WORDS_BIGENDIAN
> +uint64_t devid:16; /* device to invalidate   */
> +uint64_t reserved_1:44;
> +uint64_t type:4;   /* command type   */
> +#else
> +uint64_t devid:16;
> +uint64_t reserved_1:44;
> +uint64_t type:4;
> +#endif /* __BIG_ENDIAN_BITFIELD */
> +uint64_t reserved_2;
> +} CMDInvalDevEntry;
> +
> +/* invalidate a range of entries in IOMMU translation cache for devid */
> +typedef struct QEMU_PACKED {
> +#ifdef HOST_WORDS_BIGENDIAN
> +uint64_t type:4;   /* command type   */
> +uint64_t reserved_2:12
> +uint64_t domid:16; /* domain to inval for*/
> +uint64_t reserved_1:12;
> +uint64_t pasid:20;
> +#else
> +uint64_t pasid:20;
> +uint64_t reserved_1:12;
> +uint64_t domid:16;
> +uint64_t reserved_2:12;
> +uint64_t type:4;
> +#endif /* __BIG_ENDIAN_BITFIELD */
> +
> +#ifdef HOST_WORDS_BIGENDIAN
> +uint64_t address:51;  /* address to invalidate   */
> +uint64_t reserved_3:10;
> +uint64_t guest:1; /* G/N invalidation*/
> +uint64_t pde:1;   /* invalidate cached ptes  */
> +uint64_t size:1   /* size of invalidation*/
> +#else
> +uint64_t size:1;
> +uint64_t pde:1;
> +uint64_t guest:1;
> +uint64_t reserved_3:10;
> +uint64_t address:51;
> +#endif /* __BIG_ENDIAN_BITFIELD */
> +} CMDInvalIommuPages;
> +
> +/* inval specified address for devid from remote IOTLB */
> +typedef struct QEMU_PACKED {
> +#ifdef HOST_WORDS_BIGENDIAN
> +uint64_t type:4;/* command type*/
> +uint64_t pasid_19_6:4;
> +uint64_t pasid_7_0:8;
> +uint64_t queuid:16;
> +uint64_t maxpend:8;
> +uint64_t pasid_15_8;
> +uint64_t devid:16; /* related devid*/
> +#else
> +uint64_t devid:16;
> +uint64_t pasid_15_8:8;
> +uint64_t maxpend:8;
> +uint64_t queuid:16;
> +uint64_t pasid_7_0:8;
> +uint64_t pasid_19_6:4;
> +uint64_t type:4;
> +#endif /* __BIG_ENDIAN_BITFIELD */
> +
> +#ifdef HOST_WORDS_BIGENDIAN
> +uint64_t address:52;   /* invalidate addr  */
> +uint64_t reserved_2:9;
> +uint64_t guest:1;  /* G/N invalidate   */
> +uint64_t reserved_1:1;
> +uint64_t size:1;   /* size of invalidation */
> +#else
> +uint64_t size:1;
> +uint64_t reserved_1:1;
> +uint64_t guest:1;
> +uint64_t reserved_2:9;
> +uint64_t address:52;
> +#endif /* __BIG_ENDIAN_BITFIELD */
> +} CMDInvalIOTLBPages;
> +
> +/* invalidate all cached interrupt info for devid */
> +typedef struct QEMU_PACKED {
> +#ifdef HOST_WORDS_BIGENDIAN
> +uint64_t type:4;  /* command type*/
> +uint64_t reserved_1:44;
> +uint64_t devid:16;/* related devid   */
> +#else
> +uint64_t devid:16;
> +uint64_t reserved_1:44;
> +uint64_t type:4;
> +#endif /* __BIG_ENDIAN_BITFIELD */
> +uint64_t reserved_2;
> +} CMDInvalIntrTable;
> +
> +/* load adddress translation info for devid into translation cache */
> +typedef struct QEMU_PACKED {
> +#ifdef HOST_WORDS_BIGENDIAN
> +uint64_t type:4;  /* command type   */
> +uint64_t reserved_2:8;
> +uint64_t pasid_19_0:20;
> +uint64_t pfcount_7_0:8;
> +uint64_t reserved_1:8;
> +uint64_t devid:16;/* related devid  */
> +#else
> +uint64_t devid:16;
> +uint64_t reserved_1:8;
> +uint64_t pfcount_7_0:8;
> +uint64_t pasid_19_0:20;
> +uint64_t reserved_2:8;
> +uint64_t type:4;
> +#endif /* __BIG_ENDIAN_BITFIELD */
> +
> +#ifdef HOST_WORDS_BIGENDIAN
> +uint64_t address:52; /* invalidate address   */
> +uint64_t reserved_5:7;
> +uint64_t inval:1;/* inval matching entries   */
> +uint64_t reserved_4:1;
> +uint64_t guest:1;/* G/N invalidate   */
> +uint64_t reserved_3:1;
> +uint64_t size:1; /* prefetched page size */
> +#else
> +uint64_t size:1;
> +uint64_t 

[Qemu-devel] [V17 3/4] hw/i386: Introduce AMD IOMMU

2016-08-31 Thread David Kiarie
Add AMD IOMMU emulaton to Qemu in addition to Intel IOMMU.
The IOMMU does basic translation, error checking and has a
minimal IOTLB implementation. This IOMMU bypassed the need
for target aborts by responding with IOMMU_NONE access rights
and exempts the region 0xfee0-0xfeef from translation
as it is the q35 interrupt region.

We advertise features that are not yet implemented to please
the Linux IOMMU driver.

IOTLB aims at implementing commands on real IOMMUs which is
essential for debugging and may not offer any performance
benefits

Signed-off-by: David Kiarie 
---
 hw/i386/Makefile.objs |1 +
 hw/i386/amd_iommu.c   | 1381 +
 hw/i386/amd_iommu.h   |  289 +++
 3 files changed, 1671 insertions(+)
 create mode 100644 hw/i386/amd_iommu.c
 create mode 100644 hw/i386/amd_iommu.h

diff --git a/hw/i386/Makefile.objs b/hw/i386/Makefile.objs
index 90e94ff..909ead6 100644
--- a/hw/i386/Makefile.objs
+++ b/hw/i386/Makefile.objs
@@ -3,6 +3,7 @@ obj-y += multiboot.o
 obj-y += pc.o pc_piix.o pc_q35.o
 obj-y += pc_sysfw.o
 obj-y += x86-iommu.o intel_iommu.o
+obj-y += amd_iommu.o
 obj-$(CONFIG_XEN) += ../xenpv/ xen/
 
 obj-y += kvmvapic.o
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
new file mode 100644
index 000..e6a4c58
--- /dev/null
+++ b/hw/i386/amd_iommu.c
@@ -0,0 +1,1381 @@
+/*
+ * QEMU emulation of AMD IOMMU (AMD-Vi)
+ *
+ * Copyright (C) 2011 Eduard - Gabriel Munteanu
+ * Copyright (C) 2015 David Kiarie, 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see .
+ *
+ * Cache implementation inspired by hw/i386/intel_iommu.c
+ */
+#include "qemu/osdep.h"
+#include "hw/i386/amd_iommu.h"
+#include "trace.h"
+
+/* used AMD-Vi MMIO registers */
+const char *amdvi_mmio_low[] = {
+"AMDVI_MMIO_DEVTAB_BASE",
+"AMDVI_MMIO_CMDBUF_BASE",
+"AMDVI_MMIO_EVTLOG_BASE",
+"AMDVI_MMIO_CONTROL",
+"AMDVI_MMIO_EXCL_BASE",
+"AMDVI_MMIO_EXCL_LIMIT",
+"AMDVI_MMIO_EXT_FEATURES",
+"AMDVI_MMIO_PPR_BASE",
+"UNHANDLED"
+};
+const char *amdvi_mmio_high[] = {
+"AMDVI_MMIO_COMMAND_HEAD",
+"AMDVI_MMIO_COMMAND_TAIL",
+"AMDVI_MMIO_EVTLOG_HEAD",
+"AMDVI_MMIO_EVTLOG_TAIL",
+"AMDVI_MMIO_STATUS",
+"AMDVI_MMIO_PPR_HEAD",
+"AMDVI_MMIO_PPR_TAIL",
+"UNHANDLED"
+};
+typedef struct AMDVIAddressSpace {
+uint8_t bus_num;/* bus number   */
+uint8_t devfn;  /* device function  */
+AMDVIState *iommu_state;/* AMDVI - one per machine  */
+MemoryRegion iommu; /* Device's address translation region  */
+MemoryRegion iommu_ir;  /* Device's interrupt remapping region  */
+AddressSpace as;/* device's corresponding address space */
+} AMDVIAddressSpace;
+
+/* AMDVI cache entry */
+typedef struct AMDVIIOTLBEntry {
+uint16_t domid; /* assigned domain id  */
+uint16_t devid; /* device owning entry */
+uint64_t perms; /* access permissions  */
+uint64_t translated_addr;   /* translated address  */
+uint64_t page_mask; /* physical page size  */
+} AMDVIIOTLBEntry;
+
+/* serialize IOMMU command processing */
+typedef struct QEMU_PACKED {
+#ifdef HOST_WORDS_BIGENDIAN
+uint64_t type:4;   /* command type   */
+uint64_t reserved:8;
+uint64_t store_addr:49;/* addr to write  */
+uint64_t completion_flush:1;   /* allow more executions  */
+uint64_t completion_int:1; /* set MMIOWAITINT*/
+uint64_t completion_store:1;   /* write data to address  */
+#else
+uint64_t completion_store:1;
+uint64_t completion_int:1;
+uint64_t completion_flush:1;
+uint64_t store_addr:49;
+uint64_t reserved:8;
+uint64_t type:4;
+#endif /* __BIG_ENDIAN_BITFIELD */
+uint64_t store_data;   /* data to write  */
+} CMDCompletionWait;
+
+/* invalidate internal caches for devid */
+typedef struct QEMU_PACKED {
+#ifdef HOST_WORDS_BIGENDIAN
+uint64_t devid:16; /* device to invalidate   */
+uint64_t reserved_1:44;
+uint64_t type:4;   /* command type   */
+#else
+uint64_t devid:16;
+uint64_t reserved_1:44;
+uint64_t type:4;
+#endif /* __BIG_ENDIAN_BITFIELD */
+uint64_t