date:20220113

Re: ppc pbr403 vmstate

2022-01-13 Thread Cédric Le Goater


On 1/14/22 00:41, David Gibson wrote:

On Thu, Jan 13, 2022 at 06:51:56PM +, Dr. David Alan Gilbert wrote:

Hi,
   Is there any easy way of getting a machine where the pbr403 vmstate
would be generated?


The condition in pbr403_needed is...

 return (pvr & 0x) == 0x0020;

.. which looks to be the PVR for ppc403 models.  That makes sense with
the section name... but not so much with the fact that it's under
cpu/tlb6xx.  The 6xx MMU is basically unrelated to the 40x MMU.  But
it looks like the vmstate_tlbemb might be shared between then, because
of bad ideas of the past.

But in any case, we already dropped what little 403 support we ever
had - there's nothing with that PVR even listed in
target/ppc/cpu-models.h.

So I think we should just drop it.


yes. But we can not remove env.pb since this would break migration
compatibility, correct ?


I will send a patch.

Thanks,

C.

[PULL 13/20] hw/mips/jazz: Inline vga_mmio_init() and remove it

2022-01-13 Thread Gerd Hoffmann

From: Philippe Mathieu-Daudé 

vga_mmio_init() is used only one time and not very helpful,
inline and remove it.

Reviewed-by: BALATON Zoltan 
Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Thomas Huth 
Message-Id: <20211206224528.563588-5-f4...@amsat.org>
Signed-off-by: Gerd Hoffmann 
---
 include/hw/display/vga.h |  5 -
 hw/display/vga-mmio.c| 19 ---
 hw/mips/jazz.c   |  9 -
 3 files changed, 8 insertions(+), 25 deletions(-)

diff --git a/include/hw/display/vga.h b/include/hw/display/vga.h
index 98b2e560f9b3..a79aa2909b25 100644
--- a/include/hw/display/vga.h
+++ b/include/hw/display/vga.h
@@ -9,8 +9,6 @@
 #ifndef QEMU_HW_DISPLAY_VGA_H
 #define QEMU_HW_DISPLAY_VGA_H
 
-#include "exec/hwaddr.h"
-
 /*
  * modules can reference this symbol to avoid being loaded
  * into system emulators without vga support
@@ -26,7 +24,4 @@ extern enum vga_retrace_method vga_retrace_method;
 
 #define TYPE_VGA_MMIO "vga-mmio"
 
-int vga_mmio_init(hwaddr vram_base, hwaddr ctrl_base,
-  int it_shift, MemoryRegion *address_space);
-
 #endif
diff --git a/hw/display/vga-mmio.c b/hw/display/vga-mmio.c
index 10bde32af5ca..496936808137 100644
--- a/hw/display/vga-mmio.c
+++ b/hw/display/vga-mmio.c
@@ -24,7 +24,6 @@
 
 #include "qemu/osdep.h"
 #include "qapi/error.h"
-#include "hw/display/vga.h"
 #include "hw/sysbus.h"
 #include "hw/display/vga.h"
 #include "hw/qdev-properties.h"
@@ -85,24 +84,6 @@ static void vga_mmio_reset(DeviceState *dev)
 vga_common_reset(>vga);
 }
 
-int vga_mmio_init(hwaddr vram_base, hwaddr ctrl_base,
-  int it_shift, MemoryRegion *address_space)
-{
-DeviceState *dev;
-SysBusDevice *s;
-
-dev = qdev_new(TYPE_VGA_MMIO);
-qdev_prop_set_uint8(dev, "it_shift", it_shift);
-s = SYS_BUS_DEVICE(dev);
-sysbus_realize_and_unref(s, _fatal);
-
-sysbus_mmio_map(s, 0, ctrl_base);
-sysbus_mmio_map(s, 1, vram_base + 0x000a);
-sysbus_mmio_map(s, 2, VBE_DISPI_LFB_PHYSICAL_ADDRESS);
-
-return 0;
-}
-
 static void vga_mmio_realizefn(DeviceState *dev, Error **errp)
 {
 VGAMmioState *s = VGA_MMIO(dev);
diff --git a/hw/mips/jazz.c b/hw/mips/jazz.c
index 8f345afd137a..44f0d48bfd75 100644
--- a/hw/mips/jazz.c
+++ b/hw/mips/jazz.c
@@ -43,6 +43,7 @@
 #include "hw/rtc/mc146818rtc.h"
 #include "hw/timer/i8254.h"
 #include "hw/display/vga.h"
+#include "hw/display/bochs-vbe.h"
 #include "hw/audio/pcspk.h"
 #include "hw/input/i8042.h"
 #include "hw/sysbus.h"
@@ -274,7 +275,13 @@ static void mips_jazz_init(MachineState *machine,
 }
 break;
 case JAZZ_PICA61:
-vga_mmio_init(0x4000, 0x6000, 0, get_system_memory());
+dev = qdev_new(TYPE_VGA_MMIO);
+qdev_prop_set_uint8(dev, "it_shift", 0);
+sysbus = SYS_BUS_DEVICE(dev);
+sysbus_realize_and_unref(sysbus, _fatal);
+sysbus_mmio_map(sysbus, 0, 0x6000);
+sysbus_mmio_map(sysbus, 1, 0x400a);
+sysbus_mmio_map(sysbus, 2, VBE_DISPI_LFB_PHYSICAL_ADDRESS);
 break;
 default:
 break;
-- 
2.34.1

[PULL 15/20] edid: Added support for 4k@60 Hz monitor

2022-01-13 Thread Gerd Hoffmann

From: Satyeshwar Singh 

Previously, the large modes (>1080p) that were generated by Qemu in its EDID
were all 50 Hz. If we provide them to a Guest OS and the user selects
one of these modes, then the OS by default only gets 50 FPS. This is
especially true for Windows OS. With this patch, we are now exposing a
3840x2160@60 Hz which will allow the guest OS to get 60 FPS.

Cc: Gerd Hoffmann 
Signed-off-by: Satyeshwar Singh 
Message-Id: <2026221103.27128-1-dongwon@intel.com>
Signed-off-by: Gerd Hoffmann 
---
 hw/display/edid-generate.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/display/edid-generate.c b/hw/display/edid-generate.c
index 6f5ac6a38ad8..bccf32af69ce 100644
--- a/hw/display/edid-generate.c
+++ b/hw/display/edid-generate.c
@@ -24,6 +24,9 @@ static const struct edid_mode {
 { .xres = 2048,   .yres = 1152 },
 { .xres = 1920,   .yres = 1080,   .dta =  31 },
 
+/* dea/dta extension timings (all @ 60 Hz) */
+{ .xres = 3840,   .yres = 2160,   .dta =  97 },
+
 /* additional standard timings 3 (all @ 60Hz) */
 { .xres = 1920,   .yres = 1200,   .xtra3 = 10,   .bit = 0 },
 { .xres = 1600,   .yres = 1200,   .xtra3 =  9,   .bit = 2 },
-- 
2.34.1

[PULL 07/20] ui/dbus: fix buffer-overflow detected by ASAN

2022-01-13 Thread Gerd Hoffmann

From: Marc-André Lureau 

On the last added dbus patch, I left a tiny BO:

==441487==ERROR: AddressSanitizer: heap-buffer-overflow on address 
0x61125a70 at pc 0x7f0817bb764c bp 0x7ffde672ae60 sp 0x7ffde672ae58
WRITE of size 8 at 0x61125a70 thread T0
#0 0x7f0817bb764b in dbus_vc_class_init ../ui/dbus.c:401

A cookie for ASAN! not you C :)

Signed-off-by: Marc-André Lureau 
Fixes: 7f767ca35e5 ("ui/dbus: register D-Bus VC handler")
Reviewed-by: Philippe Mathieu-Daudé 
Message-Id: <20211222144032.443424-1-marcandre.lur...@redhat.com>
Signed-off-by: Gerd Hoffmann 
---
 ui/dbus.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ui/dbus.c b/ui/dbus.c
index b2c1c9fb522c..0074424c1fed 100644
--- a/ui/dbus.c
+++ b/ui/dbus.c
@@ -405,6 +405,7 @@ dbus_vc_class_init(ObjectClass *oc, void *data)
 static const TypeInfo dbus_vc_type_info = {
 .name = TYPE_CHARDEV_VC,
 .parent = TYPE_CHARDEV_DBUS,
+.class_size = sizeof(DBusVCClass),
 .class_init = dbus_vc_class_init,
 };
 
-- 
2.34.1

[PULL 20/20] ui/input-legacy: pass horizontal scroll information

2022-01-13 Thread Gerd Hoffmann

From: Dmitry Petrov 

This code seems to be used by vmport hack, passing these values allows
to implement horizontal scroll support even when using vmport.
In case it's not supported horizontal scroll will act as a vertical one.

Signed-off-by: Dmitry Petrov 
Message-Id: <20220108153947.171861-6-dpetr...@gmail.com>
Signed-off-by: Gerd Hoffmann 
---
 ui/input-legacy.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/ui/input-legacy.c b/ui/input-legacy.c
index 9fc78a639bd4..46ea74e44d6d 100644
--- a/ui/input-legacy.c
+++ b/ui/input-legacy.c
@@ -23,6 +23,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/log.h"
 #include "qapi/qapi-commands-ui.h"
 #include "ui/console.h"
 #include "keymaps.h"
@@ -179,6 +180,20 @@ static void legacy_mouse_event(DeviceState *dev, 
QemuConsole *src,
 1,
 s->buttons);
 }
+if (btn->down && btn->button == INPUT_BUTTON_WHEEL_RIGHT) {
+s->qemu_put_mouse_event(s->qemu_put_mouse_event_opaque,
+s->axis[INPUT_AXIS_X],
+s->axis[INPUT_AXIS_Y],
+-2,
+s->buttons);
+}
+if (btn->down && btn->button == INPUT_BUTTON_WHEEL_LEFT) {
+s->qemu_put_mouse_event(s->qemu_put_mouse_event_opaque,
+s->axis[INPUT_AXIS_X],
+s->axis[INPUT_AXIS_Y],
+2,
+s->buttons);
+}
 break;
 case INPUT_EVENT_KIND_ABS:
 move = evt->u.abs.data;
-- 
2.34.1

[PULL 16/20] ps2: Initial horizontal scroll support

2022-01-13 Thread Gerd Hoffmann

From: Dmitry Petrov 

This change adds support for horizontal scroll to ps/2 mouse device
code. The code is implemented to match the logic of linux kernel
which is used as a reference.

Signed-off-by: Dmitry Petrov 
Message-Id: <20220108153947.171861-2-dpetr...@gmail.com>
Signed-off-by: Gerd Hoffmann 
---
 hw/input/ps2.c | 57 +++---
 qapi/ui.json   |  2 +-
 2 files changed, 50 insertions(+), 9 deletions(-)

diff --git a/hw/input/ps2.c b/hw/input/ps2.c
index 9376a8f4ce53..6236711e1b8f 100644
--- a/hw/input/ps2.c
+++ b/hw/input/ps2.c
@@ -123,6 +123,7 @@ typedef struct {
 int mouse_dx; /* current values, needed for 'poll' mode */
 int mouse_dy;
 int mouse_dz;
+int mouse_dw;
 uint8_t mouse_buttons;
 } PS2MouseState;
 
@@ -715,7 +716,7 @@ static int ps2_mouse_send_packet(PS2MouseState *s)
 /* IMPS/2 and IMEX send 4 bytes, PS2 sends 3 bytes */
 const int needed = s->mouse_type ? 4 : 3;
 unsigned int b;
-int dx1, dy1, dz1;
+int dx1, dy1, dz1, dw1;
 
 if (PS2_QUEUE_SIZE - s->common.queue.count < needed) {
 return 0;
@@ -724,6 +725,7 @@ static int ps2_mouse_send_packet(PS2MouseState *s)
 dx1 = s->mouse_dx;
 dy1 = s->mouse_dy;
 dz1 = s->mouse_dz;
+dw1 = s->mouse_dw;
 /* XXX: increase range to 8 bits ? */
 if (dx1 > 127)
 dx1 = 127;
@@ -740,6 +742,9 @@ static int ps2_mouse_send_packet(PS2MouseState *s)
 /* extra byte for IMPS/2 or IMEX */
 switch(s->mouse_type) {
 default:
+/* Just ignore the wheels if not supported */
+s->mouse_dz = 0;
+s->mouse_dw = 0;
 break;
 case 3:
 if (dz1 > 127)
@@ -747,13 +752,41 @@ static int ps2_mouse_send_packet(PS2MouseState *s)
 else if (dz1 < -127)
 dz1 = -127;
 ps2_queue_noirq(>common, dz1 & 0xff);
+s->mouse_dz -= dz1;
+s->mouse_dw = 0;
 break;
 case 4:
-if (dz1 > 7)
-dz1 = 7;
-else if (dz1 < -7)
-dz1 = -7;
-b = (dz1 & 0x0f) | ((s->mouse_buttons & 0x18) << 1);
+/*
+ * This matches what the Linux kernel expects for exps/2 in
+ * drivers/input/mouse/psmouse-base.c. Note, if you happen to
+ * press/release the 4th or 5th buttons at the same moment as a
+ * horizontal wheel scroll, those button presses will get lost. I'm not
+ * sure what to do about that, since by this point we don't know
+ * whether those buttons actually changed state.
+ */
+if (dw1 != 0) {
+if (dw1 > 31) {
+dw1 = 31;
+} else if (dw1 < -31) {
+dw1 = -31;
+}
+
+/*
+ * linux kernel expects first 6 bits to represent the value
+ * for horizontal scroll
+ */
+b = (dw1 & 0x3f) | 0x40;
+s->mouse_dw -= dw1;
+} else {
+if (dz1 > 7) {
+dz1 = 7;
+} else if (dz1 < -7) {
+dz1 = -7;
+}
+
+b = (dz1 & 0x0f) | ((s->mouse_buttons & 0x18) << 1);
+s->mouse_dz -= dz1;
+}
 ps2_queue_noirq(>common, b);
 break;
 }
@@ -764,7 +797,6 @@ static int ps2_mouse_send_packet(PS2MouseState *s)
 /* update deltas */
 s->mouse_dx -= dx1;
 s->mouse_dy -= dy1;
-s->mouse_dz -= dz1;
 
 return 1;
 }
@@ -806,6 +838,12 @@ static void ps2_mouse_event(DeviceState *dev, QemuConsole 
*src,
 } else if (btn->button == INPUT_BUTTON_WHEEL_DOWN) {
 s->mouse_dz++;
 }
+
+if (btn->button == INPUT_BUTTON_WHEEL_RIGHT) {
+s->mouse_dw--;
+} else if (btn->button == INPUT_BUTTON_WHEEL_LEFT) {
+s->mouse_dw++;
+}
 } else {
 s->mouse_buttons &= ~bmap[btn->button];
 }
@@ -833,8 +871,10 @@ static void ps2_mouse_sync(DeviceState *dev)
 /* if not remote, send event. Multiple events are sent if
too big deltas */
 while (ps2_mouse_send_packet(s)) {
-if (s->mouse_dx == 0 && s->mouse_dy == 0 && s->mouse_dz == 0)
+if (s->mouse_dx == 0 && s->mouse_dy == 0
+&& s->mouse_dz == 0 && s->mouse_dw == 0) {
 break;
+}
 }
 }
 }
@@ -1036,6 +1076,7 @@ static void ps2_mouse_reset(void *opaque)
 s->mouse_dx = 0;
 s->mouse_dy = 0;
 s->mouse_dz = 0;
+s->mouse_dw = 0;
 s->mouse_buttons = 0;
 }
 
diff --git a/qapi/ui.json b/qapi/ui.json
index 2b4371da3777..9354f4c46716 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -905,7 +905,7 @@
 ##
 { 'enum'  : 'InputButton',
   'data'  : [ 'left', 'middle', 'right', 'wheel-up', 'wheel-down', 'side',
-  'extra' ] }
+  'extra', 'wheel-left', 'wheel-right' ] }
 
 ##
 # @InputAxis:
-- 
2.34.1

[PULL 04/20] jackaudio: use ifdefs to hide unavailable functions

2022-01-13 Thread Gerd Hoffmann

From: Volker Rümelin 

On Windows the jack_set_thread_creator() function and on MacOS the
pthread_setname_np() function with a thread pointer paramater is
not available. Use #ifdefs to remove the jack_set_thread_creator()
function call and the qjack_thread_creator() function in both
cases.

The qjack_thread_creator() function just sets the name of the
created thread for debugging purposes and isn't really necessary.

>From the jack_set_thread_creator() documentation:
(...)

No normal application/client should consider calling this. (...)

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/785
Signed-off-by: Volker Rümelin 
Reviewed-by: Christian Schoenebeck 
Message-Id: <20211226154017.6067-1-vr_q...@t-online.de>
Signed-off-by: Gerd Hoffmann 
---
 audio/jackaudio.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/audio/jackaudio.c b/audio/jackaudio.c
index e7de6d5433e9..317009e93660 100644
--- a/audio/jackaudio.c
+++ b/audio/jackaudio.c
@@ -622,6 +622,7 @@ static void qjack_enable_in(HWVoiceIn *hw, bool enable)
 ji->c.enabled = enable;
 }
 
+#if !defined(WIN32) && defined(CONFIG_PTHREAD_SETNAME_NP_W_TID)
 static int qjack_thread_creator(jack_native_thread_t *thread,
 const pthread_attr_t *attr, void *(*function)(void *), void *arg)
 {
@@ -635,6 +636,7 @@ static int qjack_thread_creator(jack_native_thread_t 
*thread,
 
 return ret;
 }
+#endif
 
 static void *qjack_init(Audiodev *dev)
 {
@@ -687,7 +689,9 @@ static void register_audio_jack(void)
 {
 qemu_mutex_init(_shutdown_lock);
 audio_driver_register(_driver);
+#if !defined(WIN32) && defined(CONFIG_PTHREAD_SETNAME_NP_W_TID)
 jack_set_thread_creator(qjack_thread_creator);
+#endif
 jack_set_error_function(qjack_error);
 jack_set_info_function(qjack_info);
 }
-- 
2.34.1

[PULL 09/20] uas: add missing return

2022-01-13 Thread Gerd Hoffmann

Otherwise we run the error handling code even for successful requests.

Fixes: 13b250b12ad3 ("uas: add stream number sanity checks.")
Reported-by: Guenter Roeck 
Signed-off-by: Gerd Hoffmann 
Reviewed-by: Philippe Mathieu-Daudé 
Message-Id: <20211210080659.2537084-1-kra...@redhat.com>
---
 hw/usb/dev-uas.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
index 599d6b52a012..c9f295e7e449 100644
--- a/hw/usb/dev-uas.c
+++ b/hw/usb/dev-uas.c
@@ -908,6 +908,7 @@ static void usb_uas_handle_data(USBDevice *dev, USBPacket 
*p)
 p->status = USB_RET_STALL;
 break;
 }
+return;
 
 err_stream:
 error_report("%s: invalid stream %d", __func__, p->stream);
-- 
2.34.1

[PULL 18/20] ui/gtk: pass horizontal scroll information to the device code

2022-01-13 Thread Gerd Hoffmann

From: Dmitry Petrov 

Signed-off-by: Dmitry Petrov 
Message-Id: <20220108153947.171861-4-dpetr...@gmail.com>
Signed-off-by: Gerd Hoffmann 
---
 ui/gtk.c | 54 ++
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/ui/gtk.c b/ui/gtk.c
index 6a1f65d51894..a8567b9ddc8f 100644
--- a/ui/gtk.c
+++ b/ui/gtk.c
@@ -968,33 +968,63 @@ static gboolean gd_scroll_event(GtkWidget *widget, 
GdkEventScroll *scroll,
 void *opaque)
 {
 VirtualConsole *vc = opaque;
-InputButton btn;
+InputButton btn_vertical;
+InputButton btn_horizontal;
+bool has_vertical = false;
+bool has_horizontal = false;
 
 if (scroll->direction == GDK_SCROLL_UP) {
-btn = INPUT_BUTTON_WHEEL_UP;
+btn_vertical = INPUT_BUTTON_WHEEL_UP;
+has_vertical = true;
 } else if (scroll->direction == GDK_SCROLL_DOWN) {
-btn = INPUT_BUTTON_WHEEL_DOWN;
+btn_vertical = INPUT_BUTTON_WHEEL_DOWN;
+has_vertical = true;
+} else if (scroll->direction == GDK_SCROLL_LEFT) {
+btn_horizontal = INPUT_BUTTON_WHEEL_LEFT;
+has_horizontal = true;
+} else if (scroll->direction == GDK_SCROLL_RIGHT) {
+btn_horizontal = INPUT_BUTTON_WHEEL_RIGHT;
+has_horizontal = true;
 } else if (scroll->direction == GDK_SCROLL_SMOOTH) {
 gdouble delta_x, delta_y;
 if (!gdk_event_get_scroll_deltas((GdkEvent *)scroll,
  _x, _y)) {
 return TRUE;
 }
-if (delta_y == 0) {
-return TRUE;
-} else if (delta_y > 0) {
-btn = INPUT_BUTTON_WHEEL_DOWN;
+
+if (delta_y > 0) {
+btn_vertical = INPUT_BUTTON_WHEEL_DOWN;
+has_vertical = true;
+} else if (delta_y < 0) {
+btn_vertical = INPUT_BUTTON_WHEEL_UP;
+has_vertical = true;
+} else if (delta_x > 0) {
+btn_horizontal = INPUT_BUTTON_WHEEL_RIGHT;
+has_horizontal = true;
+} else if (delta_x < 0) {
+btn_horizontal = INPUT_BUTTON_WHEEL_LEFT;
+has_horizontal = true;
 } else {
-btn = INPUT_BUTTON_WHEEL_UP;
+return TRUE;
 }
 } else {
 return TRUE;
 }
 
-qemu_input_queue_btn(vc->gfx.dcl.con, btn, true);
-qemu_input_event_sync();
-qemu_input_queue_btn(vc->gfx.dcl.con, btn, false);
-qemu_input_event_sync();
+if (has_vertical) {
+qemu_input_queue_btn(vc->gfx.dcl.con, btn_vertical, true);
+qemu_input_event_sync();
+qemu_input_queue_btn(vc->gfx.dcl.con, btn_vertical, false);
+qemu_input_event_sync();
+}
+
+if (has_horizontal) {
+qemu_input_queue_btn(vc->gfx.dcl.con, btn_horizontal, true);
+qemu_input_event_sync();
+qemu_input_queue_btn(vc->gfx.dcl.con, btn_horizontal, false);
+qemu_input_event_sync();
+}
+
 return TRUE;
 }
 
-- 
2.34.1

Re: [PATCH 3/3] intel-iommu: PASID support

2022-01-13 Thread Peter Xu

On Fri, Jan 14, 2022 at 01:58:07PM +0800, Jason Wang wrote:
> > > Right, but I think you meant to do this only when scalable mode is 
> > > disabled.
> >
> > Yes IMHO it will definitely suite for !scalable case since that's exactly 
> > what
> > we did before.  What I'm also wondering is even if scalable is enabled but 
> > no
> > "real" pasid is used, so if all the translations go through the default 
> > pasid
> > that stored in the device context entry, then maybe we can ignore checking 
> > it.
> > The latter is the "hacky" part mentioned above.
> 
> The problem I see is that we can't know what PASID is used as default
> without reading the context entry?

Can the default NO_PASID being used in mixture of !NO_PASID use case on the
same device?  If that's possible, then I agree..

My previous idea should be based on the fact that if NO_PASID is used on one
device, then all translations will be based on NO_PASID, but now I'm not sure
of it.

> 
> >
> > The other thing to mention is, if we postpone the iotlb lookup to be after
> > context entry, then logically we can have per-device iotlb, that means we 
> > can
> > replace IntelIOMMUState.iotlb with VTDAddressSpace.iotlb in the future, too,
> > which can also be more efficient.
> 
> Right but we still need to limit the total slots and ATS is a better
> way to deal with the IOTLB bottleneck actually.

I think it depends on how the iotlb ghash is implemented.  Logically I think if
we can split the cache to per-device it'll be slightly better because we don't
need to iterate over iotlbs of other devices when lookup anymore; meanwhile
each iotlb takes less space too (no devfn needed anymore).

Thanks,

-- 
Peter Xu

[PULL 05/20] dsoundaudio: fix crackling audio recordings

2022-01-13 Thread Gerd Hoffmann

From: Volker Rümelin 

Audio recordings with the DirectSound backend don't sound right.
A look a the Microsoft online documentation tells us why.

>From the DirectSound Programming Guide, Capture Buffer Information:
'You can safely copy data from the buffer only up to the read
cursor.'

Change the code to read up to the read cursor instead of the
capture cursor.

Signed-off-by: Volker Rümelin 
Message-Id: <20211226154017.6067-2-vr_q...@t-online.de>
Signed-off-by: Gerd Hoffmann 
---
 audio/dsoundaudio.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/audio/dsoundaudio.c b/audio/dsoundaudio.c
index cfc79c129eee..3dd2c4d4a60b 100644
--- a/audio/dsoundaudio.c
+++ b/audio/dsoundaudio.c
@@ -536,13 +536,12 @@ static void *dsound_get_buffer_in(HWVoiceIn *hw, size_t 
*size)
 DSoundVoiceIn *ds = (DSoundVoiceIn *) hw;
 LPDIRECTSOUNDCAPTUREBUFFER dscb = ds->dsound_capture_buffer;
 HRESULT hr;
-DWORD cpos, rpos, act_size;
+DWORD rpos, act_size;
 size_t req_size;
 int err;
 void *ret;
 
-hr = IDirectSoundCaptureBuffer_GetCurrentPosition(
-dscb, , ds->first_time ?  : NULL);
+hr = IDirectSoundCaptureBuffer_GetCurrentPosition(dscb, NULL, );
 if (FAILED(hr)) {
 dsound_logerr(hr, "Could not get capture buffer position\n");
 *size = 0;
@@ -554,7 +553,7 @@ static void *dsound_get_buffer_in(HWVoiceIn *hw, size_t 
*size)
 ds->first_time = false;
 }
 
-req_size = audio_ring_dist(cpos, hw->pos_emul, hw->size_emul);
+req_size = audio_ring_dist(rpos, hw->pos_emul, hw->size_emul);
 req_size = MIN(*size, MIN(req_size, hw->size_emul - hw->pos_emul));
 
 if (req_size == 0) {
-- 
2.34.1

[PULL 17/20] ui/cocoa: pass horizontal scroll information to the device code

2022-01-13 Thread Gerd Hoffmann

From: Dmitry Petrov 

Signed-off-by: Dmitry Petrov 
Message-Id: <20220108153947.171861-3-dpetr...@gmail.com>
Signed-off-by: Gerd Hoffmann 
---
 ui/cocoa.m | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/ui/cocoa.m b/ui/cocoa.m
index 69745c483b45..ac18e14ce01c 100644
--- a/ui/cocoa.m
+++ b/ui/cocoa.m
@@ -970,21 +970,27 @@ QemuCocoaView *cocoaView;
  */
 
 /*
- * When deltaY is zero, it means that this scrolling event was
- * either horizontal, or so fine that it only appears in
- * scrollingDeltaY. So we drop the event.
+ * We shouldn't have got a scroll event when deltaY and delta Y
+ * are zero, hence no harm in dropping the event
  */
-if ([event deltaY] != 0) {
+if ([event deltaY] != 0 || [event deltaX] != 0) {
 /* Determine if this is a scroll up or scroll down event */
-buttons = ([event deltaY] > 0) ?
+if ([event deltaY] != 0) {
+  buttons = ([event deltaY] > 0) ?
 INPUT_BUTTON_WHEEL_UP : INPUT_BUTTON_WHEEL_DOWN;
+} else if ([event deltaX] != 0) {
+  buttons = ([event deltaX] > 0) ?
+INPUT_BUTTON_WHEEL_LEFT : INPUT_BUTTON_WHEEL_RIGHT;
+}
+
 qemu_input_queue_btn(dcl.con, buttons, true);
 qemu_input_event_sync();
 qemu_input_queue_btn(dcl.con, buttons, false);
 qemu_input_event_sync();
 }
+
 /*
- * Since deltaY also reports scroll wheel events we prevent mouse
+ * Since deltaX/deltaY also report scroll wheel events we prevent 
mouse
  * movement code from executing.
  */
 mouse_event = false;
-- 
2.34.1

[PULL 06/20] hw/audio/intel-hda: fix stream reset

2022-01-13 Thread Gerd Hoffmann

From: Volker Rümelin 

Quote from:
High Definition Audio Specification 1.0a, section 3.3.35

Offset 80: {IOB}SDnCTL Stream Reset (SRST): Writing a 1 causes
the corresponding stream to be reset. The Stream Descriptor
registers (except the SRST bit itself) ... are reset.

Change the code to reset the Stream Descriptor Control and Status
registers except the SRST bit.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/757
Signed-off-by: Volker Rümelin 
Message-Id: <20211226154017.6067-3-vr_q...@t-online.de>
Signed-off-by: Gerd Hoffmann 
---
 hw/audio/intel-hda.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/audio/intel-hda.c b/hw/audio/intel-hda.c
index 2b55d521503f..5f8a878f20c1 100644
--- a/hw/audio/intel-hda.c
+++ b/hw/audio/intel-hda.c
@@ -581,7 +581,7 @@ static void intel_hda_set_st_ctl(IntelHDAState *d, const 
IntelHDAReg *reg, uint3
 if (st->ctl & 0x01) {
 /* reset */
 dprint(d, 1, "st #%d: reset\n", reg->stream);
-st->ctl = SD_STS_FIFO_READY << 24;
+st->ctl = SD_STS_FIFO_READY << 24 | SD_CTL_STREAM_RESET;
 }
 if ((st->ctl & 0x02) != (old & 0x02)) {
 uint32_t stnr = (st->ctl >> 20) & 0x0f;
-- 
2.34.1

[PULL 19/20] ui/sdl2: pass horizontal scroll information to the device code

2022-01-13 Thread Gerd Hoffmann

From: Dmitry Petrov 

Signed-off-by: Dmitry Petrov 
Message-Id: <20220108153947.171861-5-dpetr...@gmail.com>
Signed-off-by: Gerd Hoffmann 
---
 ui/sdl2.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/ui/sdl2.c b/ui/sdl2.c
index 0bd30504cfcc..46a252d7d9d7 100644
--- a/ui/sdl2.c
+++ b/ui/sdl2.c
@@ -33,6 +33,7 @@
 #include "sysemu/runstate-action.h"
 #include "sysemu/sysemu.h"
 #include "ui/win32-kbd-hook.h"
+#include "qemu/log.h"
 
 static int sdl2_num_outputs;
 static struct sdl2_console *sdl2_console;
@@ -535,6 +536,10 @@ static void handle_mousewheel(SDL_Event *ev)
 btn = INPUT_BUTTON_WHEEL_UP;
 } else if (wev->y < 0) {
 btn = INPUT_BUTTON_WHEEL_DOWN;
+} else if (wev->x < 0) {
+btn = INPUT_BUTTON_WHEEL_RIGHT;
+} else if (wev->x > 0) {
+btn = INPUT_BUTTON_WHEEL_LEFT;
 } else {
 return;
 }
-- 
2.34.1

[PULL 02/20] usb: allow max 8192 bytes for desc

2022-01-13 Thread Gerd Hoffmann

From: zhenwei pi 

A device of USB video class usually uses larger desc structure, so
use larger buffer to avoid failure. (dev-video.c is ready)

This is an unlikely code path:
1, during guest startup, guest tries to probe device.
2, run 'lsusb' command in guest(or other similar commands).

Reviewed-by: Daniel P. Berrangé 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: zhenwei pi 
Message-Id: <20220112015835.900619-1-pizhen...@bytedance.com>
Signed-off-by: Gerd Hoffmann 
---
 hw/usb/desc.h |  1 +
 hw/usb/desc.c | 15 ---
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/hw/usb/desc.h b/hw/usb/desc.h
index 3ac604ecfa17..35babdeff624 100644
--- a/hw/usb/desc.h
+++ b/hw/usb/desc.h
@@ -199,6 +199,7 @@ struct USBDesc {
 const USBDescMSOS *msos;
 };
 
+#define USB_DESC_MAX_LEN8192
 #define USB_DESC_FLAG_SUPER (1 << 1)
 
 /* little helpers */
diff --git a/hw/usb/desc.c b/hw/usb/desc.c
index 8b6eaea4079e..7f6cc2f99bd4 100644
--- a/hw/usb/desc.c
+++ b/hw/usb/desc.c
@@ -632,7 +632,8 @@ int usb_desc_get_descriptor(USBDevice *dev, USBPacket *p,
 bool msos = (dev->flags & (1 << USB_DEV_FLAG_MSOS_DESC_IN_USE));
 const USBDesc *desc = usb_device_get_usb_desc(dev);
 const USBDescDevice *other_dev;
-uint8_t buf[256];
+size_t buflen = USB_DESC_MAX_LEN;
+g_autofree uint8_t *buf = g_malloc(buflen);
 uint8_t type = value >> 8;
 uint8_t index = value & 0xff;
 int flags, ret = -1;
@@ -650,36 +651,36 @@ int usb_desc_get_descriptor(USBDevice *dev, USBPacket *p,
 
 switch(type) {
 case USB_DT_DEVICE:
-ret = usb_desc_device(>id, dev->device, msos, buf, sizeof(buf));
+ret = usb_desc_device(>id, dev->device, msos, buf, buflen);
 trace_usb_desc_device(dev->addr, len, ret);
 break;
 case USB_DT_CONFIG:
 if (index < dev->device->bNumConfigurations) {
 ret = usb_desc_config(dev->device->confs + index, flags,
-  buf, sizeof(buf));
+  buf, buflen);
 }
 trace_usb_desc_config(dev->addr, index, len, ret);
 break;
 case USB_DT_STRING:
-ret = usb_desc_string(dev, index, buf, sizeof(buf));
+ret = usb_desc_string(dev, index, buf, buflen);
 trace_usb_desc_string(dev->addr, index, len, ret);
 break;
 case USB_DT_DEVICE_QUALIFIER:
 if (other_dev != NULL) {
-ret = usb_desc_device_qualifier(other_dev, buf, sizeof(buf));
+ret = usb_desc_device_qualifier(other_dev, buf, buflen);
 }
 trace_usb_desc_device_qualifier(dev->addr, len, ret);
 break;
 case USB_DT_OTHER_SPEED_CONFIG:
 if (other_dev != NULL && index < other_dev->bNumConfigurations) {
 ret = usb_desc_config(other_dev->confs + index, flags,
-  buf, sizeof(buf));
+  buf, buflen);
 buf[0x01] = USB_DT_OTHER_SPEED_CONFIG;
 }
 trace_usb_desc_other_speed_config(dev->addr, index, len, ret);
 break;
 case USB_DT_BOS:
-ret = usb_desc_bos(desc, buf, sizeof(buf));
+ret = usb_desc_bos(desc, buf, buflen);
 trace_usb_desc_bos(dev->addr, len, ret);
 break;
 
-- 
2.34.1

Re: [RFC PATCH] MAINTAINERS: Add myself to s390 I/O areas

2022-01-13 Thread Christian Borntraeger





Am 12.01.22 um 17:40 schrieb Eric Farman:

After the recent restructuring, I'd like to volunteer to help
in some of the s390 I/O areas.

Built on "[PATCH RFC v2] MAINTAINERS: split out s390x sections"

Signed-off-by: Eric Farman 


Acked-by: Christian Borntraeger 

Thanks a lot Eric

---
  MAINTAINERS | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5d37b0eec5..343f43e83d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1521,6 +1521,7 @@ S390 Machines
  S390 Virtio-ccw
  M: Halil Pasic 
  M: Christian Borntraeger 
+M: Eric Farman 
  S: Supported
  F: hw/s390x/
  F: include/hw/s390x/
@@ -1551,6 +1552,7 @@ L: qemu-s3...@nongnu.org
  S390 channel subsystem
  M: Halil Pasic 
  M: Christian Borntraeger 
+M: Eric Farman 
  S: Supported
  F: hw/s390x/ccw-device.[ch]
  F: hw/s390x/css.c
@@ -1975,6 +1977,7 @@ T: git https://github.com/stefanha/qemu.git block
  virtio-ccw
  M: Cornelia Huck 
  M: Halil Pasic 
+M: Eric Farman 
  S: Supported
  F: hw/s390x/virtio-ccw*.[hc]
  F: hw/s390x/vhost-vsock-ccw.c

[PULL 12/20] hw/display/vga-mmio: QOM'ify vga_mmio_init() as TYPE_VGA_MMIO

2022-01-13 Thread Gerd Hoffmann

From: Philippe Mathieu-Daudé 

Introduce TYPE_VGA_MMIO, a sysbus device.

While there is no change in the vga_mmio_init()
interface, this is a migration compatibility break
of the MIPS Acer Pica 61 Jazz machine (pica61).

Suggested-by: Thomas Huth 
Reviewed-by: BALATON Zoltan 
Signed-off-by: Philippe Mathieu-Daudé 
Reviewed-by: Thomas Huth 
Message-Id: <20211206224528.563588-4-f4...@amsat.org>
Signed-off-by: Gerd Hoffmann 
---
 include/hw/display/vga.h |   2 +
 hw/display/vga-mmio.c| 120 ---
 2 files changed, 88 insertions(+), 34 deletions(-)

diff --git a/include/hw/display/vga.h b/include/hw/display/vga.h
index c16a5c26dae9..98b2e560f9b3 100644
--- a/include/hw/display/vga.h
+++ b/include/hw/display/vga.h
@@ -24,6 +24,8 @@ enum vga_retrace_method {
 
 extern enum vga_retrace_method vga_retrace_method;
 
+#define TYPE_VGA_MMIO "vga-mmio"
+
 int vga_mmio_init(hwaddr vram_base, hwaddr ctrl_base,
   int it_shift, MemoryRegion *address_space);
 
diff --git a/hw/display/vga-mmio.c b/hw/display/vga-mmio.c
index 5671fdb920f7..10bde32af5ca 100644
--- a/hw/display/vga-mmio.c
+++ b/hw/display/vga-mmio.c
@@ -23,21 +23,34 @@
  */
 
 #include "qemu/osdep.h"
-#include "qemu/bitops.h"
-#include "qemu/units.h"
-#include "migration/vmstate.h"
+#include "qapi/error.h"
 #include "hw/display/vga.h"
+#include "hw/sysbus.h"
+#include "hw/display/vga.h"
+#include "hw/qdev-properties.h"
 #include "vga_int.h"
-#include "ui/pixel_ops.h"
 
-#define VGA_RAM_SIZE (8 * MiB)
+/*
+ * QEMU interface:
+ *  + sysbus MMIO region 0: VGA I/O registers
+ *  + sysbus MMIO region 1: VGA MMIO registers
+ *  + sysbus MMIO region 2: VGA memory
+ */
 
-typedef struct VGAMmioState {
+OBJECT_DECLARE_SIMPLE_TYPE(VGAMmioState, VGA_MMIO)
+
+struct VGAMmioState {
+/*< private >*/
+SysBusDevice parent_obj;
+
+/*< public >*/
 VGACommonState vga;
-int it_shift;
-} VGAMmioState;
+MemoryRegion iomem;
+MemoryRegion lowmem;
+
+uint8_t it_shift;
+};
 
-/* Memory mapped interface */
 static uint64_t vga_mm_read(void *opaque, hwaddr addr, unsigned size)
 {
 VGAMmioState *s = opaque;
@@ -65,42 +78,81 @@ static const MemoryRegionOps vga_mm_ctrl_ops = {
 .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
+static void vga_mmio_reset(DeviceState *dev)
+{
+VGAMmioState *s = VGA_MMIO(dev);
+
+vga_common_reset(>vga);
+}
+
 int vga_mmio_init(hwaddr vram_base, hwaddr ctrl_base,
   int it_shift, MemoryRegion *address_space)
 {
-VGAMmioState *s;
-MemoryRegion *s_ioport_ctrl, *vga_io_memory;
+DeviceState *dev;
+SysBusDevice *s;
 
-s = g_malloc0(sizeof(*s));
+dev = qdev_new(TYPE_VGA_MMIO);
+qdev_prop_set_uint8(dev, "it_shift", it_shift);
+s = SYS_BUS_DEVICE(dev);
+sysbus_realize_and_unref(s, _fatal);
 
-s->vga.vram_size_mb = VGA_RAM_SIZE / MiB;
-s->vga.global_vmstate = true;
-vga_common_init(>vga, NULL);
+sysbus_mmio_map(s, 0, ctrl_base);
+sysbus_mmio_map(s, 1, vram_base + 0x000a);
+sysbus_mmio_map(s, 2, VBE_DISPI_LFB_PHYSICAL_ADDRESS);
 
-s->it_shift = it_shift;
-s_ioport_ctrl = g_malloc(sizeof(*s_ioport_ctrl));
-memory_region_init_io(s_ioport_ctrl, NULL, _mm_ctrl_ops, s,
-  "vga-mm-ctrl", 0x10);
-memory_region_set_flush_coalesced(s_ioport_ctrl);
+return 0;
+}
+
+static void vga_mmio_realizefn(DeviceState *dev, Error **errp)
+{
+VGAMmioState *s = VGA_MMIO(dev);
+SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+
+memory_region_init_io(>iomem, OBJECT(dev), _mm_ctrl_ops, s,
+  "vga-mmio", 0x10);
+memory_region_set_flush_coalesced(>iomem);
+sysbus_init_mmio(sbd, >iomem);
 
-vga_io_memory = g_malloc(sizeof(*vga_io_memory));
 /* XXX: endianness? */
-memory_region_init_io(vga_io_memory, NULL, _mem_ops, >vga,
-  "vga-mem", 0x2);
+memory_region_init_io(>lowmem, OBJECT(dev), _mem_ops, >vga,
+  "vga-lowmem", 0x2);
+memory_region_set_coalescing(>lowmem);
+sysbus_init_mmio(sbd, >lowmem);
 
-vmstate_register(NULL, 0, _vga_common, s);
-
-memory_region_add_subregion(address_space, ctrl_base, s_ioport_ctrl);
 s->vga.bank_offset = 0;
-memory_region_add_subregion(address_space,
-vram_base + 0x000a, vga_io_memory);
-memory_region_set_coalescing(vga_io_memory);
+s->vga.global_vmstate = true;
+vga_common_init(>vga, OBJECT(dev));
+sysbus_init_mmio(sbd, >vga.vram);
+s->vga.con = graphic_console_init(dev, 0, s->vga.hw_ops, >vga);
+}
 
-s->vga.con = graphic_console_init(NULL, 0, s->vga.hw_ops, s);
+static Property vga_mmio_properties[] = {
+DEFINE_PROP_UINT8("it_shift", VGAMmioState, it_shift, 0),
+DEFINE_PROP_UINT32("vgamem_mb", VGAMmioState, vga.vram_size_mb, 8),
+DEFINE_PROP_END_OF_LIST(),
+};
 
-memory_region_add_subregion(address_space,
-

[PULL 08/20] ui: fix gtk clipboard clear assertion

2022-01-13 Thread Gerd Hoffmann

From: Marc-André Lureau 

When closing the QEMU Gtk display window, it can occasionaly warn:
qemu-system-x86_64: Gtk: gtk_clipboard_set_with_data: assertion 'targets != 
NULL' failed

#3  0x74f02f22 in gtk_clipboard_set_with_data (clipboard=, targets=, n_targets=, get_func=, clear_func=, user_data=) at 
/usr/src/debug/gtk3-3.24.30-4.fc35.x86_64/gtk/gtkclipboard.c:672
#4  0x7552cd75 in gd_clipboard_update_info (gd=0x579a9e00, 
info=0x57ba4b50) at ../ui/gtk-clipboard.c:98
#5  0x7552ce00 in gd_clipboard_notify (notifier=0x579aaba8, 
data=0x7fffd720) at ../ui/gtk-clipboard.c:128
#6  0x5603e0ff in notifier_list_notify (list=0x56657470 
, data=0x7fffd720) at ../util/notify.c:39
#7  0x5594e8e0 in qemu_clipboard_update (info=0x57ba4b50) at 
../ui/clipboard.c:54
#8  0x5594e840 in qemu_clipboard_peer_release (peer=0x5684a5b0, 
selection=QEMU_CLIPBOARD_SELECTION_PRIMARY) at ../ui/clipboard.c:40
#9  0x5594e786 in qemu_clipboard_peer_unregister (peer=0x5684a5b0) 
at ../ui/clipboard.c:19
#10 0x5595f044 in vdagent_disconnect (vd=0x5684a400) at 
../ui/vdagent.c:852
#11 0x5595f262 in vdagent_chr_fini (obj=0x5684a400) at 
../ui/vdagent.c:908

Signed-off-by: Marc-André Lureau 
Message-Id: <20211216083233.1166504-1-marcandre.lur...@redhat.com>
Signed-off-by: Gerd Hoffmann 
---
 ui/gtk-clipboard.c | 19 ++-
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/ui/gtk-clipboard.c b/ui/gtk-clipboard.c
index e0b8b283fef8..d58fd761abdf 100644
--- a/ui/gtk-clipboard.c
+++ b/ui/gtk-clipboard.c
@@ -83,7 +83,7 @@ static void gd_clipboard_update_info(GtkDisplayState *gd,
 if (info != qemu_clipboard_info(s)) {
 gd->cbpending[s] = 0;
 if (!self_update) {
-GtkTargetList *list;
+g_autoptr(GtkTargetList) list = NULL;
 GtkTargetEntry *targets;
 gint n_targets;
 
@@ -94,15 +94,16 @@ static void gd_clipboard_update_info(GtkDisplayState *gd,
 targets = gtk_target_table_new_from_list(list, _targets);
 
 gtk_clipboard_clear(gd->gtkcb[s]);
-gd->cbowner[s] = true;
-gtk_clipboard_set_with_data(gd->gtkcb[s],
-targets, n_targets,
-gd_clipboard_get_data,
-gd_clipboard_clear,
-gd);
+if (targets) {
+gd->cbowner[s] = true;
+gtk_clipboard_set_with_data(gd->gtkcb[s],
+targets, n_targets,
+gd_clipboard_get_data,
+gd_clipboard_clear,
+gd);
 
-gtk_target_table_free(targets, n_targets);
-gtk_target_list_unref(list);
+gtk_target_table_free(targets, n_targets);
+}
 }
 return;
 }
-- 
2.34.1

[PULL 11/20] hw/display/vga-mmio: Inline vga_mm_init()

2022-01-13 Thread Gerd Hoffmann

From: Philippe Mathieu-Daudé 

Inline vga_mm_init() in vga_mmio_init() to simplify the
next patch review. Kind of.

Reviewed-by: BALATON Zoltan 
Reviewed-by: Thomas Huth 
Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20211206224528.563588-3-f4...@amsat.org>
Signed-off-by: Gerd Hoffmann 
---
 hw/display/vga-mmio.c | 25 +
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/hw/display/vga-mmio.c b/hw/display/vga-mmio.c
index 4ffe3afe32db..5671fdb920f7 100644
--- a/hw/display/vga-mmio.c
+++ b/hw/display/vga-mmio.c
@@ -65,12 +65,18 @@ static const MemoryRegionOps vga_mm_ctrl_ops = {
 .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void vga_mm_init(VGAMmioState *s, hwaddr vram_base,
-hwaddr ctrl_base, int it_shift,
-MemoryRegion *address_space)
+int vga_mmio_init(hwaddr vram_base, hwaddr ctrl_base,
+  int it_shift, MemoryRegion *address_space)
 {
+VGAMmioState *s;
 MemoryRegion *s_ioport_ctrl, *vga_io_memory;
 
+s = g_malloc0(sizeof(*s));
+
+s->vga.vram_size_mb = VGA_RAM_SIZE / MiB;
+s->vga.global_vmstate = true;
+vga_common_init(>vga, NULL);
+
 s->it_shift = it_shift;
 s_ioport_ctrl = g_malloc(sizeof(*s_ioport_ctrl));
 memory_region_init_io(s_ioport_ctrl, NULL, _mm_ctrl_ops, s,
@@ -89,19 +95,6 @@ static void vga_mm_init(VGAMmioState *s, hwaddr vram_base,
 memory_region_add_subregion(address_space,
 vram_base + 0x000a, vga_io_memory);
 memory_region_set_coalescing(vga_io_memory);
-}
-
-int vga_mmio_init(hwaddr vram_base, hwaddr ctrl_base,
-  int it_shift, MemoryRegion *address_space)
-{
-VGAMmioState *s;
-
-s = g_malloc0(sizeof(*s));
-
-s->vga.vram_size_mb = VGA_RAM_SIZE / MiB;
-s->vga.global_vmstate = true;
-vga_common_init(>vga, NULL);
-vga_mm_init(s, vram_base, ctrl_base, it_shift, address_space);
 
 s->vga.con = graphic_console_init(NULL, 0, s->vga.hw_ops, s);
 
-- 
2.34.1

Re: [PATCH 06/30] bsd-user/arm/target_arch_cpu.h: Correct code pointer

2022-01-13 Thread Warner Losh

On Thu, Jan 13, 2022 at 10:15 AM Peter Maydell 
wrote:

> On Sun, 9 Jan 2022 at 16:26, Warner Losh  wrote:
> >
> > The code has moved in FreeBSD since the emulator was started, update the
> > comment to reflect that change. Remove now-redundant comment saying the
> > same thing (but incorrectly).
> >
> > Signed-off-by: Warner Losh 
> > ---
> >  bsd-user/arm/target_arch_cpu.h | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/bsd-user/arm/target_arch_cpu.h
> b/bsd-user/arm/target_arch_cpu.h
> > index 05b19ce6119..905f13aa1b9 100644
> > --- a/bsd-user/arm/target_arch_cpu.h
> > +++ b/bsd-user/arm/target_arch_cpu.h
> > @@ -73,7 +73,7 @@ static inline void target_cpu_loop(CPUARMState *env)
> >  int32_t syscall_nr = n;
> >  int32_t arg1, arg2, arg3, arg4, arg5, arg6, arg7,
> arg8;
> >
> > -/* See arm/arm/trap.c cpu_fetch_syscall_args() */
> > +/* See arm/arm/syscall.c cpu_fetch_syscall_args() */
> >  if (syscall_nr == TARGET_FREEBSD_NR_syscall) {
> >  syscall_nr = env->regs[0];
> >  arg1 = env->regs[1];
>
> Commit message says we're updating one comment and deleting a
> second one; code only does an update, no delete ?
>

Commit is right, commit message is wrong. I'll fix the commit message. I got
this confused with part 8 where I kinda sorta did something similar (but not
that similar).

Warner

[PULL 01/20] hw/usb/dev-wacom: add missing HID descriptor

2022-01-13 Thread Gerd Hoffmann

From: Dario Binacchi 

Linux need to fill up the HID descriptor in order to let the driver be
emulated. The descriptor was downloaded from [1]. The patch was tested
with evtest tool on top of qemu 5.2.0 with linux kernel 4.19.208.

[1] 
https://github.com/linuxwacom/wacom-hid-descriptors/tree/master/Wacom%20PenPartner

Signed-off-by: Michael Trimarchi 
Co-developed-by: Michael Trimarchi 
Signed-off-by: Dario Binacchi 
Message-Id: <20220112090125.381364-1-dario.binac...@amarulasolutions.com>
Signed-off-by: Gerd Hoffmann 
---
 hw/usb/dev-wacom.c | 72 +-
 1 file changed, 71 insertions(+), 1 deletion(-)

diff --git a/hw/usb/dev-wacom.c b/hw/usb/dev-wacom.c
index ed687bc9f1eb..8323650c6a4d 100644
--- a/hw/usb/dev-wacom.c
+++ b/hw/usb/dev-wacom.c
@@ -69,6 +69,65 @@ static const USBDescStrings desc_strings = {
 [STR_SERIALNUMBER] = "1",
 };
 
+static const uint8_t qemu_wacom_hid_report_descriptor[] = {
+0x05, 0x01,  /* Usage Page (Desktop) */
+0x09, 0x02,  /* Usage (Mouse) */
+0xa1, 0x01,  /* Collection (Application) */
+0x85, 0x01,  /*Report ID (1) */
+0x09, 0x01,  /*Usage (Pointer) */
+0xa1, 0x00,  /*Collection (Physical) */
+0x05, 0x09,  /*   Usage Page (Button) */
+0x19, 0x01,  /*   Usage Minimum (01h) */
+0x29, 0x03,  /*   Usage Maximum (03h) */
+0x15, 0x00,  /*   Logical Minimum (0) */
+0x25, 0x01,  /*   Logical Maximum (1) */
+0x95, 0x03,  /*   Report Count (3) */
+0x75, 0x01,  /*   Report Size (1) */
+0x81, 0x02,  /*   Input (Data, Variable, Absolute) */
+0x95, 0x01,  /*   Report Count (1) */
+0x75, 0x05,  /*   Report Size (5) */
+0x81, 0x01,  /*   Input (Constant) */
+0x05, 0x01,  /*   Usage Page (Desktop) */
+0x09, 0x30,  /*   Usage (X) */
+0x09, 0x31,  /*   Usage (Y) */
+0x09, 0x38,  /*   Usage (Wheel) */
+0x15, 0x81,  /*   Logical Minimum (-127) */
+0x25, 0x7f,  /*   Logical Maximum (127) */
+0x75, 0x08,  /*   Report Size (8) */
+0x95, 0x03,  /*   Report Count (3) */
+0x81, 0x06,  /*   Input (Data, Variable, Relative) */
+0x95, 0x03,  /*   Report Count (3) */
+0x81, 0x01,  /*   Input (Constant) */
+0xc0,/*End Collection */
+0xc0,/* End Collection */
+0x05, 0x0d,  /* Usage Page (Digitizer) */
+0x09, 0x01,  /* Usage (Digitizer) */
+0xa1, 0x01,  /* Collection (Application) */
+0x85, 0x02,  /*Report ID (2) */
+0xa1, 0x00,  /*Collection (Physical) */
+0x06, 0x00, 0xff,/*   Usage Page (ff00h), vendor-defined */
+0x09, 0x01,  /*   Usage (01h) */
+0x15, 0x00,  /*   Logical Minimum (0) */
+0x26, 0xff, 0x00,/*   Logical Maximum (255) */
+0x75, 0x08,  /*   Report Size (8) */
+0x95, 0x07,  /*   Report Count (7) */
+0x81, 0x02,  /*   Input (Data, Variable, Absolute) */
+0xc0,/*End Collection */
+0x09, 0x01,  /*Usage (01h) */
+0x85, 0x63,  /*Report ID (99) */
+0x95, 0x07,  /*Report Count (7) */
+0x81, 0x02,  /*Input (Data, Variable, Absolute) */
+0x09, 0x01,  /*Usage (01h) */
+0x85, 0x02,  /*Report ID (2) */
+0x95, 0x01,  /*Report Count (1) */
+0xb1, 0x02,  /*Feature (Variable) */
+0x09, 0x01,  /*Usage (01h) */
+0x85, 0x03,  /*Report ID (3) */
+0x95, 0x01,  /*Report Count (1) */
+0xb1, 0x02,  /*Feature (Variable) */
+0xc0 /* End Collection */
+};
+
 static const USBDescIface desc_iface_wacom = {
 .bInterfaceNumber  = 0,
 .bNumEndpoints = 1,
@@ -86,7 +145,7 @@ static const USBDescIface desc_iface_wacom = {
 0x00,  /*  u8  country_code */
 0x01,  /*  u8  num_descriptors */
 USB_DT_REPORT, /*  u8  type: Report */
-0x6e, 0,   /*  u16 len */
+sizeof(qemu_wacom_hid_report_descriptor), 0, /*  u16 len */
 },
 },
 },
@@ -266,6 +325,17 @@ static void usb_wacom_handle_control(USBDevice *dev, 
USBPacket *p,
 }
 
 switch (request) {
+case InterfaceRequest | USB_REQ_GET_DESCRIPTOR:
+switch (value >> 8) {
+case 0x22:
+memcpy(data, qemu_wacom_hid_report_descriptor,
+   sizeof(qemu_wacom_hid_report_descriptor));
+p->actual_length = sizeof(qemu_wacom_hid_report_descriptor);
+break;
+default:
+return;
+}
+break;
 case WACOM_SET_REPORT:
 if (s->mouse_grabbed) {
 qemu_remove_mouse_event_handler(s->eh_entry);
-- 
2.34.1

[PULL 10/20] hw/display: Rename VGA_ISA_MM -> VGA_MMIO

2022-01-13 Thread Gerd Hoffmann

From: Philippe Mathieu-Daudé 

There is no ISA bus part in the MMIO VGA device, so rename:

 *  hw/display/vga-isa-mm.c -> hw/display/vga-mmio.c
 *  CONFIG_VGA_ISA_MM -> CONFIG_VGA_MMIO
 *  ISAVGAMMState -> VGAMmioState
 *  isa_vga_mm_init() -> vga_mmio_init()

Reviewed-by: BALATON Zoltan 
Reviewed-by: Thomas Huth 
Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20211206224528.563588-2-f4...@amsat.org>
Signed-off-by: Gerd Hoffmann 
---
 include/hw/display/vga.h|  5 ++---
 hw/display/{vga-isa-mm.c => vga-mmio.c} | 19 +--
 hw/mips/jazz.c  |  2 +-
 configs/devices/mips-softmmu/common.mak |  2 +-
 hw/display/Kconfig  |  2 +-
 hw/display/meson.build  |  2 +-
 hw/mips/Kconfig |  2 +-
 7 files changed, 16 insertions(+), 18 deletions(-)
 rename hw/display/{vga-isa-mm.c => vga-mmio.c} (90%)

diff --git a/include/hw/display/vga.h b/include/hw/display/vga.h
index 5f7825e0e368..c16a5c26dae9 100644
--- a/include/hw/display/vga.h
+++ b/include/hw/display/vga.h
@@ -24,8 +24,7 @@ enum vga_retrace_method {
 
 extern enum vga_retrace_method vga_retrace_method;
 
-int isa_vga_mm_init(hwaddr vram_base,
-hwaddr ctrl_base, int it_shift,
-MemoryRegion *address_space);
+int vga_mmio_init(hwaddr vram_base, hwaddr ctrl_base,
+  int it_shift, MemoryRegion *address_space);
 
 #endif
diff --git a/hw/display/vga-isa-mm.c b/hw/display/vga-mmio.c
similarity index 90%
rename from hw/display/vga-isa-mm.c
rename to hw/display/vga-mmio.c
index 7321b7a06d59..4ffe3afe32db 100644
--- a/hw/display/vga-isa-mm.c
+++ b/hw/display/vga-mmio.c
@@ -1,5 +1,5 @@
 /*
- * QEMU ISA MM VGA Emulator.
+ * QEMU MMIO VGA Emulator.
  *
  * Copyright (c) 2003 Fabrice Bellard
  *
@@ -32,15 +32,15 @@
 
 #define VGA_RAM_SIZE (8 * MiB)
 
-typedef struct ISAVGAMMState {
+typedef struct VGAMmioState {
 VGACommonState vga;
 int it_shift;
-} ISAVGAMMState;
+} VGAMmioState;
 
 /* Memory mapped interface */
 static uint64_t vga_mm_read(void *opaque, hwaddr addr, unsigned size)
 {
-ISAVGAMMState *s = opaque;
+VGAMmioState *s = opaque;
 
 return vga_ioport_read(>vga, addr >> s->it_shift) &
 MAKE_64BIT_MASK(0, size * 8);
@@ -49,7 +49,7 @@ static uint64_t vga_mm_read(void *opaque, hwaddr addr, 
unsigned size)
 static void vga_mm_write(void *opaque, hwaddr addr, uint64_t value,
  unsigned size)
 {
-ISAVGAMMState *s = opaque;
+VGAMmioState *s = opaque;
 
 vga_ioport_write(>vga, addr >> s->it_shift,
  value & MAKE_64BIT_MASK(0, size * 8));
@@ -65,7 +65,7 @@ static const MemoryRegionOps vga_mm_ctrl_ops = {
 .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static void vga_mm_init(ISAVGAMMState *s, hwaddr vram_base,
+static void vga_mm_init(VGAMmioState *s, hwaddr vram_base,
 hwaddr ctrl_base, int it_shift,
 MemoryRegion *address_space)
 {
@@ -91,11 +91,10 @@ static void vga_mm_init(ISAVGAMMState *s, hwaddr vram_base,
 memory_region_set_coalescing(vga_io_memory);
 }
 
-int isa_vga_mm_init(hwaddr vram_base,
-hwaddr ctrl_base, int it_shift,
-MemoryRegion *address_space)
+int vga_mmio_init(hwaddr vram_base, hwaddr ctrl_base,
+  int it_shift, MemoryRegion *address_space)
 {
-ISAVGAMMState *s;
+VGAMmioState *s;
 
 s = g_malloc0(sizeof(*s));
 
diff --git a/hw/mips/jazz.c b/hw/mips/jazz.c
index f5a26e174d58..8f345afd137a 100644
--- a/hw/mips/jazz.c
+++ b/hw/mips/jazz.c
@@ -274,7 +274,7 @@ static void mips_jazz_init(MachineState *machine,
 }
 break;
 case JAZZ_PICA61:
-isa_vga_mm_init(0x4000, 0x6000, 0, get_system_memory());
+vga_mmio_init(0x4000, 0x6000, 0, get_system_memory());
 break;
 default:
 break;
diff --git a/configs/devices/mips-softmmu/common.mak 
b/configs/devices/mips-softmmu/common.mak
index 752b62b1e636..d2202c839e03 100644
--- a/configs/devices/mips-softmmu/common.mak
+++ b/configs/devices/mips-softmmu/common.mak
@@ -7,7 +7,7 @@ CONFIG_ISA_BUS=y
 CONFIG_PCI=y
 CONFIG_PCI_DEVICES=y
 CONFIG_VGA_ISA=y
-CONFIG_VGA_ISA_MM=y
+CONFIG_VGA_MMIO=y
 CONFIG_VGA_CIRRUS=y
 CONFIG_VMWARE_VGA=y
 CONFIG_SERIAL=y
diff --git a/hw/display/Kconfig b/hw/display/Kconfig
index a2306b67d87c..a1b159becd76 100644
--- a/hw/display/Kconfig
+++ b/hw/display/Kconfig
@@ -49,7 +49,7 @@ config VGA_ISA
 depends on ISA_BUS
 select VGA
 
-config VGA_ISA_MM
+config VGA_MMIO
 bool
 select VGA
 
diff --git a/hw/display/meson.build b/hw/display/meson.build
index 861c43ff9847..adc53dd8b6cc 100644
--- a/hw/display/meson.build
+++ b/hw/display/meson.build
@@ -18,7 +18,7 @@ softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xenfb.c'))
 
 softmmu_ss.add(when: 'CONFIG_VGA_PCI', if_true: files('vga-pci.c'))
 softmmu_ss.add(when: 'CONFIG_VGA_ISA', if_true:

[PULL 03/20] ui/vnc.c: Fixed a deadlock bug.

2022-01-13 Thread Gerd Hoffmann

From: Rao Lei 

The GDB statck is as follows:
(gdb) bt
0  __lll_lock_wait (futex=futex@entry=0x56211df20360, private=0) at 
lowlevellock.c:52
1  0x7f263caf20a3 in __GI___pthread_mutex_lock (mutex=0x56211df20360) at 
../nptl/pthread_mutex_lock.c:80
2  0x56211a757364 in qemu_mutex_lock_impl (mutex=0x56211df20360, 
file=0x56211a804857 "../ui/vnc-jobs.h", line=60)
at ../util/qemu-thread-posix.c:80
3  0x56211a0ef8c7 in vnc_lock_output (vs=0x56211df14200) at 
../ui/vnc-jobs.h:60
4  0x56211a0efcb7 in vnc_clipboard_send (vs=0x56211df14200, count=1, 
dwords=0x7ffdf1701338) at ../ui/vnc-clipboard.c:138
5  0x56211a0f0129 in vnc_clipboard_notify (notifier=0x56211df244c8, 
data=0x56211dd1bbf0) at ../ui/vnc-clipboard.c:209
6  0x56211a75dde8 in notifier_list_notify (list=0x56211afa17d0 
, data=0x56211dd1bbf0) at ../util/notify.c:39
7  0x56211a0bf0e6 in qemu_clipboard_update (info=0x56211dd1bbf0) at 
../ui/clipboard.c:50
8  0x56211a0bf05d in qemu_clipboard_peer_release (peer=0x56211df244c0, 
selection=QEMU_CLIPBOARD_SELECTION_CLIPBOARD)
at ../ui/clipboard.c:41
9  0x56211a0bef9b in qemu_clipboard_peer_unregister (peer=0x56211df244c0) 
at ../ui/clipboard.c:19
10 0x56211a0d45f3 in vnc_disconnect_finish (vs=0x56211df14200) at 
../ui/vnc.c:1358
11 0x56211a0d4c9d in vnc_client_read (vs=0x56211df14200) at ../ui/vnc.c:1611
12 0x56211a0d4df8 in vnc_client_io (ioc=0x56211ce70690, condition=G_IO_IN, 
opaque=0x56211df14200) at ../ui/vnc.c:1649
13 0x56211a5b976c in qio_channel_fd_source_dispatch
(source=0x56211ce50a00, callback=0x56211a0d4d71 , 
user_data=0x56211df14200) at ../io/channel-watch.c:84
14 0x7f263ccede8e in g_main_context_dispatch () at 
/lib/x86_64-linux-gnu/libglib-2.0.so.0
15 0x56211a77d4a1 in glib_pollfds_poll () at ../util/main-loop.c:232
16 0x56211a77d51f in os_host_main_loop_wait (timeout=958545) at 
../util/main-loop.c:255
17 0x56211a77d630 in main_loop_wait (nonblocking=0) at 
../util/main-loop.c:531
18 0x56211a45bc8e in qemu_main_loop () at ../softmmu/runstate.c:726
19 0x56211a0b45fa in main (argc=69, argv=0x7ffdf1701778, 
envp=0x7ffdf17019a8) at ../softmmu/main.c:50

>From the call trace, we can see it is a deadlock bug.
vnc_disconnect_finish will acquire the output_mutex.
But, the output_mutex will be acquired again in vnc_clipboard_send.
Repeated locking will cause deadlock. So, I move
qemu_clipboard_peer_unregister() behind vnc_unlock_output();

Fixes: 0bf41cab93e ("ui/vnc: clipboard support")
Signed-off-by: Lei Rao 
Reviewed-by: Marc-André Lureau 
Message-Id: <20220105020808.597325-1-lei@intel.com>
Signed-off-by: Gerd Hoffmann 
---
 ui/vnc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ui/vnc.c b/ui/vnc.c
index 1ed1c7efc688..3ccd33dedcc8 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -1354,12 +1354,12 @@ void vnc_disconnect_finish(VncState *vs)
 /* last client gone */
 vnc_update_server_surface(vs->vd);
 }
+vnc_unlock_output(vs);
+
 if (vs->cbpeer.notifier.notify) {
 qemu_clipboard_peer_unregister(>cbpeer);
 }
 
-vnc_unlock_output(vs);
-
 qemu_mutex_destroy(>output_mutex);
 if (vs->bh != NULL) {
 qemu_bh_delete(vs->bh);
-- 
2.34.1

[PULL 00/20] Kraxel 20220114 patches

2022-01-13 Thread Gerd Hoffmann

The following changes since commit 91f5f7a5df1fda8c34677a7c49ee8a4bb5b56a36:

  Merge remote-tracking branch 
'remotes/lvivier-gitlab/tags/linux-user-for-7.0-pull-request' into staging 
(2022-01-12 11:51:47 +)

are available in the Git repository at:

  git://git.kraxel.org/qemu tags/kraxel-20220114-pull-request

for you to fetch changes up to 17f6315ef883a142b6a41a491b63a6554e784a5c:

  ui/input-legacy: pass horizontal scroll information (2022-01-13 15:33:18 
+0100)


- bugfixes for ui, usb, audio, display
- change default display resolution
- add horizontal scrolling support



Daniel P. Berrangé (1):
  edid: set default resolution to 1280x800 (WXGA)

Dario Binacchi (1):
  hw/usb/dev-wacom: add missing HID descriptor

Dmitry Petrov (5):
  ps2: Initial horizontal scroll support
  ui/cocoa: pass horizontal scroll information to the device code
  ui/gtk: pass horizontal scroll information to the device code
  ui/sdl2: pass horizontal scroll information to the device code
  ui/input-legacy: pass horizontal scroll information

Gerd Hoffmann (1):
  uas: add missing return

Marc-André Lureau (2):
  ui/dbus: fix buffer-overflow detected by ASAN
  ui: fix gtk clipboard clear assertion

Philippe Mathieu-Daudé (4):
  hw/display: Rename VGA_ISA_MM -> VGA_MMIO
  hw/display/vga-mmio: Inline vga_mm_init()
  hw/display/vga-mmio: QOM'ify vga_mmio_init() as TYPE_VGA_MMIO
  hw/mips/jazz: Inline vga_mmio_init() and remove it

Rao Lei (1):
  ui/vnc.c: Fixed a deadlock bug.

Satyeshwar Singh (1):
  edid: Added support for 4k@60 Hz monitor

Volker Rümelin (3):
  jackaudio: use ifdefs to hide unavailable functions
  dsoundaudio: fix crackling audio recordings
  hw/audio/intel-hda: fix stream reset

Zhenwei Pi (1):
  usb: allow max 8192 bytes for desc

 hw/usb/desc.h   |   1 +
 include/hw/display/vga.h|   6 +-
 include/hw/virtio/virtio-gpu.h  |   4 +-
 audio/dsoundaudio.c |   7 +-
 audio/jackaudio.c   |   4 +
 hw/audio/intel-hda.c|   2 +-
 hw/display/edid-generate.c  |   7 +-
 hw/display/vga-isa-mm.c | 114 ---
 hw/display/vga-mmio.c   | 139 
 hw/input/ps2.c  |  57 --
 hw/mips/jazz.c  |   9 +-
 hw/usb/desc.c   |  15 +--
 hw/usb/dev-uas.c|   1 +
 hw/usb/dev-wacom.c  |  72 +++-
 qemu-edid.c |   4 +-
 ui/dbus.c   |   1 +
 ui/gtk-clipboard.c  |  19 ++--
 ui/gtk.c|  54 +++--
 ui/input-legacy.c   |  15 +++
 ui/sdl2.c   |   5 +
 ui/vnc.c|   4 +-
 configs/devices/mips-softmmu/common.mak |   2 +-
 hw/display/Kconfig  |   2 +-
 hw/display/meson.build  |   2 +-
 hw/mips/Kconfig |   2 +-
 qapi/ui.json|   2 +-
 ui/cocoa.m  |  18 ++-
 27 files changed, 387 insertions(+), 181 deletions(-)
 delete mode 100644 hw/display/vga-isa-mm.c
 create mode 100644 hw/display/vga-mmio.c

-- 
2.34.1

Re: [PATCH v6 03/23] target/riscv: Implement hgeie and hgeip CSRs

2022-01-13 Thread Frank Chang

Anup Patel  於 2021年12月30日 週四 下午8:45寫道：

> From: Anup Patel 
>
> The hgeie and hgeip CSRs are required for emulating an external
> interrupt controller capable of injecting virtual external interrupt
> to Guest/VM running at VS-level.
>
> Signed-off-by: Anup Patel 
> Signed-off-by: Anup Patel 
> Reviewed-by: Alistair Francis 
> ---
>  target/riscv/cpu.c| 61 ---
>  target/riscv/cpu.h|  5 
>  target/riscv/cpu_bits.h   |  1 +
>  target/riscv/cpu_helper.c | 37 ++--
>  target/riscv/csr.c| 43 ++-
>  target/riscv/machine.c|  6 ++--
>  6 files changed, 118 insertions(+), 35 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 7d92ce7555..f4dbc766c2 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -582,23 +582,49 @@ static void riscv_cpu_realize(DeviceState *dev,
> Error **errp)
>  static void riscv_cpu_set_irq(void *opaque, int irq, int level)
>  {
>  RISCVCPU *cpu = RISCV_CPU(opaque);
> +CPURISCVState *env = >env;
>
> -switch (irq) {
> -case IRQ_U_SOFT:
> -case IRQ_S_SOFT:
> -case IRQ_VS_SOFT:
> -case IRQ_M_SOFT:
> -case IRQ_U_TIMER:
> -case IRQ_S_TIMER:
> -case IRQ_VS_TIMER:
> -case IRQ_M_TIMER:
> -case IRQ_U_EXT:
> -case IRQ_S_EXT:
> -case IRQ_VS_EXT:
> -case IRQ_M_EXT:
> -riscv_cpu_update_mip(cpu, 1 << irq, BOOL_TO_MASK(level));
> -break;
> -default:
> +if (irq < IRQ_LOCAL_MAX) {
> +switch (irq) {
> +case IRQ_U_SOFT:
> +case IRQ_S_SOFT:
> +case IRQ_VS_SOFT:
> +case IRQ_M_SOFT:
> +case IRQ_U_TIMER:
> +case IRQ_S_TIMER:
> +case IRQ_VS_TIMER:
> +case IRQ_M_TIMER:
> +case IRQ_U_EXT:
> +case IRQ_S_EXT:
> +case IRQ_VS_EXT:
> +case IRQ_M_EXT:
> +riscv_cpu_update_mip(cpu, 1 << irq, BOOL_TO_MASK(level));
> +break;
> +default:
> +g_assert_not_reached();
> +}
> +} else if (irq < (IRQ_LOCAL_MAX + IRQ_LOCAL_GUEST_MAX)) {
> +/* Require H-extension for handling guest local interrupts */
> +if (!riscv_has_ext(env, RVH)) {
> +g_assert_not_reached();
> +}
> +
> +/* Compute bit position in HGEIP CSR */
> +irq = irq - IRQ_LOCAL_MAX + 1;
> +if (env->geilen < irq) {
> +g_assert_not_reached();
> +}
> +
> +/* Update HGEIP CSR */
> +env->hgeip &= ~((target_ulong)1 << irq);
> +if (level) {
> +env->hgeip |= (target_ulong)1 << irq;
> +}
> +
> +/* Update mip.SGEIP bit */
> +riscv_cpu_update_mip(cpu, MIP_SGEIP,
> + BOOL_TO_MASK(!!(env->hgeie & env->hgeip)));
> +} else {
>  g_assert_not_reached();
>  }
>  }
> @@ -611,7 +637,8 @@ static void riscv_cpu_init(Object *obj)
>  cpu_set_cpustate_pointers(cpu);
>
>  #ifndef CONFIG_USER_ONLY
> -qdev_init_gpio_in(DEVICE(cpu), riscv_cpu_set_irq, IRQ_LOCAL_MAX);
> +qdev_init_gpio_in(DEVICE(cpu), riscv_cpu_set_irq,
> +  IRQ_LOCAL_MAX + IRQ_LOCAL_GUEST_MAX);
>  #endif /* CONFIG_USER_ONLY */
>  }
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index dc10f27093..6895ac138c 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -151,6 +151,7 @@ struct CPURISCVState {
>  target_ulong priv;
>  /* This contains QEMU specific information about the virt state. */
>  target_ulong virt;
> +target_ulong geilen;
>  target_ulong resetvec;
>
>  target_ulong mhartid;
> @@ -188,6 +189,8 @@ struct CPURISCVState {
>  target_ulong htval;
>  target_ulong htinst;
>  target_ulong hgatp;
> +target_ulong hgeie;
> +target_ulong hgeip;
>  uint64_t htimedelta;
>
>  /* Virtual CSRs */
> @@ -355,6 +358,8 @@ int riscv_cpu_write_elf32_note(WriteCoreDumpFunction
> f, CPUState *cs,
>  int riscv_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg);
>  int riscv_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg);
>  bool riscv_cpu_fp_enabled(CPURISCVState *env);
> +target_ulong riscv_cpu_get_geilen(CPURISCVState *env);
> +void riscv_cpu_set_geilen(CPURISCVState *env, target_ulong geilen);
>  bool riscv_cpu_vector_enabled(CPURISCVState *env);
>  bool riscv_cpu_virt_enabled(CPURISCVState *env);
>  void riscv_cpu_set_virt_enabled(CPURISCVState *env, bool enable);
> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> index fe276d4b34..f32159a19d 100644
> --- a/target/riscv/cpu_bits.h
> +++ b/target/riscv/cpu_bits.h
> @@ -536,6 +536,7 @@ typedef enum RISCVException {
>  #define IRQ_M_EXT  11
>  #define IRQ_S_GEXT 12
>  #define IRQ_LOCAL_MAX  16
> +#define IRQ_LOCAL_GUEST_MAX(TARGET_LONG_BITS - 1)
>
>  /* mip masks */
>  #define MIP_USIP

Re: [PATCH 05/30] bsd-user/arm/arget_arch_cpu.h: Move EXCP_DEBUG and EXCP_BKPT together

2022-01-13 Thread Warner Losh

On Thu, Jan 13, 2022 at 10:13 AM Peter Maydell 
wrote:

> On Sun, 9 Jan 2022 at 16:26, Warner Losh  wrote:
> >
> > Implement EXCP_DEBUG and EXCP_BKPT the same, as is done in
> > linux-user. The prior adjustment of register 15 isn't needed, so remove
> > that. Remove a redunant comment (that code in FreeBSD never handled
> > break points).
> >
> > Signed-off-by: Warner Losh 
> > ---
> >  bsd-user/arm/target_arch_cpu.h | 23 +++
> >  1 file changed, 3 insertions(+), 20 deletions(-)
> >
> > diff --git a/bsd-user/arm/target_arch_cpu.h
> b/bsd-user/arm/target_arch_cpu.h
> > index c526fc73502..05b19ce6119 100644
> > --- a/bsd-user/arm/target_arch_cpu.h
> > +++ b/bsd-user/arm/target_arch_cpu.h
> > @@ -21,6 +21,7 @@
> >  #define _TARGET_ARCH_CPU_H_
> >
> >  #include "target_arch.h"
> > +#include "signal-common.h"
> >
> >  #define TARGET_DEFAULT_CPU_MODEL "any"
> >
> > @@ -64,19 +65,7 @@ static inline void target_cpu_loop(CPUARMState *env)
> >  }
> >  break;
> >  case EXCP_SWI:
> > -case EXCP_BKPT:
> >  {
> > -/*
> > - * system call
> > - * See arm/arm/trap.c cpu_fetch_syscall_args()
> > - */
> > -if (trapnr == EXCP_BKPT) {
> > -if (env->thumb) {
> > -env->regs[15] += 2;
> > -} else {
> > -env->regs[15] += 4;
> > -}
> > -}
>
> So the previous code was implementing BKPT as a way to do
> a syscall (added in commit 8d450c9a30). Was that just a mistake ?
>

I did some digging and I'm at a loss for why this code was ever here.


> >  n = env->regs[7];
> >  if (bsd_type == target_freebsd) {
> >  int ret;
> > @@ -171,14 +160,8 @@ static inline void target_cpu_loop(CPUARMState *env)
> >  queue_signal(env, info.si_signo, );
> >  break;
> >  case EXCP_DEBUG:
> > -{
> > -
> > -info.si_signo = TARGET_SIGTRAP;
> > -info.si_errno = 0;
> > -info.si_code = TARGET_TRAP_BRKPT;
> > -info.si_addr = env->exception.vaddress;
> > -queue_signal(env, info.si_signo, );
> > -}
> > +case EXCP_BKPT:
> > +force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT,
> env->regs[15]);
> >  break;
> >  case EXCP_YIELD:
> >  /* nothing to do here for user-mode, just resume guest code
> */
>
> Looks like it now matches the freebsd kernel behaviour, anyway.
>

Yea. That's why I went ahead and made the change rather than slavishly
carry it
over for something weird I couldn't find out about... I think it's an old
mistake...
I'll update the commit message to specifically note it.


> Reviewed-by: Peter Maydell 
>
> thanks
> -- PMM
>

Re: [PATCH v3 kvm/queue 14/16] KVM: Handle page fault for private memory

2022-01-13 Thread Yan Zhao

hi Sean,
Sorry for the late reply. I just saw this mail in my mailbox.

On Wed, Jan 05, 2022 at 08:52:39PM +, Sean Christopherson wrote:
> On Wed, Jan 05, 2022, Yan Zhao wrote:
> > Sorry, maybe I didn't express it clearly.
> > 
> > As in the kvm_faultin_pfn_private(), 
> > static bool kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
> > struct kvm_page_fault *fault,
> > bool *is_private_pfn, int *r)
> > {
> > int order;
> > int mem_convert_type;
> > struct kvm_memory_slot *slot = fault->slot;
> > long pfn = kvm_memfd_get_pfn(slot, fault->gfn, );
> > ...
> > }
> > Currently, kvm_memfd_get_pfn() is called unconditionally.
> > However, if the backend of a private memslot is not memfd, and is device
> > fd for example, a different xxx_get_pfn() is required here.
> 
> Ya, I've complained about this in a different thread[*].  This should really 
> be
> something like kvm_private_fd_get_pfn(), where the underlying ops struct can 
> point
> at any compatible backing store.
> 
> https://lore.kernel.org/all/ycumuemybxfyy...@google.com/
>
ok. 

> > Further, though mapped to a private gfn, it might be ok for QEMU to
> > access the device fd in hva-based way (or call it MMU access way, e.g.
> > read/write/mmap), it's desired that it could use the traditional to get
> > pfn without convert the range to a shared one.
> 
> No, this is expressly forbidden.  The backing store for a private gfn must not
> be accessible by userspace.  It's possible a backing store could support 
> both, but
> not concurrently, and any conversion must be done without KVM being involved.
> In other words, resolving a private gfn must either succeed or fail (exit to
> userspace), KVM cannot initiate any conversions.
>
When it comes to a device passthrough via VFIO, there might be more work
related to the device fd as a backend.

First, unlike memfd which can allocate one private fd for a set of PFNs,
and one shared fd for another set of PFNs, for device fd, it needs to open
the same physical device twice, one for shared fd, and one for private fd.

Then, for private device fd, now its ramblock has to use 
qemu_ram_alloc_from_fd()
instead of current qemu_ram_alloc_from_ptr().
And as in VFIO, this private fd is shared by several ramblocks (each locating 
from
a different base offset), the base offsets also need to be kept somewhere 
in order to call get_pfn successfully. (this info is kept in
vma through mmap() previously, so without mmap(), a new interface might
be required). 

Also, for shared device fd,  mmap() is required in order to allocate the
ramblock with qemu_ram_alloc_from_ptr(), and more importantly to make
the future gfn_to_hva, and hva_to_pfn possible.
But as the shared and private fds are based on the same physical device,
the vfio driver needs to record which vma ranges are allowed for the actual
mmap_fault, which vma area are not.

With the above changes, it only prevents the host user space from accessing
the device mapped to private GFNs.
For memory backends, host kernel space accessing is prevented via MKTME.
And for device, the device needs to the work to disallow host kernel
space access.
However, unlike memory side, the device side would not cause any MCE. 
Thereby, host user space access to the device also would not cause MCEs, 
either. 

So, I'm not sure if the above work is worthwhile to the device fd.

> > pfn = __gfn_to_pfn_memslot(slot, fault->gfn, ...)
> > |->addr = __gfn_to_hva_many (slot, gfn,...)
> > |  pfn = hva_to_pfn (addr,...)
> > 
> > 
> > So, is it possible to recognize such kind of backends in KVM, and to get
> > the pfn in traditional way without converting them to shared?
> > e.g.
> > - specify KVM_MEM_PRIVATE_NONPROTECT to memory regions with such kind
> > of backends, or
> > - detect the fd type and check if get_pfn is provided. if no, go the
> >   traditional way.
> 
> No, because the whole point of this is to make guest private memory 
> inaccessible
> to host userspace.  Or did I misinterpret your questions?
I think the host unmap series is based on the assumption that host user
space access to the memory based to private guest GFNs would cause fatal
MCEs.
So, I hope for backends who will not bring this fatal error can keep
using traditional way to get pfn and be mapped to private GFNs at the
same time.

Thanks
Yan

Re: [PATCH 3/3] intel-iommu: PASID support

2022-01-13 Thread Jason Wang

On Fri, Jan 14, 2022 at 11:31 AM Peter Xu  wrote:
>
> On Fri, Jan 14, 2022 at 10:47:44AM +0800, Jason Wang wrote:
> >
> > 在 2022/1/13 下午1:06, Peter Xu 写道:
> > > On Wed, Jan 05, 2022 at 12:19:45PM +0800, Jason Wang wrote:
> > > > @@ -1725,11 +1780,16 @@ static bool 
> > > > vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
> > > >   cc_entry->context_cache_gen = s->context_cache_gen;
> > > >   }
> > > > +/* Try to fetch slpte form IOTLB */
> > > > +if ((pasid == PCI_NO_PASID) && s->root_scalable) {
> > > > +pasid = VTD_CE_GET_RID2PASID();
> > > > +}
> > > > +
> > > >   /*
> > > >* We don't need to translate for pass-through context entries.
> > > >* Also, let's ignore IOTLB caching as well for PT devices.
> > > >*/
> > > > -if (vtd_dev_pt_enabled(s, )) {
> > > > +if (vtd_dev_pt_enabled(s, , pasid)) {
> > > >   entry->iova = addr & VTD_PAGE_MASK_4K;
> > > >   entry->translated_addr = entry->iova;
> > > >   entry->addr_mask = ~VTD_PAGE_MASK_4K;
> > > > @@ -1750,14 +1810,24 @@ static bool 
> > > > vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
> > > >   return true;
> > > >   }
> > > > +iotlb_entry = vtd_lookup_iotlb(s, source_id, addr, pasid);
> > > > +if (iotlb_entry) {
> > > > +trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
> > > > + iotlb_entry->domain_id);
> > > > +slpte = iotlb_entry->slpte;
> > > > +access_flags = iotlb_entry->access_flags;
> > > > +page_mask = iotlb_entry->mask;
> > > > +goto out;
> > > > +}
> > > IIUC the iotlb lookup moved down just because the pasid==NO_PASID case 
> > > then
> > > we'll need to fetch the default pasid from the context entry.  That looks
> > > reasonable.
> > >
> > > It's just a bit of pity because logically it'll slow down iotlb hits due 
> > > to
> > > context entry operations.  When NO_PASID we could have looked up iotlb 
> > > without
> > > checking pasid at all, assuming that "default pasid" will always match.  
> > > But
> > > that is a little bit hacky.
> >
> >
> > Right, but I think you meant to do this only when scalable mode is disabled.
>
> Yes IMHO it will definitely suite for !scalable case since that's exactly what
> we did before.  What I'm also wondering is even if scalable is enabled but no
> "real" pasid is used, so if all the translations go through the default pasid
> that stored in the device context entry, then maybe we can ignore checking it.
> The latter is the "hacky" part mentioned above.

The problem I see is that we can't know what PASID is used as default
without reading the context entry?

>
> The other thing to mention is, if we postpone the iotlb lookup to be after
> context entry, then logically we can have per-device iotlb, that means we can
> replace IntelIOMMUState.iotlb with VTDAddressSpace.iotlb in the future, too,
> which can also be more efficient.

Right but we still need to limit the total slots and ATS is a better
way to deal with the IOTLB bottleneck actually.

>
> Not sure whether Michael will have a preference, for me I think either way can
> be done on top.
>
> >
> >
> > >
> > > vIOMMU seems to be mostly used for assigned devices and dpdk in 
> > > production in
> > > the future due to its slowness otherwise.. so maybe not a big deal at all.
> > >
> > > [...]
> > >
> > > > @@ -2011,7 +2083,52 @@ static void 
> > > > vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
> > > >   vtd_iommu_lock(s);
> > > >   g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, 
> > > > );
> > > >   vtd_iommu_unlock(s);
> > > > -vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
> > > > +vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, 
> > > > PCI_NO_PASID);
> > > > +}
> > > > +
> > > > +static void vtd_iotlb_page_pasid_invalidate(IntelIOMMUState *s,
> > > > +uint16_t domain_id,
> > > > +hwaddr addr, uint8_t am,
> > > > +uint32_t pasid)
> > > > +{
> > > > +VTDIOTLBPageInvInfo info;
> > > > +
> > > > +trace_vtd_inv_desc_iotlb_pasid_pages(domain_id, addr, am, pasid);
> > > > +
> > > > +assert(am <= VTD_MAMV);
> > > > +info.domain_id = domain_id;
> > > > +info.addr = addr;
> > > > +info.mask = ~((1 << am) - 1);
> > > > +info.pasid = pasid;
> > > > +vtd_iommu_lock(s);
> > > > +g_hash_table_foreach_remove(s->iotlb, 
> > > > vtd_hash_remove_by_page_pasid, );
> > > > +vtd_iommu_unlock(s);
> > > > +vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, pasid);
> > > Hmm, I think indeed we need a notification, but it'll be unnecessary for
> > > e.g. vfio map notifiers, because this is 1st level invalidation and at 
> > > least so
> > > far vfio map notifiers are rewalking only the 2nd level

[PULL V3 11/13] net/vmnet: implement bridged mode (vmnet-bridged)

2022-01-13 Thread Jason Wang

From: Vladislav Yaroshchuk 

Signed-off-by: Vladislav Yaroshchuk 
Signed-off-by: Jason Wang 
---
 net/vmnet-bridged.m | 105 +---
 1 file changed, 100 insertions(+), 5 deletions(-)

diff --git a/net/vmnet-bridged.m b/net/vmnet-bridged.m
index 4e42a90..bc499c6 100644
--- a/net/vmnet-bridged.m
+++ b/net/vmnet-bridged.m
@@ -10,16 +10,111 @@
 
 #include "qemu/osdep.h"
 #include "qapi/qapi-types-net.h"
-#include "vmnet_int.h"
-#include "clients.h"
-#include "qemu/error-report.h"
 #include "qapi/error.h"
+#include "clients.h"
+#include "vmnet_int.h"
 
 #include 
 
+typedef struct VmnetBridgedState {
+  VmnetCommonState cs;
+} VmnetBridgedState;
+
+static bool validate_ifname(const char *ifname)
+{
+xpc_object_t shared_if_list = vmnet_copy_shared_interface_list();
+__block bool match = false;
+
+xpc_array_apply(
+shared_if_list,
+^bool(size_t index, xpc_object_t value) {
+  if (strcmp(xpc_string_get_string_ptr(value), ifname) == 0) {
+  match = true;
+  return false;
+  }
+  return true;
+});
+
+return match;
+}
+
+static const char *get_valid_ifnames(void)
+{
+xpc_object_t shared_if_list = vmnet_copy_shared_interface_list();
+__block char *if_list = NULL;
+
+xpc_array_apply(
+shared_if_list,
+^bool(size_t index, xpc_object_t value) {
+  if_list = g_strconcat(xpc_string_get_string_ptr(value),
+" ",
+if_list,
+NULL);
+  return true;
+});
+
+if (if_list) {
+return if_list;
+}
+return "[no interfaces]";
+}
+
+static xpc_object_t create_if_desc(const Netdev *netdev, Error **errp)
+{
+const NetdevVmnetBridgedOptions *options = &(netdev->u.vmnet_bridged);
+xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, 0);
+
+xpc_dictionary_set_uint64(
+if_desc,
+vmnet_operation_mode_key,
+VMNET_BRIDGED_MODE
+);
+
+#if defined(MAC_OS_VERSION_11_0) && \
+MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
+xpc_dictionary_set_bool(
+if_desc,
+vmnet_enable_isolation_key,
+options->isolated
+);
+#else
+if (options->has_isolated) {
+error_setg(errp,
+   "vmnet-bridged.isolated feature is "
+   "unavailable: outdated vmnet.framework API");
+}
+#endif
+
+if (validate_ifname(options->ifname)) {
+xpc_dictionary_set_string(if_desc,
+  vmnet_shared_interface_name_key,
+  options->ifname);
+} else {
+return NULL;
+}
+return if_desc;
+}
+
+static NetClientInfo net_vmnet_bridged_info = {
+.type = NET_CLIENT_DRIVER_VMNET_BRIDGED,
+.size = sizeof(VmnetBridgedState),
+.receive = vmnet_receive_common,
+.cleanup = vmnet_cleanup_common,
+};
+
 int net_init_vmnet_bridged(const Netdev *netdev, const char *name,
NetClientState *peer, Error **errp)
 {
-  error_setg(errp, "vmnet-bridged is not implemented yet");
-  return -1;
+NetClientState *nc = qemu_new_net_client(_vmnet_bridged_info,
+ peer, "vmnet-bridged", name);
+xpc_object_t if_desc = create_if_desc(netdev, errp);;
+
+if (!if_desc) {
+error_setg(errp,
+   "unsupported ifname, should be one of: %s",
+   get_valid_ifnames());
+return -1;
+}
+
+return vmnet_if_create(nc, if_desc, errp, NULL);
 }
-- 
2.7.4

[PULL V3 10/13] net/vmnet: implement host mode (vmnet-host)

2022-01-13 Thread Jason Wang

From: Vladislav Yaroshchuk 

Signed-off-by: Vladislav Yaroshchuk 
Signed-off-by: Jason Wang 
---
 net/vmnet-host.c | 110 ---
 1 file changed, 104 insertions(+), 6 deletions(-)

diff --git a/net/vmnet-host.c b/net/vmnet-host.c
index 4a5ef99..501b677 100644
--- a/net/vmnet-host.c
+++ b/net/vmnet-host.c
@@ -9,16 +9,114 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/uuid.h"
 #include "qapi/qapi-types-net.h"
-#include "vmnet_int.h"
-#include "clients.h"
-#include "qemu/error-report.h"
 #include "qapi/error.h"
+#include "clients.h"
+#include "vmnet_int.h"
 
 #include 
 
+typedef struct VmnetHostState {
+  VmnetCommonState cs;
+  QemuUUID network_uuid;
+} VmnetHostState;
+
+static xpc_object_t create_if_desc(const Netdev *netdev,
+   NetClientState *nc,
+   Error **errp)
+{
+const NetdevVmnetHostOptions *options = &(netdev->u.vmnet_host);
+
+xpc_object_t if_desc = xpc_dictionary_create(NULL, NULL, 0);
+
+xpc_dictionary_set_uint64(
+if_desc,
+vmnet_operation_mode_key,
+VMNET_HOST_MODE
+);
+
+#if defined(MAC_OS_VERSION_11_0) && \
+MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0
+
+VmnetCommonState *cs = DO_UPCAST(VmnetCommonState, nc, nc);
+VmnetHostState *hs = DO_UPCAST(VmnetHostState, cs, cs);
+
+xpc_dictionary_set_bool(
+if_desc,
+vmnet_enable_isolation_key,
+options->isolated
+);
+
+if (options->has_net_uuid) {
+if (qemu_uuid_parse(options->net_uuid, >network_uuid) < 0) {
+error_setg(errp, "Invalid UUID provided in 'net-uuid'");
+}
+
+xpc_dictionary_set_uuid(
+if_desc,
+vmnet_network_identifier_key,
+hs->network_uuid.data
+);
+}
+#else
+if (options->has_isolated) {
+error_setg(errp,
+   "vmnet-host.isolated feature is "
+   "unavailable: outdated vmnet.framework API");
+}
+
+if (options->has_net_uuid) {
+error_setg(errp,
+   "vmnet-host.net-uuid feature is "
+   "unavailable: outdated vmnet.framework API");
+}
+#endif
+
+if (options->has_start_address ||
+options->has_end_address ||
+options->has_subnet_mask) {
+
+if (options->has_start_address &&
+options->has_end_address &&
+options->has_subnet_mask) {
+
+xpc_dictionary_set_string(if_desc,
+  vmnet_start_address_key,
+  options->start_address);
+xpc_dictionary_set_string(if_desc,
+  vmnet_end_address_key,
+  options->end_address);
+xpc_dictionary_set_string(if_desc,
+  vmnet_subnet_mask_key,
+  options->subnet_mask);
+} else {
+error_setg(
+errp,
+"'start-address', 'end-address', 'subnet-mask' "
+"should be provided together"
+);
+}
+}
+
+return if_desc;
+}
+
+static NetClientInfo net_vmnet_host_info = {
+.type = NET_CLIENT_DRIVER_VMNET_HOST,
+.size = sizeof(VmnetHostState),
+.receive = vmnet_receive_common,
+.cleanup = vmnet_cleanup_common,
+};
+
 int net_init_vmnet_host(const Netdev *netdev, const char *name,
-NetClientState *peer, Error **errp) {
-  error_setg(errp, "vmnet-host is not implemented yet");
-  return -1;
+NetClientState *peer, Error **errp)
+{
+NetClientState *nc;
+xpc_object_t if_desc;
+
+nc = qemu_new_net_client(_vmnet_host_info,
+ peer, "vmnet-host", name);
+if_desc = create_if_desc(netdev, nc, errp);
+return vmnet_if_create(nc, if_desc, errp, NULL);
 }
-- 
2.7.4

[PULL V3 09/13] net/vmnet: implement shared mode (vmnet-shared)

2022-01-13 Thread Jason Wang

From: Vladislav Yaroshchuk 

Interaction with vmnet.framework in different modes
differs only on configuration stage, so we can create
common `send`, `receive`, etc. procedures and reuse them.

vmnet.framework supports iov, but writing more than
one iov into vmnet interface fails with
'VMNET_INVALID_ARGUMENT'. Collecting provided iovs into
one and passing it to vmnet works fine. That's the
reason why receive_iov() left unimplemented. But it still
works with good enough performance having .receive()
implemented only.

Also, there is no way to unsubscribe from vmnet packages
receiving except registering and unregistering event
callback or simply drop packages just ignoring and
not processing them when related flag is set. Here we do
using the second way.

Signed-off-by: Phillip Tennen 
Signed-off-by: Vladislav Yaroshchuk 
Signed-off-by: Jason Wang 
---
 net/vmnet-common.m | 314 +
 net/vmnet-shared.c |  83 +-
 net/vmnet_int.h|  23 
 3 files changed, 416 insertions(+), 4 deletions(-)

diff --git a/net/vmnet-common.m b/net/vmnet-common.m
index f949eb6..e780985 100644
--- a/net/vmnet-common.m
+++ b/net/vmnet-common.m
@@ -10,6 +10,8 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/log.h"
 #include "qapi/qapi-types-net.h"
 #include "vmnet_int.h"
 #include "clients.h"
@@ -17,3 +19,315 @@
 #include "qapi/error.h"
 
 #include 
+#include 
+
+#ifdef DEBUG
+#define D(x) x
+#define D_LOG(...) qemu_log(__VA_ARGS__)
+#else
+#define D(x) do { } while (0)
+#define D_LOG(...) do { } while (0)
+#endif
+
+typedef struct vmpktdesc vmpktdesc_t;
+typedef struct iovec iovec_t;
+
+static void vmnet_set_send_enabled(VmnetCommonState *s, bool enable)
+{
+s->send_enabled = enable;
+}
+
+
+static void vmnet_send_completed(NetClientState *nc, ssize_t len)
+{
+VmnetCommonState *s = DO_UPCAST(VmnetCommonState, nc, nc);
+vmnet_set_send_enabled(s, true);
+}
+
+
+static void vmnet_send(NetClientState *nc,
+   interface_event_t event_id,
+   xpc_object_t event)
+{
+assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE);
+
+VmnetCommonState *s;
+uint64_t packets_available;
+
+struct iovec *iov;
+struct vmpktdesc *packets;
+int pkt_cnt;
+int i;
+
+vmnet_return_t if_status;
+ssize_t size;
+
+s = DO_UPCAST(VmnetCommonState, nc, nc);
+
+packets_available = xpc_dictionary_get_uint64(
+event,
+vmnet_estimated_packets_available_key
+);
+
+pkt_cnt = (packets_available < VMNET_PACKETS_LIMIT) ?
+  packets_available :
+  VMNET_PACKETS_LIMIT;
+
+
+iov = s->iov_buf;
+packets = s->packets_buf;
+
+for (i = 0; i < pkt_cnt; ++i) {
+packets[i].vm_pkt_size = s->max_packet_size;
+packets[i].vm_pkt_iovcnt = 1;
+packets[i].vm_flags = 0;
+}
+
+if_status = vmnet_read(s->vmnet_if, packets, _cnt);
+if (if_status != VMNET_SUCCESS) {
+error_printf("vmnet: read failed: %s\n",
+ vmnet_status_map_str(if_status));
+}
+qemu_mutex_lock_iothread();
+for (i = 0; i < pkt_cnt; ++i) {
+size = qemu_send_packet_async(nc,
+  iov[i].iov_base,
+  packets[i].vm_pkt_size,
+  vmnet_send_completed);
+if (size == 0) {
+vmnet_set_send_enabled(s, false);
+} else if (size < 0) {
+break;
+}
+}
+qemu_mutex_unlock_iothread();
+
+}
+
+
+static void vmnet_register_event_callback(VmnetCommonState *s)
+{
+dispatch_queue_t avail_pkt_q = dispatch_queue_create(
+"org.qemu.vmnet.if_queue",
+DISPATCH_QUEUE_SERIAL
+);
+
+vmnet_interface_set_event_callback(
+s->vmnet_if,
+VMNET_INTERFACE_PACKETS_AVAILABLE,
+avail_pkt_q,
+^(interface_event_t event_id, xpc_object_t event) {
+  if (s->send_enabled) {
+  vmnet_send(>nc, event_id, event);
+  }
+});
+}
+
+
+static void vmnet_bufs_init(VmnetCommonState *s)
+{
+int i;
+struct vmpktdesc *packets;
+struct iovec *iov;
+
+packets = s->packets_buf;
+iov = s->iov_buf;
+
+for (i = 0; i < VMNET_PACKETS_LIMIT; ++i) {
+iov[i].iov_len = s->max_packet_size;
+iov[i].iov_base = g_malloc0(iov[i].iov_len);
+packets[i].vm_pkt_iov = iov + i;
+}
+}
+
+
+const char *vmnet_status_map_str(vmnet_return_t status)
+{
+switch (status) {
+case VMNET_SUCCESS:
+return "success";
+case VMNET_FAILURE:
+return "general failure";
+case VMNET_MEM_FAILURE:
+return "memory allocation failure";
+case VMNET_INVALID_ARGUMENT:
+return "invalid argument specified";
+case VMNET_SETUP_INCOMPLETE:
+return "interface setup is not complete";
+case VMNET_INVALID_ACCESS:
+return "invalid access,

[PULL V3 13/13] net/vmnet: update MAINTAINERS list

2022-01-13 Thread Jason Wang

From: Vladislav Yaroshchuk 

Signed-off-by: Vladislav Yaroshchuk 
Signed-off-by: Jason Wang 
---
 MAINTAINERS | 5 +
 1 file changed, 5 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6ccdec7..1dc9d49 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2645,6 +2645,11 @@ W: http://info.iet.unipi.it/~luigi/netmap/
 S: Maintained
 F: net/netmap.c
 
+Apple vmnet network backends
+M: Vladislav Yaroshchuk 
+S: Maintained
+F: net/vmnet*
+
 Host Memory Backends
 M: David Hildenbrand 
 M: Igor Mammedov 
-- 
2.7.4

[PULL V3 08/13] net/vmnet: add vmnet backends to qapi/net

2022-01-13 Thread Jason Wang

From: Vladislav Yaroshchuk 

Create separate netdevs for each vmnet operating mode:
- vmnet-host
- vmnet-shared
- vmnet-bridged

Signed-off-by: Vladislav Yaroshchuk 
Signed-off-by: Jason Wang 
---
 net/clients.h   |  11 +
 net/meson.build |   7 +++
 net/net.c   |  10 
 net/vmnet-bridged.m |  25 ++
 net/vmnet-common.m  |  19 
 net/vmnet-host.c|  24 ++
 net/vmnet-shared.c  |  25 ++
 net/vmnet_int.h |  25 ++
 qapi/net.json   | 133 +++-
 9 files changed, 277 insertions(+), 2 deletions(-)
 create mode 100644 net/vmnet-bridged.m
 create mode 100644 net/vmnet-common.m
 create mode 100644 net/vmnet-host.c
 create mode 100644 net/vmnet-shared.c
 create mode 100644 net/vmnet_int.h

diff --git a/net/clients.h b/net/clients.h
index 92f9b59..c915778 100644
--- a/net/clients.h
+++ b/net/clients.h
@@ -63,4 +63,15 @@ int net_init_vhost_user(const Netdev *netdev, const char 
*name,
 
 int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
 NetClientState *peer, Error **errp);
+#ifdef CONFIG_VMNET
+int net_init_vmnet_host(const Netdev *netdev, const char *name,
+  NetClientState *peer, Error **errp);
+
+int net_init_vmnet_shared(const Netdev *netdev, const char *name,
+  NetClientState *peer, Error **errp);
+
+int net_init_vmnet_bridged(const Netdev *netdev, const char *name,
+  NetClientState *peer, Error **errp);
+#endif /* CONFIG_VMNET */
+
 #endif /* QEMU_NET_CLIENTS_H */
diff --git a/net/meson.build b/net/meson.build
index 847bc2a..00a88c4 100644
--- a/net/meson.build
+++ b/net/meson.build
@@ -42,4 +42,11 @@ softmmu_ss.add(when: 'CONFIG_POSIX', if_true: 
files(tap_posix))
 softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c'))
 softmmu_ss.add(when: 'CONFIG_VHOST_NET_VDPA', if_true: files('vhost-vdpa.c'))
 
+vmnet_files = files(
+  'vmnet-common.m',
+  'vmnet-bridged.m',
+  'vmnet-host.c',
+  'vmnet-shared.c'
+)
+softmmu_ss.add(when: vmnet, if_true: vmnet_files)
 subdir('can')
diff --git a/net/net.c b/net/net.c
index f0d14db..1dbb64b 100644
--- a/net/net.c
+++ b/net/net.c
@@ -1021,6 +1021,11 @@ static int (* const 
net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
 #ifdef CONFIG_L2TPV3
 [NET_CLIENT_DRIVER_L2TPV3]= net_init_l2tpv3,
 #endif
+#ifdef CONFIG_VMNET
+[NET_CLIENT_DRIVER_VMNET_HOST] = net_init_vmnet_host,
+[NET_CLIENT_DRIVER_VMNET_SHARED] = net_init_vmnet_shared,
+[NET_CLIENT_DRIVER_VMNET_BRIDGED] = net_init_vmnet_bridged,
+#endif /* CONFIG_VMNET */
 };
 
 
@@ -1107,6 +1112,11 @@ void show_netdevs(void)
 #ifdef CONFIG_VHOST_VDPA
 "vhost-vdpa",
 #endif
+#ifdef CONFIG_VMNET
+"vmnet-host",
+"vmnet-shared",
+"vmnet-bridged",
+#endif
 };
 
 qemu_printf("Available netdev backend types:\n");
diff --git a/net/vmnet-bridged.m b/net/vmnet-bridged.m
new file mode 100644
index 000..4e42a90
--- /dev/null
+++ b/net/vmnet-bridged.m
@@ -0,0 +1,25 @@
+/*
+ * vmnet-bridged.m
+ *
+ * Copyright(c) 2021 Vladislav Yaroshchuk 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qapi-types-net.h"
+#include "vmnet_int.h"
+#include "clients.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+
+#include 
+
+int net_init_vmnet_bridged(const Netdev *netdev, const char *name,
+   NetClientState *peer, Error **errp)
+{
+  error_setg(errp, "vmnet-bridged is not implemented yet");
+  return -1;
+}
diff --git a/net/vmnet-common.m b/net/vmnet-common.m
new file mode 100644
index 000..f949eb6
--- /dev/null
+++ b/net/vmnet-common.m
@@ -0,0 +1,19 @@
+/*
+ * vmnet-common.m - network client wrapper for Apple vmnet.framework
+ *
+ * Copyright(c) 2021 Vladislav Yaroshchuk 
+ * Copyright(c) 2021 Phillip Tennen 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qapi-types-net.h"
+#include "vmnet_int.h"
+#include "clients.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+
+#include 
diff --git a/net/vmnet-host.c b/net/vmnet-host.c
new file mode 100644
index 000..4a5ef99
--- /dev/null
+++ b/net/vmnet-host.c
@@ -0,0 +1,24 @@
+/*
+ * vmnet-host.c
+ *
+ * Copyright(c) 2021 Vladislav Yaroshchuk 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/qapi-types-net.h"
+#include "vmnet_int.h"
+#include "clients.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+
+#include 
+
+int net_init_vmnet_host(const Netdev *netdev, const char *name,
+NetClientState *peer, Error

[PULL V3 04/13] net/colo-compare.c: Optimize compare order for performance

2022-01-13 Thread Jason Wang

From: Zhang Chen 

COLO-compare use the glib function g_queue_find_custom to dump
another VM's networking packet to compare. But this function always
start find from the queue->head(here is the newest packet), It will
reduce the success rate of comparison. So this patch reversed
the order of the queues for performance.

Signed-off-by: Zhang Chen 
Reported-by: leirao 
Signed-off-by: Jason Wang 
---
 net/colo-compare.c | 26 +-
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index b966e7e..216de5a 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -197,7 +197,7 @@ static void colo_compare_inconsistency_notify(CompareState 
*s)
 /* Use restricted to colo_insert_packet() */
 static gint seq_sorter(Packet *a, Packet *b, gpointer data)
 {
-return a->tcp_seq - b->tcp_seq;
+return b->tcp_seq - a->tcp_seq;
 }
 
 static void fill_pkt_tcp_info(void *data, uint32_t *max_ack)
@@ -421,13 +421,13 @@ pri:
 if (g_queue_is_empty(>primary_list)) {
 return;
 }
-ppkt = g_queue_pop_head(>primary_list);
+ppkt = g_queue_pop_tail(>primary_list);
 sec:
 if (g_queue_is_empty(>secondary_list)) {
-g_queue_push_head(>primary_list, ppkt);
+g_queue_push_tail(>primary_list, ppkt);
 return;
 }
-spkt = g_queue_pop_head(>secondary_list);
+spkt = g_queue_pop_tail(>secondary_list);
 
 if (ppkt->tcp_seq == ppkt->seq_end) {
 colo_release_primary_pkt(s, ppkt);
@@ -458,7 +458,7 @@ sec:
 }
 }
 if (!ppkt) {
-g_queue_push_head(>secondary_list, spkt);
+g_queue_push_tail(>secondary_list, spkt);
 goto pri;
 }
 }
@@ -477,7 +477,7 @@ sec:
 if (mark == COLO_COMPARE_FREE_PRIMARY) {
 conn->compare_seq = ppkt->seq_end;
 colo_release_primary_pkt(s, ppkt);
-g_queue_push_head(>secondary_list, spkt);
+g_queue_push_tail(>secondary_list, spkt);
 goto pri;
 } else if (mark == COLO_COMPARE_FREE_SECONDARY) {
 conn->compare_seq = spkt->seq_end;
@@ -490,8 +490,8 @@ sec:
 goto pri;
 }
 } else {
-g_queue_push_head(>primary_list, ppkt);
-g_queue_push_head(>secondary_list, spkt);
+g_queue_push_tail(>primary_list, ppkt);
+g_queue_push_tail(>secondary_list, spkt);
 
 #ifdef DEBUG_COLO_PACKETS
 qemu_hexdump(stderr, "colo-compare ppkt", ppkt->data, ppkt->size);
@@ -673,7 +673,7 @@ static void colo_compare_packet(CompareState *s, Connection 
*conn,
 
 while (!g_queue_is_empty(>primary_list) &&
!g_queue_is_empty(>secondary_list)) {
-pkt = g_queue_pop_head(>primary_list);
+pkt = g_queue_pop_tail(>primary_list);
 result = g_queue_find_custom(>secondary_list,
  pkt, (GCompareFunc)HandlePacket);
 
@@ -689,7 +689,7 @@ static void colo_compare_packet(CompareState *s, Connection 
*conn,
  * timeout, it will trigger a checkpoint request.
  */
 trace_colo_compare_main("packet different");
-g_queue_push_head(>primary_list, pkt);
+g_queue_push_tail(>primary_list, pkt);
 
 colo_compare_inconsistency_notify(s);
 break;
@@ -819,7 +819,7 @@ static int compare_chr_send(CompareState *s,
 entry->buf = g_malloc(size);
 memcpy(entry->buf, buf, size);
 }
-g_queue_push_head(>send_list, entry);
+g_queue_push_tail(>send_list, entry);
 
 if (sendco->done) {
 sendco->co = qemu_coroutine_create(_compare_chr_send, sendco);
@@ -1347,7 +1347,7 @@ static void colo_flush_packets(void *opaque, void 
*user_data)
 Packet *pkt = NULL;
 
 while (!g_queue_is_empty(>primary_list)) {
-pkt = g_queue_pop_head(>primary_list);
+pkt = g_queue_pop_tail(>primary_list);
 compare_chr_send(s,
  pkt->data,
  pkt->size,
@@ -1357,7 +1357,7 @@ static void colo_flush_packets(void *opaque, void 
*user_data)
 packet_destroy_partial(pkt, NULL);
 }
 while (!g_queue_is_empty(>secondary_list)) {
-pkt = g_queue_pop_head(>secondary_list);
+pkt = g_queue_pop_tail(>secondary_list);
 packet_destroy(pkt, NULL);
 }
 }
-- 
2.7.4

[PULL V3 12/13] net/vmnet: update qemu-options.hx

2022-01-13 Thread Jason Wang

From: Vladislav Yaroshchuk 

Signed-off-by: Vladislav Yaroshchuk 
Signed-off-by: Jason Wang 
---
 qemu-options.hx | 25 +
 1 file changed, 25 insertions(+)

diff --git a/qemu-options.hx b/qemu-options.hx
index ec90505..81dd34f 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2733,6 +2733,25 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
 "-netdev vhost-vdpa,id=str,vhostdev=/path/to/dev\n"
 "configure a vhost-vdpa network,Establish a vhost-vdpa 
netdev\n"
 #endif
+#ifdef CONFIG_VMNET
+"-netdev vmnet-host,id=str[,isolated=on|off][,net-uuid=uuid]\n"
+" [,start-address=addr,end-address=addr,subnet-mask=mask]\n"
+"configure a vmnet network backend in host mode with ID 
'str',\n"
+"isolate this interface from others with 'isolated',\n"
+"configure the address range and choose a subnet mask,\n"
+"specify network UUID 'uuid' to disable DHCP and interact 
with\n"
+"vmnet-host interfaces within this isolated network\n"
+"-netdev vmnet-shared,id=str[,isolated=on|off][,nat66-prefix=addr]\n"
+" [,start-address=addr,end-address=addr,subnet-mask=mask]\n"
+"configure a vmnet network backend in shared mode with ID 
'str',\n"
+"configure the address range and choose a subnet mask,\n"
+"set IPv6 ULA prefix (of length 64) to use for internal 
network,\n"
+"isolate this interface from others with 'isolated'\n"
+"-netdev vmnet-bridged,id=str,ifname=name[,isolated=on|off]\n"
+"configure a vmnet network backend in bridged mode with ID 
'str',\n"
+"use 'ifname=name' to select a physical network interface 
to be bridged,\n"
+"isolate this interface from others with 'isolated'\n"
+#endif
 "-netdev hubport,id=str,hubid=n[,netdev=nd]\n"
 "configure a hub port on the hub with ID 'n'\n", 
QEMU_ARCH_ALL)
 DEF("nic", HAS_ARG, QEMU_OPTION_nic,
@@ -2752,6 +2771,9 @@ DEF("nic", HAS_ARG, QEMU_OPTION_nic,
 #ifdef CONFIG_POSIX
 "vhost-user|"
 #endif
+#ifdef CONFIG_VMNET
+"vmnet-host|vmnet-shared|vmnet-bridged|"
+#endif
 "socket][,option][,...][mac=macaddr]\n"
 "initialize an on-board / default host NIC (using MAC 
address\n"
 "macaddr) and connect it to the given host network 
backend\n"
@@ -2774,6 +2796,9 @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
 #ifdef CONFIG_NETMAP
 "netmap|"
 #endif
+#ifdef CONFIG_VMNET
+"vmnet-host|vmnet-shared|vmnet-bridged|"
+#endif
 "socket][,option][,option][,...]\n"
 "old way to initialize a host network interface\n"
 "(use the -netdev option if possible instead)\n", 
QEMU_ARCH_ALL)
-- 
2.7.4

[PULL V3 07/13] net/vmnet: add vmnet dependency and customizable option

2022-01-13 Thread Jason Wang

From: Vladislav Yaroshchuk 

vmnet.framework dependency is added with 'vmnet' option
to enable or disable it. Default value is 'auto'.

vmnet features to be used are available since macOS 11.0,
corresponding probe is created into meson.build.

Signed-off-by: Vladislav Yaroshchuk 
Signed-off-by: Jason Wang 
---
 meson.build   | 16 +++-
 meson_options.txt |  2 ++
 scripts/meson-buildoptions.sh |  3 +++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/meson.build b/meson.build
index c1b1db1..285fb7b 100644
--- a/meson.build
+++ b/meson.build
@@ -496,6 +496,18 @@ if cocoa.found() and get_option('gtk').enabled()
   error('Cocoa and GTK+ cannot be enabled at the same time')
 endif
 
+vmnet = dependency('appleframeworks', modules: 'vmnet', required: 
get_option('vmnet'))
+if vmnet.found() and not cc.has_header_symbol('vmnet/vmnet.h',
+  'VMNET_BRIDGED_MODE',
+  dependencies: vmnet)
+  vmnet = not_found
+  if get_option('vmnet').enabled()
+error('vmnet.framework API is outdated')
+  else
+warning('vmnet.framework API is outdated, disabling')
+  endif
+endif
+
 seccomp = not_found
 if not get_option('seccomp').auto() or have_system or have_tools
   seccomp = dependency('libseccomp', version: '>=2.3.0',
@@ -1492,6 +1504,7 @@ config_host_data.set('CONFIG_SECCOMP', seccomp.found())
 config_host_data.set('CONFIG_SNAPPY', snappy.found())
 config_host_data.set('CONFIG_USB_LIBUSB', libusb.found())
 config_host_data.set('CONFIG_VDE', vde.found())
+config_host_data.set('CONFIG_VMNET', vmnet.found())
 config_host_data.set('CONFIG_VHOST_USER_BLK_SERVER', 
have_vhost_user_blk_server)
 config_host_data.set('CONFIG_VNC', vnc.found())
 config_host_data.set('CONFIG_VNC_JPEG', jpeg.found())
@@ -3406,7 +3419,8 @@ summary(summary_info, bool_yn: true, section: 'Crypto')
 # Libraries
 summary_info = {}
 if targetos == 'darwin'
-  summary_info += {'Cocoa support':   cocoa}
+  summary_info += {'Cocoa support':   cocoa}
+  summary_info += {'vmnet.framework support': vmnet}
 endif
 summary_info += {'SDL support':   sdl}
 summary_info += {'SDL image support': sdl_image}
diff --git a/meson_options.txt b/meson_options.txt
index 921967e..701e138 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -151,6 +151,8 @@ option('netmap', type : 'feature', value : 'auto',
description: 'netmap network backend support')
 option('vde', type : 'feature', value : 'auto',
description: 'vde network backend support')
+option('vmnet', type : 'feature', value : 'auto',
+   description: 'vmnet.framework network backend support')
 option('virglrenderer', type : 'feature', value : 'auto',
description: 'virgl rendering support')
 option('vnc', type : 'feature', value : 'auto',
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index 50bd7be..cdcece4 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -84,6 +84,7 @@ meson_options_help() {
   printf "%s\n" '  u2f U2F emulation support'
   printf "%s\n" '  usb-redir   libusbredir support'
   printf "%s\n" '  vde vde network backend support'
+  printf "%s\n" '  vmnet   vmnet.framework network backend support'
   printf "%s\n" '  vhost-user-blk-server'
   printf "%s\n" '  build vhost-user-blk server'
   printf "%s\n" '  virglrenderer   virgl rendering support'
@@ -248,6 +249,8 @@ _meson_option_parse() {
 --disable-usb-redir) printf "%s" -Dusb_redir=disabled ;;
 --enable-vde) printf "%s" -Dvde=enabled ;;
 --disable-vde) printf "%s" -Dvde=disabled ;;
+--enable-vmnet) printf "%s" -Dvmnet=enabled ;;
+--disable-vmnet) printf "%s" -Dvmnet=disabled ;;
 --enable-vhost-user-blk-server) printf "%s" 
-Dvhost_user_blk_server=enabled ;;
 --disable-vhost-user-blk-server) printf "%s" 
-Dvhost_user_blk_server=disabled ;;
 --enable-virglrenderer) printf "%s" -Dvirglrenderer=enabled ;;
-- 
2.7.4

[PULL V3 05/13] net/colo-compare.c: Update the default value comments

2022-01-13 Thread Jason Wang

From: Zhang Chen 

Make the comments consistent with the REGULAR_PACKET_CHECK_MS.

Signed-off-by: Zhang Chen 
Signed-off-by: Jason Wang 
---
 net/colo-compare.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/colo-compare.c b/net/colo-compare.c
index 216de5a..62554b5 100644
--- a/net/colo-compare.c
+++ b/net/colo-compare.c
@@ -1267,7 +1267,7 @@ static void colo_compare_complete(UserCreatable *uc, 
Error **errp)
 }
 
 if (!s->expired_scan_cycle) {
-/* Set default value to 3000 MS */
+/* Set default value to 1000 MS */
 s->expired_scan_cycle = REGULAR_PACKET_CHECK_MS;
 }
 
-- 
2.7.4

[PULL V3 03/13] net: Fix uninitialized data usage

2022-01-13 Thread Jason Wang

From: Peter Foley 

e.g.
1109 15:16:20.151506 Uninitialized bytes in ioctl_common_pre at offset 0 inside 
[0x7ffc516af9b8, 4)
 1109 15:16:20.151659 ==588974==WARNING: MemorySanitizer: 
use-of-uninitialized-value
 1109 15:16:20.312923 #0 0x5639b88acb21 in tap_probe_vnet_hdr_len 
third_party/qemu/net/tap-linux.c:183:9
 1109 15:16:20.312952 #1 0x5639b88afd66 in net_tap_fd_init 
third_party/qemu/net/tap.c:409:9
 1109 15:16:20.312954 #2 0x5639b88b2d1b in net_init_tap_one 
third_party/qemu/net/tap.c:681:19
 1109 15:16:20.312956 #3 0x5639b88b16a8 in net_init_tap 
third_party/qemu/net/tap.c:912:13
 1109 15:16:20.312957 #4 0x5639b8890175 in net_client_init1 
third_party/qemu/net/net.c:1110:9
 1109 15:16:20.312958 #5 0x5639b888f912 in net_client_init 
third_party/qemu/net/net.c:1208:15
 1109 15:16:20.312960 #6 0x5639b8894aa5 in net_param_nic 
third_party/qemu/net/net.c:1588:11
 1109 15:16:20.312961 #7 0x5639b900cd18 in qemu_opts_foreach 
third_party/qemu/util/qemu-option.c:1135:14
 1109 15:16:20.312962 #8 0x5639b889393c in net_init_clients 
third_party/qemu/net/net.c:1612:9
 1109 15:16:20.312964 #9 0x5639b717aaf3 in qemu_create_late_backends 
third_party/qemu/softmmu/vl.c:1962:5
 1109 15:16:20.312965 #10 0x5639b717aaf3 in qemu_init 
third_party/qemu/softmmu/vl.c:3694:5
 1109 15:16:20.312967 #11 0x5639b71083b8 in main 
third_party/qemu/softmmu/main.c:49:5
 1109 15:16:20.312968 #12 0x7f464de1d8d2 in __libc_start_main 
(/usr/grte/v5/lib64/libc.so.6+0x628d2)
 1109 15:16:20.312969 #13 0x5639b6bbd389 in _start 
/usr/grte/v5/debug-src/src/csu/../sysdeps/x86_64/start.S:120
 1109 15:16:20.312970
 1109 15:16:20.312975   Uninitialized value was stored to memory at
 1109 15:16:20.313393 #0 0x5639b88acbee in tap_probe_vnet_hdr_len 
third_party/qemu/net/tap-linux.c
 1109 15:16:20.313396 #1 0x5639b88afd66 in net_tap_fd_init 
third_party/qemu/net/tap.c:409:9
 1109 15:16:20.313398 #2 0x5639b88b2d1b in net_init_tap_one 
third_party/qemu/net/tap.c:681:19
 1109 15:16:20.313399 #3 0x5639b88b16a8 in net_init_tap 
third_party/qemu/net/tap.c:912:13
 1109 15:16:20.313400 #4 0x5639b8890175 in net_client_init1 
third_party/qemu/net/net.c:1110:9
 1109 15:16:20.313401 #5 0x5639b888f912 in net_client_init 
third_party/qemu/net/net.c:1208:15
 1109 15:16:20.313403 #6 0x5639b8894aa5 in net_param_nic 
third_party/qemu/net/net.c:1588:11
 1109 15:16:20.313404 #7 0x5639b900cd18 in qemu_opts_foreach 
third_party/qemu/util/qemu-option.c:1135:14
 1109 15:16:20.313405 #8 0x5639b889393c in net_init_clients 
third_party/qemu/net/net.c:1612:9
 1109 15:16:20.313407 #9 0x5639b717aaf3 in qemu_create_late_backends 
third_party/qemu/softmmu/vl.c:1962:5
 1109 15:16:20.313408 #10 0x5639b717aaf3 in qemu_init 
third_party/qemu/softmmu/vl.c:3694:5
 1109 15:16:20.313409 #11 0x5639b71083b8 in main 
third_party/qemu/softmmu/main.c:49:5
 1109 15:16:20.313410 #12 0x7f464de1d8d2 in __libc_start_main 
(/usr/grte/v5/lib64/libc.so.6+0x628d2)
 1109 15:16:20.313412 #13 0x5639b6bbd389 in _start 
/usr/grte/v5/debug-src/src/csu/../sysdeps/x86_64/start.S:120
 1109 15:16:20.313413
 1109 15:16:20.313417   Uninitialized value was stored to memory at
 1109 15:16:20.313791 #0 0x5639b88affbd in net_tap_fd_init 
third_party/qemu/net/tap.c:400:26
 1109 15:16:20.313826 #1 0x5639b88b2d1b in net_init_tap_one 
third_party/qemu/net/tap.c:681:19
 1109 15:16:20.313829 #2 0x5639b88b16a8 in net_init_tap 
third_party/qemu/net/tap.c:912:13
 1109 15:16:20.313831 #3 0x5639b8890175 in net_client_init1 
third_party/qemu/net/net.c:1110:9
 1109 15:16:20.313836 #4 0x5639b888f912 in net_client_init 
third_party/qemu/net/net.c:1208:15
 1109 15:16:20.313838 #5 0x5639b8894aa5 in net_param_nic 
third_party/qemu/net/net.c:1588:11
 1109 15:16:20.313839 #6 0x5639b900cd18 in qemu_opts_foreach 
third_party/qemu/util/qemu-option.c:1135:14
 1109 15:16:20.313841 #7 0x5639b889393c in net_init_clients 
third_party/qemu/net/net.c:1612:9
 1109 15:16:20.313843 #8 0x5639b717aaf3 in qemu_create_late_backends 
third_party/qemu/softmmu/vl.c:1962:5
 1109 15:16:20.313844 #9 0x5639b717aaf3 in qemu_init 
third_party/qemu/softmmu/vl.c:3694:5
 1109 15:16:20.313845 #10 0x5639b71083b8 in main 
third_party/qemu/softmmu/main.c:49:5
 1109 15:16:20.313846 #11 0x7f464de1d8d2 in __libc_start_main 
(/usr/grte/v5/lib64/libc.so.6+0x628d2)
 1109 15:16:20.313847 #12 0x5639b6bbd389 in _start 
/usr/grte/v5/debug-src/src/csu/../sysdeps/x86_64/start.S:120
 1109 15:16:20.313849
 1109 15:16:20.313851   Uninitialized value was created by an allocation of 
'ifr' in the stack frame of function 'tap_probe_vnet_hdr'
 1109 15:16:20.313855 #0 0x5639b88ac680 in tap_probe_vnet_hdr 
third_party/qemu/net/tap-linux.c:151
 1109 15:16:20.313856
 1109 15:16:20.313878 SUMMARY: MemorySanitizer: use-of-uninitialized-value 
third_party/qemu/net/tap-linux.c:183:9 in tap_probe_vnet_hdr_len

Fixes: dc69004c7d8

[PULL V3 02/13] net/tap: Set return code on failure

2022-01-13 Thread Jason Wang

From: Peter Foley 

Match the other error handling in this function.

Fixes: e7b347d0bf6 ("net: detect errors from probing vnet hdr flag for TAP 
devices")

Reviewed-by: Patrick Venture 
Reviewed-by: Philippe Mathieu-Daudé 
Signed-off-by: Peter Foley 
Signed-off-by: Jason Wang 
---
 net/tap.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/tap.c b/net/tap.c
index f716be3..c5cbeaa 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -900,6 +900,7 @@ int net_init_tap(const Netdev *netdev, const char *name,
 if (i == 0) {
 vnet_hdr = tap_probe_vnet_hdr(fd, errp);
 if (vnet_hdr < 0) {
+ret = -1;
 goto free_fail;
 }
 } else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) {
-- 
2.7.4

[PULL V3 01/13] hw/net/vmxnet3: Log guest-triggerable errors using LOG_GUEST_ERROR

2022-01-13 Thread Jason Wang

From: Philippe Mathieu-Daudé 

The "Interrupt Cause" register (VMXNET3_REG_ICR) is read-only.
Write accesses are ignored. Log them with as LOG_GUEST_ERROR
instead of aborting:

  [R +0.239743] writeq 0xe0002031 0x46291a5a55460800
  ERROR:hw/net/vmxnet3.c:1819:vmxnet3_io_bar1_write: code should not be reached
  Thread 1 "qemu-system-i38" received signal SIGABRT, Aborted.
  (gdb) bt
  #3  0x74c397d3 in __GI_abort () at abort.c:79
  #4  0x76d3cd4c in g_assertion_message (domain=, 
file=, line=, func=, 
message=) at ../glib/gtestutils.c:3223
  #5  0x76d9d45f in g_assertion_message_expr
  (domain=0x0, file=0x59fc2e53 "hw/net/vmxnet3.c", line=1819, 
func=0x59fc11e0 <__func__.vmxnet3_io_bar1_write> "vmxnet3_io_bar1_write", 
expr=)
  at ../glib/gtestutils.c:3249
  #6  0x57e80a3a in vmxnet3_io_bar1_write (opaque=0x62814100, addr=56, val=70, 
size=4) at hw/net/vmxnet3.c:1819
  #7  0x58c2d894 in memory_region_write_accessor (mr=0x62816b90, addr=56, 
value=0x7fff9450, size=4, shift=0, mask=4294967295, attrs=...) at 
softmmu/memory.c:492
  #8  0x58c2d1d2 in access_with_adjusted_size (addr=56, value=0x7fff9450, 
size=1, access_size_min=4, access_size_max=4, access_fn=
  0x58c2d290 , mr=0x62816b90, attrs=...) at 
softmmu/memory.c:554
  #9  0x58c2bae7 in memory_region_dispatch_write (mr=0x62816b90, addr=56, 
data=70, op=MO_8, attrs=...) at softmmu/memory.c:1504
  #10 0x58bfd034 in flatview_write_continue (fv=0x606000181700, 
addr=0xe0002038, attrs=..., ptr=0x7fffb9e0, len=1, addr1=56, l=1, mr=0x62816b90)
  at softmmu/physmem.c:2782
  #11 0x58beba00 in flatview_write (fv=0x606000181700, addr=0xe0002031, 
attrs=..., buf=0x7fffb9e0, len=8) at softmmu/physmem.c:2822
  #12 0x58beb589 in address_space_write (as=0x60815f20, addr=0xe0002031, 
attrs=..., buf=0x7fffb9e0, len=8) at softmmu/physmem.c:2914

Reported-by: Dike 
Reported-by: Duhao <504224...@qq.com>
BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=2032932
Signed-off-by: Philippe Mathieu-Daudé 
Signed-off-by: Jason Wang 
---
 hw/net/vmxnet3.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index f65af4e..0b7acf7 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -1816,7 +1816,9 @@ vmxnet3_io_bar1_write(void *opaque,
 case VMXNET3_REG_ICR:
 VMW_CBPRN("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d",
   val, size);
-g_assert_not_reached();
+qemu_log_mask(LOG_GUEST_ERROR,
+  "%s: write to read-only register VMXNET3_REG_ICR\n",
+  TYPE_VMXNET3);
 break;
 
 /* Event Cause Register */
-- 
2.7.4

[PULL V3 00/13] Net patches

2022-01-13 Thread Jason Wang

The following changes since commit f8d75e10d3e0033a0a29a7a7e4777a4fbc17a016:

  Merge remote-tracking branch 'remotes/legoater/tags/pull-ppc-20220112' into 
staging (2022-01-13 11:18:24 +)

are available in the git repository at:

  https://github.com/jasowang/qemu.git tags/net-pull-request

for you to fetch changes up to 818692f0a01587d02220916b31d5bb8e7dced611:

  net/vmnet: update MAINTAINERS list (2022-01-14 12:58:19 +0800)



Changes since V2:

- Try to make vmnet work on some old mac version


Peter Foley (2):
  net/tap: Set return code on failure
  net: Fix uninitialized data usage

Philippe Mathieu-Daudé (1):
  hw/net/vmxnet3: Log guest-triggerable errors using LOG_GUEST_ERROR

Rao Lei (1):
  net/filter: Optimize filter_send to coroutine

Vladislav Yaroshchuk (7):
  net/vmnet: add vmnet dependency and customizable option
  net/vmnet: add vmnet backends to qapi/net
  net/vmnet: implement shared mode (vmnet-shared)
  net/vmnet: implement host mode (vmnet-host)
  net/vmnet: implement bridged mode (vmnet-bridged)
  net/vmnet: update qemu-options.hx
  net/vmnet: update MAINTAINERS list

Zhang Chen (2):
  net/colo-compare.c: Optimize compare order for performance
  net/colo-compare.c: Update the default value comments

 MAINTAINERS   |   5 +
 hw/net/vmxnet3.c  |   4 +-
 meson.build   |  16 +-
 meson_options.txt |   2 +
 net/clients.h |  11 ++
 net/colo-compare.c|  28 ++--
 net/filter-mirror.c   |  66 +++--
 net/meson.build   |   7 +
 net/net.c |  10 ++
 net/tap-linux.c   |   1 +
 net/tap.c |   1 +
 net/vmnet-bridged.m   | 120 +++
 net/vmnet-common.m| 333 ++
 net/vmnet-host.c  | 122 
 net/vmnet-shared.c| 100 +
 net/vmnet_int.h   |  48 ++
 qapi/net.json | 133 -
 qemu-options.hx   |  25 
 scripts/meson-buildoptions.sh |   3 +
 19 files changed, 1004 insertions(+), 31 deletions(-)
 create mode 100644 net/vmnet-bridged.m
 create mode 100644 net/vmnet-common.m
 create mode 100644 net/vmnet-host.c
 create mode 100644 net/vmnet-shared.c
 create mode 100644 net/vmnet_int.h

[PULL V3 06/13] net/filter: Optimize filter_send to coroutine

2022-01-13 Thread Jason Wang

From: Rao Lei 

This patch is to improve the logic of QEMU main thread sleep code in
qemu_chr_write_buffer() where it can be blocked and can't run other
coroutines during COLO IO stress test.

Our approach is to put filter_send() in a coroutine. In this way,
filter_send() will call qemu_coroutine_yield() in qemu_co_sleep_ns(),
so that it can be scheduled out and QEMU main thread has opportunity to
run other tasks.

Signed-off-by: Lei Rao 
Signed-off-by: Zhang Chen 
Reviewed-by: Li Zhijian 
Reviewed-by: Zhang Chen 
Signed-off-by: Jason Wang 
---
 net/filter-mirror.c | 66 ++---
 1 file changed, 53 insertions(+), 13 deletions(-)

diff --git a/net/filter-mirror.c b/net/filter-mirror.c
index f20240c..34a63b5 100644
--- a/net/filter-mirror.c
+++ b/net/filter-mirror.c
@@ -20,6 +20,7 @@
 #include "chardev/char-fe.h"
 #include "qemu/iov.h"
 #include "qemu/sockets.h"
+#include "block/aio-wait.h"
 
 #define TYPE_FILTER_MIRROR "filter-mirror"
 typedef struct MirrorState MirrorState;
@@ -42,20 +43,21 @@ struct MirrorState {
 bool vnet_hdr;
 };
 
-static int filter_send(MirrorState *s,
-   const struct iovec *iov,
-   int iovcnt)
+typedef struct FilterSendCo {
+MirrorState *s;
+char *buf;
+ssize_t size;
+bool done;
+int ret;
+} FilterSendCo;
+
+static int _filter_send(MirrorState *s,
+   char *buf,
+   ssize_t size)
 {
 NetFilterState *nf = NETFILTER(s);
 int ret = 0;
-ssize_t size = 0;
 uint32_t len = 0;
-char *buf;
-
-size = iov_size(iov, iovcnt);
-if (!size) {
-return 0;
-}
 
 len = htonl(size);
 ret = qemu_chr_fe_write_all(>chr_out, (uint8_t *), sizeof(len));
@@ -80,10 +82,7 @@ static int filter_send(MirrorState *s,
 }
 }
 
-buf = g_malloc(size);
-iov_to_buf(iov, iovcnt, 0, buf, size);
 ret = qemu_chr_fe_write_all(>chr_out, (uint8_t *)buf, size);
-g_free(buf);
 if (ret != size) {
 goto err;
 }
@@ -94,6 +93,47 @@ err:
 return ret < 0 ? ret : -EIO;
 }
 
+static void coroutine_fn filter_send_co(void *opaque)
+{
+FilterSendCo *data = opaque;
+
+data->ret = _filter_send(data->s, data->buf, data->size);
+data->done = true;
+g_free(data->buf);
+aio_wait_kick();
+}
+
+static int filter_send(MirrorState *s,
+   const struct iovec *iov,
+   int iovcnt)
+{
+ssize_t size = iov_size(iov, iovcnt);
+char *buf = NULL;
+
+if (!size) {
+return 0;
+}
+
+buf = g_malloc(size);
+iov_to_buf(iov, iovcnt, 0, buf, size);
+
+FilterSendCo data = {
+.s = s,
+.size = size,
+.buf = buf,
+.ret = 0,
+};
+
+Coroutine *co = qemu_coroutine_create(filter_send_co, );
+qemu_coroutine_enter(co);
+
+while (!data.done) {
+aio_poll(qemu_get_aio_context(), true);
+}
+
+return data.ret;
+}
+
 static void redirector_to_filter(NetFilterState *nf,
  const uint8_t *buf,
  int len)
-- 
2.7.4

Re: [PATCH v10 2/3] cpu-throttle: implement virtual CPU throttle

2022-01-13 Thread Peter Xu

On Fri, Jan 14, 2022 at 09:30:39AM +0800, Hyman Huang wrote:
> 
> 
> 在 2022/1/14 0:22, Markus Armbruster 写道:
> > Peter Xu  writes:
> > 
> > > On Fri, Dec 31, 2021 at 12:36:40AM +0800, Hyman Huang wrote:
> > > > > > +struct {
> > > > > > +DirtyLimitState *states;
> > > > > > +int max_cpus;
> > > > > > +unsigned long *bmap; /* running thread bitmap */
> > > > > > +unsigned long nr;
> > > > > > +QemuThread thread;
> > > > > > +} *dirtylimit_state;
> > > > > > +
> > > > > > +static bool dirtylimit_quit = true;
> > > > > 
> > > > > Again, I think "quit" is not a good wording to show "whether 
> > > > > dirtylimit is in
> > > > > service".  How about "dirtylimit_global_enabled"?
> > > > > 
> > > > > You can actually use "dirtylimit_state" to show whether it's enabled 
> > > > > already
> > > > > (then drop the global value) since it's a pointer.  It shouldn't need 
> > > > > to be
> > > > > init-once-for-all, but we can alloc the strucuture wAhen dirty limit 
> > > > > enabled
> > > > > globally, and destroy it (and reset it to NULL) when globally 
> > > > > disabled.
> > > > > 
> > > > > Then "whether it's enabled" is simply to check "!!dirtylimit_state" 
> > > > > under BQL.
> > > > Yes, checking pointer is fairly straightforword, but since dirtylimit 
> > > > thread
> > > > also access the dirtylimit_state when doing the limit, if we free
> > > > dirtylimit_state after last limited vcpu be canceled, dirtylimit thread
> > > > would crash when reference null pointer. And this method turn out to
> > > > introduce a mutex lock to protect dirtylimit_state, comparing with 
> > > > qatomic
> > > > operation, which is better ?
> > > 
> > > I don't see much difference here on using either atomic or mutex, because 
> > > it's
> > > not a hot path.
> > 
> > Quick interjection without having bothered to understand the details:
> > correct use of atomics and memory barriers is *much* harder than correct
> > use of locks.  Stick to locks unless you *know* they impair performance

Yong,

Just a heads up - You seem to have replied something but there's really nothing
I saw... it happened multiple times, so I hope you didn't miss it by sending
something empty.

I agree with Markus, and that's also what I wanted to express too (it's not a
perf critical path, so we don't necessarily need to use atomics; obviously I
failed again on using English to express myself.. :).  But I don't urge it if
the atomics works pretty simple and well.  I think I'll read the atomic version
you posted first and I'll comment again there.

Thanks,

-- 
Peter Xu

Re: [PATCH 3/3] intel-iommu: PASID support

2022-01-13 Thread Peter Xu

On Fri, Jan 14, 2022 at 10:47:44AM +0800, Jason Wang wrote:
> 
> 在 2022/1/13 下午1:06, Peter Xu 写道:
> > On Wed, Jan 05, 2022 at 12:19:45PM +0800, Jason Wang wrote:
> > > @@ -1725,11 +1780,16 @@ static bool 
> > > vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
> > >   cc_entry->context_cache_gen = s->context_cache_gen;
> > >   }
> > > +/* Try to fetch slpte form IOTLB */
> > > +if ((pasid == PCI_NO_PASID) && s->root_scalable) {
> > > +pasid = VTD_CE_GET_RID2PASID();
> > > +}
> > > +
> > >   /*
> > >* We don't need to translate for pass-through context entries.
> > >* Also, let's ignore IOTLB caching as well for PT devices.
> > >*/
> > > -if (vtd_dev_pt_enabled(s, )) {
> > > +if (vtd_dev_pt_enabled(s, , pasid)) {
> > >   entry->iova = addr & VTD_PAGE_MASK_4K;
> > >   entry->translated_addr = entry->iova;
> > >   entry->addr_mask = ~VTD_PAGE_MASK_4K;
> > > @@ -1750,14 +1810,24 @@ static bool 
> > > vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
> > >   return true;
> > >   }
> > > +iotlb_entry = vtd_lookup_iotlb(s, source_id, addr, pasid);
> > > +if (iotlb_entry) {
> > > +trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
> > > + iotlb_entry->domain_id);
> > > +slpte = iotlb_entry->slpte;
> > > +access_flags = iotlb_entry->access_flags;
> > > +page_mask = iotlb_entry->mask;
> > > +goto out;
> > > +}
> > IIUC the iotlb lookup moved down just because the pasid==NO_PASID case then
> > we'll need to fetch the default pasid from the context entry.  That looks
> > reasonable.
> > 
> > It's just a bit of pity because logically it'll slow down iotlb hits due to
> > context entry operations.  When NO_PASID we could have looked up iotlb 
> > without
> > checking pasid at all, assuming that "default pasid" will always match.  But
> > that is a little bit hacky.
> 
> 
> Right, but I think you meant to do this only when scalable mode is disabled.

Yes IMHO it will definitely suite for !scalable case since that's exactly what
we did before.  What I'm also wondering is even if scalable is enabled but no
"real" pasid is used, so if all the translations go through the default pasid
that stored in the device context entry, then maybe we can ignore checking it.
The latter is the "hacky" part mentioned above.

The other thing to mention is, if we postpone the iotlb lookup to be after
context entry, then logically we can have per-device iotlb, that means we can
replace IntelIOMMUState.iotlb with VTDAddressSpace.iotlb in the future, too,
which can also be more efficient.

Not sure whether Michael will have a preference, for me I think either way can
be done on top.

> 
> 
> > 
> > vIOMMU seems to be mostly used for assigned devices and dpdk in production 
> > in
> > the future due to its slowness otherwise.. so maybe not a big deal at all.
> > 
> > [...]
> > 
> > > @@ -2011,7 +2083,52 @@ static void 
> > > vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
> > >   vtd_iommu_lock(s);
> > >   g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, 
> > > );
> > >   vtd_iommu_unlock(s);
> > > -vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
> > > +vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, 
> > > PCI_NO_PASID);
> > > +}
> > > +
> > > +static void vtd_iotlb_page_pasid_invalidate(IntelIOMMUState *s,
> > > +uint16_t domain_id,
> > > +hwaddr addr, uint8_t am,
> > > +uint32_t pasid)
> > > +{
> > > +VTDIOTLBPageInvInfo info;
> > > +
> > > +trace_vtd_inv_desc_iotlb_pasid_pages(domain_id, addr, am, pasid);
> > > +
> > > +assert(am <= VTD_MAMV);
> > > +info.domain_id = domain_id;
> > > +info.addr = addr;
> > > +info.mask = ~((1 << am) - 1);
> > > +info.pasid = pasid;
> > > +vtd_iommu_lock(s);
> > > +g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page_pasid, 
> > > );
> > > +vtd_iommu_unlock(s);
> > > +vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, pasid);
> > Hmm, I think indeed we need a notification, but it'll be unnecessary for
> > e.g. vfio map notifiers, because this is 1st level invalidation and at 
> > least so
> > far vfio map notifiers are rewalking only the 2nd level page table, so 
> > it'll be
> > destined to be a no-op and pure overhead.
> 
> 
> Right, consider we don't implement l1 and we don't have a 1st level
> abstraction in neither vhost nor vfio, we can simply remove this.

We probably still need the real pasid invalidation parts in the future?  Either
for vhost (if vhost will going to cache pasid-based translations), or for
compatible assigned devices in the future where the HW can cache it.

I'm not sure what's the best way to do this, yet.

Re: [PATCH 1/3] intel-iommu: don't warn guest errors when getting rid2pasid entry

2022-01-13 Thread Jason Wang




在 2022/1/13 下午3:05, Michael S. Tsirkin 写道:

On Thu, Jan 13, 2022 at 11:35:19AM +0800, Peter Xu wrote:

On Wed, Jan 05, 2022 at 12:19:43PM +0800, Jason Wang wrote:

We use to warn on wrong rid2pasid entry. But this error could be
triggered by the guest and could happens during initialization. So
let's don't warn in this case.

Signed-off-by: Jason Wang 
---
  hw/i386/intel_iommu.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 4c6c016388..f2c7a23712 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1524,8 +1524,10 @@ static bool vtd_dev_pt_enabled(IntelIOMMUState *s, 
VTDContextEntry *ce)
  if (s->root_scalable) {
  ret = vtd_ce_get_rid2pasid_entry(s, ce, );
  if (ret) {
-error_report_once("%s: vtd_ce_get_rid2pasid_entry error: %"PRId32,
-  __func__, ret);
+/*
+ * This error is guest triggerable. We should assumt PT
+ * not enabled for safety.
+ */
  return false;
  }
  return (VTD_PE_GET_TYPE() == VTD_SM_PASID_ENTRY_PT);
--
2.25.1


No strong opinion, but the thing is mostly all error_report_once() in this file
is guest triggerable.  If we remove this one then it's debatable on whether we
want to remove all.

IMHO we used the _once() variant just for this: it won't go into any form of
DoS, meanwhile we'll still get some information (as hypervisor) that the guest
OS may not be trustworthy.

So from that pov it's still useful?  Or is this error very special in some way?

Thanks,


Well we have LOG_GUEST_ERROR for guest errors now.



Ok, but this is not necessarily a guest error. (Inferring from the 
comment in vtd_as_pt_enabled()).


Thanks





--
Peter Xu

Re: [PATCH 1/3] intel-iommu: don't warn guest errors when getting rid2pasid entry

2022-01-13 Thread Jason Wang




在 2022/1/13 下午3:06, Michael S. Tsirkin 写道:

On Wed, Jan 05, 2022 at 12:19:43PM +0800, Jason Wang wrote:

We use to warn on wrong rid2pasid entry. But this error could be
triggered by the guest and could happens during initialization. So
let's don't warn in this case.

Signed-off-by: Jason Wang 
---
  hw/i386/intel_iommu.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 4c6c016388..f2c7a23712 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1524,8 +1524,10 @@ static bool vtd_dev_pt_enabled(IntelIOMMUState *s, 
VTDContextEntry *ce)
  if (s->root_scalable) {
  ret = vtd_ce_get_rid2pasid_entry(s, ce, );
  if (ret) {
-error_report_once("%s: vtd_ce_get_rid2pasid_entry error: %"PRId32,
-  __func__, ret);
+/*
+ * This error is guest triggerable. We should assumt PT

typo

And drop "We should" pls, just use direct voice:
"Assume PT not enabled".



Fixed.

Thanks






+ * not enabled for safety.
+ */
  return false;
  }
  return (VTD_PE_GET_TYPE() == VTD_SM_PASID_ENTRY_PT);
--
2.25.1

Re: [PULL V2 00/13] Net patches

2022-01-13 Thread Jason Wang

On Thu, Jan 13, 2022 at 11:36 PM Philippe Mathieu-Daudé  wrote:
>
> On 13/1/22 15:00, Peter Maydell wrote:
> > On Wed, 12 Jan 2022 at 08:32, Jason Wang  wrote:
> >>
> >> The following changes since commit 
> >> 64c01c7da449bcafc614b27ecf1325bb08031c84:
> >>
> >>Merge remote-tracking branch 'remotes/philmd/tags/sdmmc-20220108' into 
> >> staging (2022-01-11 11:39:31 +)
> >>
> >> are available in the git repository at:
> >>
> >>https://github.com/jasowang/qemu.git tags/net-pull-request
> >>
> >> for you to fetch changes up to 99420f216cf5cd2e5c09e0d491b9e44d16030aba:
> >>
> >>net/vmnet: update MAINTAINERS list (2022-01-12 16:27:19 +0800)
> >>
> >> 
> >>
> >
> > Let me know if you want me to apply this or if you're going to update
> > it with Vladislav's v11 vmnet series.
>
> Note, there is also a v12.
>

Will send a new pull request soon (and there's a v13).

Thanks

Re: [PATCH 3/3] intel-iommu: PASID support

2022-01-13 Thread Jason Wang




在 2022/1/13 下午1:06, Peter Xu 写道:

On Wed, Jan 05, 2022 at 12:19:45PM +0800, Jason Wang wrote:

@@ -1725,11 +1780,16 @@ static bool vtd_do_iommu_translate(VTDAddressSpace 
*vtd_as, PCIBus *bus,
  cc_entry->context_cache_gen = s->context_cache_gen;
  }
  
+/* Try to fetch slpte form IOTLB */

+if ((pasid == PCI_NO_PASID) && s->root_scalable) {
+pasid = VTD_CE_GET_RID2PASID();
+}
+
  /*
   * We don't need to translate for pass-through context entries.
   * Also, let's ignore IOTLB caching as well for PT devices.
   */
-if (vtd_dev_pt_enabled(s, )) {
+if (vtd_dev_pt_enabled(s, , pasid)) {
  entry->iova = addr & VTD_PAGE_MASK_4K;
  entry->translated_addr = entry->iova;
  entry->addr_mask = ~VTD_PAGE_MASK_4K;
@@ -1750,14 +1810,24 @@ static bool vtd_do_iommu_translate(VTDAddressSpace 
*vtd_as, PCIBus *bus,
  return true;
  }
  
+iotlb_entry = vtd_lookup_iotlb(s, source_id, addr, pasid);

+if (iotlb_entry) {
+trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
+ iotlb_entry->domain_id);
+slpte = iotlb_entry->slpte;
+access_flags = iotlb_entry->access_flags;
+page_mask = iotlb_entry->mask;
+goto out;
+}

IIUC the iotlb lookup moved down just because the pasid==NO_PASID case then
we'll need to fetch the default pasid from the context entry.  That looks
reasonable.

It's just a bit of pity because logically it'll slow down iotlb hits due to
context entry operations.  When NO_PASID we could have looked up iotlb without
checking pasid at all, assuming that "default pasid" will always match.  But
that is a little bit hacky.



Right, but I think you meant to do this only when scalable mode is disabled.




vIOMMU seems to be mostly used for assigned devices and dpdk in production in
the future due to its slowness otherwise.. so maybe not a big deal at all.

[...]


@@ -2011,7 +2083,52 @@ static void vtd_iotlb_page_invalidate(IntelIOMMUState 
*s, uint16_t domain_id,
  vtd_iommu_lock(s);
  g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, );
  vtd_iommu_unlock(s);
-vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
+vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, PCI_NO_PASID);
+}
+
+static void vtd_iotlb_page_pasid_invalidate(IntelIOMMUState *s,
+uint16_t domain_id,
+hwaddr addr, uint8_t am,
+uint32_t pasid)
+{
+VTDIOTLBPageInvInfo info;
+
+trace_vtd_inv_desc_iotlb_pasid_pages(domain_id, addr, am, pasid);
+
+assert(am <= VTD_MAMV);
+info.domain_id = domain_id;
+info.addr = addr;
+info.mask = ~((1 << am) - 1);
+info.pasid = pasid;
+vtd_iommu_lock(s);
+g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page_pasid, 
);
+vtd_iommu_unlock(s);
+vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, pasid);

Hmm, I think indeed we need a notification, but it'll be unnecessary for
e.g. vfio map notifiers, because this is 1st level invalidation and at least so
far vfio map notifiers are rewalking only the 2nd level page table, so it'll be
destined to be a no-op and pure overhead.



Right, consider we don't implement l1 and we don't have a 1st level 
abstraction in neither vhost nor vfio, we can simply remove this.






+}
+
+static void vtd_iotlb_pasid_invalidate(IntelIOMMUState *s, uint16_t domain_id,
+   uint32_t pasid)
+{
+VTDIOTLBPageInvInfo info;
+VTDAddressSpace *vtd_as;
+VTDContextEntry ce;
+
+trace_vtd_inv_desc_iotlb_pasid(domain_id, pasid);
+
+info.domain_id = domain_id;
+info.pasid = pasid;
+vtd_iommu_lock(s);
+g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid, );
+vtd_iommu_unlock(s);
+
+QLIST_FOREACH(vtd_as, >vtd_as_with_notifiers, next) {
+if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
+  vtd_as->devfn, ) &&
+domain_id == vtd_get_domain_id(s, , vtd_as->pasid) &&
+pasid == vtd_as->pasid) {
+vtd_sync_shadow_page_table(vtd_as);

Do we need to rewalk the shadow pgtable (which is the 2nd level, afaict) even
if we got the 1st level pgtable invalidated?



Seems not and this makes me think to remove the whole PASID based 
invalidation logic since they are for L1 which is not implemented in 
this series.






+}
+}
  }

The rest looks mostly good to me; thanks.



Thanks

Re: [PATCH 2/3] intel-iommu: drop VTDBus

2022-01-13 Thread Jason Wang




在 2022/1/13 下午12:12, Peter Xu 写道:

On Wed, Jan 05, 2022 at 12:19:44PM +0800, Jason Wang wrote:

We introduce VTDBus structure as an intermediate step for searching
the address space. This works well with SID based matching/lookup. But
when we want to support SID plus PASID based address space lookup,
this intermediate steps turns out to be a burden. So the patch simply
drops the VTDBus structure and use the PCIBus and devfn as the key for
the g_hash_table(). This simplifies the codes and the future PASID
extension.

This may case slight slower for the vtd_find_as_from_bus_num() callers
but since they are all called from the control path, we can afford it.

The only one I found is vtd_process_device_iotlb_desc() that may got affected
the most; the rest look mostly always traversing all the address space anyway
so shouldn't hurt.

I think dev-iotlb can be invalidated in IO path too when kernel device drivers
are used?  It shouldn't affect much when the VM has a few devices, but IIUC
it'll further slow down the kernel drivers on vIOMMU.  Maybe it's not a huge
problem either.



Maybe we can keep maintaining a cache for some speedup for the searching 
for NO_PASID.






Signed-off-by: Jason Wang 
---
  hw/i386/intel_iommu.c | 183 +-
  include/hw/i386/intel_iommu.h |  10 +-
  2 files changed, 69 insertions(+), 124 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index f2c7a23712..58c682097b 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -61,6 +61,11 @@
  } 
\
  }
  
+struct vtd_as_key {

+PCIBus *bus;
+uint8_t devfn;
+};
+
  static void vtd_address_space_refresh_all(IntelIOMMUState *s);
  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
  
@@ -190,12 +195,18 @@ static inline gboolean vtd_as_has_map_notifier(VTDAddressSpace *as)

  /* GHashTable functions */
  static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2)
  {
-return *((const uint64_t *)v1) == *((const uint64_t *)v2);
+const struct vtd_as_key *key1 = v1;
+const struct vtd_as_key *key2 = v2;
+
+return (key1->bus == key2->bus) && (key1->devfn == key2->devfn);
  }
  
  static guint vtd_uint64_hash(gconstpointer v)

  {
-return (guint)*(const uint64_t *)v;
+const struct vtd_as_key *key = v;
+guint value = (guint)(uintptr_t)key->bus;
+
+return (guint)(value << 8 | key->devfn);

Note that value is a pointer to PCIBus*.  Just want to check with you that it's
intended to use this hash value (or maybe you wanted to use Source ID so it is
bus number to use not the bus pointer)?



Right, SID should be used here.





  }
  
  static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,

@@ -236,22 +247,14 @@ static gboolean vtd_hash_remove_by_page(gpointer key, 
gpointer value,
  static void vtd_reset_context_cache_locked(IntelIOMMUState *s)
  {
  VTDAddressSpace *vtd_as;
-VTDBus *vtd_bus;
-GHashTableIter bus_it;
-uint32_t devfn_it;
+GHashTableIter as_it;
  
  trace_vtd_context_cache_reset();
  
-g_hash_table_iter_init(_it, s->vtd_as_by_busptr);

+g_hash_table_iter_init(_it, s->vtd_as);
  
-while (g_hash_table_iter_next (_it, NULL, (void**)_bus)) {

-for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
-vtd_as = vtd_bus->dev_as[devfn_it];
-if (!vtd_as) {
-continue;
-}
-vtd_as->context_cache_entry.context_cache_gen = 0;
-}
+while (g_hash_table_iter_next (_it, NULL, (void**)_as)) {
+vtd_as->context_cache_entry.context_cache_gen = 0;
  }
  s->context_cache_gen = 1;
  }
@@ -986,32 +989,6 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, 
uint32_t level)
  return slpte & rsvd_mask;
  }
  
-/* Find the VTD address space associated with a given bus number */

-static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
-{
-VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num];
-GHashTableIter iter;
-
-if (vtd_bus) {
-return vtd_bus;
-}
-
-/*
- * Iterate over the registered buses to find the one which
- * currently holds this bus number and update the bus_num
- * lookup table.
- */
-g_hash_table_iter_init(, s->vtd_as_by_busptr);
-while (g_hash_table_iter_next(, NULL, (void **)_bus)) {
-if (pci_bus_num(vtd_bus->bus) == bus_num) {
-s->vtd_as_by_bus_num[bus_num] = vtd_bus;
-return vtd_bus;
-}
-}
-
-return NULL;
-}
-
  /* Given the @iova, get relevant @slptep. @slpte_level will be the last level
   * of the translation, can be used for deciding the size of large page.
   */
@@ -1604,18 +1581,12 @@ static bool vtd_switch_address_space(VTDAddressSpace 
*as)
  
  static void vtd_switch_address_space_all(IntelIOMMUState *s)

  {
+VTDAddressSpace *vtd_as;

[PATCH v3 3/3] target/riscv: add support for svpbmt extension

2022-01-13 Thread Weiwei Li

It uses two PTE bits, but otherwise has no effect on QEMU, since QEMU is 
sequentially consistent and doesn't model PMAs currently

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
Tested-by: Heiko Stuebner 
---
 target/riscv/cpu.c| 1 +
 target/riscv/cpu.h| 1 +
 target/riscv/cpu_bits.h   | 3 +++
 target/riscv/cpu_helper.c | 9 -
 4 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 45ac98e06b..4f82bd00a3 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -670,6 +670,7 @@ static Property riscv_cpu_properties[] = {
 
 DEFINE_PROP_BOOL("svinval", RISCVCPU, cfg.ext_svinval, false),
 DEFINE_PROP_BOOL("svnapot", RISCVCPU, cfg.ext_svnapot, false),
+DEFINE_PROP_BOOL("svpbmt", RISCVCPU, cfg.ext_svpbmt, false),
 
 DEFINE_PROP_BOOL("zba", RISCVCPU, cfg.ext_zba, true),
 DEFINE_PROP_BOOL("zbb", RISCVCPU, cfg.ext_zbb, true),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index c3d1845ca1..53f314c752 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -329,6 +329,7 @@ struct RISCVCPU {
 bool ext_icsr;
 bool ext_svinval;
 bool ext_svnapot;
+bool ext_svpbmt;
 bool ext_zfh;
 bool ext_zfhmin;
 
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index bc23e3b523..ee294c1d0b 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -486,7 +486,10 @@ typedef enum {
 #define PTE_A   0x040 /* Accessed */
 #define PTE_D   0x080 /* Dirty */
 #define PTE_SOFT0x300 /* Reserved for Software */
+#define PTE_RSVD0x1FC0 /* Reserved for future use */
+#define PTE_PBMT0x6000 /* Page-based memory types */
 #define PTE_N   0x8000 /* NAPOT translation */
+#define PTE_ATTR0xFFC0 /* All attributes bits */
 
 /* Page table PPN shift amount */
 #define PTE_PPN_SHIFT   10
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 58ab85bca3..f90766e026 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -619,16 +619,23 @@ restart:
 return TRANSLATE_FAIL;
 }
 
-hwaddr ppn = (pte & ~(target_ulong)PTE_N) >> PTE_PPN_SHIFT;
+hwaddr ppn = (pte & ~(target_ulong)PTE_ATTR) >> PTE_PPN_SHIFT;
 
 RISCVCPU *cpu = env_archcpu(env);
 if (!cpu->cfg.ext_svnapot && (pte & PTE_N)) {
 return TRANSLATE_FAIL;
+} else if (!cpu->cfg.ext_svpbmt && (pte & PTE_PBMT)) {
+return TRANSLATE_FAIL;
+} else if (pte & PTE_RSVD) {
+return TRANSLATE_FAIL;
 } else if (!(pte & PTE_V)) {
 /* Invalid PTE */
 return TRANSLATE_FAIL;
 } else if (!(pte & (PTE_R | PTE_W | PTE_X))) {
 /* Inner PTE, continue walking */
+if (pte & (PTE_D | PTE_A | PTE_U | PTE_N | PTE_PBMT)) {
+return TRANSLATE_FAIL;
+}
 base = ppn << PGSHIFT;
 } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
 /* Reserved leaf PTE flags: PTE_W */
-- 
2.17.1

[PATCH v3 2/3] target/riscv: add support for svinval extension

2022-01-13 Thread Weiwei Li

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
---
 target/riscv/cpu.c  |  1 +
 target/riscv/cpu.h  |  1 +
 target/riscv/insn32.decode  |  7 ++
 target/riscv/insn_trans/trans_svinval.c.inc | 75 +
 target/riscv/translate.c|  1 +
 5 files changed, 85 insertions(+)
 create mode 100644 target/riscv/insn_trans/trans_svinval.c.inc

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index ff6c86c85b..45ac98e06b 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -668,6 +668,7 @@ static Property riscv_cpu_properties[] = {
 DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128),
 DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64),
 
+DEFINE_PROP_BOOL("svinval", RISCVCPU, cfg.ext_svinval, false),
 DEFINE_PROP_BOOL("svnapot", RISCVCPU, cfg.ext_svnapot, false),
 
 DEFINE_PROP_BOOL("zba", RISCVCPU, cfg.ext_zba, true),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index d3d17cde82..c3d1845ca1 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -327,6 +327,7 @@ struct RISCVCPU {
 bool ext_counters;
 bool ext_ifencei;
 bool ext_icsr;
+bool ext_svinval;
 bool ext_svnapot;
 bool ext_zfh;
 bool ext_zfhmin;
diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode
index 5bbedc254c..7a0351fde2 100644
--- a/target/riscv/insn32.decode
+++ b/target/riscv/insn32.decode
@@ -809,3 +809,10 @@ fcvt_l_h   1100010  00010 . ... . 1010011 @r2_rm
 fcvt_lu_h  1100010  00011 . ... . 1010011 @r2_rm
 fcvt_h_l   1101010  00010 . ... . 1010011 @r2_rm
 fcvt_h_lu  1101010  00011 . ... . 1010011 @r2_rm
+
+# *** Svinval Standard Extension ***
+sinval_vma0001011 . . 000 0 1110011 @sfence_vma
+sfence_w_inval0001100 0 0 000 0 1110011
+sfence_inval_ir   0001100 1 0 000 0 1110011
+hinval_vvma   0011011 . . 000 0 1110011 @hfence_vvma
+hinval_gvma   0111011 . . 000 0 1110011 @hfence_gvma
diff --git a/target/riscv/insn_trans/trans_svinval.c.inc 
b/target/riscv/insn_trans/trans_svinval.c.inc
new file mode 100644
index 00..1dde665661
--- /dev/null
+++ b/target/riscv/insn_trans/trans_svinval.c.inc
@@ -0,0 +1,75 @@
+/*
+ * RISC-V translation routines for the Svinval Standard Instruction Set.
+ *
+ * Copyright (c) 2020-2021 PLCT lab
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see .
+ */
+
+#define REQUIRE_SVINVAL(ctx) do {\
+if (!RISCV_CPU(ctx->cs)->cfg.ext_svinval) {  \
+return false;\
+}\
+} while (0)
+
+static bool trans_sinval_vma(DisasContext *ctx, arg_sinval_vma *a)
+{
+REQUIRE_SVINVAL(ctx);
+/* Do the same as sfence.vma currently */
+REQUIRE_EXT(ctx, RVS);
+#ifndef CONFIG_USER_ONLY
+gen_helper_tlb_flush(cpu_env);
+return true;
+#endif
+return false;
+}
+
+static bool trans_sfence_w_inval(DisasContext *ctx, arg_sfence_w_inval *a)
+{
+REQUIRE_SVINVAL(ctx);
+REQUIRE_EXT(ctx, RVS);
+/* Do nothing currently */
+return true;
+}
+
+static bool trans_sfence_inval_ir(DisasContext *ctx, arg_sfence_inval_ir *a)
+{
+REQUIRE_SVINVAL(ctx);
+REQUIRE_EXT(ctx, RVS);
+/* Do nothing currently */
+return true;
+}
+
+static bool trans_hinval_vvma(DisasContext *ctx, arg_hinval_vvma *a)
+{
+REQUIRE_SVINVAL(ctx);
+/* Do the same as hfence.vvma currently */
+REQUIRE_EXT(ctx, RVH);
+#ifndef CONFIG_USER_ONLY
+gen_helper_hyp_tlb_flush(cpu_env);
+return true;
+#endif
+return false;
+}
+
+static bool trans_hinval_gvma(DisasContext *ctx, arg_hinval_gvma *a)
+{
+REQUIRE_SVINVAL(ctx);
+/* Do the same as hfence.gvma currently */
+REQUIRE_EXT(ctx, RVH);
+#ifndef CONFIG_USER_ONLY
+gen_helper_hyp_gvma_tlb_flush(cpu_env);
+return true;
+#endif
+return false;
+}
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 615048ec87..4e5a9660a4 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -838,6 +838,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, 
target_ulong pc)
 #include "insn_trans/trans_rvb.c.inc"
 #include "insn_trans/trans_rvzfh.c.inc"
 #include "insn_trans/trans_privileged.c.inc"
+#include "insn_trans/trans_svinval.c.inc"
 
 /*

[PATCH v3 1/3] target/riscv: add support for svnapot extension

2022-01-13 Thread Weiwei Li

Signed-off-by: Weiwei Li 
Signed-off-by: Junqiang Wang 
---
 target/riscv/cpu.c|  2 ++
 target/riscv/cpu.h|  1 +
 target/riscv/cpu_bits.h   |  1 +
 target/riscv/cpu_helper.c | 20 
 4 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 9bc25d3055..ff6c86c85b 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -668,6 +668,8 @@ static Property riscv_cpu_properties[] = {
 DEFINE_PROP_UINT16("vlen", RISCVCPU, cfg.vlen, 128),
 DEFINE_PROP_UINT16("elen", RISCVCPU, cfg.elen, 64),
 
+DEFINE_PROP_BOOL("svnapot", RISCVCPU, cfg.ext_svnapot, false),
+
 DEFINE_PROP_BOOL("zba", RISCVCPU, cfg.ext_zba, true),
 DEFINE_PROP_BOOL("zbb", RISCVCPU, cfg.ext_zbb, true),
 DEFINE_PROP_BOOL("zbc", RISCVCPU, cfg.ext_zbc, true),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 4d63086765..d3d17cde82 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -327,6 +327,7 @@ struct RISCVCPU {
 bool ext_counters;
 bool ext_ifencei;
 bool ext_icsr;
+bool ext_svnapot;
 bool ext_zfh;
 bool ext_zfhmin;
 
diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
index 5a6d49aa64..bc23e3b523 100644
--- a/target/riscv/cpu_bits.h
+++ b/target/riscv/cpu_bits.h
@@ -486,6 +486,7 @@ typedef enum {
 #define PTE_A   0x040 /* Accessed */
 #define PTE_D   0x080 /* Dirty */
 #define PTE_SOFT0x300 /* Reserved for Software */
+#define PTE_N   0x8000 /* NAPOT translation */
 
 /* Page table PPN shift amount */
 #define PTE_PPN_SHIFT   10
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 434a83e66a..58ab85bca3 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -619,9 +619,12 @@ restart:
 return TRANSLATE_FAIL;
 }
 
-hwaddr ppn = pte >> PTE_PPN_SHIFT;
+hwaddr ppn = (pte & ~(target_ulong)PTE_N) >> PTE_PPN_SHIFT;
 
-if (!(pte & PTE_V)) {
+RISCVCPU *cpu = env_archcpu(env);
+if (!cpu->cfg.ext_svnapot && (pte & PTE_N)) {
+return TRANSLATE_FAIL;
+} else if (!(pte & PTE_V)) {
 /* Invalid PTE */
 return TRANSLATE_FAIL;
 } else if (!(pte & (PTE_R | PTE_W | PTE_X))) {
@@ -699,8 +702,17 @@ restart:
 /* for superpage mappings, make a fake leaf PTE for the TLB's
benefit. */
 target_ulong vpn = addr >> PGSHIFT;
-*physical = ((ppn | (vpn & ((1L << ptshift) - 1))) << PGSHIFT) |
-(addr & ~TARGET_PAGE_MASK);
+
+int napot_bits = ((pte & PTE_N) ? (ctzl(ppn) + 1) : 0);
+if (((pte & PTE_N) && ((ppn == 0) || (i != (levels - 1 ||
+(napot_bits != 0 && napot_bits != 4)) {
+return TRANSLATE_FAIL;
+}
+
+*physical = (((ppn & ~(((target_ulong)1 << napot_bits) - 1)) |
+  (vpn & (((target_ulong)1 << napot_bits) - 1)) |
+  (vpn & (((target_ulong)1 << ptshift) - 1))
+) << PGSHIFT) | (addr & ~TARGET_PAGE_MASK);
 
 /* set permissions on the TLB entry */
 if ((pte & PTE_R) || ((pte & PTE_X) && mxr)) {
-- 
2.17.1

[PATCH v3 0/3] support subsets of virtual memory extension

2022-01-13 Thread Weiwei Li

This patchset implements virtual memory related RISC-V extensions: Svnapot 
version 1.0, Svinval vesion 1.0, Svpbmt version 1.0. 

Specification:
https://github.com/riscv/virtual-memory/tree/main/specs

The port is available here:
https://github.com/plctlab/plct-qemu/tree/plct-virtmem-upstream-v3

To test this implementation, specify cpu argument with 
'svinval=true,svnapot=true,svpbmt=true'.

This implementation can pass the riscv-tests for rv64ssvnapot.

v3:
* drop "x-" in exposed properties

v2:
* add extension check for svnapot and svpbmt

Weiwei Li (3):
  target/riscv: add support for svnapot extension
  target/riscv: add support for svinval extension
  target/riscv: add support for svpbmt extension

 target/riscv/cpu.c  |  4 ++
 target/riscv/cpu.h  |  3 +
 target/riscv/cpu_bits.h |  4 ++
 target/riscv/cpu_helper.c   | 27 ++--
 target/riscv/insn32.decode  |  7 ++
 target/riscv/insn_trans/trans_svinval.c.inc | 75 +
 target/riscv/translate.c|  1 +
 7 files changed, 117 insertions(+), 4 deletions(-)
 create mode 100644 target/riscv/insn_trans/trans_svinval.c.inc

-- 
2.17.1

Re: [PATCH v10 2/3] cpu-throttle: implement virtual CPU throttle

2022-01-13 Thread Hyman Huang





在 2022/1/14 0:22, Markus Armbruster 写道:

Peter Xu  writes:


On Fri, Dec 31, 2021 at 12:36:40AM +0800, Hyman Huang wrote:

+struct {
+DirtyLimitState *states;
+int max_cpus;
+unsigned long *bmap; /* running thread bitmap */
+unsigned long nr;
+QemuThread thread;
+} *dirtylimit_state;
+
+static bool dirtylimit_quit = true;


Again, I think "quit" is not a good wording to show "whether dirtylimit is in
service".  How about "dirtylimit_global_enabled"?

You can actually use "dirtylimit_state" to show whether it's enabled already
(then drop the global value) since it's a pointer.  It shouldn't need to be
init-once-for-all, but we can alloc the strucuture wAhen dirty limit enabled
globally, and destroy it (and reset it to NULL) when globally disabled.

Then "whether it's enabled" is simply to check "!!dirtylimit_state" under BQL.

Yes, checking pointer is fairly straightforword, but since dirtylimit thread
also access the dirtylimit_state when doing the limit, if we free
dirtylimit_state after last limited vcpu be canceled, dirtylimit thread
would crash when reference null pointer. And this method turn out to
introduce a mutex lock to protect dirtylimit_state, comparing with qatomic
operation, which is better ?


I don't see much difference here on using either atomic or mutex, because it's
not a hot path.


Quick interjection without having bothered to understand the details:
correct use of atomics and memory barriers is *much* harder than correct
use of locks.  Stick to locks unless you *know* they impair performance.

Ok, i get it, i removed most of atomic operations in v11 and use the 
lock instead. But still thanks for the advice :)

[...]



--
Best regard

Hyman Huang(黄勇)

[PATCH] fdc: check for illegal dma length calculation

2022-01-13 Thread Jon Maloy

The function fdctrl_start_transfer() calculates the dma data length
wrongly when certain boundary conditions are fulfilled. We have
noticed that the if ((fdctrl->fifo[5] - fdctrl->fifo[6]) > 1) we get
a dma length that will be interpreted as negative by the next function
in the chain, fdctrl_transfer_handler(). This leads to a crash.

Rather than trying to fix this obscure calculation, we just check if
the harmful condition is fulfilled, and return without action if that
is the case. Since this is a condition that can only be created by a
malicious user we deem this solution safe.

This fix is intended to address CVE-2021-3507.

Signed-off-by: Jon Maloy 
---
 hw/block/fdc.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 21d18ac2e3..80a1f1750a 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -1532,6 +1532,11 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int 
direction)
 if (fdctrl->fifo[0] & 0x80)
 tmp += fdctrl->fifo[6];
 fdctrl->data_len *= tmp;
+if (tmp < 0) {
+FLOPPY_DPRINTF("calculated illegal data_len=%u, tmp=%i\n",
+   fdctrl->data_len, tmp);
+return;
+}
 }
 fdctrl->eot = fdctrl->fifo[6];
 if (fdctrl->dor & FD_DOR_DMAEN) {
-- 
2.31.1

[PATCH 2/2] qemu-timer: Skip empty timer lists before locking in qemu_clock_deadline_ns_all

2022-01-13 Thread Idan Horowitz

This decreases qemu_clock_deadline_ns_all's share from 23.2% to 13% in a
profile of icount-enabled aarch64-softmmu.

Signed-off-by: Idan Horowitz 
---
 util/qemu-timer.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/util/qemu-timer.c b/util/qemu-timer.c
index f36c75e594..e56895ef55 100644
--- a/util/qemu-timer.c
+++ b/util/qemu-timer.c
@@ -261,6 +261,9 @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int 
attr_mask)
 }
 
 QLIST_FOREACH(timer_list, >timerlists, list) {
+if (!qatomic_read(_list->active_timers)) {
+continue;
+}
 qemu_mutex_lock(_list->active_timers_lock);
 ts = timer_list->active_timers;
 /* Skip all external timers */
-- 
2.34.1

[PATCH] target/arm: Allow only specific instructions based on the SCTLR_EL1.UCI bit

2022-01-13 Thread Idan Horowitz

The SCTLR_EL1.UCI bit only affects a subset of cache maintenance
instructions as specified by the specification. Any other cache
maintenance instructions must still be trapped from EL0.

Signed-off-by: Idan Horowitz 
---
 target/arm/helper.c | 68 ++---
 1 file changed, 52 insertions(+), 16 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index cfca0f5ba6..ac75a268aa 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -4217,7 +4217,7 @@ static const ARMCPRegInfo ssbs_reginfo = {
 .readfn = aa64_ssbs_read, .writefn = aa64_ssbs_write
 };
 
-static CPAccessResult aa64_cacheop_poc_access(CPUARMState *env,
+static CPAccessResult aa64_cacheop_poc_uci_access(CPUARMState *env,
   const ARMCPRegInfo *ri,
   bool isread)
 {
@@ -4239,7 +4239,7 @@ static CPAccessResult aa64_cacheop_poc_access(CPUARMState 
*env,
 return CP_ACCESS_OK;
 }
 
-static CPAccessResult aa64_cacheop_pou_access(CPUARMState *env,
+static CPAccessResult aa64_cacheop_pou_uci_access(CPUARMState *env,
   const ARMCPRegInfo *ri,
   bool isread)
 {
@@ -4261,6 +4261,42 @@ static CPAccessResult 
aa64_cacheop_pou_access(CPUARMState *env,
 return CP_ACCESS_OK;
 }
 
+static CPAccessResult aa64_cacheop_poc_access(CPUARMState *env,
+  const ARMCPRegInfo *ri,
+  bool isread)
+{
+/* Cache invalidate/clean to Point of Coherency or Persistence...  */
+switch (arm_current_el(env)) {
+case 0:
+return CP_ACCESS_TRAP;
+case 1:
+/* ... EL1 must trap to EL2 if HCR_EL2.TPCP is set.  */
+if (arm_hcr_el2_eff(env) & HCR_TPCP) {
+return CP_ACCESS_TRAP_EL2;
+}
+break;
+}
+return CP_ACCESS_OK;
+}
+
+static CPAccessResult aa64_cacheop_pou_access(CPUARMState *env,
+  const ARMCPRegInfo *ri,
+  bool isread)
+{
+/* Cache invalidate/clean to Point of Unification... */
+switch (arm_current_el(env)) {
+case 0:
+return CP_ACCESS_TRAP;
+case 1:
+/* ... EL1 must trap to EL2 if HCR_EL2.TPU is set.  */
+if (arm_hcr_el2_eff(env) & HCR_TPU) {
+return CP_ACCESS_TRAP_EL2;
+}
+break;
+}
+return CP_ACCESS_OK;
+}
+
 /* See: D4.7.2 TLB maintenance requirements and the TLB maintenance 
instructions
  * Page D4-1736 (DDI0487A.b)
  */
@@ -4846,7 +4882,7 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
 { .name = "IC_IVAU", .state = ARM_CP_STATE_AA64,
   .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 5, .opc2 = 1,
   .access = PL0_W, .type = ARM_CP_NOP,
-  .accessfn = aa64_cacheop_pou_access },
+  .accessfn = aa64_cacheop_pou_uci_access },
 { .name = "DC_IVAC", .state = ARM_CP_STATE_AA64,
   .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 6, .opc2 = 1,
   .access = PL1_W, .accessfn = aa64_cacheop_poc_access,
@@ -4857,18 +4893,18 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
 { .name = "DC_CVAC", .state = ARM_CP_STATE_AA64,
   .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 10, .opc2 = 1,
   .access = PL0_W, .type = ARM_CP_NOP,
-  .accessfn = aa64_cacheop_poc_access },
+  .accessfn = aa64_cacheop_poc_uci_access },
 { .name = "DC_CSW", .state = ARM_CP_STATE_AA64,
   .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 10, .opc2 = 2,
   .access = PL1_W, .accessfn = access_tsw, .type = ARM_CP_NOP },
 { .name = "DC_CVAU", .state = ARM_CP_STATE_AA64,
   .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 11, .opc2 = 1,
   .access = PL0_W, .type = ARM_CP_NOP,
-  .accessfn = aa64_cacheop_pou_access },
+  .accessfn = aa64_cacheop_pou_uci_access },
 { .name = "DC_CIVAC", .state = ARM_CP_STATE_AA64,
   .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 14, .opc2 = 1,
   .access = PL0_W, .type = ARM_CP_NOP,
-  .accessfn = aa64_cacheop_poc_access },
+  .accessfn = aa64_cacheop_poc_uci_access },
 { .name = "DC_CISW", .state = ARM_CP_STATE_AA64,
   .opc0 = 1, .opc1 = 0, .crn = 7, .crm = 14, .opc2 = 2,
   .access = PL1_W, .accessfn = access_tsw, .type = ARM_CP_NOP },
@@ -7102,7 +7138,7 @@ static const ARMCPRegInfo dcpop_reg[] = {
 { .name = "DC_CVAP", .state = ARM_CP_STATE_AA64,
   .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 12, .opc2 = 1,
   .access = PL0_W, .type = ARM_CP_NO_RAW | ARM_CP_SUPPRESS_TB_END,
-  .accessfn = aa64_cacheop_poc_access, .writefn = dccvap_writefn },
+  .accessfn = aa64_cacheop_poc_uci_access, .writefn = dccvap_writefn },
 REGINFO_SENTINEL
 };
 
@@ -7110,7 +7146,7 @@ static const ARMCPRegInfo dcpodp_reg[] = {
 { .name = "DC_CVADP", .state = ARM_CP_STATE_AA64,
   .opc0 = 1, .opc1 = 3, .crn = 7, .crm = 13, .opc2 = 1,
   .access =

[PATCH 1/2] softmmu/cpus: Check if the cpu work list is empty atomically

2022-01-13 Thread Idan Horowitz

Instead of taking the lock of the cpu work list in order to check if it's
empty, we can just read the head pointer atomically. This decreases
cpu_work_list_empty's share from 5% to 1.3% in a profile of icount-enabled
aarch64-softmmu.

Signed-off-by: Idan Horowitz 
---
 softmmu/cpus.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/softmmu/cpus.c b/softmmu/cpus.c
index 23bca46b07..035395ae13 100644
--- a/softmmu/cpus.c
+++ b/softmmu/cpus.c
@@ -73,12 +73,7 @@ bool cpu_is_stopped(CPUState *cpu)
 
 bool cpu_work_list_empty(CPUState *cpu)
 {
-bool ret;
-
-qemu_mutex_lock(>work_mutex);
-ret = QSIMPLEQ_EMPTY(>work_list);
-qemu_mutex_unlock(>work_mutex);
-return ret;
+return QSIMPLEQ_EMPTY_ATOMIC(>work_list);
 }
 
 bool cpu_thread_is_idle(CPUState *cpu)
-- 
2.34.1

Re: ppc pbr403 vmstate

2022-01-13 Thread David Gibson

On Thu, Jan 13, 2022 at 06:51:56PM +, Dr. David Alan Gilbert wrote:
> Hi,
>   Is there any easy way of getting a machine where the pbr403 vmstate
> would be generated?

The condition in pbr403_needed is...

return (pvr & 0x) == 0x0020;

.. which looks to be the PVR for ppc403 models.  That makes sense with
the section name... but not so much with the fact that it's under
cpu/tlb6xx.  The 6xx MMU is basically unrelated to the 40x MMU.  But
it looks like the vmstate_tlbemb might be shared between then, because
of bad ideas of the past.

But in any case, we already dropped what little 403 support we ever
had - there's nothing with that PVR even listed in
target/ppc/cpu-models.h.

So I think we should just drop it.

>   Given my vague understanding of vmstate subsection naming, I think
> we need:
> 
> diff --git a/target/ppc/machine.c b/target/ppc/machine.c
> index 756d8de5d8..e535edb7c4 100644
> --- a/target/ppc/machine.c
> +++ b/target/ppc/machine.c
> @@ -718,7 +718,7 @@ static bool pbr403_needed(void *opaque)
>  }
>  
>  static const VMStateDescription vmstate_pbr403 = {
> -.name = "cpu/pbr403",
> +.name = "cpu/tlb6xx/pbr403",
>  .version_id = 1,
>  .minimum_version_id = 1,
>  .needed = pbr403_needed,
> 
> to fit the rule where the name of a subsection is prefixed
> by the parent name. (Something a new check I added just triggered).
> 
> Dave
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson

signature.asc
Description: PGP signature

Re: [PATCH 21/30] bsd-user/signal.c: force_sig

2022-01-13 Thread Kyle Evans

On Thu, Jan 13, 2022 at 2:53 PM Peter Maydell  wrote:
>
> On Thu, 13 Jan 2022 at 20:29, Peter Maydell  wrote:
> >
> > On Sun, 9 Jan 2022 at 16:44, Warner Losh  wrote:
> > >
> > > Force delivering a signal and generating a core file.
>
> > > +/* Abort execution with signal. */
> > > +void QEMU_NORETURN force_sig(int target_sig)
> >
> > In linux-user we call this dump_core_and_abort(), which is
> > a name that better describes what it's actually doing.
> >
> > (Today's linux-user's force_sig() does what the Linux kernel's
> > function of that name does -- it's a wrapper around
> > queue_signal() which delivers a signal to the guest with
> > .si_code = SI_KERNEL , si_pid = si_uid = 0.
> > Whether you want one of those or not depends on what BSD
> > kernels do in that kind of "we have to kill this process"
> > situation.)
>
> It looks like the FreeBSD kernel uses sigexit() as its equivalent
> function to Linux's force_sig(), incidentally. Not sure if
> you/we would prefer the bsd-user code to follow the naming that
> FreeBSD's kernel uses or the naming linux-user takes from
> the Linux kernel.
>

My $.02: let's go with linux-inherited linux-user names and drop in a
comment with the FreeBSD name, if they're functionally similar enough
(in general, not just for this specific case). My gut feeling is that
it'll be more useful in the long run if we can more quickly identify
parallels between the two, so changes affecting linux-user that may
benefit bsd-user are more easily identified and exchanged (and
vice-versa).

Thanks,

Kyle Evans

Re: [PATCH] Mark remaining global TypeInfo instances as const

2022-01-13 Thread Alistair Francis

On Fri, Jan 14, 2022 at 3:33 AM Bernhard Beschow  wrote:
>
> More than 1k of TypeInfo instances are already marked as const. Mark the
> remaining ones, too.
>
> Signed-off-by: Bernhard Beschow 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  hw/core/generic-loader.c   | 2 +-
>  hw/core/guest-loader.c | 2 +-
>  hw/display/bcm2835_fb.c| 2 +-
>  hw/display/i2c-ddc.c   | 2 +-
>  hw/display/macfb.c | 4 ++--
>  hw/display/virtio-vga.c| 2 +-
>  hw/dma/bcm2835_dma.c   | 2 +-
>  hw/i386/pc_piix.c  | 2 +-
>  hw/i386/sgx-epc.c  | 2 +-
>  hw/intc/bcm2835_ic.c   | 2 +-
>  hw/intc/bcm2836_control.c  | 2 +-
>  hw/ipmi/ipmi.c | 4 ++--
>  hw/mem/nvdimm.c| 2 +-
>  hw/mem/pc-dimm.c   | 2 +-
>  hw/misc/bcm2835_mbox.c | 2 +-
>  hw/misc/bcm2835_powermgt.c | 2 +-
>  hw/misc/bcm2835_property.c | 2 +-
>  hw/misc/bcm2835_rng.c  | 2 +-
>  hw/misc/pvpanic-isa.c  | 2 +-
>  hw/misc/pvpanic-pci.c  | 2 +-
>  hw/net/fsl_etsec/etsec.c   | 2 +-
>  hw/ppc/prep_systemio.c | 2 +-
>  hw/ppc/spapr_iommu.c   | 2 +-
>  hw/s390x/s390-pci-bus.c| 2 +-
>  hw/s390x/sclp.c| 2 +-
>  hw/s390x/tod-kvm.c | 2 +-
>  hw/s390x/tod-tcg.c | 2 +-
>  hw/s390x/tod.c | 2 +-
>  hw/scsi/lsi53c895a.c   | 2 +-
>  hw/sd/allwinner-sdhost.c   | 2 +-
>  hw/sd/aspeed_sdhci.c   | 2 +-
>  hw/sd/bcm2835_sdhost.c | 2 +-
>  hw/sd/cadence_sdhci.c  | 2 +-
>  hw/sd/npcm7xx_sdhci.c  | 2 +-
>  hw/usb/dev-mtp.c   | 2 +-
>  hw/usb/host-libusb.c   | 2 +-
>  hw/vfio/igd.c  | 2 +-
>  hw/virtio/virtio-pmem.c| 2 +-
>  qom/object.c   | 4 ++--
>  39 files changed, 42 insertions(+), 42 deletions(-)
>
> diff --git a/hw/core/generic-loader.c b/hw/core/generic-loader.c
> index 9a24ffb880..eaafc416f4 100644
> --- a/hw/core/generic-loader.c
> +++ b/hw/core/generic-loader.c
> @@ -207,7 +207,7 @@ static void generic_loader_class_init(ObjectClass *klass, 
> void *data)
>  set_bit(DEVICE_CATEGORY_MISC, dc->categories);
>  }
>
> -static TypeInfo generic_loader_info = {
> +static const TypeInfo generic_loader_info = {
>  .name = TYPE_GENERIC_LOADER,
>  .parent = TYPE_DEVICE,
>  .instance_size = sizeof(GenericLoaderState),
> diff --git a/hw/core/guest-loader.c b/hw/core/guest-loader.c
> index d3f9d1a06e..391c875a29 100644
> --- a/hw/core/guest-loader.c
> +++ b/hw/core/guest-loader.c
> @@ -129,7 +129,7 @@ static void guest_loader_class_init(ObjectClass *klass, 
> void *data)
>  set_bit(DEVICE_CATEGORY_MISC, dc->categories);
>  }
>
> -static TypeInfo guest_loader_info = {
> +static const TypeInfo guest_loader_info = {
>  .name = TYPE_GUEST_LOADER,
>  .parent = TYPE_DEVICE,
>  .instance_size = sizeof(GuestLoaderState),
> diff --git a/hw/display/bcm2835_fb.c b/hw/display/bcm2835_fb.c
> index 2be77bdd3a..088fc3d51c 100644
> --- a/hw/display/bcm2835_fb.c
> +++ b/hw/display/bcm2835_fb.c
> @@ -454,7 +454,7 @@ static void bcm2835_fb_class_init(ObjectClass *klass, 
> void *data)
>  dc->vmsd = _bcm2835_fb;
>  }
>
> -static TypeInfo bcm2835_fb_info = {
> +static const TypeInfo bcm2835_fb_info = {
>  .name  = TYPE_BCM2835_FB,
>  .parent= TYPE_SYS_BUS_DEVICE,
>  .instance_size = sizeof(BCM2835FBState),
> diff --git a/hw/display/i2c-ddc.c b/hw/display/i2c-ddc.c
> index 13eb529fc1..146489518c 100644
> --- a/hw/display/i2c-ddc.c
> +++ b/hw/display/i2c-ddc.c
> @@ -113,7 +113,7 @@ static void i2c_ddc_class_init(ObjectClass *oc, void 
> *data)
>  isc->send = i2c_ddc_tx;
>  }
>
> -static TypeInfo i2c_ddc_info = {
> +static const TypeInfo i2c_ddc_info = {
>  .name = TYPE_I2CDDC,
>  .parent = TYPE_I2C_SLAVE,
>  .instance_size = sizeof(I2CDDCState),
> diff --git a/hw/display/macfb.c b/hw/display/macfb.c
> index 4bd7c3ad6a..69c2ea2b6e 100644
> --- a/hw/display/macfb.c
> +++ b/hw/display/macfb.c
> @@ -783,14 +783,14 @@ static void macfb_nubus_class_init(ObjectClass *klass, 
> void *data)
>  device_class_set_props(dc, macfb_nubus_properties);
>  }
>
> -static TypeInfo macfb_sysbus_info = {
> +static const TypeInfo macfb_sysbus_info = {
>  .name  = TYPE_MACFB,
>  .parent= TYPE_SYS_BUS_DEVICE,
>  .instance_size = sizeof(MacfbSysBusState),
>  .class_init= macfb_sysbus_class_init,
>  };
>
> -static TypeInfo macfb_nubus_info = {
> +static const TypeInfo macfb_nubus_info = {
>  .name  = TYPE_NUBUS_MACFB,
>  .parent= TYPE_NUBUS_DEVICE,
>  .instance_size = sizeof(MacfbNubusState),
> diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c
> index b23a75a04b..5a2f7a4540 100644
> --- a/hw/display/virtio-vga.c
> +++ b/hw/display/virtio-vga.c
> @@ -220,7 +220,7 @@ static void virtio_vga_base_class_init(ObjectClass 
> *klass, void *data)
> virtio_vga_set_big_endian_fb);
>  }
>
> -static TypeInfo virtio_vga_base_info = {
> +static const TypeInfo

Re: [PATCH 21/30] bsd-user/signal.c: force_sig

2022-01-13 Thread Peter Maydell

On Thu, 13 Jan 2022 at 20:29, Peter Maydell  wrote:
>
> On Sun, 9 Jan 2022 at 16:44, Warner Losh  wrote:
> >
> > Force delivering a signal and generating a core file.

> > +/* Abort execution with signal. */
> > +void QEMU_NORETURN force_sig(int target_sig)
>
> In linux-user we call this dump_core_and_abort(), which is
> a name that better describes what it's actually doing.
>
> (Today's linux-user's force_sig() does what the Linux kernel's
> function of that name does -- it's a wrapper around
> queue_signal() which delivers a signal to the guest with
> .si_code = SI_KERNEL , si_pid = si_uid = 0.
> Whether you want one of those or not depends on what BSD
> kernels do in that kind of "we have to kill this process"
> situation.)

It looks like the FreeBSD kernel uses sigexit() as its equivalent
function to Linux's force_sig(), incidentally. Not sure if
you/we would prefer the bsd-user code to follow the naming that
FreeBSD's kernel uses or the naming linux-user takes from
the Linux kernel.

-- PMM

Re: [PATCH v2 3/3] migration: Perform vmsd structure check during tests

2022-01-13 Thread Peter Maydell

On Thu, 13 Jan 2022 at 19:45, Dr. David Alan Gilbert (git)
 wrote:
>
> From: "Dr. David Alan Gilbert" 
>
> Perform a check on vmsd structures during test runs in the hope
> of catching any missing terminators and other simple screwups.
>
> Signed-off-by: Dr. David Alan Gilbert 
> ---
>  migration/savevm.c | 39 +++
>  1 file changed, 39 insertions(+)

Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [PATCH 20/30] bsd-user/signal.c: core_dump_signal

2022-01-13 Thread Peter Maydell

On Thu, 13 Jan 2022 at 20:28, Warner Losh  wrote:
> On Thu, Jan 13, 2022 at 1:22 PM Peter Maydell  
> wrote:
>> Code is fine, but since this is a static function with no callers
>> the compiler is going to emit a warning about that. It's a small
>> function, so the easiest thing is just to squash this into the
>> following patch which is what adds the code that calls it.
>
>
> Sure thing. I'm still trying to get a feel for right-sizing the chunking...
> Since the warning didn't fail the compile, I thought it would be OK,
> but can easily fold this in with the first patch to use it.

Ah yes, we don't currently default-enable -Werror for BSD hosts
in configure (only for Linux and for mingw32). So in this particular
case it doesn't matter much, but we might as well do it the way we would
for code that's not BSD-specific.

thanks
-- PMM

Re: [PATCH v2 1/3] ppc: Fix vmstate_pbr403 subsection name

2022-01-13 Thread Peter Maydell

On Thu, 13 Jan 2022 at 19:45, Dr. David Alan Gilbert (git)
 wrote:
>
> From: "Dr. David Alan Gilbert" 
>
> The pbr403 subsection is part of the tlb6xx state, so I believe it's
> name needs to be:
>
> .name = "cpu/tlb6xx/pbr403",
>
> Signed-off-by: Dr. David Alan Gilbert 
> ---
>  target/ppc/machine.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/target/ppc/machine.c b/target/ppc/machine.c
> index 756d8de5d8..e535edb7c4 100644
> --- a/target/ppc/machine.c
> +++ b/target/ppc/machine.c
> @@ -718,7 +718,7 @@ static bool pbr403_needed(void *opaque)
>  }
>
>  static const VMStateDescription vmstate_pbr403 = {
> -.name = "cpu/pbr403",
> +.name = "cpu/tlb6xx/pbr403",
>  .version_id = 1,
>  .minimum_version_id = 1,
>  .needed = pbr403_needed,
> --
> 2.34.1

Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [PATCH v2 2/3] migration: Add canary to VMSTATE_END_OF_LIST

2022-01-13 Thread Peter Maydell

On Thu, 13 Jan 2022 at 19:45, Dr. David Alan Gilbert (git)
 wrote:
>
> From: "Dr. David Alan Gilbert" 
>
> We fairly regularly forget VMSTATE_END_OF_LIST markers off descriptions;
> given that the current check is only for ->name being NULL, sometimes
> we get unlucky and the code apparently works and no one spots the error.
>
> Explicitly add a flag, VMS_END that should be set, and assert it is
> set during the traversal.
>
> Note: This can't go in until we update the copy of vmstate.h in slirp.
>
> Suggested-by: Peter Maydell 
> Signed-off-by: Dr. David Alan Gilbert 
> ---

Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [PATCH 3/3] Link new ppc-spapr-hotplug.rst file to pseries.rst.

2022-01-13 Thread Daniel Henrique Barboza





On 1/12/22 10:52, lagar...@linux.ibm.com wrote:

From: Leonardo Garcia 

Signed-off-by: Leonardo Garcia 
---


Reviewed-by: Daniel Henrique Barboza 


  docs/system/ppc/pseries.rst | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/system/ppc/pseries.rst b/docs/system/ppc/pseries.rst
index ead33e6764..1120b21d95 100644
--- a/docs/system/ppc/pseries.rst
+++ b/docs/system/ppc/pseries.rst
@@ -110,13 +110,13 @@ can  also be found in QEMU documentation:
  .. toctree::
 :maxdepth: 1
  
+   ../../specs/ppc-spapr-hotplug.rst

 ../../specs/ppc-spapr-hcalls.rst
 ../../specs/ppc-spapr-numa.rst
 ../../specs/ppc-spapr-xive.rst
  
  Other documentation available in QEMU docs directory:
  
-* Hot plug (``/docs/specs/ppc-spapr-hotplug.txt``).

  * Hypervisor calls needed by the Ultravisor
(``/docs/specs/ppc-spapr-uv-hcalls.txt``).

Re: [PATCH 2/3] docs: Rename ppc-spapr-hotplug.txt to ppc-spapr-hotplug.rst.

2022-01-13 Thread Daniel Henrique Barboza





On 1/12/22 10:52, lagar...@linux.ibm.com wrote:

From: Leonardo Garcia 

Signed-off-by: Leonardo Garcia 
---


Reviewed-by: Daniel Henrique Barboza 


  docs/specs/{ppc-spapr-hotplug.txt => ppc-spapr-hotplug.rst} | 0
  1 file changed, 0 insertions(+), 0 deletions(-)
  rename docs/specs/{ppc-spapr-hotplug.txt => ppc-spapr-hotplug.rst} (100%)

diff --git a/docs/specs/ppc-spapr-hotplug.txt b/docs/specs/ppc-spapr-hotplug.rst
similarity index 100%
rename from docs/specs/ppc-spapr-hotplug.txt
rename to docs/specs/ppc-spapr-hotplug.rst

Re: [PATCH 22/30] bsd-user/signal.c: Fill in queue_signal

2022-01-13 Thread Peter Maydell

On Sun, 9 Jan 2022 at 16:51, Warner Losh  wrote:
>
> Fill in queue signal implementation, as well as routines allocate and
> delete elements of the signal queue.

See reply to patch 18 for why you probably don't want to do this.

> Signed-off-by: Stacey Son 
> Signed-off-by: Kyle Evans 
> Signed-off-by: Warner Losh 
> ---

> +/*
> + * FreeBSD signals are always queued.  Linux only queues real time 
> signals.
> + * XXX this code is not thread safe.  "What lock protects ts->sigtab?"
> + */

ts->sigtab shouldn't need a lock, because it is per-thread,
like all of TaskState. (The TaskState structure is pointed
to by the CPUState 'opaque' field. CPUStates are per-thread;
the TaskState for a new thread's new CPUState is allocated
and initialized as part of the emulating of whatever the
"create new thread" syscall is. For Linux this is in
do_fork() for the CLONE_VM case. The TaskState for the
initial thread is allocated in main.c.) We do need to deal
with the fact that ts->sigtab can be updated by a signal
handler (which always runs in the thread corresponding to
that guest CPU): the linux-user process_pending_signals()
has been written with that in mind.

thanks
-- PMM

Re: [PATCH 1/3] docs: rSTify ppc-spapr-hotplug.txt.

2022-01-13 Thread Daniel Henrique Barboza





On 1/12/22 10:52, lagar...@linux.ibm.com wrote:

From: Leonardo Garcia 

While working on this file, also removed and unused reference in the end of the 
file. The reference in the text was removed by commit 9f992cca93d (spapr: 
update spapr hotplug documentation), but the link in the end of the document 
was not removed then.

Signed-off-by: Leonardo Garcia 
---


Reviewed-by: Daniel Henrique Barboza 


  docs/specs/ppc-spapr-hotplug.txt | 759 +--
  1 file changed, 430 insertions(+), 329 deletions(-)

diff --git a/docs/specs/ppc-spapr-hotplug.txt b/docs/specs/ppc-spapr-hotplug.txt
index d4fb2d46d9..f84dc55ad9 100644
--- a/docs/specs/ppc-spapr-hotplug.txt
+++ b/docs/specs/ppc-spapr-hotplug.txt
@@ -1,224 +1,316 @@
-= sPAPR Dynamic Reconfiguration =
+=
+sPAPR Dynamic Reconfiguration
+=
  
-sPAPR/"pseries" guests make use of a facility called dynamic-reconfiguration

-to handle hotplugging of dynamic "physical" resources like PCI cards, or
-"logical"/paravirtual resources like memory, CPUs, and "physical"
+sPAPR or pSeries guests make use of a facility called dynamic reconfiguration
+to handle hot plugging of dynamic "physical" resources like PCI cards, or
+"logical"/para-virtual resources like memory, CPUs, and "physical"
  host-bridges, which are generally managed by the host/hypervisor and provided
-to guests as virtualized resources. The specifics of dynamic-reconfiguration
-are documented extensively in PAPR+ v2.7, Section 13.1. This document
-provides a summary of that information as it applies to the implementation
-within QEMU.
+to guests as virtualized resources. The specifics of dynamic reconfiguration
+are documented extensively in section 13 of the Linux on Power Architecture
+Reference document ([LoPAR]_). This document provides a summary of that
+information as it applies to the implementation within QEMU.
  
-== Dynamic-reconfiguration Connectors ==

+Dynamic-reconfiguration Connectors
+==
  
-To manage hotplug/unplug of these resources, a firmware abstraction known as

+To manage hot plug/unplug of these resources, a firmware abstraction known as
  a Dynamic Resource Connector (DRC) is used to assign a particular dynamic
  resource to the guest, and provide an interface for the guest to manage
  configuration/removal of the resource associated with it.
  
-== Device-tree description of DRCs ==

+Device tree description of DRCs
+===
  
-A set of 4 Open Firmware device tree array properties are used to describe

+A set of four Open Firmware device tree array properties are used to describe
  the name/index/power-domain/type of each DRC allocated to a guest at
-boot-time. There may be multiple sets of these arrays, rooted at different
+boot time. There may be multiple sets of these arrays, rooted at different
  paths in the device tree depending on the type of resource the DRCs manage.
  
  In some cases, the DRCs themselves may be provided by a dynamic resource,

-such as the DRCs managing PCI slots on a hotplugged PHB. In this case the
+such as the DRCs managing PCI slots on a hot plugged PHB. In this case the
  arrays would be fetched as part of the device tree retrieval interfaces
-for hotplugged resources described under "Guest->Host interface".
+for hot plugged resources described under :ref:`guest-host-interface`.
  
  The array properties are described below. Each entry/element in an array

  describes the DRC identified by the element in the corresponding position
-of ibm,drc-indexes:
-
-ibm,drc-names:
-  first 4-bytes: BE-encoded integer denoting the number of entries
-  each entry: a NULL-terminated  string encoded as a byte array
-
-   values for logical/virtual resources are defined in PAPR+ v2.7,
-  Section 13.5.2.4, and basically consist of the type of the resource
-  followed by a space and a numerical value that's unique across resources
-  of that type.
-
-   values for "physical" resources such as PCI or VIO devices are
-  defined as being "location codes", which are the "location labels" of
-  each encapsulating device, starting from the chassis down to the
-  individual slot for the device, concatenated by a hyphen. This provides
-  a mapping of resources to a physical location in a chassis for debugging
-  purposes. For QEMU, this mapping is less important, so we assign a
-  location code that conforms to naming specifications, but is simply a
-  location label for the slot by itself to simplify the implementation.
-  The naming convention for location labels is documented in detail in
-  PAPR+ v2.7, Section 12.3.1.5, and in our case amounts to using "C"
-  for PCI/VIO device slots, where  is unique across all PCI/VIO
-  device slots.
-
-ibm,drc-indexes:
-  first 4-bytes: BE-encoded integer denoting the number of entries
-  each 4-byte entry: BE-encoded  integer that is unique across all DRCs
-in the machine
-
-   is

Re: [PATCH 21/30] bsd-user/signal.c: force_sig

2022-01-13 Thread Peter Maydell

On Sun, 9 Jan 2022 at 16:44, Warner Losh  wrote:
>
> Force delivering a signal and generating a core file.
>
> Signed-off-by: Stacey Son 
> Signed-off-by: Kyle Evans 
> Signed-off-by: Warner Losh 
> ---
>  bsd-user/qemu.h |  1 +
>  bsd-user/signal.c   | 59 +
>  bsd-user/syscall_defs.h |  1 +
>  3 files changed, 61 insertions(+)
>
> diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
> index 7c54a933eb8..e12617f5d69 100644
> --- a/bsd-user/qemu.h
> +++ b/bsd-user/qemu.h
> @@ -223,6 +223,7 @@ void queue_signal(CPUArchState *env, int sig, 
> target_siginfo_t *info);
>  abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong 
> sp);
>  int target_to_host_signal(int sig);
>  int host_to_target_signal(int sig);
> +void QEMU_NORETURN force_sig(int target_sig);
>
>  /* mmap.c */
>  int target_mprotect(abi_ulong start, abi_ulong len, int prot);
> diff --git a/bsd-user/signal.c b/bsd-user/signal.c
> index 824535be8b8..97f42f9c45e 100644
> --- a/bsd-user/signal.c
> +++ b/bsd-user/signal.c
> @@ -109,6 +109,65 @@ static int core_dump_signal(int sig)
>  }
>  }
>
> +/* Abort execution with signal. */
> +void QEMU_NORETURN force_sig(int target_sig)

In linux-user we call this dump_core_and_abort(), which is
a name that better describes what it's actually doing.

(Today's linux-user's force_sig() does what the Linux kernel's
function of that name does -- it's a wrapper around
queue_signal() which delivers a signal to the guest with
.si_code = SI_KERNEL , si_pid = si_uid = 0.
Whether you want one of those or not depends on what BSD
kernels do in that kind of "we have to kill this process"
situation.)

> +{
> +CPUArchState *env = thread_cpu->env_ptr;
> +CPUState *cpu = env_cpu(env);
> +TaskState *ts = cpu->opaque;
> +int core_dumped = 0;
> +int host_sig;
> +struct sigaction act;
> +
> +host_sig = target_to_host_signal(target_sig);
> +gdb_signalled(env, target_sig);
> +
> +/* Dump core if supported by target binary format */
> +if (core_dump_signal(target_sig) && (ts->bprm->core_dump != NULL)) {
> +stop_all_tasks();
> +core_dumped =
> +((*ts->bprm->core_dump)(target_sig, env) == 0);
> +}
> +if (core_dumped) {
> +struct rlimit nodump;
> +
> +/*
> + * We already dumped the core of target process, we don't want
> + * a coredump of qemu itself.
> + */
> + getrlimit(RLIMIT_CORE, );
> + nodump.rlim_cur = 0;
> + setrlimit(RLIMIT_CORE, );
> + (void) fprintf(stderr, "qemu: uncaught target signal %d (%s) "
> + "- %s\n", target_sig, strsignal(host_sig), "core dumped");
> +}
> +
> +/*
> + * The proper exit code for dying from an uncaught signal is
> + * -.  The kernel doesn't allow exit() or _exit() to pass
> + * a negative value.  To get the proper exit code we need to
> + * actually die from an uncaught signal.  Here the default signal
> + * handler is installed, we send ourself a signal and we wait for
> + * it to arrive.
> + */
> +memset(, 0, sizeof(act));
> +sigfillset(_mask);
> +act.sa_handler = SIG_DFL;
> +sigaction(host_sig, , NULL);
> +
> +kill(getpid(), host_sig);
> +
> +/*
> + * Make sure the signal isn't masked (just reuse the mask inside
> + * of act).
> + */
> +sigdelset(_mask, host_sig);
> +sigsuspend(_mask);
> +
> +/* unreachable */
> +abort();
> +}
> +
>  /*
>   * Queue a signal so that it will be send to the virtual CPU as soon as
>   * possible.
> diff --git a/bsd-user/syscall_defs.h b/bsd-user/syscall_defs.h
> index 04a1a886d7b..62b472b990b 100644
> --- a/bsd-user/syscall_defs.h
> +++ b/bsd-user/syscall_defs.h
> @@ -21,6 +21,7 @@
>  #define _SYSCALL_DEFS_H_
>
>  #include 
> +#include 
>
>  #include "errno_defs.h"
>

-- PMM

Re: [PATCH 20/30] bsd-user/signal.c: core_dump_signal

2022-01-13 Thread Warner Losh

On Thu, Jan 13, 2022 at 1:22 PM Peter Maydell 
wrote:

> On Sun, 9 Jan 2022 at 16:48, Warner Losh  wrote:
> >
> > Returns 1 for signals that cause core files.
> >
> > Signed-off-by: Stacey Son 
> > Signed-off-by: Kyle Evans 
> > Signed-off-by: Warner Losh 
> > ---
> >  bsd-user/signal.c | 17 +
> >  1 file changed, 17 insertions(+)
> >
> > diff --git a/bsd-user/signal.c b/bsd-user/signal.c
> > index a6e07277fb2..824535be8b8 100644
> > --- a/bsd-user/signal.c
> > +++ b/bsd-user/signal.c
> > @@ -92,6 +92,23 @@ static inline void
> host_to_target_siginfo_noswap(target_siginfo_t *tinfo,
> >  }
> >  }
> >
> > +/* Returns 1 if given signal should dump core if not handled. */
> > +static int core_dump_signal(int sig)
> > +{
> > +switch (sig) {
> > +case TARGET_SIGABRT:
> > +case TARGET_SIGFPE:
> > +case TARGET_SIGILL:
> > +case TARGET_SIGQUIT:
> > +case TARGET_SIGSEGV:
> > +case TARGET_SIGTRAP:
> > +case TARGET_SIGBUS:
> > +return 1;
> > +default:
> > +return 0;
> > +}
> > +}
>
> Code is fine, but since this is a static function with no callers
> the compiler is going to emit a warning about that. It's a small
> function, so the easiest thing is just to squash this into the
> following patch which is what adds the code that calls it.
>

Sure thing. I'm still trying to get a feel for right-sizing the chunking...
Since the warning didn't fail the compile, I thought it would be OK,
but can easily fold this in with the first patch to use it.

Warner

Re: [PATCH 20/30] bsd-user/signal.c: core_dump_signal

2022-01-13 Thread Peter Maydell

On Sun, 9 Jan 2022 at 16:48, Warner Losh  wrote:
>
> Returns 1 for signals that cause core files.
>
> Signed-off-by: Stacey Son 
> Signed-off-by: Kyle Evans 
> Signed-off-by: Warner Losh 
> ---
>  bsd-user/signal.c | 17 +
>  1 file changed, 17 insertions(+)
>
> diff --git a/bsd-user/signal.c b/bsd-user/signal.c
> index a6e07277fb2..824535be8b8 100644
> --- a/bsd-user/signal.c
> +++ b/bsd-user/signal.c
> @@ -92,6 +92,23 @@ static inline void 
> host_to_target_siginfo_noswap(target_siginfo_t *tinfo,
>  }
>  }
>
> +/* Returns 1 if given signal should dump core if not handled. */
> +static int core_dump_signal(int sig)
> +{
> +switch (sig) {
> +case TARGET_SIGABRT:
> +case TARGET_SIGFPE:
> +case TARGET_SIGILL:
> +case TARGET_SIGQUIT:
> +case TARGET_SIGSEGV:
> +case TARGET_SIGTRAP:
> +case TARGET_SIGBUS:
> +return 1;
> +default:
> +return 0;
> +}
> +}

Code is fine, but since this is a static function with no callers
the compiler is going to emit a warning about that. It's a small
function, so the easiest thing is just to squash this into the
following patch which is what adds the code that calls it.

thanks
-- PMM

Re: [PATCH 19/30] bsd-user/strace.c: print_taken_signal

2022-01-13 Thread Peter Maydell

On Sun, 9 Jan 2022 at 16:46, Warner Losh  wrote:
>
> print_taken_signal() prints signals when we're tracing signals.
>
> Signed-off-by: Stacey Son 
> Signed-off-by: Kyle Evans 
> Signed-off-by: Warner Losh 

Reviewed-by: Peter Maydell 

thanks
-- PMM

[PATCH v2 1/2] target/riscv: iterate over a table of decoders

2022-01-13 Thread Philipp Tomsich

To split up the decoder into multiple functions (both to support
vendor-specific opcodes in separate files and to simplify maintenance
of orthogonal extensions), this changes decode_op to iterate over a
table of decoders predicated on guard functions.

This commit only adds the new structure and the table, allowing for
the easy addition of additional decoders in the future.

Signed-off-by: Philipp Tomsich 
---

Changes in v2:
- (new patch) iterate over a table of guarded decoder functions

 target/riscv/translate.c | 38 --
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 615048ec87..2cbf9cbb6f 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -116,6 +116,12 @@ static inline bool has_ext(DisasContext *ctx, uint32_t ext)
 return ctx->misa_ext & ext;
 }
 
+static inline bool always_true_p(CPURISCVState *env  
__attribute__((__unused__)),
+ DisasContext *ctx  
__attribute__((__unused__)))
+{
+return true;
+}
+
 #ifdef TARGET_RISCV32
 #define get_xl(ctx)MXL_RV32
 #elif defined(CONFIG_USER_ONLY)
@@ -844,16 +850,28 @@ static uint32_t opcode_at(DisasContextBase *dcbase, 
target_ulong pc)
 
 static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
 {
-/* check for compressed insn */
+/* If not handled, we'll raise an illegal instruction exception */
+bool handled = false;
+
+/*
+ * A table with predicate (i.e., guard) functions and decoder functions
+ * that are tested in-order until a decoder matches onto the opcode.
+ */
+const struct {
+bool (*guard_func)(CPURISCVState *, DisasContext *);
+bool (*decode_func)(DisasContext *, uint32_t);
+} decoders[] = {
+{ always_true_p,  decode_insn32 },
+};
+
+/* Check for compressed insn */
 if (extract16(opcode, 0, 2) != 3) {
 if (!has_ext(ctx, RVC)) {
 gen_exception_illegal(ctx);
 } else {
 ctx->opcode = opcode;
 ctx->pc_succ_insn = ctx->base.pc_next + 2;
-if (!decode_insn16(ctx, opcode)) {
-gen_exception_illegal(ctx);
-}
+handled = decode_insn16(ctx, opcode);
 }
 } else {
 uint32_t opcode32 = opcode;
@@ -862,10 +880,18 @@ static void decode_opc(CPURISCVState *env, DisasContext 
*ctx, uint16_t opcode)
  ctx->base.pc_next + 2));
 ctx->opcode = opcode32;
 ctx->pc_succ_insn = ctx->base.pc_next + 4;
-if (!decode_insn32(ctx, opcode32)) {
-gen_exception_illegal(ctx);
+
+for (size_t i = 0; i < ARRAY_SIZE(decoders); ++i) {
+if (!decoders[i].guard_func(env, ctx))
+continue;
+
+if ((handled = decoders[i].decode_func(ctx, opcode32)))
+break;
 }
 }
+
+if (!handled)
+gen_exception_illegal(ctx);
 }
 
 static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
-- 
2.33.1

[PATCH v2 2/2] target/riscv: Add XVentanaCondOps custom extension

2022-01-13 Thread Philipp Tomsich

This adds the decoder and translation for the XVentanaCondOps custom
extension (vendor-defined by Ventana Micro Systems), which is
documented at 
https://github.com/ventanamicro/ventana-custom-extensions/releases/download/v1.0.0/ventana-custom-extensions-v1.0.0.pdf

This commit then also adds a guard-function (has_XVentanaCondOps_p)
and the decoder function to the table of decoders, enabling the
support for the XVentanaCondOps extension.

Signed-off-by: Philipp Tomsich 

---

Changes in v2:
- Split off decode table into XVentanaCondOps.decode
- Wire up XVentanaCondOps in the decoder-table

 target/riscv/XVentanaCondOps.decode   | 25 
 target/riscv/cpu.c|  3 ++
 target/riscv/cpu.h|  3 ++
 .../insn_trans/trans_xventanacondops.inc  | 39 +++
 target/riscv/meson.build  |  1 +
 target/riscv/translate.c  | 13 +++
 6 files changed, 84 insertions(+)
 create mode 100644 target/riscv/XVentanaCondOps.decode
 create mode 100644 target/riscv/insn_trans/trans_xventanacondops.inc

diff --git a/target/riscv/XVentanaCondOps.decode 
b/target/riscv/XVentanaCondOps.decode
new file mode 100644
index 00..5aef7c3d72
--- /dev/null
+++ b/target/riscv/XVentanaCondOps.decode
@@ -0,0 +1,25 @@
+#
+# RISC-V translation routines for the XVentanaCondOps extension
+#
+# Copyright (c) 2022 Dr. Philipp Tomsich, philipp.toms...@vrull.eu
+#
+# SPDX-License-Identifier: LGPL-2.1-or-later
+#
+# Reference: VTx-family custom instructions
+#Custom ISA extensions for Ventana Micro Systems RISC-V cores
+#
(https://github.com/ventanamicro/ventana-custom-extensions/releases/download/v1.0.0/ventana-custom-extensions-v1.0.0.pdf)
+
+# Fields
+%rs2  20:5
+%rs1  15:5
+%rd7:5
+
+# Argument sets
+rd rs1 rs2  !extern
+
+# Formats
+@r ...  . . ... . ... %rs2 %rs1 
%rd
+
+# *** RV64 Custom-3 Extension ***
+vt_maskc   000  . . 110 . 011 @r
+vt_maskcn  000  . . 111 . 011 @r
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 9bc25d3055..fc8ab1dc2b 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -673,6 +673,9 @@ static Property riscv_cpu_properties[] = {
 DEFINE_PROP_BOOL("zbc", RISCVCPU, cfg.ext_zbc, true),
 DEFINE_PROP_BOOL("zbs", RISCVCPU, cfg.ext_zbs, true),
 
+/* Vendor-specific custom extensions */
+DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, 
false),
+
 /* These are experimental so mark with 'x-' */
 DEFINE_PROP_BOOL("x-j", RISCVCPU, cfg.ext_j, false),
 /* ePMP 0.9.3 */
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 4d63086765..ffde94fd1a 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -330,6 +330,9 @@ struct RISCVCPU {
 bool ext_zfh;
 bool ext_zfhmin;
 
+/* Vendor-specific custom extensions */
+bool ext_XVentanaCondOps;
+
 char *priv_spec;
 char *user_spec;
 char *bext_spec;
diff --git a/target/riscv/insn_trans/trans_xventanacondops.inc 
b/target/riscv/insn_trans/trans_xventanacondops.inc
new file mode 100644
index 00..b8a5d031b5
--- /dev/null
+++ b/target/riscv/insn_trans/trans_xventanacondops.inc
@@ -0,0 +1,39 @@
+/*
+ * RISC-V translation routines for the XVentanaCondOps extension.
+ *
+ * Copyright (c) 2021-2022 VRULL GmbH.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see .
+ */
+
+static bool gen_condmask(DisasContext *ctx, arg_r *a, TCGCond cond)
+{
+TCGv dest = dest_gpr(ctx, a->rd);
+TCGv src1 = get_gpr(ctx, a->rs1, EXT_NONE);
+TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE);
+
+tcg_gen_movcond_tl(cond, dest, src2, ctx->zero, src1, ctx->zero);
+
+gen_set_gpr(ctx, a->rd, dest);
+return true;
+}
+
+static bool trans_vt_maskc(DisasContext *ctx, arg_r *a)
+{
+return gen_condmask(ctx, a, TCG_COND_NE);
+}
+
+static bool trans_vt_maskcn(DisasContext *ctx, arg_r *a)
+{
+return gen_condmask(ctx, a, TCG_COND_EQ);
+}
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
index a32158da93..1f3a15398b 100644
--- a/target/riscv/meson.build
+++ b/target/riscv/meson.build
@@ -4,6 +4,7 @@ dir = meson.current_source_dir()
 gen = [
   decodetree.process('insn16.decode', extra_args: 
['--static-decode=decode_insn16', '--insnwidth=16']),

Re: [PATCH 18/30] bsd-user/signal.c: Implement host_signal_handler

2022-01-13 Thread Peter Maydell

On Sun, 9 Jan 2022 at 16:40, Warner Losh  wrote:
>
> Implement host_signal_handler to handle signals generated by the host
> and to do safe system calls.
>
> Signed-off-by: Stacey Son 
> Signed-off-by: Kyle Evans 
> Signed-off-by: Warner Losh 
> ---
>  bsd-user/signal.c | 105 ++
>  1 file changed, 105 insertions(+)
>
> diff --git a/bsd-user/signal.c b/bsd-user/signal.c
> index b1331f63d61..a6e07277fb2 100644
> --- a/bsd-user/signal.c
> +++ b/bsd-user/signal.c
> @@ -142,6 +142,111 @@ void force_sig_fault(int sig, int code, abi_ulong addr)
>
>  static void host_signal_handler(int host_sig, siginfo_t *info, void *puc)
>  {
> +CPUState *cpu = thread_cpu;
> +CPUArchState *env = cpu->env_ptr;
> +int sig;
> +target_siginfo_t tinfo;
> +ucontext_t *uc = puc;
> +uintptr_t pc = 0;
> +bool sync_sig = false;
> +
> +/*
> + * Non-spoofed SIGSEGV and SIGBUS are synchronous, and need special
> + * handling wrt signal blocking and unwinding.
> + */
> +if ((host_sig == SIGSEGV || host_sig == SIGBUS) && info->si_code > 0) {
> +MMUAccessType access_type;
> +uintptr_t host_addr;
> +abi_ptr guest_addr;
> +bool is_write;
> +
> +host_addr = (uintptr_t)info->si_addr;
> +
> +/*
> + * Convert forcefully to guest address space: addresses outside
> + * reserved_va are still valid to report via SEGV_MAPERR.
> + */
> +guest_addr = h2g_nocheck(host_addr);
> +
> +pc = host_signal_pc(uc);
> +is_write = host_signal_write(info, uc);
> +access_type = adjust_signal_pc(, is_write);
> +
> +if (host_sig == SIGSEGV) {
> +bool maperr = true;
> +
> +if (info->si_code == SEGV_ACCERR && h2g_valid(host_addr)) {
> +/* If this was a write to a TB protected page, restart. */
> +if (is_write &&
> +handle_sigsegv_accerr_write(cpu, >uc_sigmask,
> +pc, guest_addr)) {
> +return;
> +}
> +
> +/*
> + * With reserved_va, the whole address space is PROT_NONE,
> + * which means that we may get ACCERR when we want MAPERR.
> + */
> +if (page_get_flags(guest_addr) & PAGE_VALID) {
> +maperr = false;
> +} else {
> +info->si_code = SEGV_MAPERR;
> +}
> +}
> +
> +sigprocmask(SIG_SETMASK, >uc_sigmask, NULL);
> +cpu_loop_exit_sigsegv(cpu, guest_addr, access_type, maperr, pc);
> +} else {
> +sigprocmask(SIG_SETMASK, >uc_sigmask, NULL);
> +if (info->si_code == BUS_ADRALN) {
> +cpu_loop_exit_sigbus(cpu, guest_addr, access_type, pc);
> +}
> +}
> +
> +sync_sig = true;
> +}
> +
> +/* Get the target signal number. */
> +sig = host_to_target_signal(host_sig);
> +if (sig < 1 || sig > TARGET_NSIG) {
> +return;
> +}
> +trace_user_host_signal(cpu, host_sig, sig);
> +
> +host_to_target_siginfo_noswap(, info);
> +
> +queue_signal(env, sig, );   /* XXX how to cope with failure? */

queue_signal() can't fail, so there is nothing to cope with.
(Your bsd-user version even has the right 'void' type --
linux-user's returns 1 always and we never look at the return
value, so we should really switch that to void return too.)

> +/*
> + * Linux does something else here -> the queue signal may be wrong, but
> + * maybe not.  And then it does the rewind_if_in_safe_syscall
> + */

I think you have here a bit of a mix of linux-user's current design
and some older (broken) version. This is how linux-user works today:

 * queue_signal() is a little bit misnamed, because there is no
   "queue" here: there can only be at most one "queued" signal,
   and it lives in the TaskState struct (which is user-only specific
   information that hangs off the guest CPU struct) as the
   TaskState::sync_signal field. The reason
   we only have one at once is that queue_signal() is used only
   for signals generated by QEMU itself by calling queue_signal()
   directly or indirectly from the cpu_loop() code. The cpu loop
   always calls process_pending_signals() at the end of its loop,
   which will pick up a queued signal. We never call queue_signal()
   twice in a row before getting back to process_pending_signals(),
   so there's only ever at most one thing in the "queue".
 * for all signals we get from the host except SIGSEGV/SIGBUS,
   we track whether there's a host signal pending in the
   TaskState::sigtab[] array (which is indexed by signal number).
   We block all host signals except SIGSEGV/SIGBUS before calling
   cpu_exit(), so we know we're not going to get more than one
   of these at once (and it won't clash with a queue_signal()

Re: [PATCH 1/1] virtio: fix the condition for iommu_platform not supported

2022-01-13 Thread Halil Pasic

On Thu, 13 Jan 2022 12:11:42 -0500
"Michael S. Tsirkin"  wrote:

> On Thu, Jan 13, 2022 at 05:51:31PM +0100, Halil Pasic wrote:
> > The commit 04ceb61a40 ("virtio: Fail if iommu_platform is requested, but
> > unsupported") claims to fail the device hotplug when iommu_platform
> > is requested, but not supported by the (vhost) device. On the first
> > glance the condition for detecting that situation looks perfect, but
> > because a certain peculiarity of virtio_platform it ain't.
> > 
> > In fact the aforementioned commit introduces a regression. It breaks
> > virtio-fs support for Secure Execution, and most likely also for AMD SEV
> > or any other confidential guest scenario that relies encrypted guest
> > memory.  The same also applies to any other vhost device that does not
> > negotiate _F_ACCESS_PLATFORM.
> > 
> > The peculiarity is that iommu_platform and _F_ACCESS_PLATFORM collates
> > "device can not access all of the guest ram" and "iova != gpa, thus
> > device needs to translate iova".
> > 
> > Confidential guest technologies currently rely on the device/hypervisor
> > offering _F_ACCESS_PLATFORM to grant access to whatever the device needs
> > to see, because of the first. But, generally, they don't care for the
> > second.
> > 
> > This is the very reason for which commit 7ef7e6e3b ("vhost: correctly
> > turn on VIRTIO_F_IOMMU_PLATFORM") for, which fences _F_ACCESS_PLATFORM
> > form the vhost device that does not need it, because on the vhost
> > interface it only means "I/O address translation is needed".
> > 
> > This patch takes inspiration from 7ef7e6e3b ("vhost: correctly turn on
> > VIRTIO_F_IOMMU_PLATFORM"),  
> 
> Strange, I could not find this commit. Did you mean f7ef7e6e3b?
> 

Right! Copy-paste error.


 
 static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
@@ -765,6 +772,9 @@ static int vhost_dev_set_features(struct vhost_dev *dev,
 if (enable_log) {
 features |= 0x1ULL << VHOST_F_LOG_ALL;
 }
+if (!vhost_dev_has_iommu(dev)) {
+features &= ~(0x1ULL << VIRTIO_F_IOMMU_PLATFORM);
+}
 r = dev->vhost_ops->vhost_set_features(dev, features);
 if (r < 0) {
 VHOST_OPS_DEBUG("vhost_set_features failed");

> > and uses the same condition for detecting the
> > situation when _F_ACCESS_PLATFORM is requested, but no I/O translation
> > by the device, and thus no device capability is needed.
> >
> > In this
> > situation claiming that the device does not support iommu_plattform=on
> > is counter-productive. So let us stop doing that!
> > 
> > Signed-off-by: Halil Pasic 
> > Reported-by: Jakob Naucke 
> > Fixes: 04ceb61a40 ("virtio: Fail if iommu_platform is requested, but
> > unsupported")
> > Cc: Kevin Wolf 
> > Cc: qemu-sta...@nongnu.org
> > 
> > ---
> > 
> > @Kevin: Can you please verify, that I don't break your fix?  
> 
> So which configurations did you test for this?

I tested it with virtio-fs and qemu-system-s390x with -device
vhost-user-fs-ccw,iommu_platform=on,...

Regards,
Halil

> 
> > ---
> >  hw/virtio/virtio-bus.c | 11 ++-
> >  1 file changed, 6 insertions(+), 5 deletions(-)
> > 
> > diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
> > index d23db98c56..c1578f3de2 100644
> > --- a/hw/virtio/virtio-bus.c
> > +++ b/hw/virtio/virtio-bus.c
> > @@ -69,11 +69,6 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error 
> > **errp)
> >  return;
> >  }
> >  
> > -if (has_iommu && !virtio_host_has_feature(vdev, 
> > VIRTIO_F_IOMMU_PLATFORM)) {
> > -error_setg(errp, "iommu_platform=true is not supported by the 
> > device");
> > -return;
> > -}
> > -
> >  if (klass->device_plugged != NULL) {
> >  klass->device_plugged(qbus->parent, _err);
> >  }
> > @@ -88,6 +83,12 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error 
> > **errp)
> >  } else {
> >  vdev->dma_as = _space_memory;
> >  }
> > +
> > +if (has_iommu && vdev->dma_as != _space_memory
> > +  && !virtio_host_has_feature(vdev, 
> > VIRTIO_F_IOMMU_PLATFORM)) {
> > +error_setg(errp, "iommu_platform=true is not supported by the 
> > device");
> > +return;
> > +}
> >  }
> >  /* Reset the virtio_bus */
> > 
> > base-commit: f8d75e10d3e0033a0a29a7a7e4777a4fbc17a016
> > -- 
> > 2.32.0  
> 
>

[PATCH v2 2/3] migration: Add canary to VMSTATE_END_OF_LIST

2022-01-13 Thread Dr. David Alan Gilbert (git)

From: "Dr. David Alan Gilbert" 

We fairly regularly forget VMSTATE_END_OF_LIST markers off descriptions;
given that the current check is only for ->name being NULL, sometimes
we get unlucky and the code apparently works and no one spots the error.

Explicitly add a flag, VMS_END that should be set, and assert it is
set during the traversal.

Note: This can't go in until we update the copy of vmstate.h in slirp.

Suggested-by: Peter Maydell 
Signed-off-by: Dr. David Alan Gilbert 
---
 include/migration/vmstate.h | 7 ++-
 migration/savevm.c  | 1 +
 migration/vmstate.c | 2 ++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 017c03675c..b50708e57a 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -147,6 +147,9 @@ enum VMStateFlags {
  * VMStateField.struct_version_id to tell which version of the
  * structure we are referencing to use. */
 VMS_VSTRUCT   = 0x8000,
+
+/* Marker for end of list */
+VMS_END = 0x1
 };
 
 typedef enum {
@@ -1163,7 +1166,9 @@ extern const VMStateInfo vmstate_info_qlist;
 VMSTATE_UNUSED_BUFFER(_test, 0, _size)
 
 #define VMSTATE_END_OF_LIST() \
-{}
+{ \
+.flags = VMS_END, \
+}
 
 int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
void *opaque, int version_id);
diff --git a/migration/savevm.c b/migration/savevm.c
index 0bef031acb..8077393d11 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -620,6 +620,7 @@ static void dump_vmstate_vmsd(FILE *out_file,
 field++;
 first = false;
 }
+assert(field->flags == VMS_END);
 fprintf(out_file, "\n%*s]", indent, "");
 }
 if (vmsd->subsections != NULL) {
diff --git a/migration/vmstate.c b/migration/vmstate.c
index 05f87cdddc..181ba08c7d 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -160,6 +160,7 @@ int vmstate_load_state(QEMUFile *f, const 
VMStateDescription *vmsd,
 }
 field++;
 }
+assert(field->flags == VMS_END);
 ret = vmstate_subsection_load(f, vmsd, opaque);
 if (ret != 0) {
 return ret;
@@ -413,6 +414,7 @@ int vmstate_save_state_v(QEMUFile *f, const 
VMStateDescription *vmsd,
 }
 field++;
 }
+assert(field->flags == VMS_END);
 
 if (vmdesc) {
 json_writer_end_array(vmdesc);
-- 
2.34.1

[PATCH v2 0/3] vmsd checks

2022-01-13 Thread Dr. David Alan Gilbert (git)

From: "Dr. David Alan Gilbert" 

Aftern the patch the other day where I added a missing END_OF_LIST,
Peter suggested adding something more robust.

Here I:
  add a check for a flag at the end of the list
  add a check that's performed in vmstate_register_with_alias_id
only within qtest recursively for that canary and for
subsection naming constraints.
  Fix a ppc issue that the vmstate naming constraint caught
(Waiting for a reply from the PPC folk to check that).

The checks can't go in until I get the def into libslirp.

Signed-off-by: Dr. David Alan Gilbert 


Dr. David Alan Gilbert (3):
  ppc: Fix vmstate_pbr403 subsection name
  migration: Add canary to VMSTATE_END_OF_LIST
  migration: Perform vmsd structure check during tests

 include/migration/vmstate.h |  7 ++-
 migration/savevm.c  | 40 +
 migration/vmstate.c |  2 ++
 target/ppc/machine.c|  2 +-
 4 files changed, 49 insertions(+), 2 deletions(-)

-- 
2.34.1

[PATCH v2 3/3] migration: Perform vmsd structure check during tests

2022-01-13 Thread Dr. David Alan Gilbert (git)

From: "Dr. David Alan Gilbert" 

Perform a check on vmsd structures during test runs in the hope
of catching any missing terminators and other simple screwups.

Signed-off-by: Dr. David Alan Gilbert 
---
 migration/savevm.c | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/migration/savevm.c b/migration/savevm.c
index 8077393d11..97a4471220 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -66,6 +66,7 @@
 #include "net/announce.h"
 #include "qemu/yank.h"
 #include "yank_functions.h"
+#include "sysemu/qtest.h"
 
 const unsigned int postcopy_ram_discard_version;
 
@@ -839,6 +840,39 @@ void unregister_savevm(VMStateIf *obj, const char *idstr, 
void *opaque)
 }
 }
 
+/*
+ * Perform some basic checks on vmsd's at registration
+ * time.
+ */
+static void vmstate_check(const VMStateDescription *vmsd)
+{
+const VMStateField *field = vmsd->fields;
+const VMStateDescription **subsection = vmsd->subsections;
+
+if (field) {
+while (field->name) {
+if (field->flags & (VMS_STRUCT | VMS_VSTRUCT)) {
+/* Recurse to sub structures */
+vmstate_check(field->vmsd);
+}
+/* Carry on */
+field++;
+}
+/* Check for the end of field list canary */
+assert(field->flags == VMS_END);
+}
+
+while (subsection && *subsection) {
+/*
+ * The name of a subsection should start with the name of the
+ * current object.
+ */
+assert(!strncmp(vmsd->name, (*subsection)->name, strlen(vmsd->name)));
+vmstate_check(*subsection);
+subsection++;
+}
+}
+
 int vmstate_register_with_alias_id(VMStateIf *obj, uint32_t instance_id,
const VMStateDescription *vmsd,
void *opaque, int alias_id,
@@ -884,6 +918,11 @@ int vmstate_register_with_alias_id(VMStateIf *obj, 
uint32_t instance_id,
 } else {
 se->instance_id = instance_id;
 }
+
+/* Perform a recursive sanity check during the test runs */
+if (qtest_enabled()) {
+vmstate_check(vmsd);
+}
 assert(!se->compat || se->instance_id == 0);
 savevm_state_handler_insert(se);
 return 0;
-- 
2.34.1

Re: [PATCH 17/30] bsd-user/signal.c: Implement rewind_if_in_safe_syscall

2022-01-13 Thread Peter Maydell

On Sun, 9 Jan 2022 at 16:32, Warner Losh  wrote:
>
> Signed-off-by: Kyle Evans 
> Signed-off-by: Warner Losh 
> ---
>  bsd-user/qemu.h   |  2 ++
>  bsd-user/signal.c | 12 
>  2 files changed, 14 insertions(+)

Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [PATCH 16/30] bsd-user/signal.c: host_to_target_siginfo_noswap

2022-01-13 Thread Peter Maydell

On Sun, 9 Jan 2022 at 16:41, Warner Losh  wrote:
>
> Implement conversion of host to target siginfo.
>
> Signed-off-by: Stacey Son 
> Signed-off-by: Kyle Evans 
> Signed-off-by: Warner Losh 
> ---
>  bsd-user/signal.c | 37 +
>  1 file changed, 37 insertions(+)
>
> diff --git a/bsd-user/signal.c b/bsd-user/signal.c
> index 7168d851be8..3fe8b2d9898 100644
> --- a/bsd-user/signal.c
> +++ b/bsd-user/signal.c
> @@ -43,6 +43,43 @@ int target_to_host_signal(int sig)
>  return sig;
>  }
>
> +/* Siginfo conversion. */
> +static inline void host_to_target_siginfo_noswap(target_siginfo_t *tinfo,
> +const siginfo_t *info)
> +{
> +int sig, code;
> +
> +sig = host_to_target_signal(info->si_signo);
> +/* XXX should have host_to_target_si_code() */
> +code = tswap32(info->si_code);
> +tinfo->si_signo = sig;
> +tinfo->si_errno = info->si_errno;
> +tinfo->si_code = info->si_code;
> +tinfo->si_pid = info->si_pid;
> +tinfo->si_uid = info->si_uid;
> +tinfo->si_status = info->si_status;
> +tinfo->si_addr = (abi_ulong)(unsigned long)info->si_addr;
> +/* si_value is opaque to kernel */
> +tinfo->si_value.sival_ptr =
> +(abi_ulong)(unsigned long)info->si_value.sival_ptr;
> +if (SIGILL == sig || SIGFPE == sig || SIGSEGV == sig || SIGBUS == sig ||

Don't use yoda-conditions, please. sig == SIGILL, etc.

> +SIGTRAP == sig) {
> +tinfo->_reason._fault._trapno = info->_reason._fault._trapno;
> +}
> +#ifdef SIGPOLL
> +if (SIGPOLL == sig) {
> +tinfo->_reason._poll._band = info->_reason._poll._band;
> +}
> +#endif
> +if (SI_TIMER == code) {
> +int timerid;
> +
> +timerid = info->_reason._timer._timerid;
> +tinfo->_reason._timer._timerid = timerid;
> +tinfo->_reason._timer._overrun = info->_reason._timer._overrun;
> +}
> +}

I think this will only compile on FreeBSD (the other BSDs having
notably different target_siginfo_t structs); I guess we're OK
with that ?

I also commented on the general setup linux-user has for this
function back in patch 2; I'll let you figure out whether what
you have here is the right thing for BSD.

-- PMM

Re: [PATCH 15/30] bsd-user: Add trace events for bsd-usr

2022-01-13 Thread Peter Maydell

On Sun, 9 Jan 2022 at 16:36, Warner Losh  wrote:
>
> Add the bsd-user specific events and infrastructure. Only include the
> linux-user trace events for linux-user, not bsd-user.
>
> Signed-off-by: Stacey Son 
> Signed-off-by: Kyle Evans 
> Signed-off-by: Warner Losh 

Typo in subject: should be 'bsd-user', not 'bsd-usr'.
Otherwise
Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [PATCH 14/30] bsd-user: Add host signals to the build

2022-01-13 Thread Peter Maydell

On Sun, 9 Jan 2022 at 16:32, Warner Losh  wrote:
>
> Start to add the host signal functionality to the build.
>
> Signed-off-by: Warner Losh 
> ---
>  bsd-user/meson.build | 1 +
>  bsd-user/signal.c| 1 +
>  meson.build  | 1 +
>  3 files changed, 3 insertions(+)

Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [PATCH 12/30] bsd-user/host/i386/host-signal.h: Implement host_signal_*

2022-01-13 Thread Peter Maydell

On Sun, 9 Jan 2022 at 16:29, Warner Losh  wrote:
>
> Implement host_signal_pc, host_signal_set_pc and host_signal_write for
> i386.
>
> Signed-off-by: Kyle Evans 
> Signed-off-by: Warner Losh 

Reviewed-by: Peter Maydell 

thanks
-- PMM

Re: [PATCH 13/30] bsd-user/host/x86_64/host-signal.h: Implement host_signal_*

2022-01-13 Thread Peter Maydell

On Sun, 9 Jan 2022 at 16:37, Warner Losh  wrote:
>
> Implement host_signal_pc, host_signal_set_pc and host_signal_write for
> x86_64.
>
> Signed-off-by: Kyle Evans 
> Signed-off-by: Warner Losh 
> ---

Reviewed-by: Peter Maydell 

thanks
-- PMM

[PATCH v2 1/3] ppc: Fix vmstate_pbr403 subsection name

2022-01-13 Thread Dr. David Alan Gilbert (git)

From: "Dr. David Alan Gilbert" 

The pbr403 subsection is part of the tlb6xx state, so I believe it's
name needs to be:

.name = "cpu/tlb6xx/pbr403",

Signed-off-by: Dr. David Alan Gilbert 
---
 target/ppc/machine.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index 756d8de5d8..e535edb7c4 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -718,7 +718,7 @@ static bool pbr403_needed(void *opaque)
 }
 
 static const VMStateDescription vmstate_pbr403 = {
-.name = "cpu/pbr403",
+.name = "cpu/tlb6xx/pbr403",
 .version_id = 1,
 .minimum_version_id = 1,
 .needed = pbr403_needed,
-- 
2.34.1

[PATCH 17/17] ppc/pnv: rename pnv_pec_stk_update_map()

2022-01-13 Thread Daniel Henrique Barboza

This function does not use 'stack' anymore. Rename it to
pnv_pec_phb_update_map().

Signed-off-by: Daniel Henrique Barboza 
---
 hw/pci-host/pnv_phb4.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index a9ec42ce2c..d27b62a50a 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -892,7 +892,7 @@ static void pnv_phb4_update_regions(PnvPHB4 *phb)
 pnv_phb4_check_all_mbt(phb);
 }
 
-static void pnv_pec_stk_update_map(PnvPHB4 *phb)
+static void pnv_pec_phb_update_map(PnvPHB4 *phb)
 {
 PnvPhb4PecState *pec = phb->pec;
 MemoryRegion *sysmem = get_system_memory();
@@ -1043,7 +1043,7 @@ static void pnv_pec_stk_nest_xscom_write(void *opaque, 
hwaddr addr,
 break;
 case PEC_NEST_STK_BAR_EN:
 phb->nest_regs[reg] = val & 0xf000ull;
-pnv_pec_stk_update_map(phb);
+pnv_pec_phb_update_map(phb);
 break;
 case PEC_NEST_STK_DATA_FRZ_TYPE:
 case PEC_NEST_STK_PBCQ_TUN_BAR:
-- 
2.33.1

Re: [PATCH 11/30] bsd-user/host/arm/host-signal.h: Implement host_signal_*

2022-01-13 Thread Peter Maydell

On Sun, 9 Jan 2022 at 16:33, Warner Losh  wrote:
>
> Implement host_signal_pc, host_signal_set_pc and host_signal_write for
> arm.
>
> Signed-off-by: Kyle Evans 
> Signed-off-by: Warner Losh 

> +static inline bool host_signal_write(siginfo_t *info, ucontext_t *uc)
> +{
> +/*
> + * In the FSR, bit 11 is WnR. FreeBSD returns this as part of the
> + * si_info.si_trapno which we don't have access to here.  We assume that 
> uc
> + * is part of a trapframe and reach around to get to the si_info that's 
> in
> + * the sigframe just before it, though this may be unwise.
> + */

Yeah, that's pretty nasty. But this function is passed a
siginfo_t pointer -- isn't that the one you need ?

> +siginfo_t *si;
> +si = &((siginfo_t *)uc)[-1];
> +uint32_t fsr = si->si_trapno;
> +
> +return extract32(fsr, 11, 1);
> +}

thanks
-- PMM

[PATCH 15/17] ppc/pnv: convert pec->stacks[] into pec->phbs[]

2022-01-13 Thread Daniel Henrique Barboza

This patch changes the design of the PEC device to use PHB4s instead of
PecStacks. After all the recent changes, PHB4s now contain all the
information needed for their proper functioning, not relying on PecStack
in any capacity.

All changes are being made in a single patch to avoid renaming parts of
the PecState and leaving the code in a strange way. E.g. rename
PecClass->num_stacks to num_phbs, which would then read a
pnv_pec_num_stacks[] array. To avoid mixing the old and new design more
than necessary it's clearer to do these changes in a single step.

The name changes made are:

- in PnvPhb4PecState, rename PHB4_PEC_MAX_STACKS to PHB4_PEC_MAX_PHBS,
'num_stacks' to 'num_phbs' and convert "PnvPhb4PecStack
stacks[PHB4_PEC_MAX_STACKS]" to "PnvPHB4 *phbs[PHB4_PEC_MAX_PHBS]";

- in PnvPhb4PecClass, rename *num_stacks to *num_phbs;

- pnv_pec_num_stacks[] is renamed to pnv_pec_num_phbs[].

The logical changes:

- pnv_pec_default_phb_realize():
  * init the PnvPHB4 qdev and assign it to the corresponding
pec->phbs[phb_number];
  * do not use stack->phb anymore;

- pnv_pec_realize():
  * use the new default_phb_realize() to init/realize each PHB if
running with defaults;

- pnv_pec_instance_init(): removed since we're creating the PHBs during
pec_realize();

- pnv_phb4_get_stack():
  * renamed to pnv_phb4_get_pec() and returns a PnvPhb4PecState*;
  * assign the right pec->phbs[] pointer to the phb;
  * set 'phb_number' of the PHB given that the information is already
available;

- pnv_phb4_realize(): use 'phb->pec' instead of 'stack'.

This design change shouldn't caused any behavioral change in the runtime
of the machine.

Signed-off-by: Daniel Henrique Barboza 
---
 hw/pci-host/pnv_phb4.c | 31 +++
 hw/pci-host/pnv_phb4_pec.c | 71 ++
 include/hw/pci-host/pnv_phb4.h | 10 ++---
 3 files changed, 40 insertions(+), 72 deletions(-)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 25b4248776..a9ec42ce2c 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -1360,7 +1360,7 @@ int pnv_phb4_pec_get_phb_id(PnvPhb4PecState *pec, int 
stack_index)
 int offset = 0;
 
 while (index--) {
-offset += pecc->num_stacks[index];
+offset += pecc->num_phbs[index];
 }
 
 return offset + stack_index;
@@ -1510,8 +1510,8 @@ static void pnv_phb4_instance_init(Object *obj)
 object_initialize_child(obj, "source", >xsrc, TYPE_XIVE_SOURCE);
 }
 
-static PnvPhb4PecStack *pnv_phb4_get_stack(PnvChip *chip, PnvPHB4 *phb,
-   Error **errp)
+static PnvPhb4PecState *pnv_phb4_get_pec(PnvChip *chip, PnvPHB4 *phb,
+ Error **errp)
 {
 Pnv9Chip *chip9 = PNV9_CHIP(chip);
 int chip_id = phb->chip_id;
@@ -1520,14 +1520,19 @@ static PnvPhb4PecStack *pnv_phb4_get_stack(PnvChip 
*chip, PnvPHB4 *phb,
 
 for (i = 0; i < chip->num_pecs; i++) {
 /*
- * For each PEC, check the amount of stacks it supports
- * and see if the given phb4 index matches a stack.
+ * For each PEC, check the amount of phbs it supports
+ * and see if the given phb4 index matches an index.
  */
 PnvPhb4PecState *pec = >pecs[i];
 
-for (j = 0; j < pec->num_stacks; j++) {
+for (j = 0; j < pec->num_phbs; j++) {
 if (index == pnv_phb4_pec_get_phb_id(pec, j)) {
-return >stacks[j];
+pec->phbs[j] = phb;
+
+/* Set phb-number now since we already have it */
+object_property_set_int(OBJECT(phb), "phb-number",
+   j, _abort);
+return pec;
 }
 }
 }
@@ -1552,7 +1557,6 @@ static void pnv_phb4_realize(DeviceState *dev, Error 
**errp)
 if (!phb->pec) {
 PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
 PnvChip *chip = pnv_get_chip(pnv, phb->chip_id);
-PnvPhb4PecStack *stack;
 PnvPhb4PecClass *pecc;
 BusState *s;
 
@@ -1561,23 +1565,16 @@ static void pnv_phb4_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
-stack = pnv_phb4_get_stack(chip, phb, _err);
+phb->pec = pnv_phb4_get_pec(chip, phb, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
 }
 
-/*
- * All other phb properties but 'pec', 'version' and
- * 'phb-number' are already set.
- */
-object_property_set_link(OBJECT(phb), "pec", OBJECT(stack->pec),
- _abort);
+/* All other phb properties are already set */
 pecc = PNV_PHB4_PEC_GET_CLASS(phb->pec);
 object_property_set_int(OBJECT(phb), "version", pecc->version,
 _fatal);
-object_property_set_int(OBJECT(phb), "phb-number",
-stack->stack_no,

[PATCH 14/17] ppc/pnv: move default_phb_realize() to pec_realize()

2022-01-13 Thread Daniel Henrique Barboza

This is the last step before making the PEC device uses PHB4s directly.
Move the current pnv_pec_stk_default_phb_realize() call to
pec_realize(), renaming the function to pnv_pec_default_phb_realize(),
and set the PHB attributes using the PEC object directly.

Signed-off-by: Daniel Henrique Barboza 
---
 hw/pci-host/pnv_phb4_pec.c | 67 --
 1 file changed, 35 insertions(+), 32 deletions(-)

diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c
index 1de0eb9adc..3339e0ea3d 100644
--- a/hw/pci-host/pnv_phb4_pec.c
+++ b/hw/pci-host/pnv_phb4_pec.c
@@ -112,6 +112,32 @@ static const MemoryRegionOps pnv_pec_pci_xscom_ops = {
 .endianness = DEVICE_BIG_ENDIAN,
 };
 
+static void pnv_pec_default_phb_realize(PnvPhb4PecStack *stack,
+int phb_number,
+Error **errp)
+{
+PnvPhb4PecState *pec = stack->pec;
+PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec);
+int phb_id = pnv_phb4_pec_get_phb_id(pec, phb_number);
+
+stack->phb = PNV_PHB4(qdev_new(TYPE_PNV_PHB4));
+
+object_property_set_int(OBJECT(stack->phb), "phb-number", phb_number,
+_abort);
+object_property_set_link(OBJECT(stack->phb), "pec", OBJECT(pec),
+ _abort);
+object_property_set_int(OBJECT(stack->phb), "chip-id", pec->chip_id,
+_fatal);
+object_property_set_int(OBJECT(stack->phb), "index", phb_id,
+_fatal);
+object_property_set_int(OBJECT(stack->phb), "version", pecc->version,
+_fatal);
+
+if (!sysbus_realize(SYS_BUS_DEVICE(stack->phb), errp)) {
+return;
+}
+}
+
 static void pnv_pec_instance_init(Object *obj)
 {
 PnvPhb4PecState *pec = PNV_PHB4_PEC(obj);
@@ -144,6 +170,15 @@ static void pnv_pec_realize(DeviceState *dev, Error **errp)
 
 object_property_set_int(stk_obj, "stack-no", i, _abort);
 object_property_set_link(stk_obj, "pec", OBJECT(pec), _abort);
+
+if (defaults_enabled()) {
+pnv_pec_default_phb_realize(stack, i, errp);
+}
+
+/*
+ * qdev gets angry if we don't realize 'stack' here, even
+ * if stk_realize() is now empty.
+ */
 if (!qdev_realize(DEVICE(stk_obj), NULL, errp)) {
 return;
 }
@@ -276,40 +311,8 @@ static const TypeInfo pnv_pec_type_info = {
 }
 };
 
-static void pnv_pec_stk_default_phb_realize(PnvPhb4PecStack *stack,
-Error **errp)
-{
-PnvPhb4PecState *pec = stack->pec;
-PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec);
-int phb_id = pnv_phb4_pec_get_phb_id(pec, stack->stack_no);
-
-stack->phb = PNV_PHB4(qdev_new(TYPE_PNV_PHB4));
-
-object_property_set_int(OBJECT(stack->phb), "phb-number", stack->stack_no,
-_abort);
-object_property_set_link(OBJECT(stack->phb), "pec", OBJECT(pec),
- _abort);
-object_property_set_int(OBJECT(stack->phb), "chip-id", pec->chip_id,
-_fatal);
-object_property_set_int(OBJECT(stack->phb), "index", phb_id,
-_fatal);
-object_property_set_int(OBJECT(stack->phb), "version", pecc->version,
-_fatal);
-
-if (!sysbus_realize(SYS_BUS_DEVICE(stack->phb), errp)) {
-return;
-}
-}
-
 static void pnv_pec_stk_realize(DeviceState *dev, Error **errp)
 {
-PnvPhb4PecStack *stack = PNV_PHB4_PEC_STACK(dev);
-
-if (!defaults_enabled()) {
-return;
-}
-
-pnv_pec_stk_default_phb_realize(stack, errp);
 }
 
 static Property pnv_pec_stk_properties[] = {
-- 
2.33.1

[PATCH 13/17] ppc/pnv: remove stack pointer from PnvPHB4

2022-01-13 Thread Daniel Henrique Barboza

This pointer was being used for two reasons: pnv_phb4_update_regions()
was using it to access the PHB and phb4_realize() was using it as a way
to determine if the PHB was user created.

We can determine if the PHB is user created via phb->pec, introduced in
the previous patch, and pnv_phb4_update_regions() is no longer using
stack->phb.

Remove the pointer from the PnvPHB4 device.

Signed-off-by: Daniel Henrique Barboza 
---
 hw/pci-host/pnv_phb4.c | 17 +
 hw/pci-host/pnv_phb4_pec.c |  2 --
 include/hw/pci-host/pnv_phb4.h |  2 --
 3 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index c9117221b2..25b4248776 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -1549,9 +1549,10 @@ static void pnv_phb4_realize(DeviceState *dev, Error 
**errp)
 char name[32];
 
 /* User created PHB */
-if (!phb->stack) {
+if (!phb->pec) {
 PnvMachineState *pnv = PNV_MACHINE(qdev_get_machine());
 PnvChip *chip = pnv_get_chip(pnv, phb->chip_id);
+PnvPhb4PecStack *stack;
 PnvPhb4PecClass *pecc;
 BusState *s;
 
@@ -1560,7 +1561,7 @@ static void pnv_phb4_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
-phb->stack = pnv_phb4_get_stack(chip, phb, _err);
+stack = pnv_phb4_get_stack(chip, phb, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
@@ -1570,19 +1571,13 @@ static void pnv_phb4_realize(DeviceState *dev, Error 
**errp)
  * All other phb properties but 'pec', 'version' and
  * 'phb-number' are already set.
  */
-object_property_set_link(OBJECT(phb), "pec", OBJECT(phb->stack->pec),
+object_property_set_link(OBJECT(phb), "pec", OBJECT(stack->pec),
  _abort);
 pecc = PNV_PHB4_PEC_GET_CLASS(phb->pec);
 object_property_set_int(OBJECT(phb), "version", pecc->version,
 _fatal);
 object_property_set_int(OBJECT(phb), "phb-number",
-phb->stack->stack_no, _abort);
-
-/*
- * Assign stack->phb since pnv_phb4_update_regions() uses it
- * to access the phb.
- */
-phb->stack->phb = phb;
+stack->stack_no, _abort);
 
 /*
  * Reparent user created devices to the chip to build
@@ -1686,8 +1681,6 @@ static Property pnv_phb4_properties[] = {
 DEFINE_PROP_UINT32("index", PnvPHB4, phb_id, 0),
 DEFINE_PROP_UINT32("chip-id", PnvPHB4, chip_id, 0),
 DEFINE_PROP_UINT64("version", PnvPHB4, version, 0),
-DEFINE_PROP_LINK("stack", PnvPHB4, stack, TYPE_PNV_PHB4_PEC_STACK,
- PnvPhb4PecStack *),
 DEFINE_PROP_LINK("pec", PnvPHB4, pec, TYPE_PNV_PHB4_PEC,
  PnvPhb4PecState *),
 DEFINE_PROP_END_OF_LIST(),
diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c
index 36cc4ffe7c..1de0eb9adc 100644
--- a/hw/pci-host/pnv_phb4_pec.c
+++ b/hw/pci-host/pnv_phb4_pec.c
@@ -295,8 +295,6 @@ static void pnv_pec_stk_default_phb_realize(PnvPhb4PecStack 
*stack,
 _fatal);
 object_property_set_int(OBJECT(stack->phb), "version", pecc->version,
 _fatal);
-object_property_set_link(OBJECT(stack->phb), "stack", OBJECT(stack),
- _abort);
 
 if (!sysbus_realize(SYS_BUS_DEVICE(stack->phb), errp)) {
 return;
diff --git a/include/hw/pci-host/pnv_phb4.h b/include/hw/pci-host/pnv_phb4.h
index f66bc76b78..90eb4575f8 100644
--- a/include/hw/pci-host/pnv_phb4.h
+++ b/include/hw/pci-host/pnv_phb4.h
@@ -154,8 +154,6 @@ struct PnvPHB4 {
 XiveSource xsrc;
 qemu_irq *qirqs;
 
-PnvPhb4PecStack *stack;
-
 QLIST_HEAD(, PnvPhb4DMASpace) dma_spaces;
 };
 
-- 
2.33.1

[PATCH 16/17] ppc/pnv: remove PnvPhb4PecStack object

2022-01-13 Thread Daniel Henrique Barboza

All the complexity that was scattered between PnvPhb4PecStack and
PnvPHB4 are now centered in the PnvPHB4 device. PnvPhb4PecStack does not
serve any purpose in the current code base.

Signed-off-by: Daniel Henrique Barboza 
---
 hw/pci-host/pnv_phb4_pec.c | 34 --
 include/hw/pci-host/pnv_phb4.h | 20 
 2 files changed, 54 deletions(-)

diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c
index 61d7add25a..02e7689372 100644
--- a/hw/pci-host/pnv_phb4_pec.c
+++ b/hw/pci-host/pnv_phb4_pec.c
@@ -282,43 +282,9 @@ static const TypeInfo pnv_pec_type_info = {
 }
 };
 
-static void pnv_pec_stk_realize(DeviceState *dev, Error **errp)
-{
-}
-
-static Property pnv_pec_stk_properties[] = {
-DEFINE_PROP_UINT32("stack-no", PnvPhb4PecStack, stack_no, 0),
-DEFINE_PROP_LINK("pec", PnvPhb4PecStack, pec, TYPE_PNV_PHB4_PEC,
- PnvPhb4PecState *),
-DEFINE_PROP_END_OF_LIST(),
-};
-
-static void pnv_pec_stk_class_init(ObjectClass *klass, void *data)
-{
-DeviceClass *dc = DEVICE_CLASS(klass);
-
-device_class_set_props(dc, pnv_pec_stk_properties);
-dc->realize = pnv_pec_stk_realize;
-dc->user_creatable = false;
-
-/* TODO: reset regs ? */
-}
-
-static const TypeInfo pnv_pec_stk_type_info = {
-.name  = TYPE_PNV_PHB4_PEC_STACK,
-.parent= TYPE_DEVICE,
-.instance_size = sizeof(PnvPhb4PecStack),
-.class_init= pnv_pec_stk_class_init,
-.interfaces= (InterfaceInfo[]) {
-{ TYPE_PNV_XSCOM_INTERFACE },
-{ }
-}
-};
-
 static void pnv_pec_register_types(void)
 {
 type_register_static(_pec_type_info);
-type_register_static(_pec_stk_type_info);
 }
 
 type_init(pnv_pec_register_types);
diff --git a/include/hw/pci-host/pnv_phb4.h b/include/hw/pci-host/pnv_phb4.h
index 170de2e752..96e8583e48 100644
--- a/include/hw/pci-host/pnv_phb4.h
+++ b/include/hw/pci-host/pnv_phb4.h
@@ -167,26 +167,6 @@ extern const MemoryRegionOps pnv_phb4_xscom_ops;
 #define TYPE_PNV_PHB4_PEC "pnv-phb4-pec"
 OBJECT_DECLARE_TYPE(PnvPhb4PecState, PnvPhb4PecClass, PNV_PHB4_PEC)
 
-#define TYPE_PNV_PHB4_PEC_STACK "pnv-phb4-pec-stack"
-OBJECT_DECLARE_SIMPLE_TYPE(PnvPhb4PecStack, PNV_PHB4_PEC_STACK)
-
-/* Per-stack data */
-struct PnvPhb4PecStack {
-DeviceState parent;
-
-/* My own stack number */
-uint32_t stack_no;
-
-/* The owner PEC */
-PnvPhb4PecState *pec;
-
-/*
- * PHB4 pointer. pnv_phb4_update_regions() needs to access
- * the PHB4 via a PnvPhb4PecStack pointer.
- */
-PnvPHB4 *phb;
-};
-
 struct PnvPhb4PecState {
 DeviceState parent;
 
-- 
2.33.1

[PATCH 08/17] ppc/pnv: change pnv_pec_stk_update_map() to use PnvPHB4

2022-01-13 Thread Daniel Henrique Barboza

stack->nest_regs_mr wasn't migrated to PnvPHB4 together with phb->nest_regs[] in
the previous patch. We were unable to cleanly convert its write MemoryRegionOps,
pnv_pec_stk_nest_xscom_write(), to use PnvPHB4 instead of PnvPhb4PecStack due to
pnv_pec_stk_update_map() using a stack. Thing is, we're now able to convert
pnv_pec_stk_update_map() because of what the did in previous patch.

The need for this intermediate step is a good example of the interconnected
relationship between stack and phb that we aim to cleanup.

Signed-off-by: Daniel Henrique Barboza 
---
 hw/pci-host/pnv_phb4.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 916a7a3cf0..0f4464ec67 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -893,10 +893,10 @@ static void pnv_phb4_update_regions(PnvPHB4 *phb)
 pnv_phb4_check_all_mbt(phb);
 }
 
-static void pnv_pec_stk_update_map(PnvPhb4PecStack *stack)
+static void pnv_pec_stk_update_map(PnvPHB4 *phb)
 {
+PnvPhb4PecStack *stack = phb->stack;
 PnvPhb4PecState *pec = stack->pec;
-PnvPHB4 *phb = stack->phb;
 MemoryRegion *sysmem = get_system_memory();
 uint64_t bar_en = phb->nest_regs[PEC_NEST_STK_BAR_EN];
 uint64_t bar, mask, size;
@@ -1046,7 +1046,7 @@ static void pnv_pec_stk_nest_xscom_write(void *opaque, 
hwaddr addr,
 break;
 case PEC_NEST_STK_BAR_EN:
 phb->nest_regs[reg] = val & 0xf000ull;
-pnv_pec_stk_update_map(stack);
+pnv_pec_stk_update_map(phb);
 break;
 case PEC_NEST_STK_DATA_FRZ_TYPE:
 case PEC_NEST_STK_PBCQ_TUN_BAR:
-- 
2.33.1

[PATCH 05/17] ppc/pnv: change pnv_phb4_update_regions() to use PnvPHB4

2022-01-13 Thread Daniel Henrique Barboza

The function does not rely on stack for anything it does anymore. This
is also one less instance of 'stack->phb' that we need to worry about.

Signed-off-by: Daniel Henrique Barboza 
---
 hw/pci-host/pnv_phb4.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index fbc475f27a..034721f159 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -868,10 +868,8 @@ static uint64_t pnv_pec_stk_nest_xscom_read(void *opaque, 
hwaddr addr,
 return stack->nest_regs[reg];
 }
 
-static void pnv_phb4_update_regions(PnvPhb4PecStack *stack)
+static void pnv_phb4_update_regions(PnvPHB4 *phb)
 {
-PnvPHB4 *phb = stack->phb;
-
 /* Unmap first always */
 if (memory_region_is_mapped(>mr_regs)) {
 memory_region_del_subregion(>phbbar, >mr_regs);
@@ -930,7 +928,7 @@ static void pnv_pec_stk_update_map(PnvPhb4PecStack *stack)
 }
 
 /* Update PHB */
-pnv_phb4_update_regions(stack);
+pnv_phb4_update_regions(phb);
 
 /* Handle maps */
 if (!memory_region_is_mapped(>mmbar0) &&
@@ -977,7 +975,7 @@ static void pnv_pec_stk_update_map(PnvPhb4PecStack *stack)
 }
 
 /* Update PHB */
-pnv_phb4_update_regions(stack);
+pnv_phb4_update_regions(phb);
 }
 
 static void pnv_pec_stk_nest_xscom_write(void *opaque, hwaddr addr,
-- 
2.33.1

[PATCH 12/17] ppc/pnv: introduce PnvPHB4 'pec' property

2022-01-13 Thread Daniel Henrique Barboza

This property will track the owner PEC of this PHB. For now it's
redundant since we can retrieve the PEC via phb->stack->pec but it
will not be redundant when we get rid of the stack device.

Signed-off-by: Daniel Henrique Barboza 
---
 hw/pci-host/pnv_phb4.c | 20 +++-
 hw/pci-host/pnv_phb4_pec.c |  2 ++
 include/hw/pci-host/pnv_phb4.h |  3 +++
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 44f3087913..c9117221b2 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -894,8 +894,7 @@ static void pnv_phb4_update_regions(PnvPHB4 *phb)
 
 static void pnv_pec_stk_update_map(PnvPHB4 *phb)
 {
-PnvPhb4PecStack *stack = phb->stack;
-PnvPhb4PecState *pec = stack->pec;
+PnvPhb4PecState *pec = phb->pec;
 MemoryRegion *sysmem = get_system_memory();
 uint64_t bar_en = phb->nest_regs[PEC_NEST_STK_BAR_EN];
 uint64_t bar, mask, size;
@@ -969,7 +968,7 @@ static void pnv_pec_stk_update_map(PnvPHB4 *phb)
 bar = phb->nest_regs[PEC_NEST_STK_INT_BAR] >> 8;
 size = PNV_PHB4_MAX_INTs << 16;
 snprintf(name, sizeof(name), "pec-%d.%d-phb-%d-int",
- stack->pec->chip_id, stack->pec->index, phb->phb_number);
+ phb->pec->chip_id, phb->pec->index, phb->phb_number);
 memory_region_init(>intbar, OBJECT(phb), name, size);
 memory_region_add_subregion(sysmem, bar, >intbar);
 }
@@ -982,7 +981,7 @@ static void pnv_pec_stk_nest_xscom_write(void *opaque, 
hwaddr addr,
  uint64_t val, unsigned size)
 {
 PnvPHB4 *phb = PNV_PHB4(opaque);
-PnvPhb4PecState *pec = phb->stack->pec;
+PnvPhb4PecState *pec = phb->pec;
 uint32_t reg = addr >> 3;
 
 switch (reg) {
@@ -1458,8 +1457,7 @@ static AddressSpace *pnv_phb4_dma_iommu(PCIBus *bus, void 
*opaque, int devfn)
 
 static void pnv_phb4_xscom_realize(PnvPHB4 *phb)
 {
-PnvPhb4PecStack *stack = phb->stack;
-PnvPhb4PecState *pec = stack->pec;
+PnvPhb4PecState *pec = phb->pec;
 PnvPhb4PecClass *pecc = PNV_PHB4_PEC_GET_CLASS(pec);
 uint32_t pec_nest_base;
 uint32_t pec_pci_base;
@@ -1569,10 +1567,12 @@ static void pnv_phb4_realize(DeviceState *dev, Error 
**errp)
 }
 
 /*
- * All other phb properties but 'version' and 'phb-number'
- * are already set.
+ * All other phb properties but 'pec', 'version' and
+ * 'phb-number' are already set.
  */
-pecc = PNV_PHB4_PEC_GET_CLASS(phb->stack->pec);
+object_property_set_link(OBJECT(phb), "pec", OBJECT(phb->stack->pec),
+ _abort);
+pecc = PNV_PHB4_PEC_GET_CLASS(phb->pec);
 object_property_set_int(OBJECT(phb), "version", pecc->version,
 _fatal);
 object_property_set_int(OBJECT(phb), "phb-number",
@@ -1688,6 +1688,8 @@ static Property pnv_phb4_properties[] = {
 DEFINE_PROP_UINT64("version", PnvPHB4, version, 0),
 DEFINE_PROP_LINK("stack", PnvPHB4, stack, TYPE_PNV_PHB4_PEC_STACK,
  PnvPhb4PecStack *),
+DEFINE_PROP_LINK("pec", PnvPHB4, pec, TYPE_PNV_PHB4_PEC,
+ PnvPhb4PecState *),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/pci-host/pnv_phb4_pec.c b/hw/pci-host/pnv_phb4_pec.c
index 7c4b4023df..36cc4ffe7c 100644
--- a/hw/pci-host/pnv_phb4_pec.c
+++ b/hw/pci-host/pnv_phb4_pec.c
@@ -287,6 +287,8 @@ static void pnv_pec_stk_default_phb_realize(PnvPhb4PecStack 
*stack,
 
 object_property_set_int(OBJECT(stack->phb), "phb-number", stack->stack_no,
 _abort);
+object_property_set_link(OBJECT(stack->phb), "pec", OBJECT(pec),
+ _abort);
 object_property_set_int(OBJECT(stack->phb), "chip-id", pec->chip_id,
 _fatal);
 object_property_set_int(OBJECT(stack->phb), "index", phb_id,
diff --git a/include/hw/pci-host/pnv_phb4.h b/include/hw/pci-host/pnv_phb4.h
index fc7807be1c..f66bc76b78 100644
--- a/include/hw/pci-host/pnv_phb4.h
+++ b/include/hw/pci-host/pnv_phb4.h
@@ -87,6 +87,9 @@ struct PnvPHB4 {
 /* My own PHB number */
 uint32_t phb_number;
 
+/* The owner PEC */
+PnvPhb4PecState *pec;
+
 char bus_path[8];
 
 /* Main register images */
-- 
2.33.1

[PATCH 11/17] ppc/pnv: introduce PnvPHB4 'phb_number' property

2022-01-13 Thread Daniel Henrique Barboza

One of the remaining dependencies we have on the PnvPhb4PecStack object
is the stack->stack_no property. This is set as the position the stack
occupies in the pec->stacks[] array.

We need a way to report this same value in the PnvPHB4. This patch
creates a new property called 'phb_number' to be used in existing code
in all instances stack->stack_no is currently being used.

The 'phb_number' name is an indication of our future intention to convert
the pec->stacks[] array into a pec->phbs[] array, when the PEC object will
deal directly with phb4 objects.

Signed-off-by: Daniel Henrique Barboza 
---
 hw/pci-host/pnv_phb4.c | 28 +---
 hw/pci-host/pnv_phb4_pec.c |  2 ++
 include/hw/pci-host/pnv_phb4.h |  3 +++
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index b5045fca64..44f3087913 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -937,7 +937,7 @@ static void pnv_pec_stk_update_map(PnvPHB4 *phb)
 mask = phb->nest_regs[PEC_NEST_STK_MMIO_BAR0_MASK];
 size = ((~mask) >> 8) + 1;
 snprintf(name, sizeof(name), "pec-%d.%d-phb-%d-mmio0",
- pec->chip_id, pec->index, stack->stack_no);
+ pec->chip_id, pec->index, phb->phb_number);
 memory_region_init(>mmbar0, OBJECT(phb), name, size);
 memory_region_add_subregion(sysmem, bar, >mmbar0);
 phb->mmio0_base = bar;
@@ -949,7 +949,7 @@ static void pnv_pec_stk_update_map(PnvPHB4 *phb)
 mask = phb->nest_regs[PEC_NEST_STK_MMIO_BAR1_MASK];
 size = ((~mask) >> 8) + 1;
 snprintf(name, sizeof(name), "pec-%d.%d-phb-%d-mmio1",
- pec->chip_id, pec->index, stack->stack_no);
+ pec->chip_id, pec->index, phb->phb_number);
 memory_region_init(>mmbar1, OBJECT(phb), name, size);
 memory_region_add_subregion(sysmem, bar, >mmbar1);
 phb->mmio1_base = bar;
@@ -960,7 +960,7 @@ static void pnv_pec_stk_update_map(PnvPHB4 *phb)
 bar = phb->nest_regs[PEC_NEST_STK_PHB_REGS_BAR] >> 8;
 size = PNV_PHB4_NUM_REGS << 3;
 snprintf(name, sizeof(name), "pec-%d.%d-phb-%d",
- pec->chip_id, pec->index, stack->stack_no);
+ pec->chip_id, pec->index, phb->phb_number);
 memory_region_init(>phbbar, OBJECT(phb), name, size);
 memory_region_add_subregion(sysmem, bar, >phbbar);
 }
@@ -969,7 +969,7 @@ static void pnv_pec_stk_update_map(PnvPHB4 *phb)
 bar = phb->nest_regs[PEC_NEST_STK_INT_BAR] >> 8;
 size = PNV_PHB4_MAX_INTs << 16;
 snprintf(name, sizeof(name), "pec-%d.%d-phb-%d-int",
- stack->pec->chip_id, stack->pec->index, stack->stack_no);
+ stack->pec->chip_id, stack->pec->index, phb->phb_number);
 memory_region_init(>intbar, OBJECT(phb), name, size);
 memory_region_add_subregion(sysmem, bar, >intbar);
 }
@@ -1469,20 +1469,20 @@ static void pnv_phb4_xscom_realize(PnvPHB4 *phb)
 
 /* Initialize the XSCOM regions for the stack registers */
 snprintf(name, sizeof(name), "xscom-pec-%d.%d-nest-phb-%d",
- pec->chip_id, pec->index, stack->stack_no);
+ pec->chip_id, pec->index, phb->phb_number);
 pnv_xscom_region_init(>nest_regs_mr, OBJECT(phb),
   _pec_stk_nest_xscom_ops, phb, name,
   PHB4_PEC_NEST_STK_REGS_COUNT);
 
 snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-phb-%d",
- pec->chip_id, pec->index, stack->stack_no);
+ pec->chip_id, pec->index, phb->phb_number);
 pnv_xscom_region_init(>pci_regs_mr, OBJECT(phb),
   _pec_stk_pci_xscom_ops, phb, name,
   PHB4_PEC_PCI_STK_REGS_COUNT);
 
 /* PHB pass-through */
 snprintf(name, sizeof(name), "xscom-pec-%d.%d-pci-phb-%d",
- pec->chip_id, pec->index, stack->stack_no);
+ pec->chip_id, pec->index, phb->phb_number);
 pnv_xscom_region_init(>phb_regs_mr, OBJECT(phb),
   _phb4_xscom_ops, phb, name, 0x40);
 
@@ -1491,14 +1491,14 @@ static void pnv_phb4_xscom_realize(PnvPHB4 *phb)
 
 /* Populate the XSCOM address space. */
 pnv_xscom_add_subregion(pec->chip,
-pec_nest_base + 0x40 * (stack->stack_no + 1),
+pec_nest_base + 0x40 * (phb->phb_number + 1),
 >nest_regs_mr);
 pnv_xscom_add_subregion(pec->chip,
-pec_pci_base + 0x40 * (stack->stack_no + 1),
+pec_pci_base + 0x40 * (phb->phb_number + 1),
 >pci_regs_mr);
 pnv_xscom_add_subregion(pec->chip,
 pec_pci_base + PNV9_XSCOM_PEC_PCI_STK0 +
-0x40 * stack->stack_no,
+0x40 * phb->phb_number,

[PATCH 03/17] ppc/pnv: move phbbar to PnvPHB4

2022-01-13 Thread Daniel Henrique Barboza

This MemoryRegion is simple enough to be moved in a single step.

A 'stack->phb' pointer had to be introduced in pnv_pec_stk_update_map()
because this function isn't ready to be fully converted to use a PnvPHB4
pointer instead. This will be dealt with in the following patches.

Signed-off-by: Daniel Henrique Barboza 
---
 hw/pci-host/pnv_phb4.c | 19 ++-
 include/hw/pci-host/pnv_phb4.h |  4 +++-
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index fd9f6af4b3..00eaf91fca 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -874,15 +874,15 @@ static void pnv_phb4_update_regions(PnvPhb4PecStack 
*stack)
 
 /* Unmap first always */
 if (memory_region_is_mapped(>mr_regs)) {
-memory_region_del_subregion(>phbbar, >mr_regs);
+memory_region_del_subregion(>phbbar, >mr_regs);
 }
 if (memory_region_is_mapped(>xsrc.esb_mmio)) {
 memory_region_del_subregion(>intbar, >xsrc.esb_mmio);
 }
 
 /* Map registers if enabled */
-if (memory_region_is_mapped(>phbbar)) {
-memory_region_add_subregion(>phbbar, 0, >mr_regs);
+if (memory_region_is_mapped(>phbbar)) {
+memory_region_add_subregion(>phbbar, 0, >mr_regs);
 }
 
 /* Map ESB if enabled */
@@ -897,6 +897,7 @@ static void pnv_phb4_update_regions(PnvPhb4PecStack *stack)
 static void pnv_pec_stk_update_map(PnvPhb4PecStack *stack)
 {
 PnvPhb4PecState *pec = stack->pec;
+PnvPHB4 *phb = stack->phb;
 MemoryRegion *sysmem = get_system_memory();
 uint64_t bar_en = stack->nest_regs[PEC_NEST_STK_BAR_EN];
 uint64_t bar, mask, size;
@@ -919,9 +920,9 @@ static void pnv_pec_stk_update_map(PnvPhb4PecStack *stack)
 !(bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) {
 memory_region_del_subregion(sysmem, >mmbar1);
 }
-if (memory_region_is_mapped(>phbbar) &&
+if (memory_region_is_mapped(>phbbar) &&
 !(bar_en & PEC_NEST_STK_BAR_EN_PHB)) {
-memory_region_del_subregion(sysmem, >phbbar);
+memory_region_del_subregion(sysmem, >phbbar);
 }
 if (memory_region_is_mapped(>intbar) &&
 !(bar_en & PEC_NEST_STK_BAR_EN_INT)) {
@@ -956,14 +957,14 @@ static void pnv_pec_stk_update_map(PnvPhb4PecStack *stack)
 stack->mmio1_base = bar;
 stack->mmio1_size = size;
 }
-if (!memory_region_is_mapped(>phbbar) &&
+if (!memory_region_is_mapped(>phbbar) &&
 (bar_en & PEC_NEST_STK_BAR_EN_PHB)) {
 bar = stack->nest_regs[PEC_NEST_STK_PHB_REGS_BAR] >> 8;
 size = PNV_PHB4_NUM_REGS << 3;
-snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-phb",
+snprintf(name, sizeof(name), "pec-%d.%d-phb-%d",
  pec->chip_id, pec->index, stack->stack_no);
-memory_region_init(>phbbar, OBJECT(stack), name, size);
-memory_region_add_subregion(sysmem, bar, >phbbar);
+memory_region_init(>phbbar, OBJECT(phb), name, size);
+memory_region_add_subregion(sysmem, bar, >phbbar);
 }
 if (!memory_region_is_mapped(>intbar) &&
 (bar_en & PEC_NEST_STK_BAR_EN_INT)) {
diff --git a/include/hw/pci-host/pnv_phb4.h b/include/hw/pci-host/pnv_phb4.h
index 4487c3a6e2..b11fa80e81 100644
--- a/include/hw/pci-host/pnv_phb4.h
+++ b/include/hw/pci-host/pnv_phb4.h
@@ -112,6 +112,9 @@ struct PnvPHB4 {
 uint64_t pci_regs[PHB4_PEC_PCI_STK_REGS_COUNT];
 MemoryRegion pci_regs_mr;
 
+/* Memory windows from PowerBus to PHB */
+MemoryRegion phbbar;
+
 /* On-chip IODA tables */
 uint64_t ioda_LIST[PNV_PHB4_MAX_LSIs];
 uint64_t ioda_MIST[PNV_PHB4_MAX_MIST];
@@ -166,7 +169,6 @@ struct PnvPhb4PecStack {
 /* Memory windows from PowerBus to PHB */
 MemoryRegion mmbar0;
 MemoryRegion mmbar1;
-MemoryRegion phbbar;
 MemoryRegion intbar;
 uint64_t mmio0_base;
 uint64_t mmio0_size;
-- 
2.33.1

[PATCH 06/17] ppc/pnv: move mmbar0/mmbar1 and friends to PnvPHB4

2022-01-13 Thread Daniel Henrique Barboza

These 2 MemoryRegions, together with mmio(0|1)_base and mmio(0|1)_size
variables, are used together in the same functions. We're better of
moving them all in a single step.

Signed-off-by: Daniel Henrique Barboza 
---
 hw/pci-host/pnv_phb4.c | 52 +-
 include/hw/pci-host/pnv_phb4.h | 14 -
 2 files changed, 32 insertions(+), 34 deletions(-)

diff --git a/hw/pci-host/pnv_phb4.c b/hw/pci-host/pnv_phb4.c
index 034721f159..dc4db091e4 100644
--- a/hw/pci-host/pnv_phb4.c
+++ b/hw/pci-host/pnv_phb4.c
@@ -228,16 +228,16 @@ static void pnv_phb4_check_mbt(PnvPHB4 *phb, uint32_t 
index)
 /* TODO: Figure out how to implemet/decode AOMASK */
 
 /* Check if it matches an enabled MMIO region in the PEC stack */
-if (memory_region_is_mapped(>stack->mmbar0) &&
-base >= phb->stack->mmio0_base &&
-(base + size) <= (phb->stack->mmio0_base + phb->stack->mmio0_size)) {
-parent = >stack->mmbar0;
-base -= phb->stack->mmio0_base;
-} else if (memory_region_is_mapped(>stack->mmbar1) &&
-base >= phb->stack->mmio1_base &&
-(base + size) <= (phb->stack->mmio1_base + phb->stack->mmio1_size)) {
-parent = >stack->mmbar1;
-base -= phb->stack->mmio1_base;
+if (memory_region_is_mapped(>mmbar0) &&
+base >= phb->mmio0_base &&
+(base + size) <= (phb->mmio0_base + phb->mmio0_size)) {
+parent = >mmbar0;
+base -= phb->mmio0_base;
+} else if (memory_region_is_mapped(>mmbar1) &&
+base >= phb->mmio1_base &&
+(base + size) <= (phb->mmio1_base + phb->mmio1_size)) {
+parent = >mmbar1;
+base -= phb->mmio1_base;
 } else {
 phb_error(phb, "PHB MBAR %d out of parent bounds", index);
 return;
@@ -910,13 +910,13 @@ static void pnv_pec_stk_update_map(PnvPhb4PecStack *stack)
  */
 
 /* Handle unmaps */
-if (memory_region_is_mapped(>mmbar0) &&
+if (memory_region_is_mapped(>mmbar0) &&
 !(bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) {
-memory_region_del_subregion(sysmem, >mmbar0);
+memory_region_del_subregion(sysmem, >mmbar0);
 }
-if (memory_region_is_mapped(>mmbar1) &&
+if (memory_region_is_mapped(>mmbar1) &&
 !(bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) {
-memory_region_del_subregion(sysmem, >mmbar1);
+memory_region_del_subregion(sysmem, >mmbar1);
 }
 if (memory_region_is_mapped(>phbbar) &&
 !(bar_en & PEC_NEST_STK_BAR_EN_PHB)) {
@@ -931,29 +931,29 @@ static void pnv_pec_stk_update_map(PnvPhb4PecStack *stack)
 pnv_phb4_update_regions(phb);
 
 /* Handle maps */
-if (!memory_region_is_mapped(>mmbar0) &&
+if (!memory_region_is_mapped(>mmbar0) &&
 (bar_en & PEC_NEST_STK_BAR_EN_MMIO0)) {
 bar = stack->nest_regs[PEC_NEST_STK_MMIO_BAR0] >> 8;
 mask = stack->nest_regs[PEC_NEST_STK_MMIO_BAR0_MASK];
 size = ((~mask) >> 8) + 1;
-snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-mmio0",
+snprintf(name, sizeof(name), "pec-%d.%d-phb-%d-mmio0",
  pec->chip_id, pec->index, stack->stack_no);
-memory_region_init(>mmbar0, OBJECT(stack), name, size);
-memory_region_add_subregion(sysmem, bar, >mmbar0);
-stack->mmio0_base = bar;
-stack->mmio0_size = size;
+memory_region_init(>mmbar0, OBJECT(phb), name, size);
+memory_region_add_subregion(sysmem, bar, >mmbar0);
+phb->mmio0_base = bar;
+phb->mmio0_size = size;
 }
-if (!memory_region_is_mapped(>mmbar1) &&
+if (!memory_region_is_mapped(>mmbar1) &&
 (bar_en & PEC_NEST_STK_BAR_EN_MMIO1)) {
 bar = stack->nest_regs[PEC_NEST_STK_MMIO_BAR1] >> 8;
 mask = stack->nest_regs[PEC_NEST_STK_MMIO_BAR1_MASK];
 size = ((~mask) >> 8) + 1;
-snprintf(name, sizeof(name), "pec-%d.%d-stack-%d-mmio1",
+snprintf(name, sizeof(name), "pec-%d.%d-phb-%d-mmio1",
  pec->chip_id, pec->index, stack->stack_no);
-memory_region_init(>mmbar1, OBJECT(stack), name, size);
-memory_region_add_subregion(sysmem, bar, >mmbar1);
-stack->mmio1_base = bar;
-stack->mmio1_size = size;
+memory_region_init(>mmbar1, OBJECT(phb), name, size);
+memory_region_add_subregion(sysmem, bar, >mmbar1);
+phb->mmio1_base = bar;
+phb->mmio1_size = size;
 }
 if (!memory_region_is_mapped(>phbbar) &&
 (bar_en & PEC_NEST_STK_BAR_EN_PHB)) {
diff --git a/include/hw/pci-host/pnv_phb4.h b/include/hw/pci-host/pnv_phb4.h
index cf5dd4009c..4a8f510f6d 100644
--- a/include/hw/pci-host/pnv_phb4.h
+++ b/include/hw/pci-host/pnv_phb4.h
@@ -115,6 +115,12 @@ struct PnvPHB4 {
 /* Memory windows from PowerBus to PHB */
 MemoryRegion phbbar;
 MemoryRegion intbar;
+MemoryRegion mmbar0;
+MemoryRegion mmbar1;
+uint64_t mmio0_base;
+uint64_t mmio0_size;
+uint64_t mmio1_base;
+uint64_t mmio1_size;
 
 /*

1 2 3 >

1 - 100 of 274 matches

Mail list logo