Hi Thomas,

> Subject: Re: [RFC][PATCH] drm/virtgpu: Make vblank event dependent on the
> host resource
> 
> Hi
> 
> Am 17.10.25 um 08:03 schrieb Kasireddy, Vivek:
> > Hi Thomas,
> >
> >> Subject: [RFC][PATCH] drm/virtgpu: Make vblank event dependent on the
> host
> >> resource
> >>
> >> For each plane, store the buffer object's host backing in the state
> >> of the plane's respective CRTC. The host synchronizes output of buffer
> >> objects with a host resource to its own refresh cycle; thus avoiding
> >> tearing. During the CRTC's atomic flush, ignore the vblank timer if
> >> any of the CRTC's plane's buffer object has a host resource. Instead
> >> send the vblank event immdiatelly. Avoids corner cases where a vblank
> >> event happens too late for the next frame to be page flipped in time.
> >>
> >> The host synchronizes a plane's output to the host repaint cycle if
> >> the plane's buffer object has an associated host resource. An atomic
> >> commit blocks until the host cycle completes and then arms the vblank
> >> event. The guest compositor is thereby synchronized to the host's
> >> output rate.
> >>
> >> To avoid delays, send out the vblank event immediately instead of
> >> just arming it. Otherwise the event might be too late to page flip
> >> the compositor's next frame.
> >>
> >> The vblank timer is still active and fires in regular intervals
> >> according to the guest display refresh. This rate limits clients
> >> that only wait for the next vblank to occur, such as fbcon. These
> >> clients would otherwise produce a very high number of frames per
> >> second.
> >>
> >> For commits without host resource, the vblank timer rate-limits the
> >> guest output by generating vblank events at the guest display refresh
> >> rate as before.
> >>
> >> Signed-off-by: Thomas Zimmermann <[email protected]>
> >> ---
> >> There was a discussion about interference between vblank timers and
> >> the host repaint cycle at [1]. This patch address a possible corner
> >> case were the timing gets bad.
> >>
> >> [1] https://lore.kernel.org/dri-
> >>
> devel/[email protected]
> >> mprd11.prod.outlook.com/
> >> ---
> >>   drivers/gpu/drm/virtio/virtgpu_display.c | 72 ++++++++++++++++++++++--
> >>   drivers/gpu/drm/virtio/virtgpu_drv.h     | 15 +++++
> >>   drivers/gpu/drm/virtio/virtgpu_plane.c   | 16 +++++-
> >>   3 files changed, 98 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c
> >> b/drivers/gpu/drm/virtio/virtgpu_display.c
> >> index e972d9b015a9..43df1fa7d629 100644
> >> --- a/drivers/gpu/drm/virtio/virtgpu_display.c
> >> +++ b/drivers/gpu/drm/virtio/virtgpu_display.c
> >> @@ -49,14 +49,76 @@
> >>   #define drm_connector_to_virtio_gpu_output(x) \
> >>    container_of(x, struct virtio_gpu_output, conn)
> >>
> >> +static void virtio_gpu_crtc_state_destroy(struct virtio_gpu_crtc_state
> >> *vgcrtc_state)
> >> +{
> >> +  __drm_atomic_helper_crtc_destroy_state(&vgcrtc_state->base);
> >> +
> >> +  kfree(vgcrtc_state);
> >> +}
> >> +
> >> +static bool virtio_gpu_crtc_state_send_event_on_flush(struct
> >> virtio_gpu_crtc_state *vgcrtc_state)
> >> +{
> >> +  struct drm_crtc_state *crtc_state = &vgcrtc_state->base;
> >> +
> >> +  /*
> >> +   * The CRTC's output is vsync'ed if at least one plane's output is
> >> +   * sync'ed to the host refresh.
> >> +   */
> >> +  return vgcrtc_state->send_event_on_flush & crtc_state->plane_mask;
> >> +}
> >> +
> >> +static void virtio_gpu_crtc_reset(struct drm_crtc *crtc)
> >> +{
> >> +  struct virtio_gpu_crtc_state *vgcrtc_state;
> >> +
> >> +  if (crtc->state)
> >> +          virtio_gpu_crtc_state_destroy(to_virtio_gpu_crtc_state(crtc-
> >>> state));
> >> +
> >> +  vgcrtc_state = kzalloc(sizeof(*vgcrtc_state), GFP_KERNEL);
> >> +  if (vgcrtc_state) {
> >> +          vgcrtc_state->send_event_on_flush = 0ul;
> >> +          __drm_atomic_helper_crtc_reset(crtc, &vgcrtc_state->base);
> >> +  } else {
> >> +          __drm_atomic_helper_crtc_reset(crtc, NULL);
> >> +  }
> >> +}
> >> +
> >> +static struct drm_crtc_state
> *virtio_gpu_crtc_atomic_duplicate_state(struct
> >> drm_crtc *crtc)
> >> +{
> >> +  struct drm_device *dev = crtc->dev;
> >> +  struct drm_crtc_state *crtc_state = crtc->state;
> >> +  struct virtio_gpu_crtc_state *new_vgcrtc_state;
> >> +  struct virtio_gpu_crtc_state *vgcrtc_state;
> >> +
> >> +  if (drm_WARN_ON(dev, !crtc_state))
> >> +          return NULL;
> >> +
> >> +  new_vgcrtc_state = kzalloc(sizeof(*new_vgcrtc_state), GFP_KERNEL);
> >> +  if (!new_vgcrtc_state)
> >> +          return NULL;
> >> +
> >> +  vgcrtc_state = to_virtio_gpu_crtc_state(crtc_state);
> >> +
> >> +  __drm_atomic_helper_crtc_duplicate_state(crtc, &new_vgcrtc_state-
> >>> base);
> >> +  vgcrtc_state->send_event_on_flush = vgcrtc_state-
> >>> send_event_on_flush;
> >> +
> >> +  return &new_vgcrtc_state->base;
> >> +}
> >> +
> >> +static void virtio_gpu_crtc_atomic_destroy_state(struct drm_crtc *crtc,
> >> +                                           struct drm_crtc_state 
> >> *crtc_state)
> >> +{
> >> +  virtio_gpu_crtc_state_destroy(to_virtio_gpu_crtc_state(crtc_state));
> >> +}
> >> +
> >>   static const struct drm_crtc_funcs virtio_gpu_crtc_funcs = {
> >>    .set_config             = drm_atomic_helper_set_config,
> >>    .destroy                = drm_crtc_cleanup,
> >>
> >>    .page_flip              = drm_atomic_helper_page_flip,
> >> -  .reset                  = drm_atomic_helper_crtc_reset,
> >> -  .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
> >> -  .atomic_destroy_state   = drm_atomic_helper_crtc_destroy_state,
> >> +  .reset                  = virtio_gpu_crtc_reset,
> >> +  .atomic_duplicate_state = virtio_gpu_crtc_atomic_duplicate_state,
> >> +  .atomic_destroy_state   = virtio_gpu_crtc_atomic_destroy_state,
> >>    DRM_CRTC_VBLANK_TIMER_FUNCS,
> >>   };
> >>
> >> @@ -129,7 +191,9 @@ static void virtio_gpu_crtc_atomic_flush(struct
> >> drm_crtc *crtc,
> >>   {
> >>    struct drm_device *dev = crtc->dev;
> >>    struct drm_crtc_state *crtc_state =
> >> drm_atomic_get_new_crtc_state(state, crtc);
> >> +  struct virtio_gpu_crtc_state *vgcrtc_state =
> >> to_virtio_gpu_crtc_state(crtc_state);
> >>    struct virtio_gpu_output *output =
> >> drm_crtc_to_virtio_gpu_output(crtc);
> >> +  bool send_event_on_flush =
> >> virtio_gpu_crtc_state_send_event_on_flush(vgcrtc_state);
> >>    struct drm_pending_vblank_event *event;
> >>
> >>    /*
> >> @@ -147,7 +211,7 @@ static void virtio_gpu_crtc_atomic_flush(struct
> >> drm_crtc *crtc,
> >>    crtc_state->event = NULL;
> >>
> >>    if (event) {
> >> -          if (drm_crtc_vblank_get(crtc) == 0)
> >> +          if (!send_event_on_flush && drm_crtc_vblank_get(crtc) == 0)
> >>                    drm_crtc_arm_vblank_event(crtc, event);
> >>            else
> >>                    drm_crtc_send_vblank_event(crtc, event);
> > As suspected, before applying this patch, the frame rate was halved:
> > root@localhost:/weston_upstream/weston# ./build/clients/weston-simple-
> egl -o &
> > Using config: r8g8b8a0
> > has EGL_EXT_buffer_age and EGL_EXT_swap_buffers_with_damage
> > has EGL_EXT_surface_compression
> > 150 frames in 5 seconds: 30.000000 fps
> > 149 frames in 5 seconds: 29.799999 fps
> > 152 frames in 5 seconds: 30.400000 fps
> >
> > And, after applying this patch:
> > root@localhost:/weston_upstream/weston# ./build/clients/weston-simple-
> egl -o &
> > Using config: r8g8b8a0
> > has EGL_EXT_buffer_age and EGL_EXT_swap_buffers_with_damage
> > has EGL_EXT_surface_compression
> > 277 frames in 5 seconds: 55.400002 fps
> > 273 frames in 5 seconds: 54.599998 fps
> > 298 frames in 5 seconds: 59.599998 fps
> > 284 frames in 5 seconds: 56.799999 fps
> > 287 frames in 5 seconds: 57.400002 fps
> > 272 frames in 5 seconds: 54.400002 fps
> > 289 frames in 5 seconds: 57.799999 fps
> > 287 frames in 5 seconds: 57.400002 fps
> > 289 frames in 5 seconds: 57.799999 fps
> > 272 frames in 5 seconds: 54.400002 fps
> > 266 frames in 5 seconds: 53.200001 fps
> > 261 frames in 5 seconds: 52.200001 fps
> > 277 frames in 5 seconds: 55.400002 fps
> > 256 frames in 5 seconds: 51.200001 fps
> > 179 frames in 5 seconds: 35.799999 fps
> > 169 frames in 5 seconds: 33.799999 fps
> > 178 frames in 5 seconds: 35.599998 fps
> > 211 frames in 5 seconds: 42.200001 fps
> > 255 frames in 5 seconds: 51.000000 fps
> >
> > As you can see, the frame rate/performance improves but it occasionally
> > drops into the 30s and 40s, which is a bit concerning because if I revert
> > this patch and a036f5fceedb ("drm/virtgpu: Use vblank timer") and test
> > again, I do not see this drop.
> >
> > This suggests that the vblank event is still delayed in some other corner
> > cases, which might be challenging to figure out.
> 
> There's little difference between the current event handling and the one
> where no vblanks have been set up. I suspect that the vblank timer
> callback interferes with the locking in atomic_flush. That would also
> explain why the fps drop at no clear pattern.
> 
> Could you please test the attached patch? It enables/disables the vblank
> timer depending on the buffer resources; as you suggested before.  Does
> this make a difference?
Tested the attached patch and the frame rate/FPS is now consistent:
root@localhost:/weston_upstream/weston# ./build/clients/weston-simple-egl -o &
Using config: r8g8b8a0
has EGL_EXT_buffer_age and EGL_EXT_swap_buffers_with_damage
has EGL_EXT_surface_compression
283 frames in 5 seconds: 56.599998 fps
300 frames in 5 seconds: 60.000000 fps
301 frames in 5 seconds: 60.200001 fps
300 frames in 5 seconds: 60.000000 fps
299 frames in 5 seconds: 59.799999 fps
300 frames in 5 seconds: 60.000000 fps
300 frames in 5 seconds: 60.000000 fps
301 frames in 5 seconds: 60.200001 fps
300 frames in 5 seconds: 60.000000 fps
297 frames in 5 seconds: 59.400002 fps
301 frames in 5 seconds: 60.200001 fps
300 frames in 5 seconds: 60.000000 fps
301 frames in 5 seconds: 60.200001 fps
301 frames in 5 seconds: 60.200001 fps
300 frames in 5 seconds: 60.000000 fps
301 frames in 5 seconds: 60.200001 fps

Thanks,
Vivek

> 
> Best regards
> Thomas
> 
> >
> > Tested by running Gnome Wayland after launching Qemu with following
> > (relevant) parameters:
> > qemu-system-x86_64 -m 4096m -enable-kvm .........
> > -device vfio-pci,host=0000:03:00.1
> > -device virtio-gpu,max_outputs=1,xres=1920,yres=1080,blob=true
> > -display gtk,gl=on
> > -object memory-backend-memfd,id=mem1,size=4096M
> > -machine q35,accel=kvm,memory-backend=mem1
> >
> > Thanks,
> > Vivek
> >
> >> diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h
> >> b/drivers/gpu/drm/virtio/virtgpu_drv.h
> >> index f17660a71a3e..671fc3b61bc6 100644
> >> --- a/drivers/gpu/drm/virtio/virtgpu_drv.h
> >> +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
> >> @@ -195,6 +195,21 @@ struct virtio_gpu_framebuffer {
> >>   #define to_virtio_gpu_framebuffer(x) \
> >>    container_of(x, struct virtio_gpu_framebuffer, base)
> >>
> >> +struct virtio_gpu_crtc_state {
> >> +  struct drm_crtc_state base;
> >> +
> >> +  /*
> >> +   * Send vblank event immediately from atomic_flush. Set from each
> >> +   * plane's atomic check and depends on the buffer object. Buffers
> >> +   * with host backing are vsync'd already and should send immediately;
> >> +   * others should wait for the VBLANK timer.
> >> +   */
> >> +  u32 send_event_on_flush;
> >> +};
> >> +
> >> +#define to_virtio_gpu_crtc_state(x) \
> >> +  container_of(x, struct virtio_gpu_crtc_state, base)
> >> +
> >>   struct virtio_gpu_plane_state {
> >>    struct drm_plane_state base;
> >>    struct virtio_gpu_fence *fence;
> >> diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c
> >> b/drivers/gpu/drm/virtio/virtgpu_plane.c
> >> index 29e4b458ae57..d04721c5d505 100644
> >> --- a/drivers/gpu/drm/virtio/virtgpu_plane.c
> >> +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
> >> @@ -104,7 +104,8 @@ static int virtio_gpu_plane_atomic_check(struct
> >> drm_plane *plane,
> >>
> >> plane);
> >>    bool is_cursor = plane->type == DRM_PLANE_TYPE_CURSOR;
> >>    struct drm_crtc_state *crtc_state;
> >> -  int ret;
> >> +  struct virtio_gpu_crtc_state *vgcrtc_state;
> >> +  int ret, i;
> >>
> >>    if (!new_plane_state->fb || WARN_ON(!new_plane_state->crtc))
> >>            return 0;
> >> @@ -126,6 +127,19 @@ static int virtio_gpu_plane_atomic_check(struct
> >> drm_plane *plane,
> >>                                              DRM_PLANE_NO_SCALING,
> >>                                              DRM_PLANE_NO_SCALING,
> >>                                              is_cursor, true);
> >> +
> >> +  vgcrtc_state = to_virtio_gpu_crtc_state(crtc_state);
> >> +  vgcrtc_state->send_event_on_flush &= ~drm_plane_mask(plane);
> >> +
> >> +  for (i = 0; i < new_plane_state->fb->format->num_planes; ++i) {
> >> +          struct virtio_gpu_object *bo =
> >> gem_to_virtio_gpu_obj(new_plane_state->fb->obj[i]);
> >> +
> >> +          if (bo->host3d_blob || bo->guest_blob) {
> >> +                  vgcrtc_state->send_event_on_flush |=
> >> drm_plane_mask(plane);
> >> +                  break; /* only need to find one */
> >> +          }
> >> +  }
> >> +
> >>    return ret;
> >>   }
> >>
> >> --
> >> 2.51.0
> 
> --
> --
> Thomas Zimmermann
> Graphics Driver Developer
> SUSE Software Solutions Germany GmbH
> Frankenstrasse 146, 90461 Nuernberg, Germany
> GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman
> HRB 36809 (AG Nuernberg)


Reply via email to