On Tue, 24 Mar 2020 11:54:55 -0700, Umesh Nerlige Ramappa wrote:
>
> From: Lionel Landwerlin <lionel.g.landwer...@intel.com>
>
> We're about to introduce an options to open the perf stream, giving
> the user ability to configure how often it wants the kernel to poll
> the OA registers for available data.
>
> Right now the workaround against the OA tail pointer race condition
> requires at least twice the internal kernel polling timer to make any
> data available.
>
> This changes introduce checks on the OA data written into the circular
> buffer to make as much data as possible available on the first
> iteration of the polling timer.

Reviewed-by: Ashutosh Dixit <ashutosh.di...@intel.com>

>
> v2: Use OA_TAKEN macro without the gtt_offset (Lionel)
> v3: (Umesh)
> - Rebase
> - Change report to report32 from below review
>   https://patchwork.freedesktop.org/patch/330704/?series=66697&rev=1
> v4: (Ashutosh, Lionel)
> - Fix checkpatch errors
> - Fix aging_timestamp initialization
> - Check for only one valid landed report
> - Fix check for unlanded report
> v5: (Ashutosh)
> - Fix bug in accurately determining landed report.
> - Optimize the check for landed reports by going as far as the
>   previously determined aged tail.
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.rama...@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_perf.c       | 204 ++++++++++---------------
>  drivers/gpu/drm/i915/i915_perf_types.h |  28 ++--
>  2 files changed, 97 insertions(+), 135 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> b/drivers/gpu/drm/i915/i915_perf.c
> index 3222f6cd8255..4583ae9b77be 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -223,26 +223,17 @@
>   *
>   * Although this can be observed explicitly while copying reports to 
> userspace
>   * by checking for a zeroed report-id field in tail reports, we want to 
> account
> - * for this earlier, as part of the oa_buffer_check to avoid lots of 
> redundant
> - * read() attempts.
> - *
> - * In effect we define a tail pointer for reading that lags the real tail
> - * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough
> - * time for the corresponding reports to become visible to the CPU.
> - *
> - * To manage this we actually track two tail pointers:
> - *  1) An 'aging' tail with an associated timestamp that is tracked until we
> - *     can trust the corresponding data is visible to the CPU; at which point
> - *     it is considered 'aged'.
> - *  2) An 'aged' tail that can be used for read()ing.
> - *
> - * The two separate pointers let us decouple read()s from tail pointer aging.
> - *
> - * The tail pointers are checked and updated at a limited rate within a 
> hrtimer
> - * callback (the same callback that is used for delivering EPOLLIN events)
> - *
> - * Initially the tails are marked invalid with %INVALID_TAIL_PTR which
> - * indicates that an updated tail pointer is needed.
> + * for this earlier, as part of the oa_buffer_check_unlocked to avoid lots of
> + * redundant read() attempts.
> + *
> + * We workaround this issue in oa_buffer_check_unlocked() by reading the 
> reports
> + * in the OA buffer, starting from the tail reported by the HW until we find 
> a
> + * report with its first 2 dwords not 0 meaning its previous report is
> + * completely in memory and ready to be read. Those dwords are also set to 0
> + * once read and the whole buffer is cleared upon OA buffer initialization. 
> The
> + * first dword is the reason for this report while the second is the 
> timestamp,
> + * making the chances of having those 2 fields at 0 fairly unlikely. A more
> + * detailed explanation is available in oa_buffer_check_unlocked().
>   *
>   * Most of the implementation details for this workaround are in
>   * oa_buffer_check_unlocked() and _append_oa_reports()
> @@ -454,8 +445,8 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream 
> *stream)
>   * (See description of OA_TAIL_MARGIN_NSEC above for further details.)
>   *
>   * Besides returning true when there is data available to read() this 
> function
> - * also has the side effect of updating the oa_buffer.tails[], 
> .aging_timestamp
> - * and .aged_tail_idx state used for reading.
> + * also updates the tail, aging_tail and aging_timestamp in the oa_buffer
> + * object.
>   *
>   * Note: It's safe to read OA config state here unlocked, assuming that this 
> is
>   * only called while the stream is enabled, while the global OA configuration
> @@ -465,28 +456,18 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream 
> *stream)
>   */
>  static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
>  {
> +     u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
>       int report_size = stream->oa_buffer.format_size;
>       unsigned long flags;
> -     unsigned int aged_idx;
> -     u32 head, hw_tail, aged_tail, aging_tail;
> +     u32 hw_tail;
>       u64 now;
>
>       /* We have to consider the (unlikely) possibility that read() errors
> -      * could result in an OA buffer reset which might reset the head,
> -      * tails[] and aged_tail state.
> +      * could result in an OA buffer reset which might reset the head and
> +      * tail state.
>        */
>       spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
>
> -     /* NB: The head we observe here might effectively be a little out of
> -      * date (between head and tails[aged_idx].offset if there is currently
> -      * a read() in progress.
> -      */
> -     head = stream->oa_buffer.head;
> -
> -     aged_idx = stream->oa_buffer.aged_tail_idx;
> -     aged_tail = stream->oa_buffer.tails[aged_idx].offset;
> -     aging_tail = stream->oa_buffer.tails[!aged_idx].offset;
> -
>       hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
>
>       /* The tail pointer increases in 64 byte increments,
> @@ -496,64 +477,61 @@ static bool oa_buffer_check_unlocked(struct 
> i915_perf_stream *stream)
>
>       now = ktime_get_mono_fast_ns();
>
> -     /* Update the aged tail
> -      *
> -      * Flip the tail pointer available for read()s once the aging tail is
> -      * old enough to trust that the corresponding data will be visible to
> -      * the CPU...
> -      *
> -      * Do this before updating the aging pointer in case we may be able to
> -      * immediately start aging a new pointer too (if new data has become
> -      * available) without needing to wait for a later hrtimer callback.
> -      */
> -     if (aging_tail != INVALID_TAIL_PTR &&
> -         ((now - stream->oa_buffer.aging_timestamp) >
> -          OA_TAIL_MARGIN_NSEC)) {
> -
> -             aged_idx ^= 1;
> -             stream->oa_buffer.aged_tail_idx = aged_idx;
> +     if (hw_tail == stream->oa_buffer.aging_tail &&
> +         (now - stream->oa_buffer.aging_timestamp) > OA_TAIL_MARGIN_NSEC) {
> +             /* If the HW tail hasn't move since the last check and the HW
> +              * tail has been aging for long enough, declare it the new
> +              * tail.
> +              */
> +             stream->oa_buffer.tail = stream->oa_buffer.aging_tail;
> +     } else {
> +             u32 head, tail, aged_tail;
>
> -             aged_tail = aging_tail;
> +             /* NB: The head we observe here might effectively be a little
> +              * out of date. If a read() is in progress, the head could be
> +              * anywhere between this head and stream->oa_buffer.tail.
> +              */
> +             head = stream->oa_buffer.head - gtt_offset;
> +             aged_tail = stream->oa_buffer.tail - gtt_offset;
> +
> +             hw_tail -= gtt_offset;
> +             tail = hw_tail;
> +
> +             /* Walk the stream backward until we find a report with dword 0
> +              * & 1 not at 0. Since the circular buffer pointers progress by
> +              * increments of 64 bytes and that reports can be up to 256
> +              * bytes long, we can't tell whether a report has fully landed
> +              * in memory before the first 2 dwords of the following report
> +              * have effectively landed.
> +              *
> +              * This is assuming that the writes of the OA unit land in
> +              * memory in the order they were written to.
> +              * If not : (╯°□°)╯︵ ┻━┻
> +              */
> +             while (OA_TAKEN(tail, aged_tail) >= report_size) {
> +                     u32 *report32 = (void *)(stream->oa_buffer.vaddr + 
> tail);
>
> -             /* Mark that we need a new pointer to start aging... */
> -             stream->oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR;
> -             aging_tail = INVALID_TAIL_PTR;
> -     }
> +                     if (report32[0] != 0 || report32[1] != 0)
> +                             break;
>
> -     /* Update the aging tail
> -      *
> -      * We throttle aging tail updates until we have a new tail that
> -      * represents >= one report more data than is already available for
> -      * reading. This ensures there will be enough data for a successful
> -      * read once this new pointer has aged and ensures we will give the new
> -      * pointer time to age.
> -      */
> -     if (aging_tail == INVALID_TAIL_PTR &&
> -         (aged_tail == INVALID_TAIL_PTR ||
> -          OA_TAKEN(hw_tail, aged_tail) >= report_size)) {
> -             struct i915_vma *vma = stream->oa_buffer.vma;
> -             u32 gtt_offset = i915_ggtt_offset(vma);
> -
> -             /* Be paranoid and do a bounds check on the pointer read back
> -              * from hardware, just in case some spurious hardware condition
> -              * could put the tail out of bounds...
> -              */
> -             if (hw_tail >= gtt_offset &&
> -                 hw_tail < (gtt_offset + OA_BUFFER_SIZE)) {
> -                     stream->oa_buffer.tails[!aged_idx].offset =
> -                             aging_tail = hw_tail;
> -                     stream->oa_buffer.aging_timestamp = now;
> -             } else {
> -                     drm_err(&stream->perf->i915->drm,
> -                             "Ignoring spurious out of range OA buffer tail 
> pointer = %x\n",
> -                             hw_tail);
> +                     tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
>               }
> +
> +             if (OA_TAKEN(hw_tail, tail) > report_size &&
> +                 __ratelimit(&stream->perf->tail_pointer_race))
> +                     DRM_NOTE("unlanded report(s) head=0x%x "
> +                              "tail=0x%x hw_tail=0x%x\n",
> +                              head, tail, hw_tail);
> +
> +             stream->oa_buffer.tail = gtt_offset + tail;
> +             stream->oa_buffer.aging_tail = gtt_offset + hw_tail;
> +             stream->oa_buffer.aging_timestamp = now;
>       }
>
>       spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
> -     return aged_tail == INVALID_TAIL_PTR ?
> -             false : OA_TAKEN(aged_tail, head) >= report_size;
> +     return OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
> +                     stream->oa_buffer.head - gtt_offset) >= report_size;
>  }
>
>  /**
> @@ -671,7 +649,6 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
> *stream,
>       u32 mask = (OA_BUFFER_SIZE - 1);
>       size_t start_offset = *offset;
>       unsigned long flags;
> -     unsigned int aged_tail_idx;
>       u32 head, tail;
>       u32 taken;
>       int ret = 0;
> @@ -682,18 +659,10 @@ static int gen8_append_oa_reports(struct 
> i915_perf_stream *stream,
>       spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
>
>       head = stream->oa_buffer.head;
> -     aged_tail_idx = stream->oa_buffer.aged_tail_idx;
> -     tail = stream->oa_buffer.tails[aged_tail_idx].offset;
> +     tail = stream->oa_buffer.tail;
>
>       spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
> -     /*
> -      * An invalid tail pointer here means we're still waiting for the poll
> -      * hrtimer callback to give us a pointer
> -      */
> -     if (tail == INVALID_TAIL_PTR)
> -             return -EAGAIN;
> -
>       /*
>        * NB: oa_buffer.head/tail include the gtt_offset which we don't want
>        * while indexing relative to oa_buf_base.
> @@ -827,13 +796,11 @@ static int gen8_append_oa_reports(struct 
> i915_perf_stream *stream,
>               }
>
>               /*
> -              * The above reason field sanity check is based on
> -              * the assumption that the OA buffer is initially
> -              * zeroed and we reset the field after copying so the
> -              * check is still meaningful once old reports start
> -              * being overwritten.
> +              * Clear out the first 2 dword as a mean to detect unlanded
> +              * reports.
>                */
>               report32[0] = 0;
> +             report32[1] = 0;
>       }
>
>       if (start_offset != *offset) {
> @@ -974,7 +941,6 @@ static int gen7_append_oa_reports(struct i915_perf_stream 
> *stream,
>       u32 mask = (OA_BUFFER_SIZE - 1);
>       size_t start_offset = *offset;
>       unsigned long flags;
> -     unsigned int aged_tail_idx;
>       u32 head, tail;
>       u32 taken;
>       int ret = 0;
> @@ -985,17 +951,10 @@ static int gen7_append_oa_reports(struct 
> i915_perf_stream *stream,
>       spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
>
>       head = stream->oa_buffer.head;
> -     aged_tail_idx = stream->oa_buffer.aged_tail_idx;
> -     tail = stream->oa_buffer.tails[aged_tail_idx].offset;
> +     tail = stream->oa_buffer.tail;
>
>       spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
> -     /* An invalid tail pointer here means we're still waiting for the poll
> -      * hrtimer callback to give us a pointer
> -      */
> -     if (tail == INVALID_TAIL_PTR)
> -             return -EAGAIN;
> -
>       /* NB: oa_buffer.head/tail include the gtt_offset which we don't want
>        * while indexing relative to oa_buf_base.
>        */
> @@ -1053,13 +1012,11 @@ static int gen7_append_oa_reports(struct 
> i915_perf_stream *stream,
>               if (ret)
>                       break;
>
> -             /* The above report-id field sanity check is based on
> -              * the assumption that the OA buffer is initially
> -              * zeroed and we reset the field after copying so the
> -              * check is still meaningful once old reports start
> -              * being overwritten.
> +             /* Clear out the first 2 dwords as a mean to detect unlanded
> +              * reports.
>                */
>               report32[0] = 0;
> +             report32[1] = 0;
>       }
>
>       if (start_offset != *offset) {
> @@ -1438,8 +1395,8 @@ static void gen7_init_oa_buffer(struct i915_perf_stream 
> *stream)
>                          gtt_offset | OABUFFER_SIZE_16M);
>
>       /* Mark that we need updated tail pointers to read from... */
> -     stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
> -     stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
> +     stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
> +     stream->oa_buffer.tail = gtt_offset;
>
>       spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
> @@ -1492,8 +1449,8 @@ static void gen8_init_oa_buffer(struct i915_perf_stream 
> *stream)
>       intel_uncore_write(uncore, GEN8_OATAILPTR, gtt_offset & 
> GEN8_OATAILPTR_MASK);
>
>       /* Mark that we need updated tail pointers to read from... */
> -     stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
> -     stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
> +     stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
> +     stream->oa_buffer.tail = gtt_offset;
>
>       /*
>        * Reset state used to recognise context switches, affecting which
> @@ -1548,8 +1505,8 @@ static void gen12_init_oa_buffer(struct 
> i915_perf_stream *stream)
>                          gtt_offset & GEN12_OAG_OATAILPTR_MASK);
>
>       /* Mark that we need updated tail pointers to read from... */
> -     stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR;
> -     stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR;
> +     stream->oa_buffer.aging_tail = INVALID_TAIL_PTR;
> +     stream->oa_buffer.tail = gtt_offset;
>
>       /*
>        * Reset state used to recognise context switches, affecting which
> @@ -4398,6 +4355,11 @@ void i915_perf_init(struct drm_i915_private *i915)
>               ratelimit_set_flags(&perf->spurious_report_rs,
>                                   RATELIMIT_MSG_ON_RELEASE);
>
> +             ratelimit_state_init(&perf->tail_pointer_race,
> +                                  5 * HZ, 10);
> +             ratelimit_set_flags(&perf->tail_pointer_race,
> +                                 RATELIMIT_MSG_ON_RELEASE);
> +
>               atomic64_set(&perf->noa_programming_delay,
>                            500 * 1000 /* 500us */);
>
> diff --git a/drivers/gpu/drm/i915/i915_perf_types.h 
> b/drivers/gpu/drm/i915/i915_perf_types.h
> index 32289cbda648..c3ab184c604a 100644
> --- a/drivers/gpu/drm/i915/i915_perf_types.h
> +++ b/drivers/gpu/drm/i915/i915_perf_types.h
> @@ -273,21 +273,10 @@ struct i915_perf_stream {
>               spinlock_t ptr_lock;
>
>               /**
> -              * @tails: One 'aging' tail pointer and one 'aged' tail pointer 
> ready to
> -              * used for reading.
> -              *
> -              * Initial values of 0xffffffff are invalid and imply that an
> -              * update is required (and should be ignored by an attempted
> -              * read)
> +              * @aging_tail: The last HW tail reported by HW. The data
> +              * might not have made it to memory yet though.
>                */
> -             struct {
> -                     u32 offset;
> -             } tails[2];
> -
> -             /**
> -              * @aged_tail_idx: Index for the aged tail ready to read() data 
> up to.
> -              */
> -             unsigned int aged_tail_idx;
> +             u32 aging_tail;
>
>               /**
>                * @aging_timestamp: A monotonic timestamp for when the current 
> aging tail pointer
> @@ -303,6 +292,11 @@ struct i915_perf_stream {
>                * OA buffer data to userspace.
>                */
>               u32 head;
> +
> +             /**
> +              * @tail: The last verified tail that can be read by userspace.
> +              */
> +             u32 tail;
>       } oa_buffer;
>
>       /**
> @@ -420,6 +414,12 @@ struct i915_perf {
>        */
>       struct ratelimit_state spurious_report_rs;
>
> +     /**
> +      * For rate limiting any notifications of tail pointer
> +      * race.
> +      */
> +     struct ratelimit_state tail_pointer_race;
> +
>       u32 gen7_latched_oastatus1;
>       u32 ctx_oactxctrl_offset;
>       u32 ctx_flexeu0_offset;
> --
> 2.20.1
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to