Thiago Jung Bauermann [bauer...@linux.vnet.ibm.com] wrote:
> POWER9 introduces a new version of the hypervisor API to access the 24x7
> perf counters. The new version changed some of the structures used for
> requests and results.
> 
> Signed-off-by: Thiago Jung Bauermann <bauer...@linux.vnet.ibm.com>
> ---
>  arch/powerpc/perf/hv-24x7.c            | 145 
> +++++++++++++++++++++++++++------
>  arch/powerpc/perf/hv-24x7.h            |  59 ++++++++++++--
>  arch/powerpc/platforms/pseries/Kconfig |   2 +-
>  3 files changed, 173 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
> index 043cbc78be98..95c44f1d2fd2 100644
> --- a/arch/powerpc/perf/hv-24x7.c
> +++ b/arch/powerpc/perf/hv-24x7.c
> @@ -18,6 +18,7 @@
>  #include <linux/slab.h>
>  #include <linux/vmalloc.h>
> 
> +#include <asm/cputhreads.h>
>  #include <asm/firmware.h>
>  #include <asm/hvcall.h>
>  #include <asm/io.h>
> @@ -27,6 +28,9 @@
>  #include "hv-24x7-catalog.h"
>  #include "hv-common.h"
> 
> +/* Version of the 24x7 hypervisor API that we should use in this machine. */
> +static int interface_version;
> +
>  static bool domain_is_valid(unsigned domain)
>  {
>       switch (domain) {
> @@ -74,7 +78,11 @@ static const char *domain_name(unsigned domain)
> 
>  static bool catalog_entry_domain_is_valid(unsigned domain)
>  {
> -     return is_physical_domain(domain);
> +     /* POWER8 doesn't support virtual domains. */
> +     if (interface_version == 1)
> +             return is_physical_domain(domain);
> +     else
> +             return domain_is_valid(domain);
>  }
> 
>  /*
> @@ -166,9 +174,12 @@ DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
>  DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
>  DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
> 
> -#define MAX_NUM_REQUESTS     ((H24x7_DATA_BUFFER_SIZE -                     \
> +#define MAX_NUM_REQUESTS_V1  ((H24x7_DATA_BUFFER_SIZE -                     \
> +                                     sizeof(struct hv_24x7_request_buffer)) \
> +                                     / H24x7_REQUEST_SIZE_V1)
> +#define MAX_NUM_REQUESTS_V2  ((H24x7_DATA_BUFFER_SIZE -                     \
>                                       sizeof(struct hv_24x7_request_buffer)) \
> -                                     / sizeof(struct hv_24x7_request))
> +                                     / H24x7_REQUEST_SIZE_V2)

Nit: Can we define MAX_NUM_REQUESTS(version) - with a version parameter ? It
will...
> 
>  static char *event_name(struct hv_24x7_event_data *ev, int *len)
>  {
> @@ -1052,7 +1063,7 @@ static void init_24x7_request(struct 
> hv_24x7_request_buffer *request_buffer,
>       memset(request_buffer, 0, H24x7_DATA_BUFFER_SIZE);
>       memset(result_buffer, 0, H24x7_DATA_BUFFER_SIZE);
> 
> -     request_buffer->interface_version = HV_24X7_IF_VERSION_CURRENT;
> +     request_buffer->interface_version = interface_version;
>       /* memset above set request_buffer->num_requests to 0 */
>  }
> 
> @@ -1077,7 +1088,7 @@ static int make_24x7_request(struct 
> hv_24x7_request_buffer *request_buffer,
>       if (ret) {
>               struct hv_24x7_request *req;
> 
> -             req = &request_buffer->requests[0];
> +             req = request_buffer->requests;
>               pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => ret 
> 0x%lx (%ld) detail=0x%x failing ix=%x\n",
>                                     req->performance_domain, req->data_offset,
>                                     req->starting_ix, req->starting_lpar_ix,
> @@ -1101,9 +1112,13 @@ static int add_event_to_24x7_request(struct perf_event 
> *event,
>  {
>       u16 idx;
>       int i;
> +     size_t req_size;
>       struct hv_24x7_request *req;
> 
> -     if (request_buffer->num_requests >= MAX_NUM_REQUESTS) {
> +     if ((request_buffer->interface_version == 1
> +          && request_buffer->num_requests >= MAX_NUM_REQUESTS_V1)
> +         || (request_buffer->interface_version > 1
> +             && request_buffer->num_requests >= MAX_NUM_REQUESTS_V2)) {
>               pr_devel("Too many requests for 24x7 HCALL %d\n",

...simplify this check to

        if (request->buffer->num_requests >= MAX_NUM_REQUESTS(version))

>                               request_buffer->num_requests);
>               return -EINVAL;
> @@ -1120,8 +1135,11 @@ static int add_event_to_24x7_request(struct perf_event 
> *event,
>               idx = event_get_vcpu(event);
>       }
> 
> +     req_size = request_buffer->interface_version == 1 ?
> +                        H24x7_REQUEST_SIZE_V1 : H24x7_REQUEST_SIZE_V2;
> +

Maybe similarly, with H24x7_REQUEST_SIZE(version) ?

>       i = request_buffer->num_requests++;
> -     req = &request_buffer->requests[i];
> +     req = (void *) request_buffer->requests + i * req_size;
> 
>       req->performance_domain = event_get_domain(event);
>       req->data_size = cpu_to_be16(8);
> @@ -1131,14 +1149,97 @@ static int add_event_to_24x7_request(struct 
> perf_event *event,
>       req->starting_ix = cpu_to_be16(idx);
>       req->max_ix = cpu_to_be16(1);
> 
> +     if (request_buffer->interface_version > 1 &&
> +         req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) {
> +             req->starting_thread_group_ix = idx % 2;
> +             req->max_num_thread_groups = 1;
> +     }
> +
>       return 0;
>  }
> 
> +/**
> + * get_count_from_result - get event count from the given result
> + *
> + * @event:   Event associated with @res.
> + * @resb:    Result buffer containing @res.
> + * @res:     Result to work on.
> + * @countp:  Output variable containing the event count.
> + * @next:    Optional output variable pointing to the next result in @resb.
> + */
> +static int get_count_from_result(struct perf_event *event,
> +                              struct hv_24x7_data_result_buffer *resb,
> +                              struct hv_24x7_result *res, u64 *countp,
> +                              struct hv_24x7_result **next)
> +{
> +     u16 num_elements = be16_to_cpu(res->num_elements_returned);
> +     u16 data_size = be16_to_cpu(res->result_element_data_size);
> +     unsigned int data_offset;
> +     void *element_data;
> +     int ret = 0;
> +
> +     /*
> +      * We can bail out early if the result is empty.
> +      */
> +     if (!num_elements) {
> +             pr_debug("Result of request %hhu is empty, nothing to do\n",
> +                      res->result_ix);
> +
> +             if (next)
> +                     *next = (struct hv_24x7_result *) res->elements;
> +
> +             return -ENODATA;
> +     }
> +
> +     /*
> +      * This code assumes that a result has only one element.
> +      */
> +     if (num_elements != 1) {
> +             pr_debug("Error: result of request %hhu has %hu elements\n",
> +                      res->result_ix, num_elements);

Could this happen due to an user request or would this indicate a bug
in the way we submitted the request (perf should submit separate request
for each lpar/index - we set ->max_ix and ->max_num_lpars to cpu_be16(1).

Minor inconsistency with proceeding, is that if the next element passes,
this return code is lost/over written. IOW, h_24x7_event_commit_txn()'s
return value depends on the last element we process, even if intermediate
ones encounter an error.

> +
> +             if (!next)
> +                     return -ENOTSUPP;
> +
> +             /*
> +              * We still need to go through the motions so that we can return
> +              * a pointer to the next result.
> +              */
> +             ret = -ENOTSUPP;
> +     }
> +
> +     if (data_size != sizeof(u64)) {
> +             pr_debug("Error: result of request %hhu has data of %hu 
> bytes\n",
> +                      res->result_ix, data_size);
> +
> +             if (!next)
> +                     return -ENOTSUPP;
> +
> +             ret = -ENOTSUPP;
> +     }
> +
> +     if (resb->interface_version == 1)
> +             data_offset = offsetof(struct hv_24x7_result_element_v1,
> +                                    element_data);
> +     else
> +             data_offset = offsetof(struct hv_24x7_result_element_v2,
> +                                    element_data);
> +
> +     element_data = res->elements + data_offset;
> +
> +     if (!ret)
> +             *countp = be64_to_cpu(*((u64 *) element_data));
> +
> +     /* The next result is after the result element. */
> +     if (next)
> +             *next = element_data + data_size;
> +
> +     return ret;
> +}
> +
>  static int single_24x7_request(struct perf_event *event, u64 *count)
>  {
>       int ret;
> -     u16 num_elements;
> -     struct hv_24x7_result *result;
>       struct hv_24x7_request_buffer *request_buffer;
>       struct hv_24x7_data_result_buffer *result_buffer;
> 
> @@ -1158,14 +1259,9 @@ static int single_24x7_request(struct perf_event 
> *event, u64 *count)
>       if (ret)
>               goto out;
> 
> -     result = result_buffer->results;
> -
> -     /* This code assumes that a result has only one element. */
> -     num_elements = be16_to_cpu(result->num_elements_returned);
> -     WARN_ON_ONCE(num_elements != 1);
> -
>       /* process result from hcall */
> -     *count = be64_to_cpu(result->elements[0].element_data[0]);
> +     ret = get_count_from_result(event, result_buffer,
> +                                 result_buffer->results, count, NULL);
> 
>  out:
>       put_cpu_var(hv_24x7_reqb);
> @@ -1425,16 +1521,13 @@ static int h_24x7_event_commit_txn(struct pmu *pmu)
>       for (i = 0, res = result_buffer->results;
>            i < result_buffer->num_results; i++, res = next_res) {
>               struct perf_event *event = h24x7hw->events[res->result_ix];
> -             u16 num_elements = be16_to_cpu(res->num_elements_returned);
> -             u16 data_size = be16_to_cpu(res->result_element_data_size);
> 
> -             /* This code assumes that a result has only one element. */
> -             WARN_ON_ONCE(num_elements != 1);
> +             ret = get_count_from_result(event, result_buffer, res, &count,
> +                                         &next_res);
> +             if (ret)
> +                     continue;
> 
> -             count = be64_to_cpu(res->elements[0].element_data[0]);
>               update_event_count(event, count);
> -
> -             next_res = (void *) res->elements[0].element_data + data_size;
>       }
> 
>       put_cpu_var(hv_24x7_hw);
> @@ -1486,6 +1579,12 @@ static int hv_24x7_init(void)
>               return -ENODEV;
>       }
> 
> +     /* POWER8 only supports v1, while POWER9 only supports v2. */
> +     if (cpu_has_feature(CPU_FTR_ARCH_300))
> +             interface_version = 2;
> +     else
> +             interface_version = 1;
> +
>       hret = hv_perf_caps_get(&caps);
>       if (hret) {
>               pr_debug("could not obtain capabilities, not enabling, 
> rc=%ld\n",
> diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h
> index b95909400b2a..149af6e9538f 100644
> --- a/arch/powerpc/perf/hv-24x7.h
> +++ b/arch/powerpc/perf/hv-24x7.h
> @@ -10,6 +10,9 @@ enum hv_perf_domains {
>       HV_PERF_DOMAIN_MAX,
>  };
> 
> +#define H24x7_REQUEST_SIZE_V1        16
> +#define H24x7_REQUEST_SIZE_V2        32
> +
>  struct hv_24x7_request {
>       /* PHYSICAL domains require enabling via phyp/hmc. */
>       __u8 performance_domain;
> @@ -42,19 +45,47 @@ struct hv_24x7_request {
>       /* chip, core, or virtual processor based on @performance_domain */
>       __be16 starting_ix;
>       __be16 max_ix;
> +
> +     /* The following fields were added in v2 of the 24x7 interface. */
> +
> +     __u8 starting_thread_group_ix;
> +
> +     /* -1 means all thread groups starting at @starting_thread_group_ix */
> +     __u8 max_num_thread_groups;
> +
> +     __u8 reserved2[0xE];
>  } __packed;
> 
>  struct hv_24x7_request_buffer {
>       /* 0 - ? */
>       /* 1 - ? */
> -#define HV_24X7_IF_VERSION_CURRENT 0x01
>       __u8 interface_version;
>       __u8 num_requests;
>       __u8 reserved[0xE];
> -     struct hv_24x7_request requests[1];
> +     struct hv_24x7_request requests[];
> +} __packed;
> +
> +struct hv_24x7_result_element_v1 {
> +     __be16 lpar_ix;
> +
> +     /*
> +      * represents the core, chip, or virtual processor based on the
> +      * request's @performance_domain
> +      */
> +     __be16 domain_ix;
> +
> +     /* -1 if @performance_domain does not refer to a virtual processor */
> +     __be32 lpar_cfg_instance_id;
> +
> +     /* size = @result_element_data_size of containing result. */
> +     __u64 element_data[];
>  } __packed;
> 
> -struct hv_24x7_result_element {
> +/*
> + * We need a separate struct for v2 because the offset of @element_data 
> changed
> + * between versions.
> + */
> +struct hv_24x7_result_element_v2 {
>       __be16 lpar_ix;
> 
>       /*
> @@ -66,8 +97,12 @@ struct hv_24x7_result_element {
>       /* -1 if @performance_domain does not refer to a virtual processor */
>       __be32 lpar_cfg_instance_id;
> 
> +     __u8 thread_group_ix;
> +
> +     __u8 reserved[7];
> +
>       /* size = @result_element_data_size of containing result. */
> -     __u64 element_data[1];
> +     __u64 element_data[];
>  } __packed;
> 
>  struct hv_24x7_result {
> @@ -94,10 +129,16 @@ struct hv_24x7_result {
>       __be16 result_element_data_size;
>       __u8 reserved[0x2];
> 
> -     /* WARNING: only valid for first result element due to variable sizes
> -      *          of result elements */
> -     /* struct hv_24x7_result_element[@num_elements_returned] */
> -     struct hv_24x7_result_element elements[1];
> +     /*
> +      * Either
> +      *      struct hv_24x7_result_element_v1[@num_elements_returned]
> +      * or
> +      *      struct hv_24x7_result_element_v2[@num_elements_returned]
> +      *
> +      * depending on the interface_version field of the
> +      * struct hv_24x7_data_result_buffer containing this result.
> +      */
> +     char elements[];
>  } __packed;
> 
>  struct hv_24x7_data_result_buffer {
> @@ -113,7 +154,7 @@ struct hv_24x7_data_result_buffer {
>       __u8 reserved2[0x8];
>       /* WARNING: only valid for the first result due to variable sizes of
>        *          results */
> -     struct hv_24x7_result results[1]; /* [@num_results] */
> +     struct hv_24x7_result results[]; /* [@num_results] */
>  } __packed;
> 
>  #endif
> diff --git a/arch/powerpc/platforms/pseries/Kconfig 
> b/arch/powerpc/platforms/pseries/Kconfig
> index 913c54e23eea..3a6dfd14f64b 100644
> --- a/arch/powerpc/platforms/pseries/Kconfig
> +++ b/arch/powerpc/platforms/pseries/Kconfig
> @@ -124,7 +124,7 @@ config HV_PERF_CTRS
>         Enable access to hypervisor supplied counters in perf. Currently,
>         this enables code that uses the hcall GetPerfCounterInfo and 24x7
>         interfaces to retrieve counters. GPCI exists on Power 6 and later
> -       systems. 24x7 is available on Power 8 systems.
> +       systems. 24x7 is available on Power 8 and later systems.
> 
>            If unsure, select Y.
> 
> -- 
> 2.7.4

Reply via email to