From: Kan Liang <kan.li...@linux.intel.com>

Add perf core PMU support for Intel Tremont CPU.

The init code is based on Goldmont plus.

The generic purpose counter 0 and fixed counter 0 have less skid.
Force :ppp events on generic purpose counter 0.
Force instruction:ppp always on fixed counter 0.

Updates LLC cache event table and OFFCORE_RESPONSE mask.

The adaptive PEBS, which is already enabled on ICL, is also supported
on Tremont. No extra codes required.

Signed-off-by: Kan Liang <kan.li...@linux.intel.com>
---
 arch/x86/events/intel/core.c | 88 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index f34d92b..f478831 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -1856,6 +1856,45 @@ static __initconst const u64 glp_hw_cache_extra_regs
        },
 };
 
+#define TNT_LOCAL_DRAM                 BIT_ULL(26)
+#define TNT_DEMAND_READ                        GLM_DEMAND_DATA_RD
+#define TNT_DEMAND_WRITE               GLM_DEMAND_RFO
+#define TNT_LLC_ACCESS                 GLM_ANY_RESPONSE
+#define TNT_SNP_ANY                    (SNB_SNP_NOT_NEEDED|SNB_SNP_MISS| \
+                                        SNB_NO_FWD|SNB_SNP_FWD|SNB_HITM)
+#define TNT_LLC_MISS                   
(TNT_SNP_ANY|SNB_NON_DRAM|TNT_LOCAL_DRAM)
+
+static __initconst const u64 tnt_hw_cache_extra_regs
+                               [PERF_COUNT_HW_CACHE_MAX]
+                               [PERF_COUNT_HW_CACHE_OP_MAX]
+                               [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+       [C(LL)] = {
+               [C(OP_READ)] = {
+                       [C(RESULT_ACCESS)]      = TNT_DEMAND_READ|
+                                                 TNT_LLC_ACCESS,
+                       [C(RESULT_MISS)]        = TNT_DEMAND_READ|
+                                                 TNT_LLC_MISS,
+               },
+               [C(OP_WRITE)] = {
+                       [C(RESULT_ACCESS)]      = TNT_DEMAND_WRITE|
+                                                 TNT_LLC_ACCESS,
+                       [C(RESULT_MISS)]        = TNT_DEMAND_WRITE|
+                                                 TNT_LLC_MISS,
+               },
+               [C(OP_PREFETCH)] = {
+                       [C(RESULT_ACCESS)]      = 0x0,
+                       [C(RESULT_MISS)]        = 0x0,
+               },
+       },
+};
+
+static struct extra_reg intel_tnt_extra_regs[] __read_mostly = {
+       /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffffff9fffull, 
RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xffffff9fffull, 
RSP_1),
+       EVENT_EXTRA_END
+};
+
 #define KNL_OT_L2_HITE         BIT_ULL(19) /* Other Tile L2 Hit */
 #define KNL_OT_L2_HITF         BIT_ULL(20) /* Other Tile L2 Hit */
 #define KNL_MCDRAM_LOCAL       BIT_ULL(21)
@@ -3451,6 +3490,29 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, 
int idx,
        return c;
 }
 
+static struct event_constraint *
+tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+                         struct perf_event *event)
+{
+       struct event_constraint *c;
+
+       /*
+        * :ppp means to do reduced skid PEBS,
+        * which is available at PMC0 and fixed counter 0.
+        */
+       if (event->attr.precise_ip == 3) {
+               /* Force instruction:ppp in Fixed counter 0 */
+               if (event->hw.config == X86_CONFIG(.event=0xc0))
+                       return &fixed_counter0_constraint;
+
+               return &counter0_constraint;
+       }
+
+       c = intel_get_event_constraints(cpuc, idx, event);
+
+       return c;
+}
+
 static bool allow_tsx_force_abort = true;
 
 static struct event_constraint *
@@ -4530,6 +4592,32 @@ __init int intel_pmu_init(void)
                name = "goldmont_plus";
                break;
 
+       case INTEL_FAM6_ATOM_TREMONT_X:
+               x86_pmu.late_ack = true;
+               memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
+                      sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
+                      sizeof(hw_cache_extra_regs));
+               hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+
+               intel_pmu_lbr_init_skl();
+
+               x86_pmu.event_constraints = intel_slm_event_constraints;
+               x86_pmu.extra_regs = intel_tnt_extra_regs;
+               /*
+                * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
+                * for precise cycles.
+                */
+               x86_pmu.pebs_aliases = NULL;
+               x86_pmu.pebs_prec_dist = true;
+               x86_pmu.lbr_pt_coexist = true;
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.get_event_constraints = tnt_get_event_constraints;
+               extra_attr = slm_format_attr;
+               pr_cont("Tremont events, ");
+               name = "Tremont";
+               break;
+
        case INTEL_FAM6_WESTMERE:
        case INTEL_FAM6_WESTMERE_EP:
        case INTEL_FAM6_WESTMERE_EX:
-- 
2.7.4

Reply via email to