>From 5b9675cdec1eda745702352044728e03ac65c04e Mon Sep 17 00:00:00 2001 From: Stephane Eranian <eran...@google.com> Date: Mon, 4 Apr 2011 13:38:30 -0400 Subject: [PATCH] libpfm4: Add AMD Family 15h cpu support
This patch is based on the already published libpfm3 patch except that northbridge events are dropped. These events need to live in a separate event table. Signed-off-by: Robert Richter <robert.rich...@amd.com> --- include/perfmon/pfmlib.h | 2 + lib/Makefile | 2 +- lib/events/amd64_events_fam15h.h | 1198 ++++++++++++++++++++++++++++++++++++++ lib/pfmlib_amd64.c | 4 +- lib/pfmlib_amd64_priv.h | 6 +- lib/pfmlib_common.c | 1 + lib/pfmlib_priv.h | 1 + 7 files changed, 1211 insertions(+), 3 deletions(-) create mode 100644 lib/events/amd64_events_fam15h.h diff --git a/include/perfmon/pfmlib.h b/include/perfmon/pfmlib.h index e1b80b9..bfcf1b0 100644 --- a/include/perfmon/pfmlib.h +++ b/include/perfmon/pfmlib.h @@ -142,6 +142,8 @@ typedef enum { PFM_PMU_TORRENT, /* IBM Torrent hub chip */ PFM_PMU_INTEL_SNB, /* Intel Sandy Bridge (single socket) */ + PFM_PMU_AMD64_FAM15H_INTERLAGOS,/* AMD AMD64 Fam15h Interlagos */ + /* MUST ADD NEW PMU MODELS HERE */ PFM_PMU_MAX /* end marker */ diff --git a/lib/Makefile b/lib/Makefile index c8bb681..2a45367 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -54,7 +54,7 @@ INCARCH = $(INC_X86) SRCS += pfmlib_amd64.c pfmlib_intel_core.c pfmlib_intel_x86.c pfmlib_intel_x86_arch.c pfmlib_intel_atom.c \ pfmlib_intel_nhm_unc.c pfmlib_intel_coreduo.c pfmlib_intel_p6.c pfmlib_intel_nhm.c \ pfmlib_intel_wsm.c pfmlib_amd64_k7.c pfmlib_amd64_k8.c pfmlib_amd64_fam10h.c pfmlib_intel_netburst.c \ - pfmlib_intel_snb.c + pfmlib_intel_snb.c pfmlib_amd64_fam15h.c CFLAGS += -DCONFIG_PFMLIB_ARCH_X86 endif diff --git a/lib/events/amd64_events_fam15h.h b/lib/events/amd64_events_fam15h.h new file mode 100644 index 0000000..aff365f --- /dev/null +++ b/lib/events/amd64_events_fam15h.h @@ -0,0 +1,1198 @@ +/* + * Copyright (c) 2010 Advanced Micro Devices, Inc. + * Contributed by Robert Richter <robert.rich...@amd.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * This file is part of libpfm, a performance monitoring support library for + * applications on Linux. + */ + +/* History + * + * Dec 09 2010 -- Robert Richter, robert.rich...@amd.com: + * + * Family 15h Microarchitecture performance monitor events + * + * Source: BIOS and Kernel Developer's Guide for the AMD Family 15h + * Processors, Rev 0.90, May 18, 2010 + */ + +static const amd64_entry_t amd64_fam15h_pe[]={ + {.name = "DISPATCHED_FPU_OPS", + .code = 0x00, + .desc = "FPU Pipe Assignment", + .flags = 0, + .numasks = 9, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "OPS_PIPE0", + .udesc = "Total number uops assigned to Pipe 0", + .ucode = 1 << 0, + }, + { .uname = "OPS_PIPE1", + .udesc = "Total number uops assigned to Pipe 1", + .ucode = 1 << 1, + }, + { .uname = "OPS_PIPE2", + .udesc = "Total number uops assigned to Pipe 2", + .ucode = 1 << 2, + }, + { .uname = "OPS_PIPE3", + .udesc = "Total number uops assigned to Pipe 3", + .ucode = 1 << 3, + }, + { .uname = "OPS_DUAL_PIPE0", + .udesc = "Total number dual-pipe uops assigned to Pipe 0", + .ucode = 1 << 4, + }, + { .uname = "OPS_DUAL_PIPE1", + .udesc = "Total number dual-pipe uops assigned to Pipe 1", + .ucode = 1 << 5, + }, + { .uname = "OPS_DUAL_PIPE2", + .udesc = "Total number dual-pipe uops assigned to Pipe 2", + .ucode = 1 << 6, + }, + { .uname = "OPS_DUAL_PIPE3", + .udesc = "Total number dual-pipe uops assigned to Pipe 3", + .ucode = 1 << 7, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xFF, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "CYCLES_FPU_EMPTY", + .code = 0x01, + .desc = "FP Scheduler Empty", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "RETIRED_SSE_OPS", + .code = 0x03, + .desc = "Retired SSE/BNI Ops", + .flags = 0, + .numasks = 9, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "SINGLE_ADD_SUB_OPS", + .udesc = "Single-precision add/subtract FLOPS", + .ucode = 1 << 0, + }, + { .uname = "SINGLE_MUL_OPS", + .udesc = "Single-precision multiply FLOPS", + .ucode = 1 << 1, + }, + { .uname = "SINGLE_DIV_OPS", + .udesc = "Single-precision divide/square root FLOPS", + .ucode = 1 << 2, + }, + { .uname = "SINGLE_MUL_ADD_OPS", + .udesc = "Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS", + .ucode = 1 << 3, + }, + { .uname = "DOUBLE_ADD_SUB_OPS", + .udesc = "Double precision add/subtract FLOPS", + .ucode = 1 << 4, + }, + { .uname = "DOUBLE_MUL_OPS", + .udesc = "Double precision multiply FLOPS", + .ucode = 1 << 5, + }, + { .uname = "DOUBLE_DIV_OPS", + .udesc = "Double precision divide/square root FLOPS", + .ucode = 1 << 6, + }, + { .uname = "DOUBLE_MUL_ADD_OPS", + .udesc = "Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS", + .ucode = 1 << 7, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xFF, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "MOVE_SCALAR_OPTIMIZATION", + .code = 0x04, + .desc = "Number of Move Elimination and Scalar Op Optimization", + .flags = 0, + .numasks = 5, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "SSE_MOVE_OPS", + .udesc = "Number of SSE Move Ops", + .ucode = 1 << 0, + }, + { .uname = "SSE_MOVE_OPS_ELIM", + .udesc = "Number of SSE Move Ops eliminated", + .ucode = 1 << 1, + }, + { .uname = "OPT_CAND", + .udesc = "Number of Ops that are candidates for optimization (Z-bit set or pass)", + .ucode = 1 << 2, + }, + { .uname = "SCALAR_OPS_OPTIMIZED", + .udesc = "Number of Scalar ops optimized", + .ucode = 1 << 3, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x0F, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "RETIRED_SERIALIZING_OPS", + .code = 0x05, + .desc = "Retired Serializing Ops", + .flags = 0, + .numasks = 5, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "SSE_RETIRED", + .udesc = "SSE bottom-executing uops retired", + .ucode = 1 << 0, + }, + { .uname = "SSE_MISPREDICTED", + .udesc = "SSE control word mispredict traps due to mispredictions", + .ucode = 1 << 1, + }, + { .uname = "X87_RETIRED", + .udesc = "x87 bottom-executing uops retired", + .ucode = 1 << 2, + }, + { .uname = "X87_MISPREDICTED", + .udesc = "x87 control word mispredict traps due to mispredictions", + .ucode = 1 << 3, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x0F, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "BOTTOM_EXECUTE_OP", + .code = 0x06, + .desc = "Number of Cycles that a Bottom-Execute uop is in the FP Scheduler", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "SEGMENT_REGISTER_LOADS", + .code = 0x20, + .desc = "Segment Register Loads", + .flags = 0, + .numasks = 8, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "ES", + .udesc = "ES", + .ucode = 1 << 0, + }, + { .uname = "CS", + .udesc = "CS", + .ucode = 1 << 1, + }, + { .uname = "SS", + .udesc = "SS", + .ucode = 1 << 2, + }, + { .uname = "DS", + .udesc = "DS", + .ucode = 1 << 3, + }, + { .uname = "FS", + .udesc = "FS", + .ucode = 1 << 4, + }, + { .uname = "GS", + .udesc = "GS", + .ucode = 1 << 5, + }, + { .uname = "HS", + .udesc = "HS", + .ucode = 1 << 6, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x7F, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "PIPELINE_RESTART_DUE_TO_SELF_MODIFYING_CODE", + .code = 0x21, + .desc = "Pipeline Restart Due to Self-Modifying Code", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "PIPELINE_RESTART_DUE_TO_PROBE_HIT", + .code = 0x22, + .desc = "Pipeline Restart Due to Probe Hit", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "LOAD_Q_STORE_Q_FULL", + .code = 0x23, + .desc = "Load Queue/Store Queue Full", + .flags = 0, + .numasks = 3, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "LOAD_QUEUE", + .udesc = "The number of cycles that the load buffer is full", + .ucode = 1 << 0, + }, + { .uname = "STORE_QUEUE", + .udesc = "The number of cycles that the store buffer is full", + .ucode = 1 << 1, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x03, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "LOCKED_OPS", + .code = 0x24, + .desc = "Locked Operations", + .flags = 0, + .numasks = 4, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "EXECUTED", + .udesc = "Number of locked instructions executed", + .ucode = 1 << 0, + }, + { .uname = "CYCLES_NON_SPECULATIVE_PHASE", + .udesc = "Number of cycles spent non-speculative phase (including cache miss penalty)", + .ucode = 1 << 2, + }, + { .uname = "CYCLES_WAITING", + .udesc = "Number of cycles waiting for a cache hit (cache miss penalty)", + .ucode = 1 << 3, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x0D, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "RETIRED_CLFLUSH_INSTRUCTIONS", + .code = 0x26, + .desc = "Retired CLFLUSH Instructions", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "RETIRED_CPUID_INSTRUCTIONS", + .code = 0x27, + .desc = "Retired CPUID Instructions", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "CANCELLED_STORE_TO_LOAD", + .code = 0x2A, + .desc = "Canceled Store to Load Forward Operations", + .flags = 0, + .numasks = 2, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "SIZE_ADDRESS_MISMATCHES", + .udesc = "Store is smaller than load or different starting byte but partial overlap", + .ucode = 1 << 0, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x01, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "SMIS_RECEIVED", + .code = 0x2B, + .desc = "SMIs Received", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "DATA_CACHE_ACCESSES", + .code = 0x40, + .desc = "Data Cache Accesses", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "DATA_CACHE_MISSES", + .code = 0x41, + .desc = "Data Cache Misses", + .flags = 0, + .numasks = 3, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "DC_MISS_STREAMING_STORE", + .udesc = "First data cache miss or streaming store to a 64B cache line", + .ucode = 1 << 0, + }, + { .uname = "STREAMING_STORE", + .udesc = "First streaming store to a 64B cache line", + .ucode = 1 << 1, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x03, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "DATA_CACHE_REFILLS_FROM_L2_OR_NORTHBRIDGE", + .code = 0x42, + .desc = "Data Cache Refills from L2 or System", + .flags = 0, + .numasks = 4, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "GOOD", + .udesc = "Fill with good data. (Final valid status is valid)", + .ucode = 1 << 0, + }, + { .uname = "INVALID", + .udesc = "Early valid status turned out to be invalid", + .ucode = 1 << 1, + }, + { .uname = "READ_ERROR", + .udesc = "Fill with read data error", + .ucode = 1 << 3, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x0B, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "DATA_CACHE_REFILLS_FROM_NORTHBRIDGE", + .code = 0x43, + .desc = "Data Cache Refills from System", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "UNIFIED_TLB_HIT", + .code = 0x45, + .desc = "Unified TLB Hit", + .flags = 0, + .numasks = 7, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "4K_DATA", + .udesc = "4 KB unified TLB hit for data", + .ucode = 1 << 0, + }, + { .uname = "2M_DATA", + .udesc = "2 MB unified TLB hit for data", + .ucode = 1 << 1, + }, + { .uname = "1G_DATA", + .udesc = "1 GB unified TLB hit for data", + .ucode = 1 << 2, + }, + { .uname = "4K_INST", + .udesc = "4 KB unified TLB hit for instruction", + .ucode = 1 << 4, + }, + { .uname = "2M_INST", + .udesc = "2 MB unified TLB hit for instruction", + .ucode = 1 << 5, + }, + { .uname = "1G_INST", + .udesc = "1 GB unified TLB hit for instruction", + .ucode = 1 << 6, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x77, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "UNIFIED_TLB_MISS", + .code = 0x46, + .desc = "Unified TLB Miss", + .flags = 0, + .numasks = 7, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "4K_DATA", + .udesc = "4 KB unified TLB miss for data", + .ucode = 1 << 0, + }, + { .uname = "2M_DATA", + .udesc = "2 MB unified TLB miss for data", + .ucode = 1 << 1, + }, + { .uname = "1GB_DATA", + .udesc = "1 GB unified TLB miss for data", + .ucode = 1 << 2, + }, + { .uname = "4K_INST", + .udesc = "4 KB unified TLB miss for instruction", + .ucode = 1 << 4, + }, + { .uname = "2M_INST", + .udesc = "2 MB unified TLB miss for instruction", + .ucode = 1 << 5, + }, + { .uname = "1G_INST", + .udesc = "1 GB unified TLB miss for instruction", + .ucode = 1 << 6, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x77, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "MISALIGNED_ACCESSES", + .code = 0x47, + .desc = "Misaligned Accesses", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "PREFETCH_INSTRUCTIONS_DISPATCHED", + .code = 0x4B, + .desc = "Prefetch Instructions Dispatched", + .flags = 0, + .numasks = 4, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "LOAD", + .udesc = "Load (Prefetch, PrefetchT0/T1/T2)", + .ucode = 1 << 0, + }, + { .uname = "STORE", + .udesc = "Store (PrefetchW)", + .ucode = 1 << 1, + }, + { .uname = "NTA", + .udesc = "NTA (PrefetchNTA)", + .ucode = 1 << 2, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x07, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "INEFFECTIVE_SW_PREFETCHES", + .code = 0x52, + .desc = "Ineffective Software Prefetches", + .flags = 0, + .numasks = 3, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "SW_PREFETCH_HIT_IN_L1", + .udesc = "Software prefetch hit in the L1.", + .ucode = 1 << 0, + }, + { .uname = "SW_PREFETCH_HIT_IN_L2", + .udesc = "Software prefetch hit in L2.", + .ucode = 1 << 3, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x09, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "MEMORY_REQUESTS", + .code = 0x65, + .desc = "Memory Requests by Type", + .flags = 0, + .numasks = 4, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "NON_CACHEABLE", + .udesc = "Requests to non-cacheable (UC) memory", + .ucode = 1 << 0, + }, + { .uname = "WRITE_COMBINING", + .udesc = "Requests to non-cacheable (WC, but not WC+/SS) memory", + .ucode = 1 << 1, + }, + { .uname = "STREAMING_STORE", + .udesc = "Requests to non-cacheable (WC+/SS, but not WC) memory", + .ucode = 1 << 7, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x83, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "DATA_PREFETCHER", + .code = 0x67, + .desc = "Data Prefetcher", + .flags = 0, + .numasks = 2, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "ATTEMPTED", + .udesc = "Prefetch attempts", + .ucode = 1 << 1, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x02, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "MAB_REQS", + .code = 0x68, + .desc = "MAB Requests", + .flags = 0, + .numasks = 9, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "BUFFER_BIT_0", + .udesc = "Buffer entry index bit 0", + .ucode = 1 << 0, + }, + { .uname = "BUFFER_BIT_1", + .udesc = "Buffer entry index bit 1", + .ucode = 1 << 1, + }, + { .uname = "BUFFER_BIT_2", + .udesc = "Buffer entry index bit 2", + .ucode = 1 << 2, + }, + { .uname = "BUFFER_BIT_3", + .udesc = "Buffer entry index bit 3", + .ucode = 1 << 3, + }, + { .uname = "BUFFER_BIT_4", + .udesc = "Buffer entry index bit 4", + .ucode = 1 << 4, + }, + { .uname = "BUFFER_BIT_5", + .udesc = "Buffer entry index bit 5", + .ucode = 1 << 5, + }, + { .uname = "BUFFER_BIT_6", + .udesc = "Buffer entry index bit 6", + .ucode = 1 << 6, + }, + { .uname = "BUFFER_BIT_7", + .udesc = "Buffer entry index bit 7", + .ucode = 1 << 7, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xFF, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "MAB_WAIT", + .code = 0x69, + .desc = "MAB Wait Cycles", + .flags = 0, + .numasks = 9, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "BUFFER_BIT_0", + .udesc = "Buffer entry index bit 0", + .ucode = 1 << 0, + }, + { .uname = "BUFFER_BIT_1", + .udesc = "Buffer entry index bit 1", + .ucode = 1 << 1, + }, + { .uname = "BUFFER_BIT_2", + .udesc = "Buffer entry index bit 2", + .ucode = 1 << 2, + }, + { .uname = "BUFFER_BIT_3", + .udesc = "Buffer entry index bit 3", + .ucode = 1 << 3, + }, + { .uname = "BUFFER_BIT_4", + .udesc = "Buffer entry index bit 4", + .ucode = 1 << 4, + }, + { .uname = "BUFFER_BIT_5", + .udesc = "Buffer entry index bit 5", + .ucode = 1 << 5, + }, + { .uname = "BUFFER_BIT_6", + .udesc = "Buffer entry index bit 6", + .ucode = 1 << 6, + }, + { .uname = "BUFFER_BIT_7", + .udesc = "Buffer entry index bit 7", + .ucode = 1 << 7, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0xFF, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "SYSTEM_READ_RESPONSES", + .code = 0x6C, + .desc = "Response From System on Cache Refills", + .flags = 0, + .numasks = 7, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "EXCLUSIVE", + .udesc = "Exclusive", + .ucode = 1 << 0, + }, + { .uname = "MODIFIED", + .udesc = "Modified (D18F0x68[ATMModeEn]==0), Modified written (D18F0x68[ATMModeEn]==1)", + .ucode = 1 << 1, + }, + { .uname = "SHARED", + .udesc = "Shared", + .ucode = 1 << 2, + }, + { .uname = "OWNED", + .udesc = "Owned", + .ucode = 1 << 3, + }, + { .uname = "DATA_ERROR", + .udesc = "Data Error", + .ucode = 1 << 4, + }, + { .uname = "MODIFIED_UNWRITTEN", + .udesc = "Modified unwritten", + .ucode = 1 << 5, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x3F, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "OCTWORD_WRITE_TRANSFERS", + .code = 0x6D, + .desc = "Octwords Written to System", + .flags = 0, + .numasks = 2, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "OCTWORD_WRITE_TRANSFER", + .udesc = "OW write transfer", + .ucode = 1 << 0, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x01, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "CPU_CLK_UNHALTED", + .code = 0x76, + .desc = "CPU Clocks not Halted", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "REQUESTS_TO_L2", + .code = 0x7D, + .desc = "Requests to L2 Cache", + .flags = 0, + .numasks = 7, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "INSTRUCTIONS", + .udesc = "IC fill", + .ucode = 1 << 0, + }, + { .uname = "DATA", + .udesc = "DC fill", + .ucode = 1 << 1, + }, + { .uname = "TLB_WALK", + .udesc = "TLB fill (page table walks)", + .ucode = 1 << 2, + }, + { .uname = "SNOOP", + .udesc = "NB probe request", + .ucode = 1 << 3, + }, + { .uname = "CANCELLED", + .udesc = "Canceled request", + .ucode = 1 << 4, + }, + { .uname = "PREFETCHER", + .udesc = "L2 cache prefetcher request", + .ucode = 1 << 6, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x5F, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "L2_CACHE_MISS", + .code = 0x7E, + .desc = "L2 Cache Misses", + .flags = 0, + .numasks = 5, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "INSTRUCTIONS", + .udesc = "IC fill", + .ucode = 1 << 0, + }, + { .uname = "DATA", + .udesc = "DC fill (includes possible replays, whereas PMCx041 does not)", + .ucode = 1 << 1, + }, + { .uname = "TLB_WALK", + .udesc = "TLB page table walk", + .ucode = 1 << 2, + }, + { .uname = "PREFETCHER", + .udesc = "L2 Cache Prefetcher request", + .ucode = 1 << 4, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x17, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "L2_CACHE_FILL_WRITEBACK", + .code = 0x7F, + .desc = "L2 Fill/Writeback", + .flags = 0, + .numasks = 4, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "L2_FILLS", + .udesc = "L2 fills from system", + .ucode = 1 << 0, + }, + { .uname = "L2_WRITEBACKS", + .udesc = "L2 Writebacks to system (Clean and Dirty)", + .ucode = 1 << 1, + }, + { .uname = "L2_WRITEBACKS_CLEAN", + .udesc = "L2 Clean Writebacks to system", + .ucode = 1 << 2, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x07, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "PAGE_SPLINTERING", + .code = 0x165, + .desc = "Page Splintering", + .flags = 0, + .numasks = 4, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "GUEST_LARGER", + .udesc = "Guest page size is larger than host page size when nested paging is enabled", + .ucode = 1 << 0, + }, + { .uname = "MTRR_MISMATCH", + .udesc = "Splintering due to MTRRs, IORRs, APIC, TOMs or other special address region", + .ucode = 1 << 1, + }, + { .uname = "HOST_LARGER", + .udesc = "Host page size is larger than the guest page size", + .ucode = 1 << 2, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x07, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "INSTRUCTION_CACHE_FETCHES", + .code = 0x80, + .desc = "Instruction Cache Fetches", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "INSTRUCTION_CACHE_MISSES", + .code = 0x81, + .desc = "Instruction Cache Misses", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "INSTRUCTION_CACHE_REFILLS_FROM_L2", + .code = 0x82, + .desc = "Instruction Cache Refills from L2", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "INSTRUCTION_CACHE_REFILLS_FROM_SYSTEM", + .code = 0x83, + .desc = "Instruction Cache Refills from System", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "L1_ITLB_MISS_AND_L2_ITLB_HIT", + .code = 0x84, + .desc = "L1 ITLB Miss, L2 ITLB Hit", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "L1_ITLB_MISS_AND_L2_ITLB_MISS", + .code = 0x85, + .desc = "L1 ITLB Miss, L2 ITLB Miss", + .flags = 0, + .numasks = 4, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "4K_PAGE_FETCHES", + .udesc = "Instruction fetches to a 4 KB page", + .ucode = 1 << 0, + }, + { .uname = "2M_PAGE_FETCHES", + .udesc = "Instruction fetches to a 2 MB page", + .ucode = 1 << 1, + }, + { .uname = "1G_PAGE_FETCHES", + .udesc = "Instruction fetches to a 1 GB page", + .ucode = 1 << 2, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x07, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "PIPELINE_RESTART_DUE_TO_INSTRUCTION_STREAM_PROBE", + .code = 0x86, + .desc = "Pipeline Restart Due to Instruction Stream Probe", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "INSTRUCTION_FETCH_STALL", + .code = 0x87, + .desc = "Instruction Fetch Stall", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "RETURN_STACK_HITS", + .code = 0x88, + .desc = "Return Stack Hits", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "RETURN_STACK_OVERFLOWS", + .code = 0x89, + .desc = "Return Stack Overflows", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "INSTRUCTION_CACHE_VICTIMS", + .code = 0x8B, + .desc = "Instruction Cache Victims", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "INSTRUCTION_CACHE_INVALIDATED", + .code = 0x8C, + .desc = "Instruction Cache Lines Invalidated", + .flags = 0, + .numasks = 5, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "NON_SMC_PROBE_MISS", + .udesc = "Non-SMC invalidating probe that missed on in-flight instructions", + .ucode = 1 << 0, + }, + { .uname = "NON_SMC_PROBE_HIT", + .udesc = "Non-SMC invalidating probe that hit on in-flight instructions", + .ucode = 1 << 1, + }, + { .uname = "SMC_PROBE_MISS", + .udesc = "SMC invalidating probe that missed on in-flight instructions", + .ucode = 1 << 2, + }, + { .uname = "SMC_PROBE_HIT", + .udesc = "SMC invalidating probe that hit on in-flight instructions", + .ucode = 1 << 3, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x0F, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "ITLB_RELOADS", + .code = 0x99, + .desc = "ITLB Reloads", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "ITLB_RELOADS_ABORTED", + .code = 0x9A, + .desc = "ITLB Reloads Aborted", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "RETIRED_INSTRUCTIONS", + .code = 0xC0, + .desc = "Retired Instructions", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "RETIRED_UOPS", + .code = 0xC1, + .desc = "Retired uops", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "RETIRED_BRANCH_INSTRUCTIONS", + .code = 0xC2, + .desc = "Retired Branch Instructions", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "RETIRED_MISPREDICTED_BRANCH_INSTRUCTIONS", + .code = 0xC3, + .desc = "Retired Mispredicted Branch Instructions", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS", + .code = 0xC4, + .desc = "Retired Taken Branch Instructions", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "RETIRED_TAKEN_BRANCH_INSTRUCTIONS_MISPREDICTED", + .code = 0xC5, + .desc = "Retired Taken Branch Instructions Mispredicted", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "RETIRED_FAR_CONTROL_TRANSFERS", + .code = 0xC6, + .desc = "Retired Far Control Transfers", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "RETIRED_BRANCH_RESYNCS", + .code = 0xC7, + .desc = "Retired Branch Resyncs", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "RETIRED_NEAR_RETURNS", + .code = 0xC8, + .desc = "Retired Near Returns", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "RETIRED_NEAR_RETURNS_MISPREDICTED", + .code = 0xC9, + .desc = "Retired Near Returns Mispredicted", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "RETIRED_INDIRECT_BRANCHES_MISPREDICTED", + .code = 0xCA, + .desc = "Retired Indirect Branches Mispredicted", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "RETIRED_MMX_FP_INSTRUCTIONS", + .code = 0xCB, + .desc = "Retired MMX/FP Instructions", + .flags = 0, + .numasks = 4, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "X87", + .udesc = "x87 instructions", + .ucode = 1 << 0, + }, + { .uname = "MMX", + .udesc = "MMX(tm) instructions", + .ucode = 1 << 1, + }, + { .uname = "SSE", + .udesc = "SSE instructions (SSE,SSE2,SSE3,SSSE3,SSE4A,SSE4.1,SSE4.2,AVX,XOP,FMA4)", + .ucode = 1 << 2, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x07, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "INTERRUPTS_MASKED_CYCLES", + .code = 0xCD, + .desc = "Interrupts-Masked Cycles", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "INTERRUPTS_MASKED_CYCLES_WITH_INTERRUPT_PENDING", + .code = 0xCE, + .desc = "Interrupts-Masked Cycles with Interrupt Pending", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "INTERRUPTS_TAKEN", + .code = 0xCF, + .desc = "Interrupts Taken", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "DECODER_EMPTY", + .code = 0xD0, + .desc = "Decoder Empty", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "DISPATCH_STALLS", + .code = 0xD1, + .desc = "Dispatch Stalls", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "DISPATCH_STALL_FOR_SERIALIZATION", + .code = 0xD3, + .desc = "Microsequencer Stall due to Serialization", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "DISPATCH_STALL_FOR_RETIRE_QUEUE_FULL", + .code = 0xD5, + .desc = "Dispatch Stall for Instruction Retire Q Full", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "DISPATCH_STALL_FOR_INT_SCHED_QUEUE_FULL", + .code = 0xD6, + .desc = "Dispatch Stall for Integer Scheduler Queue Full", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "DISPATCH_STALL_FOR_FPU_FULL", + .code = 0xD7, + .desc = "Dispatch Stall for FP Scheduler Queue Full", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "DISPATCH_STALL_FOR_LDQ_FULL", + .code = 0xD8, + .desc = "Dispatch Stall for LDQ Full", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "MICROSEQ_STALL_WAITING_FOR_ALL_QUIET", + .code = 0xD9, + .desc = "Microsequencer Stall Waiting for All Quiet", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "FPU_EXCEPTIONS", + .code = 0xDB, + .desc = "FPU Exceptions", + .flags = 0, + .numasks = 6, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "TOTAL_FAULTS", + .udesc = "Total microfaults", + .ucode = 1 << 0, + }, + { .uname = "TOTAL_TRAPS", + .udesc = "Total microtraps", + .ucode = 1 << 1, + }, + { .uname = "INT2EXT_FAULTS", + .udesc = "Int2Ext faults", + .ucode = 1 << 2, + }, + { .uname = "EXT2INT_FAULTS", + .udesc = "Ext2Int faults", + .ucode = 1 << 3, + }, + { .uname = "BYPASS_FAULTS", + .udesc = "Bypass faults", + .ucode = 1 << 4, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x1F, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, + {.name = "DR0_BREAKPOINTS", + .code = 0xDC, + .desc = "DR0 Breakpoint Match", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "DR1_BREAKPOINTS", + .code = 0xDD, + .desc = "DR1 Breakpoint Match", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "DR2_BREAKPOINTS", + .code = 0xDE, + .desc = "DR2 Breakpoint Match", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "DR3_BREAKPOINTS", + .code = 0xDF, + .desc = "DR3 Breakpoint Match", + .modmsk = AMD64_FAM15H_ATTRS, + }, + {.name = "IBS_OPS_TAGGED", + .code = 0x1CF, + .desc = "Tagged IBS Ops", + .flags = 0, + .numasks = 4, + .ngrp = 1, + .modmsk = AMD64_FAM15H_ATTRS, + .umasks = { + { .uname = "TAGGED", + .udesc = "Number of ops tagged by IBS", + .ucode = 1 << 0, + }, + { .uname = "RETIRED", + .udesc = "Number of ops tagged by IBS that retired", + .ucode = 1 << 1, + }, + { .uname = "IGNORED", + .udesc = "Number of times op could not be tagged due to other tagged op active in pipe", + .ucode = 1 << 2, + }, + { .uname = "ALL", + .udesc = "All sub-events selected", + .ucode = 0x07, + .uflags = AMD64_FL_DFL|AMD64_FL_NCOMBO, + }, + }, + }, +}; +#define PME_AMD64_FAM15H_EVENT_COUNT (sizeof(amd64_fam15h_pe)/sizeof(amd64_entry_t)) diff --git a/lib/pfmlib_amd64.c b/lib/pfmlib_amd64.c index 476bdbc..493541e 100644 --- a/lib/pfmlib_amd64.c +++ b/lib/pfmlib_amd64.c @@ -152,7 +152,9 @@ amd64_get_revision(pfm_amd64_config_t *cfg) default: rev = PFM_PMU_AMD64_FAM10H_BARCELONA; } - } + } else if (cfg->family == 21) { + rev = PFM_PMU_AMD64_FAM15H_INTERLAGOS; + } cfg->revision = rev; } diff --git a/lib/pfmlib_amd64_priv.h b/lib/pfmlib_amd64_priv.h index e260f13..d620f5a 100644 --- a/lib/pfmlib_amd64_priv.h +++ b/lib/pfmlib_amd64_priv.h @@ -25,7 +25,7 @@ #ifndef __PFMLIB_AMD64_PRIV_H__ #define __PFMLIB_AMD64_PRIV_H__ -#define AMD64_MAX_UMASKS 12 +#define AMD64_MAX_UMASKS 16 #define AMD64_MAX_GRP 4 /* must be < 32 (int) */ typedef struct { @@ -92,6 +92,7 @@ extern pfm_amd64_config_t pfm_amd64_cfg; #define AMD64_FL_IBSOP 0x04 /* IBS op */ #define AMD64_FL_DFL 0x08 /* unit mask is default choice */ #define AMD64_FL_OMIT 0x10 /* umask can be omitted */ +#define AMD64_FL_FAM15H_NB 0x20 /* fam15h Northbridge event */ #define AMD64_FL_TILL_K8_REV_C AMD64_TILL_REV(AMD64_K8_REV_C) #define AMD64_FL_K8_REV_D AMD64_FROM_REV(AMD64_K8_REV_D) @@ -124,6 +125,9 @@ extern pfm_amd64_config_t pfm_amd64_cfg; #define AMD64_K8_ATTRS (AMD64_BASIC_ATTRS) #define AMD64_FAM10H_ATTRS (AMD64_BASIC_ATTRS|_AMD64_ATTR_H|_AMD64_ATTR_G) +#define AMD64_FAM15H_ATTRS (AMD64_BASIC_ATTRS|_AMD64_ATTR_H|_AMD64_ATTR_G) +#define AMD64_FAM10H_ATTRS_IBSFE (_AMD64_ATTR_R) +#define AMD64_FAM10H_ATTRS_IBSOP (0) /* * AMD64 MSR definitions diff --git a/lib/pfmlib_common.c b/lib/pfmlib_common.c index 7257158..df10a63 100644 --- a/lib/pfmlib_common.c +++ b/lib/pfmlib_common.c @@ -67,6 +67,7 @@ static pfmlib_pmu_t *pfmlib_pmus[]= &amd64_fam10h_barcelona_support, &amd64_fam10h_shanghai_support, &amd64_fam10h_istanbul_support, + &amd64_fam15h_interlagos_support, &intel_core_support, &intel_atom_support, &intel_nhm_support, diff --git a/lib/pfmlib_priv.h b/lib/pfmlib_priv.h index a0e725a..f8290bf 100644 --- a/lib/pfmlib_priv.h +++ b/lib/pfmlib_priv.h @@ -196,6 +196,7 @@ extern pfmlib_pmu_t amd64_k8_revg_support; extern pfmlib_pmu_t amd64_fam10h_barcelona_support; extern pfmlib_pmu_t amd64_fam10h_shanghai_support; extern pfmlib_pmu_t amd64_fam10h_istanbul_support; +extern pfmlib_pmu_t amd64_fam15h_interlagos_support; extern pfmlib_pmu_t intel_p6_support; extern pfmlib_pmu_t intel_ppro_support; extern pfmlib_pmu_t intel_pii_support; -- 1.7.3.4 -- Advanced Micro Devices, Inc. Operating System Research Center ------------------------------------------------------------------------------ Xperia(TM) PLAY It's a major breakthrough. An authentic gaming smartphone on the nation's most reliable network. And it wants your games. http://p.sf.net/sfu/verizon-sfdev _______________________________________________ perfmon2-devel mailing list perfmon2-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/perfmon2-devel