Commit 772c1d06 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf updates from Ingo Molnar:
 "Kernel side changes:

   - Improved kbprobes robustness

   - Intel PEBS support for PT hardware tracing

   - Other Intel PT improvements: high order pages memory footprint
     reduction and various related cleanups

   - Misc cleanups

  The perf tooling side has been very busy in this cycle, with over 300
  commits. This is an incomplete high-level summary of the many
  improvements done by over 30 developers:

   - Lots of updates to the following tools:

      'perf c2c'
      'perf config'
      'perf record'
      'perf report'
      'perf script'
      'perf test'
      'perf top'
      'perf trace'

   - Updates to libperf and libtraceevent, and a consolidation of the
     proliferation of x86 instruction decoder libraries.

   - Vendor event updates for Intel and PowerPC CPUs,

   - Updates to hardware tracing tooling for ARM and Intel CPUs,

   - ... and lots of other changes and cleanups - see the shortlog and
     Git log for details"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (322 commits)
  kprobes: Prohibit probing on BUG() and WARN() address
  perf/x86: Make more stuff static
  x86, perf: Fix the dependency of the x86 insn decoder selftest
  objtool: Ignore intentional differences for the x86 insn decoder
  objtool: Update sync-check.sh from perf's check-headers.sh
  perf build: Ignore intentional differences for the x86 insn decoder
  perf intel-pt: Use shared x86 insn decoder
  perf intel-pt: Remove inat.c from build dependency list
  perf: Update .gitignore file
  objtool: Move x86 insn decoder to a common location
  perf metricgroup: Support multiple events for metricgroup
  perf metricgroup: Scale the metric result
  perf pmu: Change convert_scale from static to global
  perf symbols: Move mem_info and branch_info out of symbol.h
  perf auxtrace: Uninline functions that touch perf_session
  perf tools: Remove needless evlist.h include directives
  perf tools: Remove needless evlist.h include directives
  perf tools: Remove needless thread_map.h include directives
  perf tools: Remove needless thread.h include directives
  perf tools: Remove needless map.h include directives
  ...
parents c7eba51c e336b402
......@@ -171,7 +171,7 @@ config HAVE_MMIOTRACE_SUPPORT
config X86_DECODER_SELFTEST
bool "x86 instruction decoder selftest"
depends on DEBUG_KERNEL && KPROBES
depends on DEBUG_KERNEL && INSTRUCTION_DECODER
depends on !COMPILE_TEST
---help---
Perform x86 instruction decoder selftests at build time.
......
......@@ -1005,6 +1005,27 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
/* current number of events already accepted */
n = cpuc->n_events;
if (!cpuc->n_events)
cpuc->pebs_output = 0;
if (!cpuc->is_fake && leader->attr.precise_ip) {
/*
* For PEBS->PT, if !aux_event, the group leader (PT) went
* away, the group was broken down and this singleton event
* can't schedule any more.
*/
if (is_pebs_pt(leader) && !leader->aux_event)
return -EINVAL;
/*
* pebs_output: 0: no PEBS so far, 1: PT, 2: DS
*/
if (cpuc->pebs_output &&
cpuc->pebs_output != is_pebs_pt(leader) + 1)
return -EINVAL;
cpuc->pebs_output = is_pebs_pt(leader) + 1;
}
if (is_x86_event(leader)) {
if (n >= max_count)
......@@ -2241,6 +2262,17 @@ static int x86_pmu_check_period(struct perf_event *event, u64 value)
return 0;
}
static int x86_pmu_aux_output_match(struct perf_event *event)
{
if (!(pmu.capabilities & PERF_PMU_CAP_AUX_OUTPUT))
return 0;
if (x86_pmu.aux_output_match)
return x86_pmu.aux_output_match(event);
return 0;
}
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable,
......@@ -2266,6 +2298,8 @@ static struct pmu pmu = {
.sched_task = x86_pmu_sched_task,
.task_ctx_size = sizeof(struct x86_perf_task_context),
.check_period = x86_pmu_check_period,
.aux_output_match = x86_pmu_aux_output_match,
};
void arch_perf_update_userpage(struct perf_event *event,
......
......@@ -18,6 +18,7 @@
#include <asm/cpufeature.h>
#include <asm/hardirq.h>
#include <asm/intel-family.h>
#include <asm/intel_pt.h>
#include <asm/apic.h>
#include <asm/cpu_device_id.h>
......@@ -3298,6 +3299,13 @@ static int intel_pmu_hw_config(struct perf_event *event)
}
}
if (event->attr.aux_output) {
if (!event->attr.precise_ip)
return -EINVAL;
event->hw.flags |= PERF_X86_EVENT_PEBS_VIA_PT;
}
if (event->attr.type != PERF_TYPE_RAW)
return 0;
......@@ -3816,6 +3824,14 @@ static int intel_pmu_check_period(struct perf_event *event, u64 value)
return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
}
static int intel_pmu_aux_output_match(struct perf_event *event)
{
if (!x86_pmu.intel_cap.pebs_output_pt_available)
return 0;
return is_intel_pt_event(event);
}
PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
PMU_FORMAT_ATTR(ldlat, "config1:0-15");
......@@ -3940,6 +3956,8 @@ static __initconst const struct x86_pmu intel_pmu = {
.sched_task = intel_pmu_sched_task,
.check_period = intel_pmu_check_period,
.aux_output_match = intel_pmu_aux_output_match,
};
static __init void intel_clovertown_quirk(void)
......
......@@ -446,7 +446,7 @@ static int cstate_cpu_init(unsigned int cpu)
return 0;
}
const struct attribute_group *core_attr_update[] = {
static const struct attribute_group *core_attr_update[] = {
&group_cstate_core_c1,
&group_cstate_core_c3,
&group_cstate_core_c6,
......@@ -454,7 +454,7 @@ const struct attribute_group *core_attr_update[] = {
NULL,
};
const struct attribute_group *pkg_attr_update[] = {
static const struct attribute_group *pkg_attr_update[] = {
&group_cstate_pkg_c2,
&group_cstate_pkg_c3,
&group_cstate_pkg_c6,
......
......@@ -902,6 +902,9 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
*/
static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
{
if (cpuc->n_pebs == cpuc->n_pebs_via_pt)
return false;
return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
}
......@@ -919,6 +922,9 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
u64 threshold;
int reserved;
if (cpuc->n_pebs_via_pt)
return;
if (x86_pmu.flags & PMU_FL_PEBS_ALL)
reserved = x86_pmu.max_pebs_events + x86_pmu.num_counters_fixed;
else
......@@ -1059,10 +1065,40 @@ void intel_pmu_pebs_add(struct perf_event *event)
cpuc->n_pebs++;
if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
cpuc->n_large_pebs++;
if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
cpuc->n_pebs_via_pt++;
pebs_update_state(needed_cb, cpuc, event, true);
}
static void intel_pmu_pebs_via_pt_disable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (!is_pebs_pt(event))
return;
if (!(cpuc->pebs_enabled & ~PEBS_VIA_PT_MASK))
cpuc->pebs_enabled &= ~PEBS_VIA_PT_MASK;
}
static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
struct debug_store *ds = cpuc->ds;
if (!is_pebs_pt(event))
return;
if (!(event->hw.flags & PERF_X86_EVENT_LARGE_PEBS))
cpuc->pebs_enabled |= PEBS_PMI_AFTER_EACH_RECORD;
cpuc->pebs_enabled |= PEBS_OUTPUT_PT;
wrmsrl(MSR_RELOAD_PMC0 + hwc->idx, ds->pebs_event_reset[hwc->idx]);
}
void intel_pmu_pebs_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
......@@ -1100,6 +1136,8 @@ void intel_pmu_pebs_enable(struct perf_event *event)
} else {
ds->pebs_event_reset[hwc->idx] = 0;
}
intel_pmu_pebs_via_pt_enable(event);
}
void intel_pmu_pebs_del(struct perf_event *event)
......@@ -1111,6 +1149,8 @@ void intel_pmu_pebs_del(struct perf_event *event)
cpuc->n_pebs--;
if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
cpuc->n_large_pebs--;
if (hwc->flags & PERF_X86_EVENT_PEBS_VIA_PT)
cpuc->n_pebs_via_pt--;
pebs_update_state(needed_cb, cpuc, event, false);
}
......@@ -1120,7 +1160,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
if (cpuc->n_pebs == cpuc->n_large_pebs)
if (cpuc->n_pebs == cpuc->n_large_pebs &&
cpuc->n_pebs != cpuc->n_pebs_via_pt)
intel_pmu_drain_pebs_buffer();
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
......@@ -1131,6 +1172,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled &= ~(1ULL << 63);
intel_pmu_pebs_via_pt_disable(event);
if (cpuc->enabled)
wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
......@@ -2031,6 +2074,12 @@ void __init intel_ds_init(void)
PERF_SAMPLE_REGS_INTR);
}
pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
if (x86_pmu.intel_cap.pebs_output_pt_available) {
pr_cont("PEBS-via-PT, ");
x86_get_pmu()->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
}
break;
default:
......
......@@ -273,7 +273,7 @@ static inline bool lbr_from_signext_quirk_needed(void)
return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
}
DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
/* If quirk is enabled, ensure sign extension is 63 bits: */
inline u64 lbr_from_signext_quirk_wr(u64 val)
......
......@@ -545,33 +545,62 @@ static void pt_config_buffer(void *buf, unsigned int topa_idx,
wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
}
/*
* Keep ToPA table-related metadata on the same page as the actual table,
* taking up a few words from the top
*/
#define TENTS_PER_PAGE (((PAGE_SIZE - 40) / sizeof(struct topa_entry)) - 1)
/**
* struct topa - page-sized ToPA table with metadata at the top
* @table: actual ToPA table entries, as understood by PT hardware
* struct topa - ToPA metadata
* @list: linkage to struct pt_buffer's list of tables
* @phys: physical address of this page
* @offset: offset of the first entry in this table in the buffer
* @size: total size of all entries in this table
* @last: index of the last initialized entry in this table
* @z_count: how many times the first entry repeats
*/
struct topa {
struct topa_entry table[TENTS_PER_PAGE];
struct list_head list;
u64 phys;
u64 offset;
size_t size;
int last;
unsigned int z_count;
};
/*
* Keep ToPA table-related metadata on the same page as the actual table,
* taking up a few words from the top
*/
#define TENTS_PER_PAGE \
((PAGE_SIZE - sizeof(struct topa)) / sizeof(struct topa_entry))
/**
* struct topa_page - page-sized ToPA table with metadata at the top
* @table: actual ToPA table entries, as understood by PT hardware
* @topa: metadata
*/
struct topa_page {
struct topa_entry table[TENTS_PER_PAGE];
struct topa topa;
};
static inline struct topa_page *topa_to_page(struct topa *topa)
{
return container_of(topa, struct topa_page, topa);
}
static inline struct topa_page *topa_entry_to_page(struct topa_entry *te)
{
return (struct topa_page *)((unsigned long)te & PAGE_MASK);
}
static inline phys_addr_t topa_pfn(struct topa *topa)
{
return PFN_DOWN(virt_to_phys(topa_to_page(topa)));
}
/* make -1 stand for the last table entry */
#define TOPA_ENTRY(t, i) ((i) == -1 ? &(t)->table[(t)->last] : &(t)->table[(i)])
#define TOPA_ENTRY(t, i) \
((i) == -1 \
? &topa_to_page(t)->table[(t)->last] \
: &topa_to_page(t)->table[(i)])
#define TOPA_ENTRY_SIZE(t, i) (sizes(TOPA_ENTRY((t), (i))->size))
#define TOPA_ENTRY_PAGES(t, i) (1 << TOPA_ENTRY((t), (i))->size)
/**
* topa_alloc() - allocate page-sized ToPA table
......@@ -583,27 +612,26 @@ struct topa {
static struct topa *topa_alloc(int cpu, gfp_t gfp)
{
int node = cpu_to_node(cpu);
struct topa *topa;
struct topa_page *tp;
struct page *p;
p = alloc_pages_node(node, gfp | __GFP_ZERO, 0);
if (!p)
return NULL;
topa = page_address(p);
topa->last = 0;
topa->phys = page_to_phys(p);
tp = page_address(p);
tp->topa.last = 0;
/*
* In case of singe-entry ToPA, always put the self-referencing END
* link as the 2nd entry in the table
*/
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries)) {
TOPA_ENTRY(topa, 1)->base = topa->phys >> TOPA_SHIFT;
TOPA_ENTRY(topa, 1)->end = 1;
TOPA_ENTRY(&tp->topa, 1)->base = page_to_phys(p);
TOPA_ENTRY(&tp->topa, 1)->end = 1;
}
return topa;
return &tp->topa;
}
/**
......@@ -643,7 +671,7 @@ static void topa_insert_table(struct pt_buffer *buf, struct topa *topa)
BUG_ON(last->last != TENTS_PER_PAGE - 1);
TOPA_ENTRY(last, -1)->base = topa->phys >> TOPA_SHIFT;
TOPA_ENTRY(last, -1)->base = topa_pfn(topa);
TOPA_ENTRY(last, -1)->end = 1;
}
......@@ -670,7 +698,7 @@ static bool topa_table_full(struct topa *topa)
*
* Return: 0 on success or error code.
*/
static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
static int topa_insert_pages(struct pt_buffer *buf, int cpu, gfp_t gfp)
{
struct topa *topa = buf->last;
int order = 0;
......@@ -681,13 +709,18 @@ static int topa_insert_pages(struct pt_buffer *buf, gfp_t gfp)
order = page_private(p);
if (topa_table_full(topa)) {
topa = topa_alloc(buf->cpu, gfp);
topa = topa_alloc(cpu, gfp);
if (!topa)
return -ENOMEM;
topa_insert_table(buf, topa);
}
if (topa->z_count == topa->last - 1) {
if (order == TOPA_ENTRY(topa, topa->last - 1)->size)
topa->z_count++;
}
TOPA_ENTRY(topa, -1)->base = page_to_phys(p) >> TOPA_SHIFT;
TOPA_ENTRY(topa, -1)->size = order;
if (!buf->snapshot &&
......@@ -713,23 +746,26 @@ static void pt_topa_dump(struct pt_buffer *buf)
struct topa *topa;
list_for_each_entry(topa, &buf->tables, list) {
struct topa_page *tp = topa_to_page(topa);
int i;
pr_debug("# table @%p (%016Lx), off %llx size %zx\n", topa->table,
topa->phys, topa->offset, topa->size);
pr_debug("# table @%p, off %llx size %zx\n", tp->table,
topa->offset, topa->size);
for (i = 0; i < TENTS_PER_PAGE; i++) {
pr_debug("# entry @%p (%lx sz %u %c%c%c) raw=%16llx\n",
&topa->table[i],
(unsigned long)topa->table[i].base << TOPA_SHIFT,
sizes(topa->table[i].size),
topa->table[i].end ? 'E' : ' ',
topa->table[i].intr ? 'I' : ' ',
topa->table[i].stop ? 'S' : ' ',
*(u64 *)&topa->table[i]);
&tp->table[i],
(unsigned long)tp->table[i].base << TOPA_SHIFT,
sizes(tp->table[i].size),
tp->table[i].end ? 'E' : ' ',
tp->table[i].intr ? 'I' : ' ',
tp->table[i].stop ? 'S' : ' ',
*(u64 *)&tp->table[i]);
if ((intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) &&
topa->table[i].stop) ||
topa->table[i].end)
tp->table[i].stop) ||
tp->table[i].end)
break;
if (!i && topa->z_count)
i += topa->z_count;
}
}
}
......@@ -771,7 +807,7 @@ static void pt_update_head(struct pt *pt)
/* offset of the current output region within this table */
for (topa_idx = 0; topa_idx < buf->cur_idx; topa_idx++)
base += sizes(buf->cur->table[topa_idx].size);
base += TOPA_ENTRY_SIZE(buf->cur, topa_idx);
if (buf->snapshot) {
local_set(&buf->data_size, base);
......@@ -791,7 +827,7 @@ static void pt_update_head(struct pt *pt)
*/
static void *pt_buffer_region(struct pt_buffer *buf)
{
return phys_to_virt(buf->cur->table[buf->cur_idx].base << TOPA_SHIFT);
return phys_to_virt(TOPA_ENTRY(buf->cur, buf->cur_idx)->base << TOPA_SHIFT);
}
/**
......@@ -800,7 +836,7 @@ static void *pt_buffer_region(struct pt_buffer *buf)
*/
static size_t pt_buffer_region_size(struct pt_buffer *buf)
{
return sizes(buf->cur->table[buf->cur_idx].size);
return TOPA_ENTRY_SIZE(buf->cur, buf->cur_idx);
}
/**
......@@ -830,7 +866,7 @@ static void pt_handle_status(struct pt *pt)
* know.
*/
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) ||
buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
buf->output_off == pt_buffer_region_size(buf)) {
perf_aux_output_flag(&pt->handle,
PERF_AUX_FLAG_TRUNCATED);
advance++;
......@@ -868,9 +904,11 @@ static void pt_handle_status(struct pt *pt)
static void pt_read_offset(struct pt_buffer *buf)
{
u64 offset, base_topa;
struct topa_page *tp;
rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
buf->cur = phys_to_virt(base_topa);
tp = phys_to_virt(base_topa);
buf->cur = &tp->topa;
rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
/* offset within current output region */
......@@ -879,29 +917,97 @@ static void pt_read_offset(struct pt_buffer *buf)
buf->cur_idx = (offset & 0xffffff80) >> 7;
}
/**
* pt_topa_next_entry() - obtain index of the first page in the next ToPA entry
* @buf: PT buffer.
* @pg: Page offset in the buffer.
*
* When advancing to the next output region (ToPA entry), given a page offset
* into the buffer, we need to find the offset of the first page in the next
* region.
*/
static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
static struct topa_entry *
pt_topa_entry_for_page(struct pt_buffer *buf, unsigned int pg)
{
struct topa_entry *te = buf->topa_index[pg];
struct topa_page *tp;
struct topa *topa;
unsigned int idx, cur_pg = 0, z_pg = 0, start_idx = 0;
/* one region */
if (buf->first == buf->last && buf->first->last == 1)
return pg;
/*
* Indicates a bug in the caller.
*/
if (WARN_ON_ONCE(pg >= buf->nr_pages))
return NULL;
/*
* First, find the ToPA table where @pg fits. With high
* order allocations, there shouldn't be many of these.
*/
list_for_each_entry(topa, &buf->tables, list) {
if (topa->offset + topa->size > pg << PAGE_SHIFT)
goto found;
}
/*
* Hitting this means we have a problem in the ToPA
* allocation code.
*/
WARN_ON_ONCE(1);
do {
pg++;
pg &= buf->nr_pages - 1;
} while (buf->topa_index[pg] == te);
return NULL;
return pg;
found:
/*
* Indicates a problem in the ToPA allocation code.
*/
if (WARN_ON_ONCE(topa->last == -1))
return NULL;
tp = topa_to_page(topa);
cur_pg = PFN_DOWN(topa->offset);
if (topa->z_count) {
z_pg = TOPA_ENTRY_PAGES(topa, 0) * (topa->z_count + 1);
start_idx = topa->z_count + 1;
}
/*
* Multiple entries at the beginning of the table have the same size,
* ideally all of them; if @pg falls there, the search is done.
*/
if (pg >= cur_pg && pg < cur_pg + z_pg) {
idx = (pg - cur_pg) / TOPA_ENTRY_PAGES(topa, 0);
return &tp->table[idx];
}
/*
* Otherwise, slow path: iterate through the remaining entries.
*/
for (idx = start_idx, cur_pg += z_pg; idx < topa->last; idx++) {
if (cur_pg + TOPA_ENTRY_PAGES(topa, idx) > pg)
return &tp->table[idx];
cur_pg += TOPA_ENTRY_PAGES(topa, idx);
}
/*
* Means we couldn't find a ToPA entry in the table that does match.
*/
WARN_ON_ONCE(1);
return NULL;
}
static struct topa_entry *
pt_topa_prev_entry(struct pt_buffer *buf, struct topa_entry *te)
{
unsigned long table = (unsigned long)te & ~(PAGE_SIZE - 1);
struct topa_page *tp;
struct topa *topa;
tp = (struct topa_page *)table;
if (tp->table != te)
return --te;
topa = &tp->topa;
if (topa == buf->first)
topa = buf->last;
else
topa = list_prev_entry(topa, list);
tp = topa_to_page(topa);
return &tp->table[topa->last - 1];
}
/**
......@@ -925,8 +1031,7 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
unsigned long idx, npages, wakeup;
/* can't stop in the middle of an output region */
if (buf->output_off + handle->size + 1 <
sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) {
if (buf->output_off + handle->size + 1 < pt_buffer_region_size(buf)) {
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
return -EINVAL;
}
......@@ -937,9 +1042,13 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
return 0;
/* clear STOP and INT from current entry */
buf->topa_index[buf->stop_pos]->stop = 0;
buf->topa_index[buf->stop_pos]->intr = 0;
buf->topa_index[buf->intr_pos]->intr = 0;
if (buf->stop_te) {
buf->stop_te->stop = 0;
buf->stop_te->intr = 0;
}