Commit 713eee84 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'perf-tools-2020-08-14' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux

Pull more perf tools updates from Arnaldo Carvalho de Melo:
 "Fixes:
   - Fixes for 'perf bench numa'.

   - Always memset source before memcpy in 'perf bench mem'.

   - Quote CC and CXX for their arguments to fix build in environments
     using those variables to pass more than just the compiler names.

   - Fix module symbol processing, addressing regression detected via
     "perf test".

   - Allow multiple probes in record+script_probe_vfs_getname.sh 'perf
     test' entry.

  Improvements:
   - Add script to autogenerate socket family name id->string table from
     copy of kernel header, used so far in 'perf trace'.

   - 'perf ftrace' improvements to provide similar options for this
     utility so that one can go from 'perf record', 'perf trace', etc to
     'perf ftrace' just by changing the name of the subcommand.

   - Prefer new "sched:sched_waking" trace event when it exists in 'perf
     sched' post processing.

   - Update POWER9 metrics to utilize other metrics.

   - Fall back to querying debuginfod if debuginfo not found locally.

  Miscellaneous:
   - Sync various kvm headers with kernel sources"

* tag 'perf-tools-2020-08-14' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (40 commits)
  perf ftrace: Make option description initials all capital letters
  perf build-ids: Fall back to debuginfod query if debuginfo not found
  perf bench numa: Remove dead code in parse_nodes_opt()
  perf stat: Update POWER9 metrics to utilize other metrics
  perf ftrace: Add change log
  perf: ftrace: Add set_tracing_options() to set all trace options
  perf ftrace: Add option --tid to filter by thread id
  perf ftrace: Add option -D/--delay to delay tracing
  perf: ftrace: Allow set graph depth by '--graph-opts'
  perf ftrace: Add support for trace option tracing_thresh
  perf ftrace: Add option 'verbose' to show more info for graph tracer
  perf ftrace: Add support for tracing option 'irq-info'
  perf ftrace: Add support for trace option funcgraph-irqs
  perf ftrace: Add support for trace option sleep-time
  perf ftrace: Add support for tracing option 'func_stack_trace'
  perf tools: Add general function to parse sublevel options
  perf ftrace: Add option '--inherit' to trace children processes
  perf ftrace: Show trace column header
  perf ftrace: Add option '-m/--buffer-size' to set per-cpu buffer size
  perf ftrace: Factor out function write_tracing_file_int()
  ...
parents 50f6c7db 492e4edb
......@@ -13566,6 +13566,7 @@ F: arch/*/kernel/perf_event*.c
F: include/linux/perf_event.h
F: include/uapi/linux/perf_event.h
F: kernel/events/*
F: tools/lib/perf/
F: tools/perf/
 
PERFORMANCE EVENTS SUBSYSTEM ARM64 PMU EVENTS
......
......@@ -231,11 +231,13 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_GSCB (1UL << 9)
#define KVM_SYNC_BPBC (1UL << 10)
#define KVM_SYNC_ETOKEN (1UL << 11)
#define KVM_SYNC_DIAG318 (1UL << 12)
#define KVM_SYNC_S390_VALID_FIELDS \
(KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \
KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \
KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN)
KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN | \
KVM_SYNC_DIAG318)
/* length and alignment of the sdnx as a power of two */
#define SDNXC 8
......@@ -264,7 +266,8 @@ struct kvm_sync_regs {
__u8 reserved2 : 7;
__u8 padding1[51]; /* riccb needs to be 64byte aligned */
__u8 riccb[64]; /* runtime instrumentation controls block */
__u8 padding2[192]; /* sdnx needs to be 256byte aligned */
__u64 diag318; /* diagnose 0x318 info */
__u8 padding2[184]; /* sdnx needs to be 256byte aligned */
union {
__u8 sdnx[SDNXL]; /* state description annex */
struct {
......
......@@ -8,7 +8,7 @@ endif
feature_check = $(eval $(feature_check_code))
define feature_check_code
feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CC=$(CC) CXX=$(CXX) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CC="$(CC)" CXX="$(CXX)" CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
endef
feature_set = $(eval $(feature_set_code))
......@@ -98,7 +98,8 @@ FEATURE_TESTS_EXTRA := \
llvm-version \
clang \
libbpf \
libpfm4
libpfm4 \
libdebuginfod
FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
......
......@@ -26,6 +26,7 @@ FILES= \
test-libelf-gelf_getnote.bin \
test-libelf-getshdrstrndx.bin \
test-libelf-mmap.bin \
test-libdebuginfod.bin \
test-libnuma.bin \
test-numa_num_possible_cpus.bin \
test-libperl.bin \
......@@ -157,6 +158,9 @@ $(OUTPUT)test-libelf-gelf_getnote.bin:
$(OUTPUT)test-libelf-getshdrstrndx.bin:
$(BUILD) -lelf
$(OUTPUT)test-libdebuginfod.bin:
$(BUILD) -ldebuginfod
$(OUTPUT)test-libnuma.bin:
$(BUILD) -lnuma
......
// SPDX-License-Identifier: GPL-2.0
#include <elfutils/debuginfod.h>
int main(void)
{
debuginfod_client* c = debuginfod_begin();
return (long)c;
}
......@@ -289,6 +289,7 @@ struct kvm_run {
/* KVM_EXIT_FAIL_ENTRY */
struct {
__u64 hardware_entry_failure_reason;
__u32 cpu;
} fail_entry;
/* KVM_EXIT_EXCEPTION */
struct {
......@@ -1031,6 +1032,9 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PPC_SECURE_GUEST 181
#define KVM_CAP_HALT_POLL 182
#define KVM_CAP_ASYNC_PF_INT 183
#define KVM_CAP_LAST_CPU 184
#define KVM_CAP_SMALLER_MAXPHYADDR 185
#define KVM_CAP_S390_DIAG318 186
#ifdef KVM_CAP_IRQ_ROUTING
......
......@@ -91,6 +91,8 @@
/* Use message type V2 */
#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
/* IOTLB can accept batching hints */
#define VHOST_BACKEND_F_IOTLB_BATCH 0x2
#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
......
......@@ -7,13 +7,13 @@ libperf-counting - counting interface
DESCRIPTION
-----------
The counting interface provides API to meassure and get count for specific perf events.
The counting interface provides API to measure and get count for specific perf events.
The following test tries to explain count on `counting.c` example.
It is by no means complete guide to counting, but shows libperf basic API for counting.
The `counting.c` comes with libbperf package and can be compiled and run like:
The `counting.c` comes with libperf package and can be compiled and run like:
[source,bash]
--
......@@ -26,7 +26,8 @@ count 176242, enabled 176242, run 176242
It requires root access, because of the `PERF_COUNT_SW_CPU_CLOCK` event,
which is available only for root.
The `counting.c` example monitors two events on the current process and displays their count, in a nutshel it:
The `counting.c` example monitors two events on the current process and displays
their count, in a nutshell it:
* creates events
* adds them to the event list
......@@ -152,7 +153,7 @@ Configure event list with the thread map and open events:
--
Both events are created as disabled (note the `disabled = 1` assignment above),
so we need to enable the whole list explicitely (both events).
so we need to enable the whole list explicitly (both events).
From this moment events are counting and we can do our workload.
......@@ -167,7 +168,8 @@ When we are done we disable the events list.
79 perf_evlist__disable(evlist);
--
Now we need to get the counts from events, following code iterates throught the events list and read counts:
Now we need to get the counts from events, following code iterates through the
events list and read counts:
[source,c]
--
......@@ -178,7 +180,7 @@ Now we need to get the counts from events, following code iterates throught the
85 }
--
And finaly cleanup.
And finally cleanup.
We close the whole events list (both events) and remove it together with the threads map:
......
......@@ -8,13 +8,13 @@ libperf-sampling - sampling interface
DESCRIPTION
-----------
The sampling interface provides API to meassure and get count for specific perf events.
The sampling interface provides API to measure and get count for specific perf events.
The following test tries to explain count on `sampling.c` example.
It is by no means complete guide to sampling, but shows libperf basic API for sampling.
The `sampling.c` comes with libbperf package and can be compiled and run like:
The `sampling.c` comes with libperf package and can be compiled and run like:
[source,bash]
--
......@@ -33,7 +33,8 @@ cpu 0, pid 4465, tid 4470, ip 7f84fe0ebebf, period 176
It requires root access, because it uses hardware cycles event.
The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a nutshel it:
The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a
nutshell it:
- creates events
- adds them to the event list
......@@ -90,7 +91,7 @@ Once the setup is complete we start by defining cycles event using the `struct p
36 };
--
Next step is to prepare cpus map.
Next step is to prepare CPUs map.
In this case we will monitor all the available CPUs:
......@@ -152,7 +153,7 @@ Once the events list is open, we can create memory maps AKA perf ring buffers:
--
The event is created as disabled (note the `disabled = 1` assignment above),
so we need to enable the events list explicitely.
so we need to enable the events list explicitly.
From this moment the cycles event is sampling.
......@@ -212,7 +213,7 @@ Each sample needs to get parsed:
106 cpu, pid, tid, ip, period);
--
And finaly cleanup.
And finally cleanup.
We close the whole events list (both events) and remove it together with the threads map:
......
......@@ -29,7 +29,7 @@ SYNOPSIS
void libperf_init(libperf_print_fn_t fn);
--
*API to handle cpu maps:*
*API to handle CPU maps:*
[source,c]
--
......@@ -217,7 +217,7 @@ Following objects are key to the libperf interface:
[horizontal]
struct perf_cpu_map:: Provides a cpu list abstraction.
struct perf_cpu_map:: Provides a CPU list abstraction.
struct perf_thread_map:: Provides a thread list abstraction.
......
......@@ -614,8 +614,9 @@ trace.*::
ftrace.*::
ftrace.tracer::
Can be used to select the default tracer. Possible values are
'function' and 'function_graph'.
Can be used to select the default tracer when neither -G nor
-F option is not specified. Possible values are 'function' and
'function_graph'.
llvm.*::
llvm.clang-path::
......
......@@ -24,16 +24,28 @@ OPTIONS
-t::
--tracer=::
Tracer to use: function_graph or function.
Tracer to use when neither -G nor -F option is not
specified: function_graph or function.
-v::
--verbose=::
Verbosity level.
-F::
--funcs::
List all available functions to trace.
-p::
--pid=::
Trace on existing process id (comma separated list).
--tid=::
Trace on existing thread id (comma separated list).
-D::
--delay::
Time (ms) to wait before starting tracing after program start.
-a::
--all-cpus::
Force system-wide collection. Scripts run without a <command>
......@@ -48,39 +60,58 @@ OPTIONS
Ranges of CPUs are specified with -: 0-2.
Default is to trace on all online CPUs.
-m::
--buffer-size::
Set the size of per-cpu tracing buffer, <size> is expected to
be a number with appended unit character - B/K/M/G.
--inherit::
Trace children processes spawned by our target.
-T::
--trace-funcs=::
Only trace functions given by the argument. Multiple functions
can be given by using this option more than once. The function
argument also can be a glob pattern. It will be passed to
'set_ftrace_filter' in tracefs.
Select function tracer and set function filter on the given
function (or a glob pattern). Multiple functions can be given
by using this option more than once. The function argument also
can be a glob pattern. It will be passed to 'set_ftrace_filter'
in tracefs.
-N::
--notrace-funcs=::
Do not trace functions given by the argument. Like -T option,
this can be used more than once to specify multiple functions
(or glob patterns). It will be passed to 'set_ftrace_notrace'
in tracefs.
Select function tracer and do not trace functions given by the
argument. Like -T option, this can be used more than once to
specify multiple functions (or glob patterns). It will be
passed to 'set_ftrace_notrace' in tracefs.
--func-opts::
List of options allowed to set:
call-graph - Display kernel stack trace for function tracer.
irq-info - Display irq context info for function tracer.
-G::
--graph-funcs=::
Set graph filter on the given function (or a glob pattern).
This is useful for the function_graph tracer only and enables
tracing for functions executed from the given function.
This can be used more than once to specify multiple functions.
It will be passed to 'set_graph_function' in tracefs.
Select function_graph tracer and set graph filter on the given
function (or a glob pattern). This is useful to trace for
functions executed from the given function. This can be used more
than once to specify multiple functions. It will be passed to
'set_graph_function' in tracefs.
-g::
--nograph-funcs=::
Set graph notrace filter on the given function (or a glob pattern).
Like -G option, this is useful for the function_graph tracer only
and disables tracing for function executed from the given function.
This can be used more than once to specify multiple functions.
It will be passed to 'set_graph_notrace' in tracefs.
Select function_graph tracer and set graph notrace filter on the
given function (or a glob pattern). Like -G option, this is useful
for the function_graph tracer only and disables tracing for function
executed from the given function. This can be used more than once to
specify multiple functions. It will be passed to 'set_graph_notrace'
in tracefs.
-D::
--graph-depth=::
Set max depth for function graph tracer to follow
--graph-opts::
List of options allowed to set:
nosleep-time - Measure on-CPU time only for function_graph tracer.
noirqs - Ignore functions that happen inside interrupt.
verbose - Show process names, PIDs, timestamps, etc.
thresh=<n> - Setup trace duration threshold in microseconds.
depth=<n> - Set max depth for function graph tracer to follow.
SEE ALSO
--------
......
......@@ -501,6 +501,14 @@ ifndef NO_LIBELF
CFLAGS += -DHAVE_ELF_GETSHDRSTRNDX_SUPPORT
endif
ifndef NO_LIBDEBUGINFOD
$(call feature_check,libdebuginfod)
ifeq ($(feature-libdebuginfod), 1)
CFLAGS += -DHAVE_DEBUGINFOD_SUPPORT
EXTLIBS += -ldebuginfod
endif
endif
ifndef NO_DWARF
ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled);
......
......@@ -124,6 +124,8 @@ include ../scripts/utilities.mak
#
# Define LIBPFM4 to enable libpfm4 events extension.
#
# Define NO_LIBDEBUGINFOD if you do not want support debuginfod
#
# As per kernel Makefile, avoid funny character set dependencies
unexport LC_ALL
......@@ -418,6 +420,7 @@ export INSTALL SHELL_PATH
SHELL = $(SHELL_PATH)
beauty_linux_dir := $(srctree)/tools/perf/trace/beauty/include/linux/
linux_uapi_dir := $(srctree)/tools/include/uapi/linux
asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/
......@@ -501,6 +504,12 @@ socket_ipproto_tbl := $(srctree)/tools/perf/trace/beauty/socket_ipproto.sh
$(socket_ipproto_array): $(linux_uapi_dir)/in.h $(socket_ipproto_tbl)
$(Q)$(SHELL) '$(socket_ipproto_tbl)' $(linux_uapi_dir) > $@
socket_arrays := $(beauty_outdir)/socket_arrays.c
socket_tbl := $(srctree)/tools/perf/trace/beauty/socket.sh
$(socket_arrays): $(beauty_linux_dir)/socket.h $(socket_tbl)
$(Q)$(SHELL) '$(socket_tbl)' $(beauty_linux_dir) > $@
vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c
vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux
vhost_virtio_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
......@@ -697,6 +706,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
$(kcmp_type_array) \
$(kvm_ioctl_array) \
$(socket_ipproto_array) \
$(socket_arrays) \
$(vhost_virtio_ioctl_array) \
$(madvise_behavior_array) \
$(mmap_flags_array) \
......@@ -1006,6 +1016,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)$(kvm_ioctl_array) \
$(OUTPUT)$(kcmp_type_array) \
$(OUTPUT)$(socket_ipproto_array) \
$(OUTPUT)$(socket_arrays) \
$(OUTPUT)$(vhost_virtio_ioctl_array) \
$(OUTPUT)$(perf_ioctl_array) \
$(OUTPUT)$(prctl_option_array) \
......
......@@ -17,9 +17,9 @@ static unsigned int inner_iterations = 100000;
static const struct option options[] = {
OPT_UINTEGER('i', "outer-iterations", &outer_iterations,
"Number of outerer iterations used"),
"Number of outer iterations used"),
OPT_UINTEGER('j', "inner-iterations", &inner_iterations,
"Number of outerer iterations used"),
"Number of inner iterations used"),
OPT_END()
};
......
......@@ -223,12 +223,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
return 0;
}
static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
{
u64 cycle_start = 0ULL, cycle_end = 0ULL;
memcpy_t fn = r->fn.memcpy;
int i;
/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
memset(src, 0, size);
......@@ -237,6 +233,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, vo
* to not measure page fault overhead:
*/
fn(dst, src, size);
}
static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
{
u64 cycle_start = 0ULL, cycle_end = 0ULL;
memcpy_t fn = r->fn.memcpy;
int i;
memcpy_prefault(fn, size, src, dst);
cycle_start = get_cycles();
for (i = 0; i < nr_loops; ++i)
......@@ -252,11 +257,7 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size, void
memcpy_t fn = r->fn.memcpy;
int i;
/*
* We prefault the freshly allocated memory range here,
* to not measure page fault overhead:
*/
fn(dst, src, size);
memcpy_prefault(fn, size, src, dst);
BUG_ON(gettimeofday(&tv_start, NULL));
for (i = 0; i < nr_loops; ++i)
......
......@@ -247,17 +247,22 @@ static int is_node_present(int node)
*/
static bool node_has_cpus(int node)
{
struct bitmask *cpu = numa_allocate_cpumask();
unsigned int i;
struct bitmask *cpumask = numa_allocate_cpumask();
bool ret = false; /* fall back to nocpus */
int cpu;
if (cpu && !numa_node_to_cpus(node, cpu)) {
for (i = 0; i < cpu->size; i++) {
if (numa_bitmask_isbitset(cpu, i))
return true;
BUG_ON(!cpumask);
if (!numa_node_to_cpus(node, cpumask)) {
for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
if (numa_bitmask_isbitset(cpumask, cpu)) {
ret = true;
break;
}
}
}
numa_free_cpumask(cpumask);
return false; /* lets fall back to nocpus safely */
return ret;
}
static cpu_set_t bind_to_cpu(int target_cpu)
......@@ -288,14 +293,10 @@ static cpu_set_t bind_to_cpu(int target_cpu)
static cpu_set_t bind_to_node(int target_node)
{
int cpus_per_node = g->p.nr_cpus / nr_numa_nodes();
cpu_set_t orig_mask, mask;
int cpu;
int ret;
BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus);
BUG_ON(!cpus_per_node);
ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
BUG_ON(ret);
......@@ -305,13 +306,16 @@ static cpu_set_t bind_to_node(int target_node)
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
CPU_SET(cpu, &mask);
} else {
int cpu_start = (target_node + 0) * cpus_per_node;
int cpu_stop = (target_node + 1) * cpus_per_node;
struct bitmask *cpumask = numa_allocate_cpumask();
BUG_ON(cpu_stop > g->p.nr_cpus);
for (cpu = cpu_start; cpu < cpu_stop; cpu++)
CPU_SET(cpu, &mask);
BUG_ON(!cpumask);
if (!numa_node_to_cpus(target_node, cpumask)) {
for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
if (numa_bitmask_isbitset(cpumask, cpu))
CPU_SET(cpu, &mask);
}
}
numa_free_cpumask(cpumask);
}
ret = sched_setaffinity(0, sizeof(mask), &mask);
......@@ -729,8 +733,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused,
return -1;
return parse_node_list(arg);
return 0;
}
#define BIT(x) (1ul << x)
......@@ -813,12 +815,12 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val
}
}
} else if (!g->p.data_backwards || (nr + loop) & 1) {
/* Process data forwards: */
d0 = data + off;
d = data + off + 1;
d1 = data + words;
/* Process data forwards: */
for (;;) {
if (unlikely(d >= d1))
d = data;
......@@ -836,7 +838,6 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val
d = data + off - 1;
d1 = data + words;
/* Process data forwards: */
for (;;) {
if (unlikely(d < data))
d = data + words-1;
......@@ -1733,12 +1734,12 @@ err:
*/
static const char *tests[][MAX_ARGS] = {
/* Basic single-stream NUMA bandwidth measurements: */
{ "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
{ "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "0", OPT_BW_RAM },
{ "RAM-bw-local-NOTHP,",
"mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP },
{ "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
{ "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "1", OPT_BW_RAM },
/* 2-stream NUMA bandwidth measurements: */
......@@ -1755,7 +1756,7 @@ static const char *tests[][MAX_ARGS] = {
{ " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV },
{ " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV },
{ " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV },
{ " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
{ " 2x3-convergence,", "mem", "-p", "2", "-t", "3", "-P", "1020", OPT_CONV },
{ " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
{ " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV },
{ " 4x4-convergence-NOTHP,",
......@@ -1780,24 +1781,24 @@ static const char *tests[][MAX_ARGS] = {
"mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP },
{ "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW },
{ " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
{ " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
{ "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
{ "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
{ " 1x4-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
{ " 1x8-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
{ "1x16-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
{ "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
{ " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
{ " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
{ " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
{ " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
{ " 4x8-bw-thread-NOTHP,",
{ " 2x3-bw-process,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
{ " 4x4-bw-process,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
{ " 4x6-bw-process,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
{ " 4x8-bw-process,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
{ " 4x8-bw-process-NOTHP,",
"mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP },
{ " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
{ " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
{ " 3x3-bw-process,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
{ " 5x5-bw-process,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },