Skip to content
Snippets Groups Projects
Commit b9277295 authored by Ionela Voinescu's avatar Ionela Voinescu
Browse files

arm64: unify counter and cpufreq-based frequency invariance


The frequency scale factor will always be a normalized ratio between
the current performance and maximum performance.

Given that the maximum performance is known from boot-time, it can be
cached in a per-cpu variable. This is already done for counter-based
frequency invariance (FI) and could be done for cpufreq-driven FI as
well.

As for the current performance, at scale factor computation time we can
either receive this value (given by the cpufreq framework) or we can
compute it using performance counters.

Given this, a scale computation function can be generalised to be used
for both cpufreq and counter-driven FI. Here, this is called
topology_set_freq_scale_unified() and it's used at the moment for arm64
only, with the intention of making this generic and available in the
arch_topology driver in the future.

Also, the initialisation code previously used for AMU validation and
initialization (init_amu_fie()) can now be used for cpufreq FI as well,
under the name init_freq_inv().

Signed-off-by: Ionela Voinescu's avatarIonela Voinescu <ionela.voinescu@arm.com>
parent 1e45443e
No related merge requests found
......@@ -17,18 +17,14 @@ int pcibus_to_node(struct pci_bus *bus);
#include <linux/arch_topology.h>
void update_freq_counters_refs(void);
#ifdef CONFIG_ARM64_AMU_EXTN
/*
* Replace task scheduler's default counter-based
* frequency-invariance scale factor setting.
*/
void topology_scale_freq_tick(void);
#define arch_scale_freq_tick topology_scale_freq_tick
#endif /* CONFIG_ARM64_AMU_EXTN */
void topology_set_freq_scale_unified(const struct cpumask *cpus,
unsigned long cur_perf,
unsigned long max_perf);
/* Replace task scheduler's default frequency-invariant accounting */
#define arch_set_freq_scale topology_set_freq_scale
#define arch_set_freq_scale topology_set_freq_scale_unified
#define arch_scale_freq_tick() topology_set_freq_scale_unified(NULL, 0, 0)
#define arch_scale_freq_capacity topology_get_freq_scale
#define arch_scale_freq_invariant topology_scale_freq_invariant
......
......@@ -142,7 +142,6 @@ COUNTER_READ_STORE(constcnt, 0);
static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale);
static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
static cpumask_var_t amu_fie_cpus;
void update_freq_counters_refs(void)
{
......@@ -183,19 +182,18 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
* Pre-compute the fixed ratio between the frequency of the constant
* reference counter and the maximum frequency of the CPU.
*
* ref_rate
* arch_max_freq_scale = ---------- * SCHED_CAPACITY_SCALE²
* max_rate
* arch_max_freq_scale = ---------- * SCHED_CAPACITY_SCALE
* ref_rate
*
* We use a factor of 2 * SCHED_CAPACITY_SHIFT -> SCHED_CAPACITY_SCALE²
* We use a factor of SCHED_CAPACITY_SHIFT -> SCHED_CAPACITY_SCALE
* in order to ensure a good resolution for arch_max_freq_scale for
* very low reference frequencies (down to the KHz range which should
* be unlikely).
* similar maximum and reference rates.
*/
ratio = ref_rate << (2 * SCHED_CAPACITY_SHIFT);
ratio = div64_u64(ratio, max_rate);
ratio = max_rate << SCHED_CAPACITY_SHIFT;
ratio = div64_u64(ratio, ref_rate);
if (!ratio) {
WARN_ONCE(1, "Reference frequency too low.\n");
WARN_ONCE(1, "Maximum frequency scale factor cannot be 0.\n");
return -EINVAL;
}
......@@ -204,76 +202,40 @@ static int freq_inv_set_max_ratio(int cpu, u64 max_rate, u64 ref_rate)
return 0;
}
static inline bool
enable_policy_freq_counters(int cpu, cpumask_var_t valid_cpus)
{
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
if (!policy) {
pr_debug("CPU%d: No cpufreq policy found.\n", cpu);
return false;
}
if (cpumask_subset(policy->related_cpus, valid_cpus))
cpumask_or(amu_fie_cpus, policy->related_cpus,
amu_fie_cpus);
cpufreq_cpu_put(policy);
return true;
}
enum freq_inv_source {FREQ_INV_CPUFREQ, FREQ_INV_COUNTERS};
static DEFINE_PER_CPU(enum freq_inv_source, inv_source) = FREQ_INV_CPUFREQ;
static DEFINE_STATIC_KEY_FALSE(amu_fie_key);
#define amu_freq_invariant() static_branch_unlikely(&amu_fie_key)
static int __init init_amu_fie(void)
static int __init init_freq_inv(void)
{
bool invariance_status = topology_scale_freq_invariant();
cpumask_var_t valid_cpus;
bool have_policy = false;
int ret = 0;
int cpu;
if (!zalloc_cpumask_var(&valid_cpus, GFP_KERNEL))
return -ENOMEM;
if (!zalloc_cpumask_var(&amu_fie_cpus, GFP_KERNEL)) {
ret = -ENOMEM;
goto free_valid_mask;
}
int counter_cpus, cpu;
u64 max_rate_hz;
for_each_present_cpu(cpu) {
if (!freq_counters_valid(cpu) ||
freq_inv_set_max_ratio(cpu,
cpufreq_get_hw_max_freq(cpu) * 1000,
arch_timer_get_rate()))
max_rate_hz = cpufreq_get_hw_max_freq(cpu) * 1000;
if (freq_counters_valid(cpu) &&
!freq_inv_set_max_ratio(cpu, max_rate_hz,
arch_timer_get_rate())) {
per_cpu(inv_source, cpu) = FREQ_INV_COUNTERS;
counter_cpus++;
continue;
}
cpumask_set_cpu(cpu, valid_cpus);
have_policy |= enable_policy_freq_counters(cpu, valid_cpus);
if (!cpufreq_supports_freq_invariance() ||
WARN_ON_ONCE(freq_inv_set_max_ratio(cpu, max_rate_hz,
1000))) {
pr_debug("CPU%d: cpufreq-based FIE not supported", cpu);
return 0;
}
}
/*
* If we are not restricted by cpufreq policies, we only enable
* the use of the AMU feature for FIE if all CPUs support AMU.
* Otherwise, enable_policy_freq_counters has already enabled
* policy cpus.
*/
if (!have_policy && cpumask_equal(valid_cpus, cpu_present_mask))
cpumask_or(amu_fie_cpus, amu_fie_cpus, valid_cpus);
if (!cpumask_empty(amu_fie_cpus)) {
pr_info("CPUs[%*pbl]: counters will be used for FIE.",
cpumask_pr_args(amu_fie_cpus));
/* Optimisation: Flag fully counter driven frequency invariance */
if (counter_cpus == num_present_cpus())
static_branch_enable(&amu_fie_key);
}
/*
* If the system is not fully invariant after AMU init, disable
* partial use of counters for frequency invariance.
*/
if (!topology_scale_freq_invariant())
static_branch_disable(&amu_fie_key);
/*
* Task scheduler behavior depends on frequency invariance support,
......@@ -284,62 +246,69 @@ static int __init init_amu_fie(void)
if (invariance_status != topology_scale_freq_invariant())
rebuild_sched_domains_energy();
free_valid_mask:
free_cpumask_var(valid_cpus);
return ret;
return 0;
}
late_initcall_sync(init_amu_fie);
late_initcall_sync(init_freq_inv);
bool arch_freq_counters_available(const struct cpumask *cpus)
void topology_set_freq_scale_unified(const struct cpumask *cpus,
unsigned long cur_freq,
unsigned long max_freq)
{
return amu_freq_invariant() &&
cpumask_subset(cpus, amu_fie_cpus);
}
u64 core_delta = cur_freq, const_delta = 1;
unsigned long scale;
int cpu;
void topology_scale_freq_tick(void)
{
u64 prev_core_cnt, prev_const_cnt;
u64 core_cnt, const_cnt, scale;
int cpu = smp_processor_id();
/* CPUfreq path: cpufreq provides cur_freq for cpus */
if (!amu_freq_invariant() && cpus) {
if (WARN_ON_ONCE(!cur_freq))
return;
if (!amu_freq_invariant())
return;
cpu = cpumask_first(cpus);
goto compute_scale;
}
if (!cpumask_test_cpu(cpu, amu_fie_cpus))
/* Counters path: cur_freq obtained from counters */
cpu = smp_processor_id();
if (per_cpu(inv_source, cpu) != FREQ_INV_COUNTERS)
return;
prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
core_delta = this_cpu_read(arch_core_cycles_prev);
const_delta = this_cpu_read(arch_const_cycles_prev);
update_freq_counters_refs();
const_cnt = this_cpu_read(arch_const_cycles_prev);
core_cnt = this_cpu_read(arch_core_cycles_prev);
core_delta = this_cpu_read(arch_core_cycles_prev) - core_delta;
const_delta = this_cpu_read(arch_const_cycles_prev) - const_delta;
if (unlikely(!core_delta || !const_delta))
return;
if (unlikely(core_cnt <= prev_core_cnt ||
const_cnt <= prev_const_cnt))
goto store_and_exit;
cpus = NULL;
compute_scale:
/*
* /\core arch_max_freq_scale
* scale = ------- * --------------------
* /\const SCHED_CAPACITY_SCALE
* /\core SCHED_CAPACITY_SCALE²
* scale = ------- * ---------------------
* /\const arch_max_freq_scale
*
* See validate_cpu_freq_invariance_counters() for details on
* arch_max_freq_scale and the use of SCHED_CAPACITY_SHIFT.
* See freq_inv_set_max_ratio () for details on
* arch_max_freq_scale and the use of SCHED_CAPACITY_SCALE.
*/
scale = core_cnt - prev_core_cnt;
scale *= this_cpu_read(arch_max_freq_scale);
scale = div64_u64(scale >> SCHED_CAPACITY_SHIFT,
const_cnt - prev_const_cnt);
const_delta *= per_cpu(arch_max_freq_scale, cpu);
core_delta = core_delta << (2 * SCHED_CAPACITY_SHIFT);
scale = div64_u64(core_delta, const_delta);
scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
this_cpu_write(freq_scale, (unsigned long)scale);
store_and_exit:
this_cpu_write(arch_core_cycles_prev, core_cnt);
this_cpu_write(arch_const_cycles_prev, const_cnt);
if (unlikely(!scale))
return;
if (cpus) {
for_each_cpu(cpu, cpus)
if (per_cpu(inv_source, cpu) == FREQ_INV_CPUFREQ)
per_cpu(freq_scale, cpu) = scale;
} else {
this_cpu_write(freq_scale, scale);
}
}
#ifdef CONFIG_ACPI_CPPC_LIB
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment