Commit c5f12fdb authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 apic updates from Thomas Gleixner:

 - Cleanup the apic IPI implementation by removing duplicated code and
   consolidating the functions into the APIC core.

 - Implement a safe variant of the IPI broadcast mode. Contrary to
   earlier attempts this uses the core tracking of which CPUs have been
   brought online at least once so that a broadcast does not end up in
   some dead end in BIOS/SMM code when the CPU is still waiting for
   init. Once all CPUs have been brought up once, IPI broadcasting is
   enabled. Before that regular one by one IPIs are issued.

 - Drop the paravirt CR8 related functions as they have no user anymore

 - Initialize the APIC TPR to block interrupt 16-31 as they are reserved
   for CPU exceptions and should never be raised by any well behaving
   device.

 - Emit a warning when vector space exhaustion breaks the admin set
   affinity of an interrupt.

 - Make sure to use the NMI fallback when shutdown via reboot vector IPI
   fails. The original code had conditions which prevent the code path
   to be reached.

 - Annotate various APIC config variables as RO after init.

[ The ipi broadcase change came in earlier through the cpu hotplug
  branch, but I left the explanation in the commit message since it was
  shared between the two different branches    - Linus ]

* 'x86-apic-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (28 commits)
  x86/apic/vector: Warn when vector space exhaustion breaks affinity
  x86/apic: Annotate global config variables as "read-only after init"
  x86/apic/x2apic: Implement IPI shorthands support
  x86/apic/flat64: Remove the IPI shorthand decision logic
  x86/apic: Share common IPI helpers
  x86/apic: Remove the shorthand decision logic
  x86/smp: Enhance native_send_call_func_ipi()
  x86/smp: Move smp_function_call implementations into IPI code
  x86/apic: Provide and use helper for send_IPI_allbutself()
  x86/apic: Add static key to Control IPI shorthands
  x86/apic: Move no_ipi_broadcast() out of 32bit
  x86/apic: Add NMI_VECTOR wait to IPI shorthand
  x86/apic: Remove dest argument from __default_send_IPI_shortcut()
  x86/hotplug: Silence APIC and NMI when CPU is dead
  x86/cpu: Move arch_smt_update() to a neutral place
  x86/apic/uv: Make x2apic_extra_bits static
  x86/apic: Consolidate the apic local headers
  x86/apic: Move apic_flat_64 header into apic directory
  x86/apic: Move ipi header into apic directory
  x86/apic: Cleanup the include maze
  ...
parents a572ba63 743dac49
......@@ -136,6 +136,7 @@ extern int lapic_get_maxlvt(void);
extern void clear_local_APIC(void);
extern void disconnect_bsp_APIC(int virt_wire_setup);
extern void disable_local_APIC(void);
extern void apic_soft_disable(void);
extern void lapic_shutdown(void);
extern void sync_Arb_IDs(void);
extern void init_bsp_APIC(void);
......@@ -176,6 +177,8 @@ extern void lapic_online(void);
extern void lapic_offline(void);
extern bool apic_needs_pit(void);
extern void apic_send_IPI_allbutself(unsigned int vector);
#else /* !CONFIG_X86_LOCAL_APIC */
static inline void lapic_shutdown(void) { }
#define local_apic_timer_c2_ok 1
......@@ -465,12 +468,6 @@ static inline unsigned default_get_apic_id(unsigned long x)
#define TRAMPOLINE_PHYS_LOW 0x467
#define TRAMPOLINE_PHYS_HIGH 0x469
#ifdef CONFIG_X86_64
extern void apic_send_IPI_self(int vector);
DECLARE_PER_CPU(int, x2apic_extra_bits);
#endif
extern void generic_bigsmp_probe(void);
#ifdef CONFIG_X86_LOCAL_APIC
......@@ -506,8 +503,10 @@ extern int default_check_phys_apicid_present(int phys_apicid);
#ifdef CONFIG_SMP
bool apic_id_is_primary_thread(unsigned int id);
void apic_smt_update(void);
#else
static inline bool apic_id_is_primary_thread(unsigned int id) { return false; }
static inline void apic_smt_update(void) { }
#endif
extern void irq_enter(void);
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_APIC_FLAT_64_H
#define _ASM_X86_APIC_FLAT_64_H
extern void flat_init_apic_ldr(void);
#endif
......@@ -18,4 +18,6 @@ int ppro_with_ram_bug(void);
static inline int ppro_with_ram_bug(void) { return 0; }
#endif
extern void cpu_bugs_smt_update(void);
#endif /* _ASM_X86_BUGS_H */
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _ASM_X86_IPI_H
#define _ASM_X86_IPI_H
#ifdef CONFIG_X86_LOCAL_APIC
/*
* Copyright 2004 James Cleverdon, IBM.
*
* Generic APIC InterProcessor Interrupt code.
*
* Moved to include file by James Cleverdon from
* arch/x86-64/kernel/smp.c
*
* Copyrights from kernel/smp.c:
*
* (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
* (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
* (c) 2002,2003 Andi Kleen, SuSE Labs.
*/
#include <asm/hw_irq.h>
#include <asm/apic.h>
#include <asm/smp.h>
/*
* the following functions deal with sending IPIs between CPUs.
*
* We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
*/
static inline unsigned int __prepare_ICR(unsigned int shortcut, int vector,
unsigned int dest)
{
unsigned int icr = shortcut | dest;
switch (vector) {
default:
icr |= APIC_DM_FIXED | vector;
break;
case NMI_VECTOR:
icr |= APIC_DM_NMI;
break;
}
return icr;
}
static inline int __prepare_ICR2(unsigned int mask)
{
return SET_APIC_DEST_FIELD(mask);
}
static inline void __xapic_wait_icr_idle(void)
{
while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)
cpu_relax();
}
void __default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest);
/*
* This is used to send an IPI with no shorthand notation (the destination is
* specified in bits 56 to 63 of the ICR).
*/
void __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest);
extern void default_send_IPI_single(int cpu, int vector);
extern void default_send_IPI_single_phys(int cpu, int vector);
extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
int vector);
extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
int vector);
/* Avoid include hell */
#define NMI_VECTOR 0x02
extern int no_broadcast;
static inline void __default_local_send_IPI_allbutself(int vector)
{
if (no_broadcast || vector == NMI_VECTOR)
apic->send_IPI_mask_allbutself(cpu_online_mask, vector);
else
__default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical);
}
static inline void __default_local_send_IPI_all(int vector)
{
if (no_broadcast || vector == NMI_VECTOR)
apic->send_IPI_mask(cpu_online_mask, vector);
else
__default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical);
}
#ifdef CONFIG_X86_32
extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
int vector);
extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
int vector);
extern void default_send_IPI_mask_logical(const struct cpumask *mask,
int vector);
extern void default_send_IPI_allbutself(int vector);
extern void default_send_IPI_all(int vector);
extern void default_send_IPI_self(int vector);
#endif
#endif
#endif /* _ASM_X86_IPI_H */
......@@ -139,18 +139,6 @@ static inline void __write_cr4(unsigned long x)
PVOP_VCALL1(cpu.write_cr4, x);
}
#ifdef CONFIG_X86_64
static inline unsigned long read_cr8(void)
{
return PVOP_CALL0(unsigned long, cpu.read_cr8);
}
static inline void write_cr8(unsigned long x)
{
PVOP_VCALL1(cpu.write_cr8, x);
}
#endif
static inline void arch_safe_halt(void)
{
PVOP_VCALL0(irq.safe_halt);
......
......@@ -119,11 +119,6 @@ struct pv_cpu_ops {
void (*write_cr4)(unsigned long);
#ifdef CONFIG_X86_64
unsigned long (*read_cr8)(void);
void (*write_cr8)(unsigned long);
#endif
/* Segment descriptor handling */
void (*load_tr_desc)(void);
void (*load_gdt)(const struct desc_ptr *);
......
......@@ -143,6 +143,7 @@ void play_dead_common(void);
void wbinvd_on_cpu(int cpu);
int wbinvd_on_all_cpus(void);
void native_smp_send_reschedule(int cpu);
void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
......
......@@ -73,20 +73,6 @@ static inline unsigned long native_read_cr4(void)
void native_write_cr4(unsigned long val);
#ifdef CONFIG_X86_64
static inline unsigned long native_read_cr8(void)
{
unsigned long cr8;
asm volatile("movq %%cr8,%0" : "=r" (cr8));
return cr8;
}
static inline void native_write_cr8(unsigned long val)
{
asm volatile("movq %0,%%cr8" :: "r" (val) : "memory");
}
#endif
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
static inline u32 rdpkru(void)
{
......@@ -200,16 +186,6 @@ static inline void wbinvd(void)
#ifdef CONFIG_X86_64
static inline unsigned long read_cr8(void)
{
return native_read_cr8();
}
static inline void write_cr8(unsigned long x)
{
native_write_cr8(x);
}
static inline void load_gs_index(unsigned selector)
{
native_load_gs_index(selector);
......
......@@ -34,7 +34,7 @@ struct saved_context {
*/
unsigned long kernelmode_gs_base, usermode_gs_base, fs_base;
unsigned long cr0, cr2, cr3, cr4, cr8;
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
bool misc_enable_saved;
struct saved_msrs saved_msrs;
......
......@@ -65,10 +65,10 @@ unsigned int num_processors;
unsigned disabled_cpus;
/* Processor that is doing the boot up */
unsigned int boot_cpu_physical_apicid = -1U;
unsigned int boot_cpu_physical_apicid __ro_after_init = -1U;
EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
u8 boot_cpu_apic_version;
u8 boot_cpu_apic_version __ro_after_init;
/*
* The highest APIC ID seen during enumeration.
......@@ -85,13 +85,13 @@ physid_mask_t phys_cpu_present_map;
* disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
* avoid undefined behaviour caused by sending INIT from AP to BSP.
*/
static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID;
static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID;
/*
* This variable controls which CPUs receive external NMIs. By default,
* external NMIs are delivered only to the BSP.
*/
static int apic_extnmi = APIC_EXTNMI_BSP;
static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;
/*
* Map cpu index to physical APIC ID
......@@ -114,7 +114,7 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
/* Local APIC was disabled by the BIOS and enabled by the kernel */
static int enabled_via_apicbase;
static int enabled_via_apicbase __ro_after_init;
/*
* Handle interrupt mode configuration register (IMCR).
......@@ -172,23 +172,23 @@ static __init int setup_apicpmtimer(char *s)
__setup("apicpmtimer", setup_apicpmtimer);
#endif
unsigned long mp_lapic_addr;
int disable_apic;
unsigned long mp_lapic_addr __ro_after_init;
int disable_apic __ro_after_init;
/* Disable local APIC timer from the kernel commandline or via dmi quirk */
static int disable_apic_timer __initdata;
/* Local APIC timer works in C2 */
int local_apic_timer_c2_ok;
int local_apic_timer_c2_ok __ro_after_init;
EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
/*
* Debug level, exported for io_apic.c
*/
int apic_verbosity;
int apic_verbosity __ro_after_init;
int pic_mode;
int pic_mode __ro_after_init;
/* Have we found an MP table */
int smp_found_config;
int smp_found_config __ro_after_init;
static struct resource lapic_resource = {
.name = "Local APIC",
......@@ -199,7 +199,7 @@ unsigned int lapic_timer_period = 0;
static void apic_pm_activate(void);
static unsigned long apic_phys;
static unsigned long apic_phys __ro_after_init;
/*
* Get the LAPIC version
......@@ -1224,25 +1224,38 @@ void clear_local_APIC(void)
}
/**
* disable_local_APIC - clear and disable the local APIC
* apic_soft_disable - Clears and software disables the local APIC on hotplug
*
* Contrary to disable_local_APIC() this does not touch the enable bit in
* MSR_IA32_APICBASE. Clearing that bit on systems based on the 3 wire APIC
* bus would require a hardware reset as the APIC would lose track of bus
* arbitration. On systems with FSB delivery APICBASE could be disabled,
* but it has to be guaranteed that no interrupt is sent to the APIC while
* in that state and it's not clear from the SDM whether it still responds
* to INIT/SIPI messages. Stay on the safe side and use software disable.
*/
void disable_local_APIC(void)
void apic_soft_disable(void)
{
unsigned int value;
/* APIC hasn't been mapped yet */
if (!x2apic_mode && !apic_phys)
return;
u32 value;
clear_local_APIC();
/*
* Disable APIC (implies clearing of registers
* for 82489DX!).
*/
/* Soft disable APIC (implies clearing of registers for 82489DX!). */
value = apic_read(APIC_SPIV);
value &= ~APIC_SPIV_APIC_ENABLED;
apic_write(APIC_SPIV, value);
}
/**
* disable_local_APIC - clear and disable the local APIC
*/
void disable_local_APIC(void)
{
/* APIC hasn't been mapped yet */
if (!x2apic_mode && !apic_phys)
return;
apic_soft_disable();
#ifdef CONFIG_X86_32
/*
......@@ -1307,7 +1320,7 @@ void __init sync_Arb_IDs(void)
APIC_INT_LEVELTRIG | APIC_DM_INIT);
}
enum apic_intr_mode_id apic_intr_mode;
enum apic_intr_mode_id apic_intr_mode __ro_after_init;
static int __init apic_intr_mode_select(void)
{
......@@ -1495,54 +1508,72 @@ static void lapic_setup_esr(void)
oldvalue, value);
}
static void apic_pending_intr_clear(void)
#define APIC_IR_REGS APIC_ISR_NR
#define APIC_IR_BITS (APIC_IR_REGS * 32)
#define APIC_IR_MAPSIZE (APIC_IR_BITS / BITS_PER_LONG)
union apic_ir {
unsigned long map[APIC_IR_MAPSIZE];
u32 regs[APIC_IR_REGS];
};
static bool apic_check_and_ack(union apic_ir *irr, union apic_ir *isr)
{
long long max_loops = cpu_khz ? cpu_khz : 1000000;
unsigned long long tsc = 0, ntsc;
unsigned int queued;
unsigned long value;
int i, j, acked = 0;
int i, bit;
/* Read the IRRs */
for (i = 0; i < APIC_IR_REGS; i++)
irr->regs[i] = apic_read(APIC_IRR + i * 0x10);
/* Read the ISRs */
for (i = 0; i < APIC_IR_REGS; i++)
isr->regs[i] = apic_read(APIC_ISR + i * 0x10);
if (boot_cpu_has(X86_FEATURE_TSC))
tsc = rdtsc();
/*
* After a crash, we no longer service the interrupts and a pending
* interrupt from previous kernel might still have ISR bit set.
*
* Most probably by now CPU has serviced that pending interrupt and
* it might not have done the ack_APIC_irq() because it thought,
* interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
* does not clear the ISR bit and cpu thinks it has already serivced
* the interrupt. Hence a vector might get locked. It was noticed
* for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
* If the ISR map is not empty. ACK the APIC and run another round
* to verify whether a pending IRR has been unblocked and turned
* into a ISR.
*/
do {
queued = 0;
for (i = APIC_ISR_NR - 1; i >= 0; i--)
queued |= apic_read(APIC_IRR + i*0x10);
for (i = APIC_ISR_NR - 1; i >= 0; i--) {
value = apic_read(APIC_ISR + i*0x10);
for_each_set_bit(j, &value, 32) {
ack_APIC_irq();
acked++;
}
}
if (acked > 256) {
pr_err("LAPIC pending interrupts after %d EOI\n", acked);
break;
}
if (queued) {
if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
ntsc = rdtsc();
max_loops = (long long)cpu_khz << 10;
max_loops -= ntsc - tsc;
} else {
max_loops--;
}
}
} while (queued && max_loops > 0);
WARN_ON(max_loops <= 0);
if (!bitmap_empty(isr->map, APIC_IR_BITS)) {
/*
* There can be multiple ISR bits set when a high priority
* interrupt preempted a lower priority one. Issue an ACK
* per set bit.
*/
for_each_set_bit(bit, isr->map, APIC_IR_BITS)
ack_APIC_irq();
return true;
}
return !bitmap_empty(irr->map, APIC_IR_BITS);
}
/*
* After a crash, we no longer service the interrupts and a pending
* interrupt from previous kernel might still have ISR bit set.
*
* Most probably by now the CPU has serviced that pending interrupt and it
* might not have done the ack_APIC_irq() because it thought, interrupt
* came from i8259 as ExtInt. LAPIC did not get EOI so it does not clear
* the ISR bit and cpu thinks it has already serivced the interrupt. Hence
* a vector might get locked. It was noticed for timer irq (vector
* 0x31). Issue an extra EOI to clear ISR.
*
* If there are pending IRR bits they turn into ISR bits after a higher
* priority ISR bit has been acked.
*/
static void apic_pending_intr_clear(void)
{
union apic_ir irr, isr;
unsigned int i;
/* 512 loops are way oversized and give the APIC a chance to obey. */
for (i = 0; i < 512; i++) {
if (!apic_check_and_ack(&irr, &isr))
return;
}
/* Dump the IRR/ISR content if that failed */
pr_warn("APIC: Stale IRR: %256pb ISR: %256pb\n", irr.map, isr.map);
}
/**
......@@ -1559,12 +1590,19 @@ static void setup_local_APIC(void)
int logical_apicid, ldr_apicid;
#endif
if (disable_apic) {
disable_ioapic_support();
return;
}
/*
* If this comes from kexec/kcrash the APIC might be enabled in
* SPIV. Soft disable it before doing further initialization.
*/
value = apic_read(APIC_SPIV);
value &= ~APIC_SPIV_APIC_ENABLED;
apic_write(APIC_SPIV, value);
#ifdef CONFIG_X86_32
/* Pound the ESR really hard over the head with a big hammer - mbligh */
if (lapic_is_integrated() && apic->disable_esr) {
......@@ -1574,8 +1612,6 @@ static void setup_local_APIC(void)
apic_write(APIC_ESR, 0);
}
#endif
perf_events_lapic_init();
/*
* Double-check whether this APIC is really registered.
* This is meaningless in clustered apic mode, so we skip it.
......@@ -1603,13 +1639,17 @@ static void setup_local_APIC(void)
#endif
/*
* Set Task Priority to 'accept all'. We never change this
* later on.
* Set Task Priority to 'accept all except vectors 0-31'. An APIC
* vector in the 16-31 range could be delivered if TPR == 0, but we
* would think it's an exception and terrible things will happen. We
* never change this later on.
*/
value = apic_read(APIC_TASKPRI);
value &= ~APIC_TPRI_MASK;
value |= 0x10;
apic_write(APIC_TASKPRI, value);
/* Clear eventually stale ISR/IRR bits */
apic_pending_intr_clear();
/*
......@@ -1656,6 +1696,8 @@ static void setup_local_APIC(void)
value |= SPURIOUS_APIC_VECTOR;
apic_write(APIC_SPIV, value);
perf_events_lapic_init();
/*
* Set up LVT0, LVT1:
*
......
......@@ -8,21 +8,14 @@
* Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
* James Cleverdon.
*/
#include <linux/acpi.h>
#include <linux/errno.h>
#include <linux/threads.h>
#include <linux/cpumask.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/ctype.h>
#include <linux/hardirq.h>
#include <linux/export.h>
#include <linux/acpi.h>
#include <asm/smp.h>
#include <asm/ipi.h>
#include <asm/apic.h>
#include <asm/apic_flat_64.h>
#include <asm/jailhouse_para.h>
#include <asm/apic.h>
#include "local.h"
static struct apic apic_physflat;
static struct apic apic_flat;
......@@ -83,35 +76,6 @@ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
_flat_send_IPI_mask(mask, vector);
}
static void flat_send_IPI_allbutself(int vector)
{
int cpu = smp_processor_id();
if (IS_ENABLED(CONFIG_HOTPLUG_CPU) || vector == NMI_VECTOR) {
if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
unsigned long mask = cpumask_bits(cpu_online_mask)[0];
if (cpu < BITS_PER_LONG)
__clear_bit(cpu, &mask);
_flat_send_IPI_mask(mask, vector);
}
} else if (num_online_cpus() > 1) {
__default_send_IPI_shortcut(APIC_DEST_ALLBUT,
vector, apic->dest_logical);
}
}
static void flat_send_IPI_all(int vector)
{
if (vector == NMI_VECTOR) {
flat_send_IPI_mask(cpu_online_mask, vector);
} else {