apic.c 71.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
Linus Torvalds's avatar
Linus Torvalds committed
2
3
4
/*
 *	Local APIC handling, local APIC timers
 *
Ingo Molnar's avatar
Ingo Molnar committed
5
 *	(c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
Linus Torvalds's avatar
Linus Torvalds committed
6
7
8
9
10
11
12
13
14
15
16
17
 *
 *	Fixes
 *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
 *					thanks to Eric Gilmore
 *					and Rolf G. Tews
 *					for testing these extensively.
 *	Maciej W. Rozycki	:	Various updates and fixes.
 *	Mikael Pettersson	:	Power Management for UP-APIC.
 *	Pavel Machek and
 *	Mikael Pettersson	:	PM converted to driver model.
 */

18
#include <linux/perf_event.h>
Linus Torvalds's avatar
Linus Torvalds committed
19
#include <linux/kernel_stat.h>
20
#include <linux/mc146818rtc.h>
21
#include <linux/acpi_pmtmr.h>
22
23
#include <linux/clockchips.h>
#include <linux/interrupt.h>
24
#include <linux/memblock.h>
25
26
#include <linux/ftrace.h>
#include <linux/ioport.h>
27
#include <linux/export.h>
28
#include <linux/syscore_ops.h>
29
30
#include <linux/delay.h>
#include <linux/timex.h>
31
#include <linux/i8253.h>
32
#include <linux/dmar.h>
33
34
35
36
37
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/dmi.h>
#include <linux/smp.h>
#include <linux/mm.h>
Linus Torvalds's avatar
Linus Torvalds committed
38

39
#include <asm/trace/irq_vectors.h>
40
#include <asm/irq_remapping.h>
41
#include <asm/perf_event.h>
42
#include <asm/x86_init.h>
Linus Torvalds's avatar
Linus Torvalds committed
43
#include <asm/pgalloc.h>
Arun Sharma's avatar
Arun Sharma committed
44
#include <linux/atomic.h>
Linus Torvalds's avatar
Linus Torvalds committed
45
#include <asm/mpspec.h>
46
#include <asm/i8259.h>
47
#include <asm/proto.h>
48
#include <asm/traps.h>
49
#include <asm/apic.h>
50
#include <asm/io_apic.h>
51
52
53
#include <asm/desc.h>
#include <asm/hpet.h>
#include <asm/mtrr.h>
54
#include <asm/time.h>
55
#include <asm/smp.h>
56
#include <asm/mce.h>
57
#include <asm/tsc.h>
58
#include <asm/hypervisor.h>
59
60
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
61
#include <asm/irq_regs.h>
Linus Torvalds's avatar
Linus Torvalds committed
62

63
unsigned int num_processors;
64

65
unsigned disabled_cpus;
66

67
/* Processor that is doing the boot up */
68
unsigned int boot_cpu_physical_apicid __ro_after_init = -1U;
69
EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
70

71
u8 boot_cpu_apic_version __ro_after_init;
72

73
/*
74
 * The highest APIC ID seen during enumeration.
75
 */
76
static unsigned int max_physical_apicid;
77

78
/*
79
 * Bitmask of physically existing CPUs:
80
 */
81
82
physid_mask_t phys_cpu_present_map;

83
84
85
86
87
/*
 * Processor to be disabled specified by kernel parameter
 * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
 * avoid undefined behaviour caused by sending INIT from AP to BSP.
 */
88
static unsigned int disabled_cpu_apicid __ro_after_init = BAD_APICID;
89

90
91
92
93
/*
 * This variable controls which CPUs receive external NMIs.  By default,
 * external NMIs are delivered only to the BSP.
 */
94
static int apic_extnmi __ro_after_init = APIC_EXTNMI_BSP;
95

96
97
98
/*
 * Map cpu index to physical APIC ID
 */
99
100
DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
101
DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, U32_MAX);
102
103
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
104
EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
105

Yinghai Lu's avatar
Yinghai Lu committed
106
#ifdef CONFIG_X86_32
107
108
109
110
111
112
113

/*
 * On x86_32, the mapping between cpu and logical apicid may vary
 * depending on apic in use.  The following early percpu variable is
 * used for the mapping.  This is where the behaviors of x86_64 and 32
 * actually diverge.  Let's keep it ugly for now.
 */
114
DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
115

Yinghai Lu's avatar
Yinghai Lu committed
116
/* Local APIC was disabled by the BIOS and enabled by the kernel */
117
static int enabled_via_apicbase __ro_after_init;
Yinghai Lu's avatar
Yinghai Lu committed
118

119
120
121
122
123
124
125
126
/*
 * Handle interrupt mode configuration register (IMCR).
 * This register controls whether the interrupt signals
 * that reach the BSP come from the master PIC or from the
 * local APIC. Before entering Symmetric I/O Mode, either
 * the BIOS or the operating system must switch out of
 * PIC Mode by changing the IMCR.
 */
127
static inline void imcr_pic_to_apic(void)
128
129
130
131
132
133
134
{
	/* select IMCR register */
	outb(0x70, 0x22);
	/* NMI and 8259 INTR go through APIC */
	outb(0x01, 0x23);
}

135
static inline void imcr_apic_to_pic(void)
136
137
138
139
140
141
{
	/* select IMCR register */
	outb(0x70, 0x22);
	/* NMI and 8259 INTR go directly to BSP */
	outb(0x00, 0x23);
}
Yinghai Lu's avatar
Yinghai Lu committed
142
143
#endif

144
145
146
147
148
149
/*
 * Knob to control our willingness to enable the local APIC.
 *
 * +1=force-enable
 */
static int force_enable_local_apic __initdata;
150

151
152
153
154
155
/*
 * APIC command line parameters
 */
static int __init parse_lapic(char *arg)
{
156
	if (IS_ENABLED(CONFIG_X86_32) && !arg)
157
		force_enable_local_apic = 1;
158
	else if (arg && !strncmp(arg, "notscdeadline", 13))
159
160
161
162
163
		setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
	return 0;
}
early_param("lapic", parse_lapic);

Yinghai Lu's avatar
Yinghai Lu committed
164
#ifdef CONFIG_X86_64
165
static int apic_calibrate_pmtmr __initdata;
Yinghai Lu's avatar
Yinghai Lu committed
166
167
168
169
170
171
172
173
174
static __init int setup_apicpmtimer(char *s)
{
	apic_calibrate_pmtmr = 1;
	notsc_setup(NULL);
	return 0;
}
__setup("apicpmtimer", setup_apicpmtimer);
#endif

175
176
unsigned long mp_lapic_addr __ro_after_init;
int disable_apic __ro_after_init;
Yinghai Lu's avatar
Yinghai Lu committed
177
/* Disable local APIC timer from the kernel commandline or via dmi quirk */
178
static int disable_apic_timer __initdata;
Hiroshi Shimamoto's avatar
Hiroshi Shimamoto committed
179
/* Local APIC timer works in C2 */
180
int local_apic_timer_c2_ok __ro_after_init;
181
182
EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);

Hiroshi Shimamoto's avatar
Hiroshi Shimamoto committed
183
184
185
/*
 * Debug level, exported for io_apic.c
 */
186
int apic_verbosity __ro_after_init;
Hiroshi Shimamoto's avatar
Hiroshi Shimamoto committed
187

188
int pic_mode __ro_after_init;
189

190
/* Have we found an MP table */
191
int smp_found_config __ro_after_init;
192

193
194
195
196
197
static struct resource lapic_resource = {
	.name = "Local APIC",
	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
};

198
unsigned int lapic_timer_period = 0;
199

200
static void apic_pm_activate(void);
201

202
static unsigned long apic_phys __ro_after_init;
203

204
205
206
207
/*
 * Get the LAPIC version
 */
static inline int lapic_get_version(void)
208
{
209
	return GET_APIC_VERSION(apic_read(APIC_LVR));
210
211
}

212
/*
213
 * Check, if the APIC is integrated or a separate chip
214
215
 */
static inline int lapic_is_integrated(void)
216
{
217
	return APIC_INTEGRATED(lapic_get_version());
218
219
220
}

/*
221
 * Check, whether this is a modern or a first generation APIC
222
 */
223
static int modern_apic(void)
224
{
225
226
227
228
	/* AMD systems use old APIC versions, so check the CPU */
	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
	    boot_cpu_data.x86 >= 0xf)
		return 1;
Pu Wen's avatar
Pu Wen committed
229
230
231
232
233

	/* Hygon systems use modern APIC */
	if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
		return 1;

234
	return lapic_get_version() >= 0x14;
235
236
}

237
/*
238
239
 * right after this call apic become NOOP driven
 * so apic->write/read doesn't do anything
240
 */
241
static void __init apic_disable(void)
242
{
243
	pr_info("APIC: switched to apic NOOP\n");
244
	apic = &apic_noop;
245
246
}

Yinghai Lu's avatar
Yinghai Lu committed
247
void native_apic_wait_icr_idle(void)
248
249
250
251
252
{
	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
		cpu_relax();
}

Yinghai Lu's avatar
Yinghai Lu committed
253
u32 native_safe_apic_wait_icr_idle(void)
254
{
255
	u32 send_status;
256
257
258
259
260
261
262
	int timeout;

	timeout = 0;
	do {
		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
		if (!send_status)
			break;
263
		inc_irq_stat(icr_read_retry_count);
264
265
266
267
268
269
		udelay(100);
	} while (timeout++ < 1000);

	return send_status;
}

Yinghai Lu's avatar
Yinghai Lu committed
270
void native_apic_icr_write(u32 low, u32 id)
271
{
272
273
274
	unsigned long flags;

	local_irq_save(flags);
275
	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
276
	apic_write(APIC_ICR, low);
277
	local_irq_restore(flags);
278
279
}

Yinghai Lu's avatar
Yinghai Lu committed
280
u64 native_apic_icr_read(void)
281
282
283
284
285
286
{
	u32 icr1, icr2;

	icr2 = apic_read(APIC_ICR2);
	icr1 = apic_read(APIC_ICR);

287
	return icr1 | ((u64)icr2 << 32);
288
289
}

290
291
292
293
294
295
296
297
298
299
#ifdef CONFIG_X86_32
/**
 * get_physical_broadcast - Get number of physical broadcast IDs
 */
int get_physical_broadcast(void)
{
	return modern_apic() ? 0xff : 0xf;
}
#endif

300
301
302
/**
 * lapic_get_maxlvt - get the maximum number of local vector table entries
 */
303
int lapic_get_maxlvt(void)
Linus Torvalds's avatar
Linus Torvalds committed
304
{
305
306
307
308
	/*
	 * - we always have APIC integrated on 64bit mode
	 * - 82489DXs do not report # of LVT entries
	 */
309
	return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2;
Linus Torvalds's avatar
Linus Torvalds committed
310
311
}

312
313
314
315
/*
 * Local APIC timer
 */

316
317
/* Clock divisor */
#define APIC_DIVISOR 16
318
#define TSC_DIVISOR  8
319

320
321
322
323
324
325
326
327
328
329
330
/*
 * This function sets up the local APIC timer, with a timeout of
 * 'clocks' APIC bus clock. During calibration we actually call
 * this function twice on the boot CPU, once with a bogus timeout
 * value, second time for real. The other (noncalibrating) CPUs
 * call this function only once, with the real, calibrated value.
 *
 * We do reads before writes even if unnecessary, to get around the
 * P5 APIC double write bug.
 */
static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
Linus Torvalds's avatar
Linus Torvalds committed
331
{
332
	unsigned int lvtt_value, tmp_value;
Linus Torvalds's avatar
Linus Torvalds committed
333

334
335
336
	lvtt_value = LOCAL_TIMER_VECTOR;
	if (!oneshot)
		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
337
338
339
	else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
		lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;

340
341
342
	if (!lapic_is_integrated())
		lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);

343
344
	if (!irqen)
		lvtt_value |= APIC_LVT_MASKED;
Linus Torvalds's avatar
Linus Torvalds committed
345

346
	apic_write(APIC_LVTT, lvtt_value);
Linus Torvalds's avatar
Linus Torvalds committed
347

348
	if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
349
350
351
352
353
354
355
		/*
		 * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode,
		 * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized.
		 * According to Intel, MFENCE can do the serialization here.
		 */
		asm volatile("mfence" : : : "memory");

356
357
358
359
		printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
		return;
	}

Linus Torvalds's avatar
Linus Torvalds committed
360
	/*
361
	 * Divide PICLK by 16
Linus Torvalds's avatar
Linus Torvalds committed
362
	 */
363
	tmp_value = apic_read(APIC_TDCR);
364
365
366
	apic_write(APIC_TDCR,
		(tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
		APIC_TDR_DIV_16);
367
368

	if (!oneshot)
369
		apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
Linus Torvalds's avatar
Linus Torvalds committed
370
371
}

372
/*
373
 * Setup extended LVT, AMD specific
374
 *
375
376
377
378
379
 * Software should use the LVT offsets the BIOS provides.  The offsets
 * are determined by the subsystems using it like those for MCE
 * threshold or IBS.  On K8 only offset 0 (APIC500) and MCE interrupts
 * are supported. Beginning with family 10h at least 4 offsets are
 * available.
380
 *
381
382
383
384
385
386
387
388
389
 * Since the offsets must be consistent for all cores, we keep track
 * of the LVT offsets in software and reserve the offset for the same
 * vector also to be used on other cores. An offset is freed by
 * setting the entry to APIC_EILVT_MASKED.
 *
 * If the BIOS is right, there should be no conflicts. Otherwise a
 * "[Firmware Bug]: ..." error message is generated. However, if
 * software does not properly determines the offsets, it is not
 * necessarily a BIOS bug.
390
 */
391

392
393
394
395
396
397
398
399
400
401
402
static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];

static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
{
	return (old & APIC_EILVT_MASKED)
		|| (new == APIC_EILVT_MASKED)
		|| ((new & ~APIC_EILVT_MASKED) == old);
}

static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
{
403
	unsigned int rsvd, vector;
404
405
406
407

	if (offset >= APIC_EILVT_NR_MAX)
		return ~0;

408
	rsvd = atomic_read(&eilvt_offsets[offset]);
409
	do {
410
411
		vector = rsvd & ~APIC_EILVT_MASKED;	/* 0: unassigned */
		if (vector && !eilvt_entry_is_changeable(vector, new))
412
413
414
415
416
			/* may not change if vectors are different */
			return rsvd;
		rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
	} while (rsvd != new);

417
418
419
420
421
	rsvd &= ~APIC_EILVT_MASKED;
	if (rsvd && rsvd != vector)
		pr_info("LVT offset %d assigned for vector 0x%02x\n",
			offset, rsvd);

422
423
424
425
426
	return new;
}

/*
 * If mask=1, the LVT entry does not generate interrupts while mask=0
427
428
 * enables the vector. See also the BKDGs. Must be called with
 * preemption disabled.
429
430
 */

431
int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
Linus Torvalds's avatar
Linus Torvalds committed
432
{
433
434
435
436
437
438
439
440
	unsigned long reg = APIC_EILVTn(offset);
	unsigned int new, old, reserved;

	new = (mask << 16) | (msg_type << 8) | vector;
	old = apic_read(reg);
	reserved = reserve_eilvt_offset(offset, new);

	if (reserved != new) {
441
442
443
444
		pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
		       "vector 0x%x, but the register is already in use for "
		       "vector 0x%x on another cpu\n",
		       smp_processor_id(), reg, offset, new, reserved);
445
446
447
448
		return -EINVAL;
	}

	if (!eilvt_entry_is_changeable(old, new)) {
449
450
451
452
		pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
		       "vector 0x%x, but the register is already in use for "
		       "vector 0x%x on this cpu\n",
		       smp_processor_id(), reg, offset, new, old);
453
454
455
456
		return -EBUSY;
	}

	apic_write(reg, new);
Andi Kleen's avatar
Andi Kleen committed
457

458
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
459
}
460
EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
461

462
463
464
465
466
/*
 * Program the next event, relative to now
 */
static int lapic_next_event(unsigned long delta,
			    struct clock_event_device *evt)
Linus Torvalds's avatar
Linus Torvalds committed
467
{
468
469
	apic_write(APIC_TMICT, delta);
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
470
471
}

472
473
474
475
476
static int lapic_next_deadline(unsigned long delta,
			       struct clock_event_device *evt)
{
	u64 tsc;

477
	tsc = rdtsc();
478
479
480
481
	wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
	return 0;
}

482
static int lapic_timer_shutdown(struct clock_event_device *evt)
483
{
484
	unsigned int v;
485

486
487
	/* Lapic used as dummy for broadcast ? */
	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
488
		return 0;
489

490
491
492
493
494
495
	v = apic_read(APIC_LVTT);
	v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
	apic_write(APIC_LVTT, v);
	apic_write(APIC_TMICT, 0);
	return 0;
}
496

497
498
499
500
501
502
static inline int
lapic_timer_set_periodic_oneshot(struct clock_event_device *evt, bool oneshot)
{
	/* Lapic used as dummy for broadcast ? */
	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
		return 0;
503

504
	__setup_APIC_LVTT(lapic_timer_period, oneshot, 1);
505
506
507
508
509
510
511
512
513
514
515
	return 0;
}

static int lapic_timer_set_periodic(struct clock_event_device *evt)
{
	return lapic_timer_set_periodic_oneshot(evt, false);
}

static int lapic_timer_set_oneshot(struct clock_event_device *evt)
{
	return lapic_timer_set_periodic_oneshot(evt, true);
516
517
}

Linus Torvalds's avatar
Linus Torvalds committed
518
/*
519
 * Local APIC timer broadcast function
Linus Torvalds's avatar
Linus Torvalds committed
520
 */
521
static void lapic_timer_broadcast(const struct cpumask *mask)
Linus Torvalds's avatar
Linus Torvalds committed
522
{
523
#ifdef CONFIG_SMP
524
	apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
525
526
#endif
}
Linus Torvalds's avatar
Linus Torvalds committed
527

528
529
530
531
532

/*
 * The local apic timer can be used for any function which is CPU local.
 */
static struct clock_event_device lapic_clockevent = {
533
534
535
536
537
538
539
540
541
542
543
544
545
	.name				= "lapic",
	.features			= CLOCK_EVT_FEAT_PERIODIC |
					  CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP
					  | CLOCK_EVT_FEAT_DUMMY,
	.shift				= 32,
	.set_state_shutdown		= lapic_timer_shutdown,
	.set_state_periodic		= lapic_timer_set_periodic,
	.set_state_oneshot		= lapic_timer_set_oneshot,
	.set_state_oneshot_stopped	= lapic_timer_shutdown,
	.set_next_event			= lapic_next_event,
	.broadcast			= lapic_timer_broadcast,
	.rating				= 100,
	.irq				= -1,
546
547
548
};
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);

549
550
551
552
553
554
555
556
#define DEADLINE_MODEL_MATCH_FUNC(model, func)	\
	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&func }

#define DEADLINE_MODEL_MATCH_REV(model, rev)	\
	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)rev }

static u32 hsx_deadline_rev(void)
{
557
	switch (boot_cpu_data.x86_stepping) {
558
559
560
561
562
563
564
565
566
	case 0x02: return 0x3a; /* EP */
	case 0x04: return 0x0f; /* EX */
	}

	return ~0U;
}

static u32 bdx_deadline_rev(void)
{
567
	switch (boot_cpu_data.x86_stepping) {
568
569
570
571
572
573
574
575
576
	case 0x02: return 0x00000011;
	case 0x03: return 0x0700000e;
	case 0x04: return 0x0f00000c;
	case 0x05: return 0x0e000003;
	}

	return ~0U;
}

577
578
static u32 skx_deadline_rev(void)
{
579
	switch (boot_cpu_data.x86_stepping) {
580
581
582
583
	case 0x03: return 0x01000136;
	case 0x04: return 0x02000014;
	}

584
585
586
	if (boot_cpu_data.x86_stepping > 4)
		return 0;

587
588
589
	return ~0U;
}

590
591
592
static const struct x86_cpu_id deadline_match[] = {
	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X,	hsx_deadline_rev),
	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X,	0x0b000020),
593
	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_D,	bdx_deadline_rev),
594
	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X,	skx_deadline_rev),
595

596
	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL,		0x22),
597
	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_L,	0x20),
598
	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_G,	0x17),
599

600
	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL,	0x25),
601
	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_G,	0x17),
602

603
	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_L,	0xb2),
604
	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE,		0xb2),
605

606
	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE_L,	0x52),
607
	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_KABYLAKE,		0x52),
608
609
610
611
612
613

	{},
};

static void apic_check_deadline_errata(void)
{
614
	const struct x86_cpu_id *m;
615
616
	u32 rev;

617
618
	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
	    boot_cpu_has(X86_FEATURE_HYPERVISOR))
619
620
621
		return;

	m = x86_match_cpu(deadline_match);
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
	if (!m)
		return;

	/*
	 * Function pointers will have the MSB set due to address layout,
	 * immediate revisions will not.
	 */
	if ((long)m->driver_data < 0)
		rev = ((u32 (*)(void))(m->driver_data))();
	else
		rev = (u32)m->driver_data;

	if (boot_cpu_data.microcode >= rev)
		return;

	setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
	pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
	       "please update microcode to version: 0x%x (or later)\n", rev);
}

642
/*
643
 * Setup the local APIC timer for this CPU. Copy the initialized values
644
645
 * of the boot CPU and register the clock event in the framework.
 */
646
static void setup_APIC_timer(void)
647
{
648
	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
Linus Torvalds's avatar
Linus Torvalds committed
649

650
	if (this_cpu_has(X86_FEATURE_ARAT)) {
651
652
653
654
655
		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
		/* Make LAPIC timer preferrable over percpu HPET */
		lapic_clockevent.rating = 150;
	}

656
	memcpy(levt, &lapic_clockevent, sizeof(*levt));
657
	levt->cpumask = cpumask_of(smp_processor_id());
Linus Torvalds's avatar
Linus Torvalds committed
658

659
	if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
660
		levt->name = "lapic-deadline";
661
662
663
664
		levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
				    CLOCK_EVT_FEAT_DUMMY);
		levt->set_next_event = lapic_next_deadline;
		clockevents_config_and_register(levt,
665
						tsc_khz * (1000 / TSC_DIVISOR),
666
667
668
						0xF, ~0UL);
	} else
		clockevents_register_device(levt);
669
}
Linus Torvalds's avatar
Linus Torvalds committed
670

671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
/*
 * Install the updated TSC frequency from recalibration at the TSC
 * deadline clockevent devices.
 */
static void __lapic_update_tsc_freq(void *info)
{
	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);

	if (!this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
		return;

	clockevents_update_freq(levt, tsc_khz * (1000 / TSC_DIVISOR));
}

void lapic_update_tsc_freq(void)
{
	/*
	 * The clockevent device's ->mult and ->shift can both be
	 * changed. In order to avoid races, schedule the frequency
	 * update code on each CPU.
	 */
	on_each_cpu(__lapic_update_tsc_freq, NULL, 0);
}

695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
/*
 * In this functions we calibrate APIC bus clocks to the external timer.
 *
 * We want to do the calibration only once since we want to have local timer
 * irqs syncron. CPUs connected by the same APIC bus have the very same bus
 * frequency.
 *
 * This was previously done by reading the PIT/HPET and waiting for a wrap
 * around to find out, that a tick has elapsed. I have a box, where the PIT
 * readout is broken, so it never gets out of the wait loop again. This was
 * also reported by others.
 *
 * Monitoring the jiffies value is inaccurate and the clockevents
 * infrastructure allows us to do a simple substitution of the interrupt
 * handler.
 *
 * The calibration routine also uses the pm_timer when possible, as the PIT
 * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
 * back to normal later in the boot process).
 */

#define LAPIC_CAL_LOOPS		(HZ/10)

static __initdata int lapic_cal_loops = -1;
static __initdata long lapic_cal_t1, lapic_cal_t2;
static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;

/*
725
 * Temporary interrupt handler and polled calibration function.
726
727
728
729
730
731
732
 */
static void __init lapic_cal_handler(struct clock_event_device *dev)
{
	unsigned long long tsc = 0;
	long tapic = apic_read(APIC_TMCCT);
	unsigned long pm = acpi_pm_read_early();

733
	if (boot_cpu_has(X86_FEATURE_TSC))
734
		tsc = rdtsc();
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754

	switch (lapic_cal_loops++) {
	case 0:
		lapic_cal_t1 = tapic;
		lapic_cal_tsc1 = tsc;
		lapic_cal_pm1 = pm;
		lapic_cal_j1 = jiffies;
		break;

	case LAPIC_CAL_LOOPS:
		lapic_cal_t2 = tapic;
		lapic_cal_tsc2 = tsc;
		if (pm < lapic_cal_pm1)
			pm += ACPI_PM_OVRRUN;
		lapic_cal_pm2 = pm;
		lapic_cal_j2 = jiffies;
		break;
	}
}

755
756
static int __init
calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
757
758
759
760
761
762
763
764
765
766
{
	const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
	const long pm_thresh = pm_100ms / 100;
	unsigned long mult;
	u64 res;

#ifndef CONFIG_X86_PM_TIMER
	return -1;
#endif

767
	apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
768
769
770
771
772
773
774
775
776

	/* Check, if the PM timer is available */
	if (!deltapm)
		return -1;

	mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);

	if (deltapm > (pm_100ms - pm_thresh) &&
	    deltapm < (pm_100ms + pm_thresh)) {
777
		apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
778
779
780
781
782
783
		return 0;
	}

	res = (((u64)deltapm) *  mult) >> 22;
	do_div(res, 1000000);
	pr_warning("APIC calibration not consistent "
784
		   "with PM-Timer: %ldms instead of 100ms\n",(long)res);
785
786
787
788
789
790
791
792
793

	/* Correct the lapic counter value */
	res = (((u64)(*delta)) * pm_100ms);
	do_div(res, deltapm);
	pr_info("APIC delta adjusted to PM-Timer: "
		"%lu (%ld)\n", (unsigned long)res, *delta);
	*delta = (long)res;

	/* Correct the tsc counter value */
794
	if (boot_cpu_has(X86_FEATURE_TSC)) {
795
		res = (((u64)(*deltatsc)) * pm_100ms);
796
		do_div(res, deltapm);
797
		apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
798
					  "PM-Timer: %lu (%ld)\n",
799
800
					(unsigned long)res, *deltatsc);
		*deltatsc = (long)res;
801
802
803
804
805
	}

	return 0;
}

806
807
static int __init lapic_init_clockevent(void)
{
808
	if (!lapic_timer_period)
809
810
811
		return -1;

	/* Calculate the scaled math multiplication factor */
812
	lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR,
813
814
815
816
817
818
819
820
821
822
823
					TICK_NSEC, lapic_clockevent.shift);
	lapic_clockevent.max_delta_ns =
		clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
	lapic_clockevent.max_delta_ticks = 0x7FFFFFFF;
	lapic_clockevent.min_delta_ns =
		clockevent_delta2ns(0xF, &lapic_clockevent);
	lapic_clockevent.min_delta_ticks = 0xF;

	return 0;
}

824
825
826
827
828
829
830
831
832
833
834
835
836
bool __init apic_needs_pit(void)
{
	/*
	 * If the frequencies are not known, PIT is required for both TSC
	 * and apic timer calibration.
	 */
	if (!tsc_khz || !cpu_khz)
		return true;

	/* Is there an APIC at all? */
	if (!boot_cpu_has(X86_FEATURE_APIC))
		return true;

837
838
839
840
	/* Virt guests may lack ARAT, but still have DEADLINE */
	if (!boot_cpu_has(X86_FEATURE_ARAT))
		return true;

841
842
843
844
845
846
847
848
849
850
851
852
853
854
	/* Deadline timer is based on TSC so no further PIT action required */
	if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
		return false;

	/* APIC timer disabled? */
	if (disable_apic_timer)
		return true;
	/*
	 * The APIC timer frequency is known already, no PIT calibration
	 * required. If unknown, let the PIT be initialized.
	 */
	return lapic_timer_period == 0;
}

855
856
static int __init calibrate_APIC_clock(void)
{
857
	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
858
859
	u64 tsc_perj = 0, tsc_start = 0;
	unsigned long jif_start;
860
	unsigned long deltaj;
861
	long delta, deltatsc;
862
863
	int pm_referenced = 0;

864
865
866
867
868
869
	if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
		return 0;

	/*
	 * Check if lapic timer has already been calibrated by platform
	 * specific routine, such as tsc calibration code. If so just fill
870
871
	 * in the clockevent structure and return.
	 */
872
	if (!lapic_init_clockevent()) {
873
		apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
874
			    lapic_timer_period);
875
876
877
878
		/*
		 * Direct calibration methods must have an always running
		 * local APIC timer, no need for broadcast timer.
		 */
879
880
881
882
		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
		return 0;
	}

883
884
885
	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
		    "calibrating APIC timer ...\n");

886
887
888
889
890
	/*
	 * There are platforms w/o global clockevent devices. Instead of
	 * making the calibration conditional on that, use a polling based
	 * approach everywhere.
	 */
891
892
893
	local_irq_disable();

	/*
894
	 * Setup the APIC counter to maximum. There is no way the lapic
895
896
	 * can underflow in the 100ms detection time frame
	 */
897
	__setup_APIC_LVTT(0xffffffff, 0, 0);
898

899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
	/*
	 * Methods to terminate the calibration loop:
	 *  1) Global clockevent if available (jiffies)
	 *  2) TSC if available and frequency is known
	 */
	jif_start = READ_ONCE(jiffies);

	if (tsc_khz) {
		tsc_start = rdtsc();
		tsc_perj = div_u64((u64)tsc_khz * 1000, HZ);
	}

	/*
	 * Enable interrupts so the tick can fire, if a global
	 * clockevent device is available
	 */
915
916
	local_irq_enable();

917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
	while (lapic_cal_loops <= LAPIC_CAL_LOOPS) {
		/* Wait for a tick to elapse */
		while (1) {
			if (tsc_khz) {
				u64 tsc_now = rdtsc();
				if ((tsc_now - tsc_start) >= tsc_perj) {
					tsc_start += tsc_perj;
					break;
				}
			} else {
				unsigned long jif_now = READ_ONCE(jiffies);

				if (time_after(jif_now, jif_start)) {
					jif_start = jif_now;
					break;
				}
			}
			cpu_relax();
		}
936

937
938
939
940
941
		/* Invoke the calibration routine */
		local_irq_disable();
		lapic_cal_handler(NULL);
		local_irq_enable();
	}
942

943
	local_irq_disable();
944
945
946
947
948

	/* Build delta t1-t2 as apic timer counts down */
	delta = lapic_cal_t1 - lapic_cal_t2;
	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);

949
950
	deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);

951
952
	/* we trust the PM based calibration if possible */
	pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
953
					&delta, &deltatsc);
954

955
	lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
956
	lapic_init_clockevent();
957
958

	apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
959
	apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
960
	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
961
		    lapic_timer_period);
962

963
	if (boot_cpu_has(X86_FEATURE_TSC)) {
964
965
		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
			    "%ld.%04ld MHz.\n",
966
967
			    (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
			    (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
968
969
970
971
	}

	apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
		    "%u.%04u MHz.\n",
972
973
		    lapic_timer_period / (1000000 / HZ),
		    lapic_timer_period % (1000000 / HZ));
974
975
976
977

	/*
	 * Do a sanity check on the APIC calibration result
	 */
978
	if (lapic_timer_period < (1000000 / HZ)) {
979
		local_irq_enable();
980
		pr_warning("APIC frequency too slow, disabling apic timer\n");
981
982
983
984
985
		return -1;
	}

	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;

986
	/*
987
988
989
	 * PM timer calibration failed or not turned on so lets try APIC
	 * timer based calibration, if a global clockevent device is
	 * available.
990
	 */
991
	if (!pm_referenced && global_clock_event) {
992
993
994
995
996
997
		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");

		/*
		 * Setup the apic timer manually
		 */
		levt->event_handler = lapic_cal_handler;
998
		lapic_timer_set_periodic(levt);
999
1000
1001
1002
1003
1004
1005
1006
1007
		lapic_cal_loops = -1;

		/* Let the interrupts run */
		local_irq_enable();

		while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
			cpu_relax();

		/* Stop the lapic timer */
1008
		local_irq_disable();
1009
		lapic_timer_shutdown(levt);
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019

		/* Jiffies delta */
		deltaj = lapic_cal_j2 - lapic_cal_j1;
		apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);

		/* Check, if the jiffies result is consistent */
		if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
			apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
		else
			levt->features |= CLOCK_EVT_FEAT_DUMMY;
1020
1021
	}
	local_irq_enable();
1022
1023

	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
1024
		pr_warning("APIC timer disabled due to verification failure\n");
Yi Wang's avatar
Yi Wang committed
1025
		return -1;
1026
1027
1028
1029
1030
	}

	return 0;
}

Hiroshi Shimamoto's avatar
Hiroshi Shimamoto committed
1031
1032
1033
1034
1035
/*
 * Setup the boot APIC
 *
 * Calibrate and verify the result.
 */
1036
1037
1038
void __init setup_boot_APIC_clock(void)
{
	/*
1039
1040
1041
1042
	 * The local apic timer can be disabled via the kernel
	 * commandline or from the CPU detection code. Register the lapic
	 * timer as a dummy clock event source on SMP systems, so the
	 * broadcast mechanism is used. On UP systems simply ignore it.
1043
1044
	 */
	if (disable_apic_timer) {
1045
		pr_info("Disabling APIC timer\n");
1046
		/* No broadcast on UP ! */
1047
1048
		if (num_possible_cpus() > 1) {
			lapic_clockevent.mult = 1;
1049
			setup_APIC_timer();
1050
		}
1051
1052
1053
		return;
	}

1054
	if (calibrate_APIC_clock()) {
1055
1056
1057
1058
1059
1060
		/* No broadcast on UP ! */
		if (num_possible_cpus() > 1)
			setup_APIC_timer();
		return;
	}

1061
1062
1063
1064
1065
	/*
	 * If nmi_watchdog is set to IO_APIC, we need the
	 * PIT/HPET going.  Otherwise register lapic as a dummy
	 * device.
	 */
1066
	lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
1067

1068
	/* Setup the lapic or request the broadcast */
1069
	setup_APIC_timer();
1070
	amd_e400_c1e_apic_setup();
1071
1072
}

1073
void setup_secondary_APIC_clock(void)
1074
1075
{
	setup_APIC_timer();
1076
	amd_e400_c1e_apic_setup();
1077
1078
1079
1080
1081
1082
1083
}

/*
 * The guts of the apic timer interrupt
 */
static void local_apic_timer_interrupt(void)
{
1084
	struct clock_event_device *evt = this_cpu_ptr(&lapic_events);
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097

	/*
	 * Normally we should not be here till LAPIC has been initialized but
	 * in some cases like kdump, its possible that there is a pending LAPIC
	 * timer interrupt from previous kernel's context and is delivered in
	 * new kernel the moment interrupts are enabled.
	 *
	 * Interrupts are enabled early and LAPIC is setup much later, hence
	 * its possible that when we get here evt->event_handler is NULL.
	 * Check for event_handler being NULL and discard the interrupt as
	 * spurious.
	 */
	if (!evt->event_handler) {
1098
1099
		pr_warning("Spurious LAPIC timer interrupt on cpu %d\n",
			   smp_processor_id());
1100
		/* Switch it off */
1101
		lapic_timer_shutdown(evt);
Thomas Gleixner's avatar