Commit b145b0eb authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
 "ARM and x86 bugfixes of all kinds.

  The most visible one is that migrating a nested hypervisor has always
  been busted on Broadwell and newer processors, and that has finally
  been fixed"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (22 commits)
  KVM: x86: omit "impossible" pmu MSRs from MSR list
  KVM: nVMX: Fix consistency check on injected exception error code
  KVM: x86: omit absent pmu MSRs from MSR list
  selftests: kvm: Fix libkvm build error
  kvm: vmx: Limit guest PMCs to those supported on the host
  kvm: x86, powerpc: do not allow clearing largepages debugfs entry
  KVM: selftests: x86: clarify what is reported on KVM_GET_MSRS failure
  KVM: VMX: Set VMENTER_L1D_FLUSH_NOT_REQUIRED if !X86_BUG_L1TF
  selftests: kvm: add test for dirty logging inside nested guests
  KVM: x86: fix nested guest live migration with PML
  KVM: x86: assign two bits to track SPTE kinds
  KVM: x86: Expose XSAVEERPTR to the guest
  kvm: x86: Enumerate support for CLZERO instruction
  kvm: x86: Use AMD CPUID semantics for AMD vCPUs
  kvm: x86: Improve emulation of CPUID leaves 0BH and 1FH
  KVM: X86: Fix userspace set invalid CR4
  kvm: x86: Fix a spurious -E2BIG in __do_cpuid_func
  KVM: LAPIC: Loosen filter for adaptive tuning of lapic_timer_advance_ns
  KVM: arm/arm64: vgic: Use the appropriate TRACE_INCLUDE_PATH
  arm64: KVM: Kill hyp_alternate_select()
  ...
parents 50dfd03d cf05a67b
......@@ -47,30 +47,6 @@
#define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1)
#define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1)
/**
* hyp_alternate_select - Generates patchable code sequences that are
* used to switch between two implementations of a function, depending
* on the availability of a feature.
*
* @fname: a symbol name that will be defined as a function returning a
* function pointer whose type will match @orig and @alt
* @orig: A pointer to the default function, as returned by @fname when
* @cond doesn't hold
* @alt: A pointer to the alternate function, as returned by @fname
* when @cond holds
* @cond: a CPU feature (as described in asm/cpufeature.h)
*/
#define hyp_alternate_select(fname, orig, alt, cond) \
typeof(orig) * __hyp_text fname(void) \
{ \
typeof(alt) *val = orig; \
asm volatile(ALTERNATIVE("nop \n", \
"mov %0, %1 \n", \
cond) \
: "+r" (val) : "r" (alt)); \
return val; \
}
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
......
......@@ -229,20 +229,6 @@ static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
}
}
static bool __hyp_text __true_value(void)
{
return true;
}
static bool __hyp_text __false_value(void)
{
return false;
}
static hyp_alternate_select(__check_arm_834220,
__false_value, __true_value,
ARM64_WORKAROUND_834220);
static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
{
u64 par, tmp;
......@@ -298,7 +284,8 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
* resolve the IPA using the AT instruction.
*/
if (!(esr & ESR_ELx_S1PTW) &&
(__check_arm_834220()() || (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
(cpus_have_const_cap(ARM64_WORKAROUND_834220) ||
(esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
if (!__translate_far_to_hpfar(far, &hpfar))
return false;
} else {
......
......@@ -67,10 +67,14 @@ static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm,
isb();
}
static hyp_alternate_select(__tlb_switch_to_guest,
__tlb_switch_to_guest_nvhe,
__tlb_switch_to_guest_vhe,
ARM64_HAS_VIRT_HOST_EXTN);
static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm,
struct tlb_inv_context *cxt)
{
if (has_vhe())
__tlb_switch_to_guest_vhe(kvm, cxt);
else
__tlb_switch_to_guest_nvhe(kvm, cxt);
}
static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm,
struct tlb_inv_context *cxt)
......@@ -98,10 +102,14 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm,
write_sysreg(0, vttbr_el2);
}
static hyp_alternate_select(__tlb_switch_to_host,
__tlb_switch_to_host_nvhe,
__tlb_switch_to_host_vhe,
ARM64_HAS_VIRT_HOST_EXTN);
static void __hyp_text __tlb_switch_to_host(struct kvm *kvm,
struct tlb_inv_context *cxt)
{
if (has_vhe())
__tlb_switch_to_host_vhe(kvm, cxt);
else
__tlb_switch_to_host_nvhe(kvm, cxt);
}
void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
{
......@@ -111,7 +119,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
/* Switch to requested VMID */
kvm = kern_hyp_va(kvm);
__tlb_switch_to_guest()(kvm, &cxt);
__tlb_switch_to_guest(kvm, &cxt);
/*
* We could do so much better if we had the VA as well.
......@@ -154,7 +162,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
if (!has_vhe() && icache_is_vpipt())
__flush_icache_all();
__tlb_switch_to_host()(kvm, &cxt);
__tlb_switch_to_host(kvm, &cxt);
}
void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
......@@ -165,13 +173,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
/* Switch to requested VMID */
kvm = kern_hyp_va(kvm);
__tlb_switch_to_guest()(kvm, &cxt);
__tlb_switch_to_guest(kvm, &cxt);
__tlbi(vmalls12e1is);
dsb(ish);
isb();
__tlb_switch_to_host()(kvm, &cxt);
__tlb_switch_to_host(kvm, &cxt);
}
void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
......@@ -180,13 +188,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
__tlb_switch_to_guest()(kvm, &cxt);
__tlb_switch_to_guest(kvm, &cxt);
__tlbi(vmalle1);
dsb(nsh);
isb();
__tlb_switch_to_host()(kvm, &cxt);
__tlb_switch_to_host(kvm, &cxt);
}
void __hyp_text __kvm_flush_vm_context(void)
......
......@@ -36,8 +36,8 @@
#include "book3s.h"
#include "trace.h"
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
/* #define EXIT_DEBUG */
......@@ -69,8 +69,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "pthru_all", VCPU_STAT(pthru_all) },
{ "pthru_host", VCPU_STAT(pthru_host) },
{ "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) },
{ "largepages_2M", VM_STAT(num_2M_pages) },
{ "largepages_1G", VM_STAT(num_1G_pages) },
{ "largepages_2M", VM_STAT(num_2M_pages, .mode = 0444) },
{ "largepages_1G", VM_STAT(num_1G_pages, .mode = 0444) },
{ NULL }
};
......
......@@ -219,13 +219,6 @@ enum {
PFERR_WRITE_MASK | \
PFERR_PRESENT_MASK)
/*
* The mask used to denote special SPTEs, which can be either MMIO SPTEs or
* Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting
* with the SVE bit in EPT PTEs.
*/
#define SPTE_SPECIAL_MASK (1ULL << 62)
/* apic attention bits */
#define KVM_APIC_CHECK_VAPIC 0
/*
......
......@@ -485,6 +485,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 0x80000008.ebx */
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
F(CLZERO) | F(XSAVEERPTR) |
F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON);
......@@ -618,16 +619,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
*/
case 0x1f:
case 0xb: {
int i, level_type;
int i;
/* read more entries until level_type is zero */
for (i = 1; ; ++i) {
/*
* We filled in entry[0] for CPUID(EAX=<function>,
* ECX=00H) above. If its level type (ECX[15:8]) is
* zero, then the leaf is unimplemented, and we're
* done. Otherwise, continue to populate entries
* until the level type (ECX[15:8]) of the previously
* added entry is zero.
*/
for (i = 1; entry[i - 1].ecx & 0xff00; ++i) {
if (*nent >= maxnent)
goto out;
level_type = entry[i - 1].ecx & 0xff00;
if (!level_type)
break;
do_host_cpuid(&entry[i], function, i);
++*nent;
}
......@@ -969,53 +974,66 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
/*
* If no match is found, check whether we exceed the vCPU's limit
* and return the content of the highest valid _standard_ leaf instead.
* This is to satisfy the CPUID specification.
* If the basic or extended CPUID leaf requested is higher than the
* maximum supported basic or extended leaf, respectively, then it is
* out of range.
*/
static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
u32 function, u32 index)
static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
{
struct kvm_cpuid_entry2 *maxlevel;
maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
if (!maxlevel || maxlevel->eax >= function)
return NULL;
if (function & 0x80000000) {
maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0);
if (!maxlevel)
return NULL;
}
return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index);
struct kvm_cpuid_entry2 *max;
max = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
return max && function <= max->eax;
}
bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
u32 *ecx, u32 *edx, bool check_limit)
{
u32 function = *eax, index = *ecx;
struct kvm_cpuid_entry2 *best;
bool entry_found = true;
best = kvm_find_cpuid_entry(vcpu, function, index);
if (!best) {
entry_found = false;
if (!check_limit)
goto out;
struct kvm_cpuid_entry2 *entry;
struct kvm_cpuid_entry2 *max;
bool found;
best = check_cpuid_limit(vcpu, function, index);
entry = kvm_find_cpuid_entry(vcpu, function, index);
found = entry;
/*
* Intel CPUID semantics treats any query for an out-of-range
* leaf as if the highest basic leaf (i.e. CPUID.0H:EAX) were
* requested. AMD CPUID semantics returns all zeroes for any
* undefined leaf, whether or not the leaf is in range.
*/
if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
!cpuid_function_in_range(vcpu, function)) {
max = kvm_find_cpuid_entry(vcpu, 0, 0);
if (max) {
function = max->eax;
entry = kvm_find_cpuid_entry(vcpu, function, index);
}
}
out:
if (best) {
*eax = best->eax;
*ebx = best->ebx;
*ecx = best->ecx;
*edx = best->edx;
} else
if (entry) {
*eax = entry->eax;
*ebx = entry->ebx;
*ecx = entry->ecx;
*edx = entry->edx;
} else {
*eax = *ebx = *ecx = *edx = 0;
trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, entry_found);
return entry_found;
/*
* When leaf 0BH or 1FH is defined, CL is pass-through
* and EDX is always the x2APIC ID, even for undefined
* subleaves. Index 1 will exist iff the leaf is
* implemented, so we pass through CL iff leaf 1
* exists. EDX can be copied from any existing index.
*/
if (function == 0xb || function == 0x1f) {
entry = kvm_find_cpuid_entry(vcpu, function, 1);
if (entry) {
*ecx = index & 0xff;
*edx = entry->edx;
}
}
}
trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, found);
return found;
}
EXPORT_SYMBOL_GPL(kvm_cpuid);
......
......@@ -66,9 +66,10 @@
#define X2APIC_BROADCAST 0xFFFFFFFFul
static bool lapic_timer_advance_dynamic __read_mostly;
#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100
#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 5000
#define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000
#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */
#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */
#define LAPIC_TIMER_ADVANCE_NS_INIT 1000
#define LAPIC_TIMER_ADVANCE_NS_MAX 5000
/* step-by-step approximation to mitigate fluctuation */
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
......@@ -1504,8 +1505,8 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
}
if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_ADJUST_MAX))
timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
}
......@@ -2302,7 +2303,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
HRTIMER_MODE_ABS_HARD);
apic->lapic_timer.timer.function = apic_timer_fn;
if (timer_advance_ns == -1) {
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
lapic_timer_advance_dynamic = true;
} else {
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
......
......@@ -83,7 +83,17 @@ module_param(dbg, bool, 0644);
#define PTE_PREFETCH_NUM 8
#define PT_FIRST_AVAIL_BITS_SHIFT 10
#define PT64_SECOND_AVAIL_BITS_SHIFT 52
#define PT64_SECOND_AVAIL_BITS_SHIFT 54
/*
* The mask used to denote special SPTEs, which can be either MMIO SPTEs or
* Access Tracking SPTEs.
*/
#define SPTE_SPECIAL_MASK (3ULL << 52)
#define SPTE_AD_ENABLED_MASK (0ULL << 52)
#define SPTE_AD_DISABLED_MASK (1ULL << 52)
#define SPTE_AD_WRPROT_ONLY_MASK (2ULL << 52)
#define SPTE_MMIO_MASK (3ULL << 52)
#define PT64_LEVEL_BITS 9
......@@ -219,12 +229,11 @@ static u64 __read_mostly shadow_present_mask;
static u64 __read_mostly shadow_me_mask;
/*
* SPTEs used by MMUs without A/D bits are marked with shadow_acc_track_value.
* Non-present SPTEs with shadow_acc_track_value set are in place for access
* tracking.
* SPTEs used by MMUs without A/D bits are marked with SPTE_AD_DISABLED_MASK;
* shadow_acc_track_mask is the set of bits to be cleared in non-accessed
* pages.
*/
static u64 __read_mostly shadow_acc_track_mask;
static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK;
/*
* The mask/shift to use for saving the original R/X bits when marking the PTE
......@@ -304,7 +313,7 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask)
{
BUG_ON((u64)(unsigned)access_mask != access_mask);
BUG_ON((mmio_mask & mmio_value) != mmio_value);
shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK;
shadow_mmio_value = mmio_value | SPTE_MMIO_MASK;
shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK;
shadow_mmio_access_mask = access_mask;
}
......@@ -320,10 +329,27 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
return sp->role.ad_disabled;
}
static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
{
/*
* When using the EPT page-modification log, the GPAs in the log
* would come from L2 rather than L1. Therefore, we need to rely
* on write protection to record dirty pages. This also bypasses
* PML, since writes now result in a vmexit.
*/
return vcpu->arch.mmu == &vcpu->arch.guest_mmu;
}
static inline bool spte_ad_enabled(u64 spte)
{
MMU_WARN_ON(is_mmio_spte(spte));
return !(spte & shadow_acc_track_value);
return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_DISABLED_MASK;
}
static inline bool spte_ad_need_write_protect(u64 spte)
{
MMU_WARN_ON(is_mmio_spte(spte));
return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK;
}
static inline u64 spte_shadow_accessed_mask(u64 spte)
......@@ -461,7 +487,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
{
BUG_ON(!dirty_mask != !accessed_mask);
BUG_ON(!accessed_mask && !acc_track_mask);
BUG_ON(acc_track_mask & shadow_acc_track_value);
BUG_ON(acc_track_mask & SPTE_SPECIAL_MASK);
shadow_user_mask = user_mask;
shadow_accessed_mask = accessed_mask;
......@@ -1589,16 +1615,16 @@ static bool spte_clear_dirty(u64 *sptep)
rmap_printk("rmap_clear_dirty: spte %p %llx\n", sptep, *sptep);
MMU_WARN_ON(!spte_ad_enabled(spte));
spte &= ~shadow_dirty_mask;
return mmu_spte_update(sptep, spte);
}
static bool wrprot_ad_disabled_spte(u64 *sptep)
static bool spte_wrprot_for_clear_dirty(u64 *sptep)
{
bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT,
(unsigned long *)sptep);
if (was_writable)
if (was_writable && !spte_ad_enabled(*sptep))
kvm_set_pfn_dirty(spte_to_pfn(*sptep));
return was_writable;
......@@ -1617,10 +1643,10 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
bool flush = false;
for_each_rmap_spte(rmap_head, &iter, sptep)
if (spte_ad_enabled(*sptep))
flush |= spte_clear_dirty(sptep);
if (spte_ad_need_write_protect(*sptep))
flush |= spte_wrprot_for_clear_dirty(sptep);
else
flush |= wrprot_ad_disabled_spte(sptep);
flush |= spte_clear_dirty(sptep);
return flush;
}
......@@ -1631,6 +1657,11 @@ static bool spte_set_dirty(u64 *sptep)
rmap_printk("rmap_set_dirty: spte %p %llx\n", sptep, *sptep);
/*
* Similar to the !kvm_x86_ops->slot_disable_log_dirty case,
* do not bother adding back write access to pages marked
* SPTE_AD_WRPROT_ONLY_MASK.
*/
spte |= shadow_dirty_mask;
return mmu_spte_update(sptep, spte);
......@@ -2622,7 +2653,7 @@ static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
shadow_user_mask | shadow_x_mask | shadow_me_mask;
if (sp_ad_disabled(sp))
spte |= shadow_acc_track_value;
spte |= SPTE_AD_DISABLED_MASK;
else
spte |= shadow_accessed_mask;
......@@ -2968,7 +2999,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
sp = page_header(__pa(sptep));
if (sp_ad_disabled(sp))
spte |= shadow_acc_track_value;
spte |= SPTE_AD_DISABLED_MASK;
else if (kvm_vcpu_ad_need_write_protect(vcpu))
spte |= SPTE_AD_WRPROT_ONLY_MASK;
/*
* For the EPT case, shadow_present_mask is 0 if hardware
......
......@@ -2610,7 +2610,7 @@ static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
/* VM-entry exception error code */
if (CC(has_error_code &&
vmcs12->vm_entry_exception_error_code & GENMASK(31, 15)))
vmcs12->vm_entry_exception_error_code & GENMASK(31, 16)))
return -EINVAL;
/* VM-entry interruption-info field: reserved bits */
......
......@@ -262,6 +262,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct x86_pmu_capability x86_pmu;
struct kvm_cpuid_entry2 *entry;
union cpuid10_eax eax;
union cpuid10_edx edx;
......@@ -283,8 +284,10 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
if (!pmu->version)
return;
perf_get_x86_pmu_capability(&x86_pmu);
pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
INTEL_PMC_MAX_GENERIC);
x86_pmu.num_counters_gp);
pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
pmu->available_event_types = ~entry->ebx &
((1ull << eax.split.mask_length) - 1);
......@@ -294,7 +297,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
} else {
pmu->nr_arch_fixed_counters =
min_t(int, edx.split.num_counters_fixed,
INTEL_PMC_MAX_FIXED);
x86_pmu.num_counters_fixed);
pmu->counter_bitmask[KVM_PMC_FIXED] =
((u64)1 << edx.split.bit_width_fixed) - 1;
}
......
......@@ -209,6 +209,11 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
struct page *page;
unsigned int i;
if (!boot_cpu_has_bug(X86_BUG_L1TF)) {
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
return 0;
}
if (!enable_ept) {
l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_EPT_DISABLED;
return 0;
......@@ -7995,12 +8000,10 @@ static int __init vmx_init(void)
* contain 'auto' which will be turned into the default 'cond'
* mitigation mode.
*/
if (boot_cpu_has(X86_BUG_L1TF)) {
r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
if (r) {
vmx_exit();
return r;
}
r = vmx_setup_l1d_flush(vmentry_l1d_flush_param);
if (r) {
vmx_exit();
return r;
}
#ifdef CONFIG_KEXEC_CORE
......
......@@ -92,8 +92,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA));
static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE);
#endif
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
#define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \
KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK)
......@@ -212,7 +212,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
{ "mmu_unsync", VM_STAT(mmu_unsync) },
{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
{ "largepages", VM_STAT(lpages) },
{ "largepages", VM_STAT(lpages, .mode = 0444) },
{ "max_mmu_page_hash_collisions",
VM_STAT(max_mmu_page_hash_collisions) },
{ NULL }
......@@ -885,34 +885,42 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
}
EXPORT_SYMBOL_GPL(kvm_set_xcr);
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
unsigned long old_cr4 = kvm_read_cr4(vcpu);
unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_PKE;
if (cr4 & CR4_RESERVED_BITS)
return 1;
return -EINVAL;
if (!guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && (cr4 & X86_CR4_OSXSAVE))
return 1;
return -EINVAL;
if (!guest_cpuid_has(vcpu, X86_FEATURE_SMEP) && (cr4 & X86_CR4_SMEP))
return 1;
return -EINVAL;
if (!guest_cpuid_has(vcpu, X86_FEATURE_SMAP) && (cr4 & X86_CR4_SMAP))
return 1;
return -EINVAL;
if (!guest_cpuid_has(vcpu, X86_FEATURE_FSGSBASE) && (cr4 & X86_CR4_FSGSBASE))
return 1;
return -EINVAL;
if (!guest_cpuid_has(vcpu, X86_FEATURE_PKU) && (cr4 & X86_CR4_PKE))