Commit 643ad15d authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'mm-pkeys-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 protection key support from Ingo Molnar:
 "This tree adds support for a new memory protection hardware feature
  that is available in upcoming Intel CPUs: 'protection keys' (pkeys).

  There's a background article at LWN.net:

      https://lwn.net/Articles/643797/

  The gist is that protection keys allow the encoding of
  user-controllable permission masks in the pte.  So instead of having a
  fixed protection mask in the pte (which needs a system call to change
  and works on a per page basis), the user can map a (handful of)
  protection mask variants and can change the masks runtime relatively
  cheaply, without having to change every single page in the affected
  virtual memory range.

  This allows the dynamic switching of the protection bits of large
  amounts of virtual memory, via user-space instructions.  It also
  allows more precise control of MMU permission bits: for example the
  executable bit is separate from the read bit (see more about that
  below).

  This tree adds the MM infrastructure and low level x86 glue needed for
  that, plus it adds a high level API to make use of protection keys -
  if a user-space application calls:

        mmap(..., PROT_EXEC);

  or

        mprotect(ptr, sz, PROT_EXEC);

  (note PROT_EXEC-only, without PROT_READ/WRITE), the kernel will notice
  this special case, and will set a special protection key on this
  memory range.  It also sets the appropriate bits in the Protection
  Keys User Rights (PKRU) register so that the memory becomes unreadable
  and unwritable.

  So using protection keys the kernel is able to implement 'true'
  PROT_EXEC on x86 CPUs: without protection keys PROT_EXEC implies
  PROT_READ as well.  Unreadable executable mappings have security
  advantages: they cannot be read via information leaks to figure out
  ASLR details, nor can they be scanned for ROP gadgets - and they
  cannot be used by exploits for data purposes either.

  We know about no user-space code that relies on pure PROT_EXEC
  mappings today, but binary loaders could start making use of this new
  feature to map binaries and libraries in a more secure fashion.

  There is other pending pkeys work that offers more high level system
  call APIs to manage protection keys - but those are not part of this
  pull request.

  Right now there's a Kconfig that controls this feature
  (CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) that is default enabled
  (like most x86 CPU feature enablement code that has no runtime
  overhead), but it's not user-configurable at the moment.  If there's
  any serious problem with this then we can make it configurable and/or
  flip the default"

* 'mm-pkeys-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (38 commits)
  x86/mm/pkeys: Fix mismerge of protection keys CPUID bits
  mm/pkeys: Fix siginfo ABI breakage caused by new u64 field
  x86/mm/pkeys: Fix access_error() denial of writes to write-only VMA
  mm/core, x86/mm/pkeys: Add execute-only protection keys support
  x86/mm/pkeys: Create an x86 arch_calc_vm_prot_bits() for VMA flags
  x86/mm/pkeys: Allow kernel to modify user pkey rights register
  x86/fpu: Allow setting of XSAVE state
  x86/mm: Factor out LDT init from context init
  mm/core, x86/mm/pkeys: Add arch_validate_pkey()
  mm/core, arch, powerpc: Pass a protection key in to calc_vm_flag_bits()
  x86/mm/pkeys: Actually enable Memory Protection Keys in the CPU
  x86/mm/pkeys: Add Kconfig prompt to existing config option
  x86/mm/pkeys: Dump pkey from VMA in /proc/pid/smaps
  x86/mm/pkeys: Dump PKRU with other kernel registers
  mm/core, x86/mm/pkeys: Differentiate instruction fetches
  x86/mm/pkeys: Optimize fault handling in access_error()
  mm/core: Do not enforce PKEY permissions on remote mm access
  um, pkeys: Add UML arch_*_access_permitted() methods
  mm/gup, x86/mm/pkeys: Check VMAs and PTEs for protection keys
  x86/mm/gup: Simplify get_user_pages() PTE bit handling
  ...
parents 24b5e20f 0d47638f
......@@ -987,6 +987,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
See Documentation/x86/intel_mpx.txt for more
information about the feature.
nopku [X86] Disable Memory Protection Keys CPU feature found
in some Intel CPUs.
eagerfpu= [X86]
on enable eager fpu restore
off disable eager fpu restore
......
......@@ -2719,9 +2719,7 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig
/* Acquire the mm page semaphore. */
down_read(&current->mm->mmap_sem);
err = get_user_pages(current,
current->mm,
(unsigned long int)(oper.indata + prev_ix),
err = get_user_pages((unsigned long int)(oper.indata + prev_ix),
noinpages,
0, /* read access only for in data */
0, /* no force */
......@@ -2736,9 +2734,7 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig
}
noinpages = err;
if (oper.do_cipher){
err = get_user_pages(current,
current->mm,
(unsigned long int)oper.cipher_outdata,
err = get_user_pages((unsigned long int)oper.cipher_outdata,
nooutpages,
1, /* write access for out data */
0, /* no force */
......
......@@ -63,10 +63,15 @@ typedef struct siginfo {
unsigned int _flags; /* see below */
unsigned long _isr; /* isr */
short _addr_lsb; /* lsb of faulting address */
struct {
void __user *_lower;
void __user *_upper;
} _addr_bnd;
union {
/* used when si_code=SEGV_BNDERR */
struct {
void __user *_lower;
void __user *_upper;
} _addr_bnd;
/* used when si_code=SEGV_PKUERR */
__u32 _pkey;
};
} _sigfault;
/* SIGPOLL */
......
......@@ -142,8 +142,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr,
u64 virt_addr=simple_strtoull(buf, NULL, 16);
int ret;
ret = get_user_pages(current, current->mm, virt_addr,
1, VM_READ, 0, NULL, NULL);
ret = get_user_pages(virt_addr, 1, VM_READ, 0, NULL, NULL);
if (ret<=0) {
#ifdef ERR_INJ_DEBUG
printk("Virtual address %lx is not existing.\n",virt_addr);
......
......@@ -86,10 +86,15 @@ typedef struct siginfo {
int _trapno; /* TRAP # which caused the signal */
#endif
short _addr_lsb;
struct {
void __user *_lower;
void __user *_upper;
} _addr_bnd;
union {
/* used when si_code=SEGV_BNDERR */
struct {
void __user *_lower;
void __user *_upper;
} _addr_bnd;
/* used when si_code=SEGV_PKUERR */
__u32 _pkey;
};
} _sigfault;
/* SIGPOLL, SIGXFSZ (To do ...) */
......
......@@ -286,8 +286,7 @@ slow_irqon:
start += nr << PAGE_SHIFT;
pages += nr;
ret = get_user_pages_unlocked(current, mm, start,
(end - start) >> PAGE_SHIFT,
ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT,
write, 0, pages);
/* Have to be a bit careful with return values */
......
......@@ -18,11 +18,12 @@
* This file is included by linux/mman.h, so we can't use cacl_vm_prot_bits()
* here. How important is the optimization?
*/
static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot)
static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
unsigned long pkey)
{
return (prot & PROT_SAO) ? VM_SAO : 0;
}
#define arch_calc_vm_prot_bits(prot) arch_calc_vm_prot_bits(prot)
#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
{
......
......@@ -148,5 +148,17 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
{
}
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
bool write, bool execute, bool foreign)
{
/* by default, allow everything */
return true;
}
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
{
/* by default, allow everything */
return true;
}
#endif /* __KERNEL__ */
#endif /* __ASM_POWERPC_MMU_CONTEXT_H */
......@@ -136,4 +136,16 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
{
}
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
bool write, bool execute, bool foreign)
{
/* by default, allow everything */
return true;
}
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
{
/* by default, allow everything */
return true;
}
#endif /* __S390_MMU_CONTEXT_H */
......@@ -210,7 +210,6 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
struct mm_struct *mm = current->mm;
int nr, ret;
might_sleep();
......@@ -222,8 +221,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
/* Try to get the remaining pages with get_user_pages */
start += nr << PAGE_SHIFT;
pages += nr;
ret = get_user_pages_unlocked(current, mm, start,
nr_pages - nr, write, 0, pages);
ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages);
/* Have to be a bit careful with return values */
if (nr > 0)
ret = (ret < 0) ? nr : ret + nr;
......
......@@ -257,7 +257,7 @@ slow_irqon:
start += nr << PAGE_SHIFT;
pages += nr;
ret = get_user_pages_unlocked(current, mm, start,
ret = get_user_pages_unlocked(start,
(end - start) >> PAGE_SHIFT, write, 0, pages);
/* Have to be a bit careful with return values */
......
......@@ -237,7 +237,7 @@ slow:
start += nr << PAGE_SHIFT;
pages += nr;
ret = get_user_pages_unlocked(current, mm, start,
ret = get_user_pages_unlocked(start,
(end - start) >> PAGE_SHIFT, write, 0, pages);
/* Have to be a bit careful with return values */
......
......@@ -27,6 +27,20 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
struct vm_area_struct *vma)
{
}
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
bool write, bool execute, bool foreign)
{
/* by default, allow everything */
return true;
}
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
{
/* by default, allow everything */
return true;
}
/*
* end asm-generic/mm_hooks.h functions
*/
......
......@@ -97,4 +97,16 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm,
{
}
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
bool write, bool foreign)
{
/* by default, allow everything */
return true;
}
static inline bool arch_pte_access_permitted(pte_t pte, bool write)
{
/* by default, allow everything */
return true;
}
#endif
......@@ -156,6 +156,8 @@ config X86
select X86_DEV_DMA_OPS if X86_64
select X86_FEATURE_NAMES if PROC_FS
select HAVE_STACK_VALIDATION if X86_64
select ARCH_USES_HIGH_VMA_FLAGS if X86_INTEL_MEMORY_PROTECTION_KEYS
select ARCH_HAS_PKEYS if X86_INTEL_MEMORY_PROTECTION_KEYS
config INSTRUCTION_DECODER
def_bool y
......@@ -1719,6 +1721,20 @@ config X86_INTEL_MPX
If unsure, say N.
config X86_INTEL_MEMORY_PROTECTION_KEYS
prompt "Intel Memory Protection Keys"
def_bool y
# Note: only available in 64-bit mode
depends on CPU_SUP_INTEL && X86_64
---help---
Memory Protection Keys provides a mechanism for enforcing
page-based protections, but without requiring modification of the
page tables when an application changes protection domains.
For details, see Documentation/x86/protection-keys.txt
If unsure, say y.
config EFI
bool "EFI runtime service support"
depends on ACPI
......
......@@ -26,6 +26,7 @@ enum cpuid_leafs
CPUID_8000_0008_EBX,
CPUID_6_EAX,
CPUID_8000_000A_EDX,
CPUID_7_ECX,
};
#ifdef CONFIG_X86_FEATURE_NAMES
......@@ -48,28 +49,42 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
test_bit(bit, (unsigned long *)((c)->x86_capability))
#define REQUIRED_MASK_BIT_SET(bit) \
( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \
(((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \
(((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \
(((bit)>>5)==3 && (1UL<<((bit)&31) & REQUIRED_MASK3)) || \
(((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4)) || \
(((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5)) || \
(((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \
(((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) || \
(((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \
(((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )
( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0 )) || \
(((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1 )) || \
(((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2 )) || \
(((bit)>>5)==3 && (1UL<<((bit)&31) & REQUIRED_MASK3 )) || \
(((bit)>>5)==4 && (1UL<<((bit)&31) & REQUIRED_MASK4 )) || \
(((bit)>>5)==5 && (1UL<<((bit)&31) & REQUIRED_MASK5 )) || \
(((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6 )) || \
(((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7 )) || \
(((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8 )) || \
(((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9 )) || \
(((bit)>>5)==10 && (1UL<<((bit)&31) & REQUIRED_MASK10)) || \
(((bit)>>5)==11 && (1UL<<((bit)&31) & REQUIRED_MASK11)) || \
(((bit)>>5)==12 && (1UL<<((bit)&31) & REQUIRED_MASK12)) || \
(((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK13)) || \
(((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK14)) || \
(((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK15)) || \
(((bit)>>5)==14 && (1UL<<((bit)&31) & REQUIRED_MASK16)) )
#define DISABLED_MASK_BIT_SET(bit) \
( (((bit)>>5)==0 && (1UL<<((bit)&31) & DISABLED_MASK0)) || \
(((bit)>>5)==1 && (1UL<<((bit)&31) & DISABLED_MASK1)) || \
(((bit)>>5)==2 && (1UL<<((bit)&31) & DISABLED_MASK2)) || \
(((bit)>>5)==3 && (1UL<<((bit)&31) & DISABLED_MASK3)) || \
(((bit)>>5)==4 && (1UL<<((bit)&31) & DISABLED_MASK4)) || \
(((bit)>>5)==5 && (1UL<<((bit)&31) & DISABLED_MASK5)) || \
(((bit)>>5)==6 && (1UL<<((bit)&31) & DISABLED_MASK6)) || \
(((bit)>>5)==7 && (1UL<<((bit)&31) & DISABLED_MASK7)) || \
(((bit)>>5)==8 && (1UL<<((bit)&31) & DISABLED_MASK8)) || \
(((bit)>>5)==9 && (1UL<<((bit)&31) & DISABLED_MASK9)) )
( (((bit)>>5)==0 && (1UL<<((bit)&31) & DISABLED_MASK0 )) || \
(((bit)>>5)==1 && (1UL<<((bit)&31) & DISABLED_MASK1 )) || \
(((bit)>>5)==2 && (1UL<<((bit)&31) & DISABLED_MASK2 )) || \
(((bit)>>5)==3 && (1UL<<((bit)&31) & DISABLED_MASK3 )) || \
(((bit)>>5)==4 && (1UL<<((bit)&31) & DISABLED_MASK4 )) || \
(((bit)>>5)==5 && (1UL<<((bit)&31) & DISABLED_MASK5 )) || \
(((bit)>>5)==6 && (1UL<<((bit)&31) & DISABLED_MASK6 )) || \
(((bit)>>5)==7 && (1UL<<((bit)&31) & DISABLED_MASK7 )) || \
(((bit)>>5)==8 && (1UL<<((bit)&31) & DISABLED_MASK8 )) || \
(((bit)>>5)==9 && (1UL<<((bit)&31) & DISABLED_MASK9 )) || \
(((bit)>>5)==10 && (1UL<<((bit)&31) & DISABLED_MASK10)) || \
(((bit)>>5)==11 && (1UL<<((bit)&31) & DISABLED_MASK11)) || \
(((bit)>>5)==12 && (1UL<<((bit)&31) & DISABLED_MASK12)) || \
(((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK13)) || \
(((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK14)) || \
(((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK15)) || \
(((bit)>>5)==14 && (1UL<<((bit)&31) & DISABLED_MASK16)) )
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
......
......@@ -12,7 +12,7 @@
/*
* Defines x86 CPU feature bits
*/
#define NCAPINTS 16 /* N 32-bit words worth of info */
#define NCAPINTS 17 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
......@@ -274,6 +274,10 @@
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
/*
* BUG word(s)
*/
......
......@@ -28,6 +28,14 @@
# define DISABLE_CENTAUR_MCR 0
#endif /* CONFIG_X86_64 */
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
# define DISABLE_PKU (1<<(X86_FEATURE_PKU))
# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE))
#else
# define DISABLE_PKU 0
# define DISABLE_OSPKE 0
#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
/*
* Make sure to add features to the correct mask
*/
......@@ -41,5 +49,12 @@
#define DISABLED_MASK7 0
#define DISABLED_MASK8 0
#define DISABLED_MASK9 (DISABLE_MPX)
#define DISABLED_MASK10 0
#define DISABLED_MASK11 0
#define DISABLED_MASK12 0
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
#define DISABLED_MASK15 0
#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
......@@ -25,6 +25,8 @@
extern void fpu__activate_curr(struct fpu *fpu);
extern void fpu__activate_fpstate_read(struct fpu *fpu);
extern void fpu__activate_fpstate_write(struct fpu *fpu);
extern void fpu__current_fpstate_write_begin(void);
extern void fpu__current_fpstate_write_end(void);
extern void fpu__save(struct fpu *fpu);
extern void fpu__restore(struct fpu *fpu);
extern int fpu__restore_sig(void __user *buf, int ia32_frame);
......
......@@ -108,6 +108,8 @@ enum xfeature {
XFEATURE_OPMASK,
XFEATURE_ZMM_Hi256,
XFEATURE_Hi16_ZMM,
XFEATURE_PT_UNIMPLEMENTED_SO_FAR,
XFEATURE_PKRU,
XFEATURE_MAX,
};
......@@ -120,6 +122,7 @@ enum xfeature {
#define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK)
#define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256)
#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)
#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)
#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \
......@@ -212,6 +215,15 @@ struct avx_512_hi16_state {
struct reg_512_bit hi16_zmm[16];
} __packed;
/*
* State component 9: 32-bit PKRU register. The state is
* 8 bytes long but only 4 bytes is used currently.
*/
struct pkru_state {
u32 pkru;
u32 pad;
} __packed;
struct xstate_header {
u64 xfeatures;
u64 xcomp_bv;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment