diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index 594ad2f57d23200bcaa4c1eb9586ef504f6f9280..4ec3613551e3b5b918840ade8bb9bca986461075 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -21,7 +21,7 @@ int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs); int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs); unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx); int insn_get_code_seg_params(struct pt_regs *regs); -unsigned long insn_get_effective_ip(struct pt_regs *regs); +int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip); int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]); int insn_fetch_from_user_inatomic(struct pt_regs *regs, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ce1792de39ca7556613895a553f06b806b7e1048..3a910f21a3b880c583b53e10f156683c1e8a8777 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1301,6 +1301,7 @@ struct kvm_x86_ops { void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); + bool (*get_if_flag)(struct kvm_vcpu *vcpu); void (*tlb_flush_all)(struct kvm_vcpu *vcpu); void (*tlb_flush_current)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index 64bf07eb9675f996896f397e8a36a46877c8f3ab..849d65a52281adfcdfbe9ef2cd46d5a4a65b902d 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -105,29 +105,41 @@ static __always_inline bool on_vc_stack(struct pt_regs *regs) } /* - * This function handles the case when an NMI is raised in the #VC exception - * handler entry code. In this case, the IST entry for #VC must be adjusted, so - * that any subsequent #VC exception will not overwrite the stack contents of the - * interrupted #VC handler. + * This function handles the case when an NMI is raised in the #VC + * exception handler entry code, before the #VC handler has switched off + * its IST stack. In this case, the IST entry for #VC must be adjusted, + * so that any nested #VC exception will not overwrite the stack + * contents of the interrupted #VC handler. * * The IST entry is adjusted unconditionally so that it can be also be - * unconditionally adjusted back in sev_es_ist_exit(). Otherwise a nested - * sev_es_ist_exit() call may adjust back the IST entry too early. + * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a + * nested sev_es_ist_exit() call may adjust back the IST entry too + * early. + * + * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run + * on the NMI IST stack, as they are only called from NMI handling code + * right now. */ void noinstr __sev_es_ist_enter(struct pt_regs *regs) { unsigned long old_ist, new_ist; /* Read old IST entry */ - old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); + new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); - /* Make room on the IST stack */ + /* + * If NMI happened while on the #VC IST stack, set the new IST + * value below regs->sp, so that the interrupted stack frame is + * not overwritten by subsequent #VC exceptions. + */ if (on_vc_stack(regs)) - new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist); - else - new_ist = old_ist - sizeof(old_ist); + new_ist = regs->sp; - /* Store old IST entry */ + /* + * Reserve additional 8 bytes and store old IST value so this + * adjustment can be unrolled in __sev_es_ist_exit(). + */ + new_ist -= sizeof(old_ist); *(unsigned long *)new_ist = old_ist; /* Set new IST entry */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 88ed44e01eaaae69624a9c250f9ebab68ecf9adc..20c632510dab107ea8be3668f1e4ebfe63d10381 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -541,8 +541,7 @@ static bool fixup_iopl_exception(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_X86_IOPL_IOPERM) || t->iopl_emul != 3) return false; - ip = insn_get_effective_ip(regs); - if (!ip) + if (insn_get_effective_ip(regs, &ip)) return false; if (get_user(byte, (const char __user *)ip)) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index babb900e49d8cb2f0567963ad7233fafc9d1b348..5ae6ad049b49308a00c9f4534d9b7c9e45dcc5ef 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -99,7 +99,7 @@ static bool __sev_recycle_asids(int min_asid, int max_asid) return true; } -static int sev_asid_new(struct kvm_sev_info *sev) +static int sev_asid_new(bool es_active) { int pos, min_asid, max_asid; bool retry = true; @@ -110,8 +110,8 @@ static int sev_asid_new(struct kvm_sev_info *sev) * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. */ - min_asid = sev->es_active ? 0 : min_sev_asid - 1; - max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; + min_asid = es_active ? 0 : min_sev_asid - 1; + max_asid = es_active ? min_sev_asid - 1 : max_sev_asid; again: pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid); if (pos >= max_asid) { @@ -199,6 +199,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + bool es_active = argp->id == KVM_SEV_ES_INIT; int asid, ret; if (kvm->created_vcpus) @@ -208,7 +209,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) if (unlikely(sev->active)) return ret; - asid = sev_asid_new(sev); + asid = sev_asid_new(es_active); if (asid < 0) return ret; @@ -217,6 +218,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) goto e_free; sev->active = true; + sev->es_active = es_active; sev->asid = asid; INIT_LIST_HEAD(&sev->regions_list); @@ -227,16 +229,6 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) return ret; } -static int sev_es_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) -{ - if (!sev_es_enabled) - return -ENOTTY; - - to_kvm_svm(kvm)->sev_info.es_active = true; - - return sev_guest_init(kvm, argp); -} - static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) { struct sev_data_activate *data; @@ -442,6 +434,7 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages) page_virtual = kmap_atomic(pages[i]); clflush_cache_range(page_virtual, PAGE_SIZE); kunmap_atomic(page_virtual); + cond_resched(); } } @@ -585,42 +578,61 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm) return 0; } -static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) +static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, + int *error) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_data_launch_update_vmsa *vmsa; - int i, ret; - - if (!sev_es_guest(kvm)) - return -ENOTTY; + struct vcpu_svm *svm = to_svm(vcpu); + int ret; vmsa = kzalloc(sizeof(*vmsa), GFP_KERNEL); if (!vmsa) return -ENOMEM; - for (i = 0; i < kvm->created_vcpus; i++) { - struct vcpu_svm *svm = to_svm(kvm->vcpus[i]); + /* Perform some pre-encryption checks against the VMSA */ + ret = sev_es_sync_vmsa(svm); + if (ret) + goto e_free; - /* Perform some pre-encryption checks against the VMSA */ - ret = sev_es_sync_vmsa(svm); - if (ret) - goto e_free; + /* + * The LAUNCH_UPDATE_VMSA command will perform in-place + * encryption of the VMSA memory content (i.e it will write + * the same memory region with the guest's key), so invalidate + * it first. + */ + clflush_cache_range(svm->vmsa, PAGE_SIZE); - /* - * The LAUNCH_UPDATE_VMSA command will perform in-place - * encryption of the VMSA memory content (i.e it will write - * the same memory region with the guest's key), so invalidate - * it first. - */ - clflush_cache_range(svm->vmsa, PAGE_SIZE); + vmsa->handle = sev->handle; + vmsa->address = __sme_pa(svm->vmsa); + vmsa->len = PAGE_SIZE; + ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, vmsa, error); + +e_free: + kfree(vmsa); + return ret; +} + +static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + + struct kvm_vcpu *vcpu; + int i, ret; + + if (!sev_es_guest(kvm)) + return -ENOTTY; - vmsa->handle = sev->handle; - vmsa->address = __sme_pa(svm->vmsa); - vmsa->len = PAGE_SIZE; - ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, vmsa, - &argp->error); + kvm_for_each_vcpu(i, vcpu, kvm) { + struct vcpu_svm *svm = to_svm(vcpu); + + ret = mutex_lock_killable(&vcpu->mutex); if (ret) - goto e_free; + return ret; + + ret = __sev_launch_update_vmsa(kvm, vcpu, &argp->error); + mutex_unlock(&vcpu->mutex); + if (ret) + return ret; /* * SEV-ES guests maintain an encrypted version of their FPU @@ -629,11 +641,9 @@ static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) * do xsave/xrstor on it. */ fpstate_set_confidential(&svm->vcpu.arch.guest_fpu); - svm->vcpu.arch.guest_state_protected = true; + vcpu->arch.guest_state_protected = true; } -e_free: - kfree(vmsa); return ret; } @@ -1091,12 +1101,15 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp) mutex_lock(&kvm->lock); switch (sev_cmd.id) { + case KVM_SEV_ES_INIT: + if (!sev_es_enabled) { + r = -ENOTTY; + goto out; + } + fallthrough; case KVM_SEV_INIT: r = sev_guest_init(kvm, &sev_cmd); break; - case KVM_SEV_ES_INIT: - r = sev_es_guest_init(kvm, &sev_cmd); - break; case KVM_SEV_LAUNCH_START: r = sev_launch_start(kvm, &sev_cmd); break; @@ -1335,8 +1348,11 @@ void __init sev_hardware_setup(void) goto out; sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); - if (!sev_reclaim_asid_bitmap) + if (!sev_reclaim_asid_bitmap) { + bitmap_free(sev_asid_bitmap); + sev_asid_bitmap = NULL; goto out; + } pr_info("%s supported: %u ASIDs\n", is_x86_vendor_hygon() ? "CSV" : "SEV", max_sev_asid - min_sev_asid + 1); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d9e2e7bc1a6629b17ee76a32fe888f70b3628a99..6a152ae6b46f8422601f43b7bba19e64f052f9e3 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1526,6 +1526,15 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) to_svm(vcpu)->vmcb->save.rflags = rflags; } +static bool svm_get_if_flag(struct kvm_vcpu *vcpu) +{ + struct vmcb *vmcb = to_svm(vcpu)->vmcb; + + return sev_es_guest(vcpu->kvm) + ? vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK + : kvm_get_rflags(vcpu) & X86_EFLAGS_IF; +} + static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) { switch (reg) { @@ -1746,15 +1755,17 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) struct vcpu_svm *svm = to_svm(vcpu); #ifdef CONFIG_X86_64 - if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) { + if (vcpu->arch.efer & EFER_LME) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { vcpu->arch.efer |= EFER_LMA; - svm->vmcb->save.efer |= EFER_LMA | EFER_LME; + if (!vcpu->arch.guest_state_protected) + svm->vmcb->save.efer |= EFER_LMA | EFER_LME; } if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { vcpu->arch.efer &= ~EFER_LMA; - svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); + if (!vcpu->arch.guest_state_protected) + svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); } } #endif @@ -3482,14 +3493,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) if (!gif_set(svm)) return true; - if (sev_es_guest(svm->vcpu.kvm)) { - /* - * SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask - * bit to determine the state of the IF flag. - */ - if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK)) - return true; - } else if (is_guest_mode(vcpu)) { + if (is_guest_mode(vcpu)) { /* As long as interrupts are being delivered... */ if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF) @@ -3500,7 +3504,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) if (nested_exit_on_intr(svm)) return false; } else { - if (!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF)) + if (!svm_get_if_flag(vcpu)) return true; } @@ -4518,6 +4522,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .cache_reg = svm_cache_reg, .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, + .get_if_flag = svm_get_if_flag, .tlb_flush_all = svm_flush_tlb, .tlb_flush_current = svm_flush_tlb, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 46352c968ac862fc1d5148c837b7789cf070297d..9fc665b7003fdc66f9e5b686656de073ca400e44 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -237,7 +237,7 @@ static inline bool sev_es_guest(struct kvm *kvm) #ifdef CONFIG_KVM_AMD_SEV struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; - return sev_guest(kvm) && sev->es_active; + return sev->es_active && !WARN_ON_ONCE(!sev->active); #else return false; #endif diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b11a96349cd3e397ecc6961c6c12400e9d98cd1e..843f1d30f95664d480e634cb9238785390e11b31 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1565,6 +1565,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) vmx->emulation_required = emulation_required(vcpu); } +static bool vmx_get_if_flag(struct kvm_vcpu *vcpu) +{ + return vmx_get_rflags(vcpu) & X86_EFLAGS_IF; +} + u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) { u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); @@ -8193,6 +8198,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .cache_reg = vmx_cache_reg, .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, + .get_if_flag = vmx_get_if_flag, .tlb_flush_all = vmx_flush_tlb_all, .tlb_flush_current = vmx_flush_tlb_current, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index af6c5d98bdc51ed927ded093ec895b0cd733bd1e..fc8d00fd3a26c7e8cfa8556e25e18989790d37a6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8867,13 +8867,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) { struct kvm_run *kvm_run = vcpu->run; - /* - * if_flag is obsolete and useless, so do not bother - * setting it for SEV-ES guests. Userspace can just - * use kvm_run->ready_for_interrupt_injection. - */ - kvm_run->if_flag = !vcpu->arch.guest_state_protected - && (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0; + kvm_run->if_flag = kvm_x86_ops.get_if_flag(vcpu); kvm_run->cr8 = kvm_get_cr8(vcpu); kvm_run->apic_base = kvm_get_apic_base(vcpu); diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 6ed542e310adcf63ce7ff6e2ecabaa3b1c14a8c3..ac0c3b10142f7c1fc34ee4adb966f208fe7231a0 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -1417,7 +1417,7 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs) } } -unsigned long insn_get_effective_ip(struct pt_regs *regs) +int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip) { unsigned long seg_base = 0; @@ -1430,10 +1430,12 @@ unsigned long insn_get_effective_ip(struct pt_regs *regs) if (!user_64bit_mode(regs)) { seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS); if (seg_base == -1L) - return 0; + return -EINVAL; } - return seg_base + regs->ip; + *ip = seg_base + regs->ip; + + return 0; } /** @@ -1455,8 +1457,7 @@ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) unsigned long ip; int not_copied; - ip = insn_get_effective_ip(regs); - if (!ip) + if (insn_get_effective_ip(regs, &ip)) return 0; not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE); @@ -1484,8 +1485,7 @@ int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_IN unsigned long ip; int not_copied; - ip = insn_get_effective_ip(regs); - if (!ip) + if (insn_get_effective_ip(regs, &ip)) return 0; not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE);