From 44fe2b2f023ba1e377ed9d89c2dca0290a7681c3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Oct 2021 13:18:34 +0200 Subject: [PATCH 01/10] dma-direct: factor out dma_set_{de,en}crypted helpers mainline inclusion from mainline-v5.15 commit 4d0564785bb03841e4b5c5b31aa4ecd1eb0d01bb category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ID7GTW CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4d0564785bb03841e4b5c5b31aa4ecd1eb0d01bb --------------------------- Factor out helpers the make dealing with memory encryption a little less cumbersome. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Signed-off-by: Zhiguang Ni --- kernel/dma/direct.c | 55 ++++++++++++++++++++++----------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 9f08d81ce1f8..c24ecbd41654 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -82,6 +82,20 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); } +static int dma_set_decrypted(struct device *dev, void *vaddr, size_t size) +{ + if (!force_dma_unencrypted(dev)) + return 0; + return set_memory_decrypted((unsigned long)vaddr, PFN_UP(size)); +} + +static int dma_set_encrypted(struct device *dev, void *vaddr, size_t size) +{ + if (!force_dma_unencrypted(dev)) + return 0; + return set_memory_encrypted((unsigned long)vaddr, PFN_UP(size)); +} + static void __dma_direct_free_pages(struct device *dev, struct page *page, size_t size) { @@ -161,7 +175,6 @@ void *dma_direct_alloc(struct device *dev, size_t size, { struct page *page; void *ret; - int err; size = PAGE_ALIGN(size); if (attrs & DMA_ATTR_NO_WARN) @@ -217,12 +230,8 @@ void *dma_direct_alloc(struct device *dev, size_t size, __builtin_return_address(0)); if (!ret) goto out_free_pages; - if (force_dma_unencrypted(dev)) { - err = set_memory_decrypted((unsigned long)ret, - PFN_UP(size)); - if (err) - goto out_free_pages; - } + if (dma_set_decrypted(dev, ret, size)) + goto out_free_pages; memset(ret, 0, size); goto done; } @@ -239,12 +248,8 @@ void *dma_direct_alloc(struct device *dev, size_t size, } ret = page_address(page); - if (force_dma_unencrypted(dev)) { - err = set_memory_decrypted((unsigned long)ret, - PFN_UP(size)); - if (err) - goto out_leak_pages; - } + if (dma_set_decrypted(dev, ret, size)) + goto out_leak_pages; memset(ret, 0, size); @@ -260,13 +265,10 @@ void *dma_direct_alloc(struct device *dev, size_t size, return ret; out_encrypt_pages: - if (force_dma_unencrypted(dev)) { - err = set_memory_encrypted((unsigned long)page_address(page), - PFN_UP(size)); - /* If memory cannot be re-encrypted, it must be leaked */ - if (err) - return NULL; - } + /* If memory cannot be re-encrypted, it must be leaked */ + if (dma_set_encrypted(dev, page_address(page), size)) + return NULL; + out_free_pages: __dma_direct_free_pages(dev, page, size); return NULL; @@ -296,8 +298,7 @@ void dma_direct_free(struct device *dev, size_t size, dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size))) return; - if (force_dma_unencrypted(dev)) - set_memory_encrypted((unsigned long)cpu_addr, PFN_UP(size)); + dma_set_encrypted(dev, cpu_addr, size); if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) vunmap(cpu_addr); @@ -333,10 +334,9 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, } ret = page_address(page); - if (force_dma_unencrypted(dev)) { - if (set_memory_decrypted((unsigned long)ret, PFN_UP(size))) - goto out_leak_pages; - } + if (dma_set_decrypted(dev, ret, size)) + goto out_leak_pages; + memset(ret, 0, size); *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return page; @@ -358,8 +358,7 @@ void dma_direct_free_pages(struct device *dev, size_t size, dma_free_from_pool(dev, vaddr, size)) return; - if (force_dma_unencrypted(dev)) - set_memory_encrypted((unsigned long)vaddr, PFN_UP(size)); + dma_set_encrypted(dev, vaddr, size); __dma_direct_free_pages(dev, page, size); } -- Gitee From 400c6be49746855e238be37397f27c2962ea0a11 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 9 Nov 2021 15:41:01 +0100 Subject: [PATCH 02/10] dma-direct: always leak memory that can't be re-encrypted mainline inclusion from mainline-v5.15 commit a90cf30437489343b8386ae87b4827b6d6c3ed50 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ID7GTW CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a90cf30437489343b8386ae87b4827b6d6c3ed50 --------------------------- We must never let unencrypted memory go back into the general page pool. So if we fail to set it back to encrypted when freeing DMA memory, leak the memory instead and warn the user. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Signed-off-by: Zhiguang Ni --- kernel/dma/direct.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index c24ecbd41654..a8e92bd4242f 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -91,9 +91,15 @@ static int dma_set_decrypted(struct device *dev, void *vaddr, size_t size) static int dma_set_encrypted(struct device *dev, void *vaddr, size_t size) { + int ret; + if (!force_dma_unencrypted(dev)) return 0; - return set_memory_encrypted((unsigned long)vaddr, PFN_UP(size)); + + ret = set_memory_encrypted((unsigned long)vaddr, PFN_UP(size)); + if (ret) + pr_warn_ratelimited("leaking DMA memory that can't be re-encrypted\n"); + return ret; } static void __dma_direct_free_pages(struct device *dev, struct page *page, @@ -265,7 +271,6 @@ void *dma_direct_alloc(struct device *dev, size_t size, return ret; out_encrypt_pages: - /* If memory cannot be re-encrypted, it must be leaked */ if (dma_set_encrypted(dev, page_address(page), size)) return NULL; @@ -298,7 +303,8 @@ void dma_direct_free(struct device *dev, size_t size, dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size))) return; - dma_set_encrypted(dev, cpu_addr, size); + if (dma_set_encrypted(dev, cpu_addr, size)) + return; if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr)) vunmap(cpu_addr); @@ -358,7 +364,8 @@ void dma_direct_free_pages(struct device *dev, size_t size, dma_free_from_pool(dev, vaddr, size)) return; - dma_set_encrypted(dev, vaddr, size); + if (dma_set_encrypted(dev, vaddr, size)) + return; __dma_direct_free_pages(dev, page, size); } -- Gitee From ea6b8bf4d8a578cfe404d8b20fec8785741249ed Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Tue, 7 Dec 2021 15:33:02 -0800 Subject: [PATCH 03/10] crypto: ccp - Add SEV_INIT rc error logging on init mainline inclusion from mainline-v5.17 commit c8341ac62bed9258746a5c7fb8a76d88809ecd1f category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ID7GTW CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c8341ac62bed9258746a5c7fb8a76d88809ecd1f --------------------------- Currently only the firmware error code is printed. This is incomplete and also incorrect as error cases exists where the firmware is never called and therefore does not set an error code. Signed-off-by: Peter Gonda Reviewed-by: Marc Orr Acked-by: David Rientjes Acked-by: Tom Lendacky Acked-by: Brijesh Singh Cc: Tom Lendacky Cc: Brijesh Singh Cc: Marc Orr Cc: Joerg Roedel Cc: Herbert Xu Cc: David Rientjes Cc: John Allen Cc: "David S. Miller" Cc: Paolo Bonzini Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu Signed-off-by: Zhiguang Ni --- drivers/crypto/ccp/sev-dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index e1908c99000a..a70c93079772 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -1176,7 +1176,8 @@ void sev_pci_init(void) } if (rc) { - dev_err(sev->dev, "SEV: failed to INIT error %#x\n", error); + dev_err(sev->dev, "SEV: failed to INIT error %#x, rc %d\n", + error, rc); return; } -- Gitee From acb0b3f3a84aa7a9a7679ff262f8180692c78364 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 25 Jan 2022 16:20:01 +0300 Subject: [PATCH 04/10] swiotlb: do not zero buffer in set_memory_decrypted() mainline inclusion from mainline-v5.18 commit dfcf2e017f5bb928094952d5d56d3566d3d07ba7 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ID7GTW CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=dfcf2e017f5bb928094952d5d56d3566d3d07ba7 --------------------------- For larger TDX VM, memset() after set_memory_decrypted() in swiotlb_update_mem_attributes() takes substantial portion of boot time. Zeroing doesn't serve any functional purpose. Malicious VMM can mess with decrypted/shared buffer at any point. Remove the memset(). Signed-off-by: Kirill A. Shutemov Acked-by: Tom Lendacky Signed-off-by: Christoph Hellwig Signed-off-by: Zhiguang Ni --- kernel/dma/swiotlb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index e321c023ddc2..12fd8a316493 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -200,7 +200,6 @@ void __init swiotlb_update_mem_attributes(void) vaddr = phys_to_virt(io_tlb_start); bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); - memset(vaddr, 0, bytes); } int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) -- Gitee From 89bb9ae2f8d54d23b1e165fae17cca2561e538cb Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 20 Jan 2022 01:07:13 +0000 Subject: [PATCH 05/10] KVM: SVM: Don't intercept #GP for SEV guests mainline inclusion from mainline-v5.17 commit 0b0be065b7563ac708aaa9f69dd4941c80b3446d category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ID7GTW CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0b0be065b7563ac708aaa9f69dd4941c80b3446d --------------------------- Never intercept #GP for SEV guests as reading SEV guest private memory will return cyphertext, i.e. emulating on #GP can't work as intended. Cc: stable@vger.kernel.org Cc: Tom Lendacky Cc: Brijesh Singh Signed-off-by: Sean Christopherson Reviewed-by: Liam Merwick Message-Id: <20220120010719.711476-4-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Zhiguang Ni --- arch/x86/kvm/svm/svm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d9e2e7bc1a66..32b3e3339816 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1158,9 +1158,10 @@ static void init_vmcb(struct vcpu_svm *svm) * Guest access to VMware backdoor ports could legitimately * trigger #GP because of TSS I/O permission bitmap. * We intercept those #GP and allow access to them anyway - * as VMware does. + * as VMware does. Don't intercept #GP for SEV guests as KVM can't + * decrypt guest memory to decode the faulting instruction. */ - if (enable_vmware_backdoor) + if (enable_vmware_backdoor && !sev_guest(svm->vcpu.kvm)) set_exception_intercept(svm, GP_VECTOR); svm_set_intercept(svm, INTERCEPT_INTR); -- Gitee From 2315ed768487b2dfc8609668f9d520bc50896a46 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 20 Jan 2022 01:07:14 +0000 Subject: [PATCH 06/10] KVM: SVM: Explicitly require DECODEASSISTS to enable SEV support mainline inclusion from mainline-v5.16 commit c532f2903b69b775d27016511fbe29a14a098f95 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ID7GTW CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=c532f2903b69b775d27016511fbe29a14a098f95 --------------------------- Add a sanity check on DECODEASSIST being support if SEV is supported, as KVM cannot read guest private memory and thus relies on the CPU to provide the instruction byte stream on #NPF for emulation. The intent of the check is to document the dependency, it should never fail in practice as producing hardware that supports SEV but not DECODEASSISTS would be non-sensical. Signed-off-by: Sean Christopherson Reviewed-by: Liam Merwick Message-Id: <20220120010719.711476-5-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Zhiguang Ni --- arch/x86/kvm/svm/sev.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index babb900e49d8..edf316adde13 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1303,8 +1303,13 @@ void __init sev_hardware_setup(void) if (!IS_ENABLED(CONFIG_KVM_AMD_SEV) || !sev_enabled || !npt_enabled) goto out; - /* Does the CPU support SEV? */ - if (!boot_cpu_has(X86_FEATURE_SEV)) + /* + * SEV must obviously be supported in hardware. Sanity check that the + * CPU supports decode assists, which is mandatory for SEV guests to + * support instruction emulation. + */ + if (!boot_cpu_has(X86_FEATURE_SEV) || + WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_DECODEASSISTS))) goto out; /* Retrieve SEV CPUID information */ -- Gitee From 52752c776a091709cc2af5edd3b4ad3eed0d723b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 20 Jan 2022 01:07:15 +0000 Subject: [PATCH 07/10] KVM: x86: Pass emulation type to can_emulate_instruction() mainline inclusion from mainline-v5.17 commit 4d31d9eff244e2631f028d658979ccbbdbcb423b category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ID7GTW CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4d31d9eff244e2631f028d658979ccbbdbcb423b --------------------------- Pass the emulation type to kvm_x86_ops.can_emulate_insutrction() so that a future commit can harden KVM's SEV support to WARN on emulation scenarios that should never happen. No functional change intended. Signed-off-by: Sean Christopherson Reviewed-by: Liam Merwick Message-Id: <20220120010719.711476-6-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Zhiguang Ni --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/svm/svm.c | 3 ++- arch/x86/kvm/vmx/vmx.c | 7 ++++--- arch/x86/kvm/x86.c | 11 +++++++++-- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ce1792de39ca..c32200f2e70f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1431,7 +1431,8 @@ struct kvm_x86_ops { int (*get_msr_feature)(struct kvm_msr_entry *entry); - bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, void *insn, int insn_len); + bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len); bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 32b3e3339816..4e548ca2f002 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4348,7 +4348,8 @@ static void enable_smi_window(struct kvm_vcpu *vcpu) } } -static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len) +static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len) { bool smep, smap, is_user; unsigned long cr4; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b11a96349cd3..15495652e274 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1666,11 +1666,12 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) return 0; } -static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len) +static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len) { /* * Emulation of instructions in SGX enclaves is impossible as RIP does - * not point tthe failing instruction, and even if it did, the code + * not point at the failing instruction, and even if it did, the code * stream is inaccessible. Inject #UD instead of exiting to userspace * so that guest userspace can't DoS the guest simply by triggering * emulation (enclaves are CPL3 only). @@ -5760,7 +5761,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) { gpa_t gpa; - if (!vmx_can_emulate_instruction(vcpu, NULL, 0)) + if (!vmx_can_emulate_instruction(vcpu, EMULTYPE_PF, NULL, 0)) return 1; /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index af6c5d98bdc5..53a954c9c3b3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6687,6 +6687,13 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, } EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); +static int kvm_can_emulate_insn(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len) +{ + return kvm_x86_ops.can_emulate_instruction(vcpu, emul_type, + insn, insn_len); +} + int handle_ud(struct kvm_vcpu *vcpu) { static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX }; @@ -6694,7 +6701,7 @@ int handle_ud(struct kvm_vcpu *vcpu) char sig[5]; /* ud2; .ascii "kvm" */ struct x86_exception e; - if (unlikely(!kvm_x86_ops.can_emulate_instruction(vcpu, NULL, 0))) + if (unlikely(!kvm_can_emulate_insn(vcpu, emul_type, NULL, 0))) return 1; if (force_emulation_prefix && @@ -7968,7 +7975,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, bool writeback = true; bool write_fault_to_spt; - if (unlikely(!kvm_x86_ops.can_emulate_instruction(vcpu, insn, insn_len))) + if (unlikely(!kvm_can_emulate_insn(vcpu, emulation_type, insn, insn_len))) return 1; vcpu->arch.l1tf_flush_l1d = true; -- Gitee From 2dccff07e5513d2f986e3126d8f0ff18ca1ed130 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 20 Jan 2022 01:07:16 +0000 Subject: [PATCH 08/10] KVM: SVM: WARN if KVM attempts emulation on #UD or #GP for SEV guests mainline inclusion from mainline-v5.17 commit 132627c64d94b1561ba5a444824e46c9f84c3d5b category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ID7GTW CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=132627c64d94b1561ba5a444824e46c9f84c3d5b --------------------------- WARN if KVM attempts to emulate in response to #UD or #GP for SEV guests, i.e. if KVM intercepts #UD or #GP, as emulation on any fault except #NPF is impossible since KVM cannot read guest private memory to get the code stream, and the CPU's DecodeAssists feature only provides the instruction bytes on #NPF. Signed-off-by: Sean Christopherson Reviewed-by: Liam Merwick Message-Id: <20220120010719.711476-7-seanjc@google.com> [Warn on EMULTYPE_TRAP_UD_FORCED according to Liam Merwick's review. - Paolo] Signed-off-by: Paolo Bonzini Signed-off-by: Zhiguang Ni --- arch/x86/kvm/svm/svm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 4e548ca2f002..e2179f62cc30 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4358,6 +4358,11 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, if (!sev_guest(vcpu->kvm)) return true; + /* #UD and #GP should never be intercepted for SEV guests. */ + WARN_ON_ONCE(emul_type & (EMULTYPE_TRAP_UD | + EMULTYPE_TRAP_UD_FORCED | + EMULTYPE_VMWARE_GP)); + /* * When the guest is an SEV-ES guest, emulation is not possible. */ -- Gitee From a4ac83d6d27e553a77138c12ca9d3516ca8131ea Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 20 Jan 2022 01:07:17 +0000 Subject: [PATCH 09/10] KVM: SVM: Inject #UD on attempted emulation for SEV guest w/o insn buffer mainline inclusion from mainline-v5.17 commit 04c40f344defdbd842d8a64fcfb47ef74b39ef4e category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ID7GTW CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=04c40f344defdbd842d8a64fcfb47ef74b39ef4e --------------------------- Inject #UD if KVM attempts emulation for an SEV guests without an insn buffer and instruction decoding is required. The previous behavior of allowing emulation if there is no insn buffer is undesirable as doing so means KVM is reading guest private memory and thus decoding cyphertext, i.e. is emulating garbage. The check was previously necessary as the emulation type was not provided, i.e. SVM needed to allow emulation to handle completion of emulation after exiting to userspace to handle I/O. Signed-off-by: Sean Christopherson Reviewed-by: Liam Merwick Message-Id: <20220120010719.711476-8-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Zhiguang Ni --- arch/x86/kvm/svm/svm.c | 88 ++++++++++++++++++++++++++---------------- 1 file changed, 55 insertions(+), 33 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e2179f62cc30..dc7bda454a2a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4369,48 +4369,70 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, if (sev_es_guest(vcpu->kvm)) return false; + /* + * Emulation is possible if the instruction is already decoded, e.g. + * when completing I/O after returning from userspace. + */ + if (emul_type & EMULTYPE_NO_DECODE) + return true; + + /* + * Emulation is possible for SEV guests if and only if a prefilled + * buffer containing the bytes of the intercepted instruction is + * available. SEV guest memory is encrypted with a guest specific key + * and cannot be decrypted by KVM, i.e. KVM would read cyphertext and + * decode garbage. + * + * Inject #UD if KVM reached this point without an instruction buffer. + * In practice, this path should never be hit by a well-behaved guest, + * e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path + * is still theoretically reachable, e.g. via unaccelerated fault-like + * AVIC access, and needs to be handled by KVM to avoid putting the + * guest into an infinite loop. Injecting #UD is somewhat arbitrary, + * but its the least awful option given lack of insight into the guest. + */ + if (unlikely(!insn)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return false; + } + + /* + * Emulate for SEV guests if the insn buffer is not empty. The buffer + * will be empty if the DecodeAssist microcode cannot fetch bytes for + * the faulting instruction because the code fetch itself faulted, e.g. + * the guest attempted to fetch from emulated MMIO or a guest page + * table used to translate CS:RIP resides in emulated MMIO. + */ + if (likely(insn_len)) + return true; + /* * Detect and workaround Errata 1096 Fam_17h_00_0Fh. * * Errata: - * When CPU raise #NPF on guest data access and vCPU CR4.SMAP=1, it is - * possible that CPU microcode implementing DecodeAssist will fail - * to read bytes of instruction which caused #NPF. In this case, - * GuestIntrBytes field of the VMCB on a VMEXIT will incorrectly - * return 0 instead of the correct guest instruction bytes. - * - * This happens because CPU microcode reading instruction bytes - * uses a special opcode which attempts to read data using CPL=0 - * priviledges. The microcode reads CS:RIP and if it hits a SMAP - * fault, it gives up and returns no instruction bytes. + * When CPU raises #NPF on guest data access and vCPU CR4.SMAP=1, it is + * possible that CPU microcode implementing DecodeAssist will fail to + * read guest memory at CS:RIP and vmcb.GuestIntrBytes will incorrectly + * be '0'. This happens because microcode reads CS:RIP using a _data_ + * loap uop with CPL=0 privileges. If the load hits a SMAP #PF, ucode + * gives up and does not fill the instruction bytes buffer. * * Detection: - * We reach here in case CPU supports DecodeAssist, raised #NPF and - * returned 0 in GuestIntrBytes field of the VMCB. - * First, errata can only be triggered in case vCPU CR4.SMAP=1. - * Second, if vCPU CR4.SMEP=1, errata could only be triggered - * in case vCPU CPL==3 (Because otherwise guest would have triggered - * a SMEP fault instead of #NPF). - * Otherwise, vCPU CR4.SMEP=0, errata could be triggered by any vCPU CPL. - * As most guests enable SMAP if they have also enabled SMEP, use above - * logic in order to attempt minimize false-positive of detecting errata - * while still preserving all cases semantic correctness. + * KVM reaches this point if the VM is an SEV guest, the CPU supports + * DecodeAssist, a #NPF was raised, KVM's page fault handler triggered + * emulation (e.g. for MMIO), and the CPU returned 0 in GuestIntrBytes + * field of the VMCB. * - * Workaround: - * To determine what instruction the guest was executing, the hypervisor - * will have to decode the instruction at the instruction pointer. + * This does _not_ mean that the erratum has been encountered, as the + * DecodeAssist will also fail if the load for CS:RIP hits a legitimate + * #PF, e.g. if the guest attempt to execute from emulated MMIO and + * encountered a reserved/not-present #PF. * - * In non SEV guest, hypervisor will be able to read the guest - * memory to decode the instruction pointer when insn_len is zero - * so we return true to indicate that decoding is possible. - * - * But in the SEV guest, the guest memory is encrypted with the - * guest specific key and hypervisor will not be able to decode the - * instruction pointer so we will not able to workaround it. Lets - * print the error and request to kill the guest. + * To reduce the likelihood of false positives, take action if and only + * if CR4.SMAP=1 (obviously required to hit the erratum) and CR4.SMEP=0 + * or CPL=3. If SMEP=1 and CPL!=3, the erratum cannot have been hit as + * the guest would have encountered a SMEP violation #PF, not a #NPF. */ - if (likely(!insn || insn_len)) - return true; cr4 = kvm_read_cr4(vcpu); smep = cr4 & X86_CR4_SMEP; -- Gitee From 3a743004dc447ad4a40b7ff615ce671db5fc0da2 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 20 Jan 2022 01:07:18 +0000 Subject: [PATCH 10/10] KVM: SVM: Don't apply SEV+SMAP workaround on code fetch or PT access mainline inclusion from mainline-v5.17 commit 3280cc22aea74d78ebbea277ff8bc8d593582de3 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ID7GTW CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=3280cc22aea74d78ebbea277ff8bc8d593582de3 --------------------------- Resume the guest instead of synthesizing a triple fault shutdown if the instruction bytes buffer is empty due to the #NPF being on the code fetch itself or on a page table access. The SMAP errata applies if and only if the code fetch was successful and ucode's subsequent data read from the code page encountered a SMAP violation. In practice, the guest is likely hosed either way, but crashing the guest on a code fetch to emulated MMIO is technically wrong according to the behavior described in the APM. Signed-off-by: Sean Christopherson Reviewed-by: Liam Merwick Message-Id: <20220120010719.711476-9-seanjc@google.com> Signed-off-by: Paolo Bonzini Signed-off-by: Zhiguang Ni --- arch/x86/kvm/svm/svm.c | 43 +++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index dc7bda454a2a..ad8e3e84cf27 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4353,6 +4353,7 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, { bool smep, smap, is_user; unsigned long cr4; + u64 error_code; /* Emulation is always possible when KVM has access to all guest state. */ if (!sev_guest(vcpu->kvm)) @@ -4417,23 +4418,32 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, * loap uop with CPL=0 privileges. If the load hits a SMAP #PF, ucode * gives up and does not fill the instruction bytes buffer. * - * Detection: - * KVM reaches this point if the VM is an SEV guest, the CPU supports - * DecodeAssist, a #NPF was raised, KVM's page fault handler triggered - * emulation (e.g. for MMIO), and the CPU returned 0 in GuestIntrBytes - * field of the VMCB. + * As above, KVM reaches this point iff the VM is an SEV guest, the CPU + * supports DecodeAssist, a #NPF was raised, KVM's page fault handler + * triggered emulation (e.g. for MMIO), and the CPU returned 0 in the + * GuestIntrBytes field of the VMCB. * * This does _not_ mean that the erratum has been encountered, as the * DecodeAssist will also fail if the load for CS:RIP hits a legitimate * #PF, e.g. if the guest attempt to execute from emulated MMIO and * encountered a reserved/not-present #PF. * - * To reduce the likelihood of false positives, take action if and only - * if CR4.SMAP=1 (obviously required to hit the erratum) and CR4.SMEP=0 - * or CPL=3. If SMEP=1 and CPL!=3, the erratum cannot have been hit as - * the guest would have encountered a SMEP violation #PF, not a #NPF. + * To hit the erratum, the following conditions must be true: + * 1. CR4.SMAP=1 (obviously). + * 2. CR4.SMEP=0 || CPL=3. If SMEP=1 and CPL<3, the erratum cannot + * have been hit as the guest would have encountered a SMEP + * violation #PF, not a #NPF. + * 3. The #NPF is not due to a code fetch, in which case failure to + * retrieve the instruction bytes is legitimate (see abvoe). + * + * In addition, don't apply the erratum workaround if the #NPF occurred + * while translating guest page tables (see below). */ + error_code = to_svm(vcpu)->vmcb->control.exit_info_1; + if (error_code & (PFERR_GUEST_PAGE_MASK | PFERR_FETCH_MASK)) + goto resume_guest; + cr4 = kvm_read_cr4(vcpu); smep = cr4 & X86_CR4_SMEP; smap = cr4 & X86_CR4_SMAP; @@ -4457,6 +4467,21 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); } +resume_guest: + /* + * If the erratum was not hit, simply resume the guest and let it fault + * again. While awful, e.g. the vCPU may get stuck in an infinite loop + * if the fault is at CPL=0, it's the lesser of all evils. Exiting to + * userspace will kill the guest, and letting the emulator read garbage + * will yield random behavior and potentially corrupt the guest. + * + * Simply resuming the guest is technically not a violation of the SEV + * architecture. AMD's APM states that all code fetches and page table + * accesses for SEV guest are encrypted, regardless of the C-Bit. The + * APM also states that encrypted accesses to MMIO are "ignored", but + * doesn't explicitly define "ignored", i.e. doing nothing and letting + * the guest spin is technically "ignoring" the access. + */ return false; } -- Gitee