p***@xen.org
2018-11-04 04:59:08 UTC
commit 2442519adf57344e4d0dc5fdc3bf1d4feae50824
Author: Andrew Cooper <***@citrix.com>
AuthorDate: Thu Oct 4 16:36:35 2018 +0000
Commit: Andrew Cooper <***@citrix.com>
CommitDate: Wed Oct 17 17:50:14 2018 +0100
x86/svm: Fix svm_update_guest_efer() for domains using shadow paging
When using shadow paging, EFER.NX is a Xen controlled bit, and is required by
the shadow pagefault handler to distinguish instruction fetches from data
accesses.
This can be observed by a guest which has NX and SMEP clear but SMAP active by
attempting to execute code on a user mapping. The first attempt to build the
target shadow will #PF so is handled by the shadow code, but when walking the
the guest pagetables, the lack of PFEC_insn_fetch being signalled causes the
shadow code to mistake the instruction fetch for a data fetch, and believe
that it is a real guest fault. As a result, the guest receives #PF[-d-srP]
for an action which should complete successfully.
The suspicious-looking gymnastics with LME is actually a subtle corner case
with shadow paging. When dropping out of Long Mode, a guests choice of LME
and Xen's choice of CR0.PG cause hardware to operate in Long Mode, but the
shadow code to operate in 2-on-3 mode.
In addition to describing this corner case in the SVM side, extend the comment
for the same fix on the VT-x side. (I have a suspicion that I've just worked
out why VT-x doesn't tolerate LMA != LME when Unrestricted Guest is clear.)
Signed-off-by: Andrew Cooper <***@citrix.com>
Reviewed-by: Kevin Tian <***@intel.com>
Reviewed-by: Boris Ostrovsky <***@oracle.com>
---
xen/arch/x86/hvm/svm/svm.c | 31 +++++++++++++++++++++++++------
xen/arch/x86/hvm/vmx/vmx.c | 6 ++++++
2 files changed, 31 insertions(+), 6 deletions(-)
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index fa18cc07fd..dd0aca4f53 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -638,13 +638,32 @@ void svm_update_guest_cr(struct vcpu *v, unsigned int cr, unsigned int flags)
static void svm_update_guest_efer(struct vcpu *v)
{
struct vmcb_struct *vmcb = v->arch.hvm.svm.vmcb;
- bool lma = v->arch.hvm.guest_efer & EFER_LMA;
- uint64_t new_efer;
+ unsigned long guest_efer = v->arch.hvm.guest_efer,
+ xen_efer = read_efer();
- new_efer = (v->arch.hvm.guest_efer | EFER_SVME) & ~EFER_LME;
- if ( lma )
- new_efer |= EFER_LME;
- vmcb_set_efer(vmcb, new_efer);
+ if ( paging_mode_shadow(v->domain) )
+ {
+ /* EFER.NX is a Xen-owned bit and is not under guest control. */
+ guest_efer &= ~EFER_NX;
+ guest_efer |= xen_efer & EFER_NX;
+
+ /*
+ * CR0.PG is a Xen-owned bit, and remains set even when the guest has
+ * logically disabled paging.
+ *
+ * LMA was calculated using the guest CR0.PG setting, but LME needs
+ * clearing to avoid interacting with Xen's CR0.PG setting. As writes
+ * to CR0 are intercepted, it is safe to leave LME clear at this
+ * point, and fix up both LME and LMA when CR0.PG is set.
+ */
+ if ( !(guest_efer & EFER_LMA) )
+ guest_efer &= ~EFER_LME;
+ }
+
+ /* SVME must remain set in non-root mode. */
+ guest_efer |= EFER_SVME;
+
+ vmcb_set_efer(vmcb, guest_efer);
ASSERT(nestedhvm_enabled(v->domain) ||
!(v->arch.hvm.guest_efer & EFER_SVME));
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index c85aa62ce7..d16129fb59 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1654,6 +1654,12 @@ static void vmx_update_guest_efer(struct vcpu *v)
* not tolerate the LME and LMA settings being different. As writes
* to CR0 are intercepted, it is safe to leave LME clear at this
* point, and fix up both LME and LMA when CR0.PG is set.
+ *
+ * Furthermore, when using shadow pagetables (subsumed by the
+ * Unrestricted Guest check), CR0.PG is a Xen-owned bit, and remains
+ * set even when the guest has logically disabled paging. LMA was
+ * calculated using the guest CR0.PG setting, but LME needs clearing
+ * to avoid interacting with Xen's CR0.PG setting.
*/
if ( !(guest_efer & EFER_LMA) )
guest_efer &= ~EFER_LME;
--
generated by git-patchbot for /home/xen/git/xen.git#master
Author: Andrew Cooper <***@citrix.com>
AuthorDate: Thu Oct 4 16:36:35 2018 +0000
Commit: Andrew Cooper <***@citrix.com>
CommitDate: Wed Oct 17 17:50:14 2018 +0100
x86/svm: Fix svm_update_guest_efer() for domains using shadow paging
When using shadow paging, EFER.NX is a Xen controlled bit, and is required by
the shadow pagefault handler to distinguish instruction fetches from data
accesses.
This can be observed by a guest which has NX and SMEP clear but SMAP active by
attempting to execute code on a user mapping. The first attempt to build the
target shadow will #PF so is handled by the shadow code, but when walking the
the guest pagetables, the lack of PFEC_insn_fetch being signalled causes the
shadow code to mistake the instruction fetch for a data fetch, and believe
that it is a real guest fault. As a result, the guest receives #PF[-d-srP]
for an action which should complete successfully.
The suspicious-looking gymnastics with LME is actually a subtle corner case
with shadow paging. When dropping out of Long Mode, a guests choice of LME
and Xen's choice of CR0.PG cause hardware to operate in Long Mode, but the
shadow code to operate in 2-on-3 mode.
In addition to describing this corner case in the SVM side, extend the comment
for the same fix on the VT-x side. (I have a suspicion that I've just worked
out why VT-x doesn't tolerate LMA != LME when Unrestricted Guest is clear.)
Signed-off-by: Andrew Cooper <***@citrix.com>
Reviewed-by: Kevin Tian <***@intel.com>
Reviewed-by: Boris Ostrovsky <***@oracle.com>
---
xen/arch/x86/hvm/svm/svm.c | 31 +++++++++++++++++++++++++------
xen/arch/x86/hvm/vmx/vmx.c | 6 ++++++
2 files changed, 31 insertions(+), 6 deletions(-)
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index fa18cc07fd..dd0aca4f53 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -638,13 +638,32 @@ void svm_update_guest_cr(struct vcpu *v, unsigned int cr, unsigned int flags)
static void svm_update_guest_efer(struct vcpu *v)
{
struct vmcb_struct *vmcb = v->arch.hvm.svm.vmcb;
- bool lma = v->arch.hvm.guest_efer & EFER_LMA;
- uint64_t new_efer;
+ unsigned long guest_efer = v->arch.hvm.guest_efer,
+ xen_efer = read_efer();
- new_efer = (v->arch.hvm.guest_efer | EFER_SVME) & ~EFER_LME;
- if ( lma )
- new_efer |= EFER_LME;
- vmcb_set_efer(vmcb, new_efer);
+ if ( paging_mode_shadow(v->domain) )
+ {
+ /* EFER.NX is a Xen-owned bit and is not under guest control. */
+ guest_efer &= ~EFER_NX;
+ guest_efer |= xen_efer & EFER_NX;
+
+ /*
+ * CR0.PG is a Xen-owned bit, and remains set even when the guest has
+ * logically disabled paging.
+ *
+ * LMA was calculated using the guest CR0.PG setting, but LME needs
+ * clearing to avoid interacting with Xen's CR0.PG setting. As writes
+ * to CR0 are intercepted, it is safe to leave LME clear at this
+ * point, and fix up both LME and LMA when CR0.PG is set.
+ */
+ if ( !(guest_efer & EFER_LMA) )
+ guest_efer &= ~EFER_LME;
+ }
+
+ /* SVME must remain set in non-root mode. */
+ guest_efer |= EFER_SVME;
+
+ vmcb_set_efer(vmcb, guest_efer);
ASSERT(nestedhvm_enabled(v->domain) ||
!(v->arch.hvm.guest_efer & EFER_SVME));
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index c85aa62ce7..d16129fb59 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -1654,6 +1654,12 @@ static void vmx_update_guest_efer(struct vcpu *v)
* not tolerate the LME and LMA settings being different. As writes
* to CR0 are intercepted, it is safe to leave LME clear at this
* point, and fix up both LME and LMA when CR0.PG is set.
+ *
+ * Furthermore, when using shadow pagetables (subsumed by the
+ * Unrestricted Guest check), CR0.PG is a Xen-owned bit, and remains
+ * set even when the guest has logically disabled paging. LMA was
+ * calculated using the guest CR0.PG setting, but LME needs clearing
+ * to avoid interacting with Xen's CR0.PG setting.
*/
if ( !(guest_efer & EFER_LMA) )
guest_efer &= ~EFER_LME;
--
generated by git-patchbot for /home/xen/git/xen.git#master