vmm(4): improve {rd,wr}msr exit handling for both amd & intel

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
4 messages Options
Reply | Threaded
Open this post in threaded view
|

vmm(4): improve {rd,wr}msr exit handling for both amd & intel

Dave Voutila-2
The following diff cleans up and improves MSR-related event handling in
vmm(4) for when the guest attempts a rdmsr/wrmsr instruction. As
mentioned in a previous email to tech@ about fixing support for 9front
guests [1], we found some discprepencies between vmm(4)'s handling on
Intel hosts and AMD hosts.

While the diff has been tested already by abieber@ and brynet@ with
additional review by mlarkin@, I'm looking for additional testers
willing to look for regressions.

This diff specifically improves and standardizes msr-based exit handling
between Intel and AMD hosts to the following:

1. All RDMSR instructions that cause a vm-exit must be explicitly
   handled (e.g. via emulation) or they result in injecting a #GP
   exception into the guest.

2. All WRMSR instructions that cause a vm-exit and are not explicitly
   handled are ignored (i.e. %rax and %rdx values are not inspected or
   used).

Consequently with the change for (1) above, the diff adds explicit
handling for the following MSRs:

1. MSR_CR_PAT: for now reads/writes are shadowed for the guest vcpu and
   host state is not touched. The shadow state is initialized on vcpu
   reset to the same value as the host.

2. MSR_BIOS_SIGN, MSR_INT_PEN_MSG (on AMD), and MSR_PLATFORM_ID are all
   ignored. This means reads result in vmm(4) setting the guest vcpu's
   %rax and %rdx to 0. (These msr's are ignored in the same manner by
   other hypervisors like kvm and nvmm.)

The average user should not see a change in behavior of vmm(4) or
vmd(8). The biggest change is for *Intel* users as this diff changes the
current vmx logic which was not injecting #GP for unsupported
msr's. (Your guests were potentially getting garbage results from rdmsr
instructions.)

The folks attempting to host the latest release of 9front as a guest on
AMD hosts should see their guest boot successfully with this diff :-)

-dv

[1] https://marc.info/?l=openbsd-tech&m=161693517121814&w=2


Index: sys/arch/amd64/include/vmmvar.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
retrieving revision 1.70
diff -u -p -r1.70 vmmvar.h
--- sys/arch/amd64/include/vmmvar.h 8 Apr 2020 07:39:48 -0000 1.70
+++ sys/arch/amd64/include/vmmvar.h 31 Mar 2021 00:15:43 -0000
@@ -936,6 +936,9 @@ struct vcpu {
  paddr_t vc_pvclock_system_gpa;
  uint32_t vc_pvclock_system_tsc_mul;

+ /* Shadowed MSRs */
+ uint64_t vc_shadow_pat;
+
  /* VMX only */
  uint64_t vc_vmx_basic;
  uint64_t vc_vmx_entry_ctls;
Index: sys/arch/amd64/amd64/vmm.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.278
diff -u -p -r1.278 vmm.c
--- sys/arch/amd64/amd64/vmm.c 11 Mar 2021 11:16:55 -0000 1.278
+++ sys/arch/amd64/amd64/vmm.c 31 Mar 2021 00:15:43 -0000
@@ -207,6 +207,7 @@ void svm_set_dirty(struct vcpu *, uint32
 int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
 void vmm_init_pvclock(struct vcpu *, paddr_t);
 int vmm_update_pvclock(struct vcpu *);
+int vmm_pat_is_valid(uint64_t);

 #ifdef VMM_DEBUG
 void dump_vcpu(struct vcpu *);
@@ -3193,6 +3194,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
  /* xcr0 power on default sets bit 0 (x87 state) */
  vcpu->vc_gueststate.vg_xcr0 = XCR0_X87 & xsave_mask;

+ /* XXX PAT shadow */
+ vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
+
 exit:
  /* Flush the VMCS */
  if (vmclear(&vcpu->vc_control_pa)) {
@@ -3584,6 +3588,10 @@ vcpu_init(struct vcpu *vcpu)
  vcpu->vc_state = VCPU_STATE_STOPPED;
  vcpu->vc_vpid = 0;
  vcpu->vc_pvclock_system_gpa = 0;
+
+ /* Shadow PAT MSR, starting with host's value. */
+ vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
+
  if (vmm_softc->mode == VMM_MODE_VMX ||
     vmm_softc->mode == VMM_MODE_EPT)
  ret = vcpu_init_vmx(vcpu);
@@ -6257,26 +6265,24 @@ vmx_handle_rdmsr(struct vcpu *vcpu)
  rdx = &vcpu->vc_gueststate.vg_rdx;

  switch (*rcx) {
- case MSR_SMBASE:
- /*
- * 34.15.6.3 - Saving Guest State (SMM)
- *
- * Unsupported, so inject #GP and return without
- * advancing %rip.
- */
+ case MSR_BIOS_SIGN:
+ case MSR_PLATFORM_ID:
+ /* Ignored */
+ *rax = 0;
+ *rdx = 0;
+ break;
+ case MSR_CR_PAT:
+ *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
+ *rdx = (vcpu->vc_shadow_pat >> 32);
+ break;
+ default:
+ /* Unsupported MSRs causes #GP exception, don't advance %rip */
+ DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n",
+    __func__, *rcx);
  ret = vmm_inject_gp(vcpu);
  return (ret);
  }

- *rax = 0;
- *rdx = 0;
-
-#ifdef VMM_DEBUG
- /* Log the access, to be able to identify unknown MSRs */
- DPRINTF("%s: rdmsr exit, msr=0x%llx, data returned to "
-    "guest=0x%llx:0x%llx\n", __func__, *rcx, *rdx, *rax);
-#endif /* VMM_DEBUG */
-
  vcpu->vc_gueststate.vg_rip += insn_length;

  return (0);
@@ -6442,7 +6448,7 @@ vmx_handle_misc_enable_msr(struct vcpu *
 int
 vmx_handle_wrmsr(struct vcpu *vcpu)
 {
- uint64_t insn_length;
+ uint64_t insn_length, val;
  uint64_t *rax, *rdx, *rcx;
  int ret;

@@ -6460,8 +6466,16 @@ vmx_handle_wrmsr(struct vcpu *vcpu)
  rax = &vcpu->vc_gueststate.vg_rax;
  rcx = &vcpu->vc_gueststate.vg_rcx;
  rdx = &vcpu->vc_gueststate.vg_rdx;
+ val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);

  switch (*rcx) {
+ case MSR_CR_PAT:
+ if (!vmm_pat_is_valid(val)) {
+ ret = vmm_inject_gp(vcpu);
+ return (ret);
+ }
+ vcpu->vc_shadow_pat = val;
+ break;
  case MSR_MISC_ENABLE:
  vmx_handle_misc_enable_msr(vcpu);
  break;
@@ -6508,7 +6522,7 @@ vmx_handle_wrmsr(struct vcpu *vcpu)
 int
 svm_handle_msr(struct vcpu *vcpu)
 {
- uint64_t insn_length;
+ uint64_t insn_length, val;
  uint64_t *rax, *rcx, *rdx;
  struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
  int ret;
@@ -6521,35 +6535,58 @@ svm_handle_msr(struct vcpu *vcpu)
  rdx = &vcpu->vc_gueststate.vg_rdx;

  if (vmcb->v_exitinfo1 == 1) {
- if (*rcx == MSR_EFER) {
+ /* WRMSR */
+ val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
+
+ switch (*rcx) {
+ case MSR_CR_PAT:
+ if (!vmm_pat_is_valid(val)) {
+ ret = vmm_inject_gp(vcpu);
+ return (ret);
+ }
+ vcpu->vc_shadow_pat = val;
+ break;
+ case MSR_EFER:
  vmcb->v_efer = *rax | EFER_SVME;
- } else if (*rcx == KVM_MSR_SYSTEM_TIME) {
+ break;
+ case KVM_MSR_SYSTEM_TIME:
  vmm_init_pvclock(vcpu,
     (*rax & 0xFFFFFFFFULL) | (*rdx  << 32));
- } else {
-#ifdef VMM_DEBUG
+ break;
+ default:
  /* Log the access, to be able to identify unknown MSRs */
  DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
     "written from guest=0x%llx:0x%llx\n", __func__,
     *rcx, *rdx, *rax);
-#endif /* VMM_DEBUG */
  }
  } else {
+ /* RDMSR */
  switch (*rcx) {
- case MSR_DE_CFG:
- /* LFENCE seralizing bit is set by host */
- *rax = DE_CFG_SERIALIZE_LFENCE;
- *rdx = 0;
- break;
- case MSR_INT_PEN_MSG:
- *rax = 0;
- *rdx = 0;
- break;
- default:
- DPRINTF("%s: guest read msr 0x%llx, injecting "
-    "#GP\n", __func__, *rcx);
- ret = vmm_inject_gp(vcpu);
- return (ret);
+ case MSR_BIOS_SIGN:
+ case MSR_INT_PEN_MSG:
+ case MSR_PLATFORM_ID:
+ /* Ignored */
+ *rax = 0;
+ *rdx = 0;
+ break;
+ case MSR_CR_PAT:
+ *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
+ *rdx = (vcpu->vc_shadow_pat >> 32);
+ break;
+ case MSR_DE_CFG:
+ /* LFENCE seralizing bit is set by host */
+ *rax = DE_CFG_SERIALIZE_LFENCE;
+ *rdx = 0;
+ break;
+ default:
+ /*
+ * Unsupported MSRs causes #GP exception, don't advance
+ * %rip
+ */
+ DPRINTF("%s: unsupported rdmsr (msr=0x%llx), "
+    "injecting #GP\n", __func__, *rcx);
+ ret = vmm_inject_gp(vcpu);
+ return (ret);
  }
  }

@@ -7271,6 +7308,23 @@ vmm_update_pvclock(struct vcpu *vcpu)
  pvclock_ti->ti_version &= ~0x1;
  }
  return (0);
+}
+
+int
+vmm_pat_is_valid(uint64_t pat)
+{
+ int i;
+ uint8_t *byte = (uint8_t *)&pat;
+
+ /* Intel SDM Vol 3A, 11.12.2: 0x02, 0x03, and 0x08-0xFF result in #GP */
+ for (i = 0; i < 8; i++) {
+ if (byte[i] == 0x02 || byte[i] == 0x03 || byte[i] > 0x07) {
+ DPRINTF("%s: invalid pat %llx\n", __func__, pat);
+ return 0;
+ }
+ }
+
+ return 1;
 }

 /*

Reply | Threaded
Open this post in threaded view
|

Re: vmm(4): improve {rd,wr}msr exit handling for both amd & intel

Dave Voutila-2

Dave Voutila writes:

> The following diff cleans up and improves MSR-related event handling in
> vmm(4) for when the guest attempts a rdmsr/wrmsr instruction. As
> mentioned in a previous email to tech@ about fixing support for 9front
> guests [1], we found some discprepencies between vmm(4)'s handling on
> Intel hosts and AMD hosts.
>
> While the diff has been tested already by abieber@ and brynet@ with
> additional review by mlarkin@, I'm looking for additional testers
> willing to look for regressions.

Last call for additional testers. Plan is to commit the diff later today
as I have an OK from mlarkin@.

>
> This diff specifically improves and standardizes msr-based exit handling
> between Intel and AMD hosts to the following:
>
> 1. All RDMSR instructions that cause a vm-exit must be explicitly
>    handled (e.g. via emulation) or they result in injecting a #GP
>    exception into the guest.
>
> 2. All WRMSR instructions that cause a vm-exit and are not explicitly
>    handled are ignored (i.e. %rax and %rdx values are not inspected or
>    used).
>
> Consequently with the change for (1) above, the diff adds explicit
> handling for the following MSRs:
>
> 1. MSR_CR_PAT: for now reads/writes are shadowed for the guest vcpu and
>    host state is not touched. The shadow state is initialized on vcpu
>    reset to the same value as the host.
>
> 2. MSR_BIOS_SIGN, MSR_INT_PEN_MSG (on AMD), and MSR_PLATFORM_ID are all
>    ignored. This means reads result in vmm(4) setting the guest vcpu's
>    %rax and %rdx to 0. (These msr's are ignored in the same manner by
>    other hypervisors like kvm and nvmm.)
>
> The average user should not see a change in behavior of vmm(4) or
> vmd(8). The biggest change is for *Intel* users as this diff changes the
> current vmx logic which was not injecting #GP for unsupported
> msr's. (Your guests were potentially getting garbage results from rdmsr
> instructions.)
>

If you do test the diff and have issues, I forgot to mention to please
build the kernel with VMM_DEBUG. The output to the kernel buffer will
help diagnose any problematic msr access.

> The folks attempting to host the latest release of 9front as a guest on
> AMD hosts should see their guest boot successfully with this diff :-)
>
> -dv
>
> [1] https://marc.info/?l=openbsd-tech&m=161693517121814&w=2
>
>
> Index: sys/arch/amd64/include/vmmvar.h
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
> retrieving revision 1.70
> diff -u -p -r1.70 vmmvar.h
> --- sys/arch/amd64/include/vmmvar.h 8 Apr 2020 07:39:48 -0000 1.70
> +++ sys/arch/amd64/include/vmmvar.h 31 Mar 2021 00:15:43 -0000
> @@ -936,6 +936,9 @@ struct vcpu {
>   paddr_t vc_pvclock_system_gpa;
>   uint32_t vc_pvclock_system_tsc_mul;
>
> + /* Shadowed MSRs */
> + uint64_t vc_shadow_pat;
> +
>   /* VMX only */
>   uint64_t vc_vmx_basic;
>   uint64_t vc_vmx_entry_ctls;
> Index: sys/arch/amd64/amd64/vmm.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
> retrieving revision 1.278
> diff -u -p -r1.278 vmm.c
> --- sys/arch/amd64/amd64/vmm.c 11 Mar 2021 11:16:55 -0000 1.278
> +++ sys/arch/amd64/amd64/vmm.c 31 Mar 2021 00:15:43 -0000
> @@ -207,6 +207,7 @@ void svm_set_dirty(struct vcpu *, uint32
>  int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
>  void vmm_init_pvclock(struct vcpu *, paddr_t);
>  int vmm_update_pvclock(struct vcpu *);
> +int vmm_pat_is_valid(uint64_t);
>
>  #ifdef VMM_DEBUG
>  void dump_vcpu(struct vcpu *);
> @@ -3193,6 +3194,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
>   /* xcr0 power on default sets bit 0 (x87 state) */
>   vcpu->vc_gueststate.vg_xcr0 = XCR0_X87 & xsave_mask;
>
> + /* XXX PAT shadow */
> + vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
> +
>  exit:
>   /* Flush the VMCS */
>   if (vmclear(&vcpu->vc_control_pa)) {
> @@ -3584,6 +3588,10 @@ vcpu_init(struct vcpu *vcpu)
>   vcpu->vc_state = VCPU_STATE_STOPPED;
>   vcpu->vc_vpid = 0;
>   vcpu->vc_pvclock_system_gpa = 0;
> +
> + /* Shadow PAT MSR, starting with host's value. */
> + vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
> +
>   if (vmm_softc->mode == VMM_MODE_VMX ||
>      vmm_softc->mode == VMM_MODE_EPT)
>   ret = vcpu_init_vmx(vcpu);
> @@ -6257,26 +6265,24 @@ vmx_handle_rdmsr(struct vcpu *vcpu)
>   rdx = &vcpu->vc_gueststate.vg_rdx;
>
>   switch (*rcx) {
> - case MSR_SMBASE:
> - /*
> - * 34.15.6.3 - Saving Guest State (SMM)
> - *
> - * Unsupported, so inject #GP and return without
> - * advancing %rip.
> - */
> + case MSR_BIOS_SIGN:
> + case MSR_PLATFORM_ID:
> + /* Ignored */
> + *rax = 0;
> + *rdx = 0;
> + break;
> + case MSR_CR_PAT:
> + *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
> + *rdx = (vcpu->vc_shadow_pat >> 32);
> + break;
> + default:
> + /* Unsupported MSRs causes #GP exception, don't advance %rip */
> + DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n",
> +    __func__, *rcx);
>   ret = vmm_inject_gp(vcpu);
>   return (ret);
>   }
>
> - *rax = 0;
> - *rdx = 0;
> -
> -#ifdef VMM_DEBUG
> - /* Log the access, to be able to identify unknown MSRs */
> - DPRINTF("%s: rdmsr exit, msr=0x%llx, data returned to "
> -    "guest=0x%llx:0x%llx\n", __func__, *rcx, *rdx, *rax);
> -#endif /* VMM_DEBUG */
> -
>   vcpu->vc_gueststate.vg_rip += insn_length;
>
>   return (0);
> @@ -6442,7 +6448,7 @@ vmx_handle_misc_enable_msr(struct vcpu *
>  int
>  vmx_handle_wrmsr(struct vcpu *vcpu)
>  {
> - uint64_t insn_length;
> + uint64_t insn_length, val;
>   uint64_t *rax, *rdx, *rcx;
>   int ret;
>
> @@ -6460,8 +6466,16 @@ vmx_handle_wrmsr(struct vcpu *vcpu)
>   rax = &vcpu->vc_gueststate.vg_rax;
>   rcx = &vcpu->vc_gueststate.vg_rcx;
>   rdx = &vcpu->vc_gueststate.vg_rdx;
> + val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
>
>   switch (*rcx) {
> + case MSR_CR_PAT:
> + if (!vmm_pat_is_valid(val)) {
> + ret = vmm_inject_gp(vcpu);
> + return (ret);
> + }
> + vcpu->vc_shadow_pat = val;
> + break;
>   case MSR_MISC_ENABLE:
>   vmx_handle_misc_enable_msr(vcpu);
>   break;
> @@ -6508,7 +6522,7 @@ vmx_handle_wrmsr(struct vcpu *vcpu)
>  int
>  svm_handle_msr(struct vcpu *vcpu)
>  {
> - uint64_t insn_length;
> + uint64_t insn_length, val;
>   uint64_t *rax, *rcx, *rdx;
>   struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
>   int ret;
> @@ -6521,35 +6535,58 @@ svm_handle_msr(struct vcpu *vcpu)
>   rdx = &vcpu->vc_gueststate.vg_rdx;
>
>   if (vmcb->v_exitinfo1 == 1) {
> - if (*rcx == MSR_EFER) {
> + /* WRMSR */
> + val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
> +
> + switch (*rcx) {
> + case MSR_CR_PAT:
> + if (!vmm_pat_is_valid(val)) {
> + ret = vmm_inject_gp(vcpu);
> + return (ret);
> + }
> + vcpu->vc_shadow_pat = val;
> + break;
> + case MSR_EFER:
>   vmcb->v_efer = *rax | EFER_SVME;
> - } else if (*rcx == KVM_MSR_SYSTEM_TIME) {
> + break;
> + case KVM_MSR_SYSTEM_TIME:
>   vmm_init_pvclock(vcpu,
>      (*rax & 0xFFFFFFFFULL) | (*rdx  << 32));
> - } else {
> -#ifdef VMM_DEBUG
> + break;
> + default:
>   /* Log the access, to be able to identify unknown MSRs */
>   DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
>      "written from guest=0x%llx:0x%llx\n", __func__,
>      *rcx, *rdx, *rax);
> -#endif /* VMM_DEBUG */
>   }
>   } else {
> + /* RDMSR */
>   switch (*rcx) {
> - case MSR_DE_CFG:
> - /* LFENCE seralizing bit is set by host */
> - *rax = DE_CFG_SERIALIZE_LFENCE;
> - *rdx = 0;
> - break;
> - case MSR_INT_PEN_MSG:
> - *rax = 0;
> - *rdx = 0;
> - break;
> - default:
> - DPRINTF("%s: guest read msr 0x%llx, injecting "
> -    "#GP\n", __func__, *rcx);
> - ret = vmm_inject_gp(vcpu);
> - return (ret);
> + case MSR_BIOS_SIGN:
> + case MSR_INT_PEN_MSG:
> + case MSR_PLATFORM_ID:
> + /* Ignored */
> + *rax = 0;
> + *rdx = 0;
> + break;
> + case MSR_CR_PAT:
> + *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
> + *rdx = (vcpu->vc_shadow_pat >> 32);
> + break;
> + case MSR_DE_CFG:
> + /* LFENCE seralizing bit is set by host */
> + *rax = DE_CFG_SERIALIZE_LFENCE;
> + *rdx = 0;
> + break;
> + default:
> + /*
> + * Unsupported MSRs causes #GP exception, don't advance
> + * %rip
> + */
> + DPRINTF("%s: unsupported rdmsr (msr=0x%llx), "
> +    "injecting #GP\n", __func__, *rcx);
> + ret = vmm_inject_gp(vcpu);
> + return (ret);
>   }
>   }
>
> @@ -7271,6 +7308,23 @@ vmm_update_pvclock(struct vcpu *vcpu)
>   pvclock_ti->ti_version &= ~0x1;
>   }
>   return (0);
> +}
> +
> +int
> +vmm_pat_is_valid(uint64_t pat)
> +{
> + int i;
> + uint8_t *byte = (uint8_t *)&pat;
> +
> + /* Intel SDM Vol 3A, 11.12.2: 0x02, 0x03, and 0x08-0xFF result in #GP */
> + for (i = 0; i < 8; i++) {
> + if (byte[i] == 0x02 || byte[i] == 0x03 || byte[i] > 0x07) {
> + DPRINTF("%s: invalid pat %llx\n", __func__, pat);
> + return 0;
> + }
> + }
> +
> + return 1;
>  }
>
>  /*


--
-Dave Voutila

Reply | Threaded
Open this post in threaded view
|

Re: vmm(4): improve {rd,wr}msr exit handling for both amd & intel

Aaron Bieber-2

Dave Voutila writes:

> Dave Voutila writes:
>
>> The following diff cleans up and improves MSR-related event handling in
>> vmm(4) for when the guest attempts a rdmsr/wrmsr instruction. As
>> mentioned in a previous email to tech@ about fixing support for 9front
>> guests [1], we found some discprepencies between vmm(4)'s handling on
>> Intel hosts and AMD hosts.
>>
>> While the diff has been tested already by abieber@ and brynet@ with
>> additional review by mlarkin@, I'm looking for additional testers
>> willing to look for regressions.

Confirmed it's working for me - very latest 9front and nixos work fine
on my ryzen!

>
> Last call for additional testers. Plan is to commit the diff later today
> as I have an OK from mlarkin@.
>
>>
>> This diff specifically improves and standardizes msr-based exit handling
>> between Intel and AMD hosts to the following:
>>
>> 1. All RDMSR instructions that cause a vm-exit must be explicitly
>>    handled (e.g. via emulation) or they result in injecting a #GP
>>    exception into the guest.
>>
>> 2. All WRMSR instructions that cause a vm-exit and are not explicitly
>>    handled are ignored (i.e. %rax and %rdx values are not inspected or
>>    used).
>>
>> Consequently with the change for (1) above, the diff adds explicit
>> handling for the following MSRs:
>>
>> 1. MSR_CR_PAT: for now reads/writes are shadowed for the guest vcpu and
>>    host state is not touched. The shadow state is initialized on vcpu
>>    reset to the same value as the host.
>>
>> 2. MSR_BIOS_SIGN, MSR_INT_PEN_MSG (on AMD), and MSR_PLATFORM_ID are all
>>    ignored. This means reads result in vmm(4) setting the guest vcpu's
>>    %rax and %rdx to 0. (These msr's are ignored in the same manner by
>>    other hypervisors like kvm and nvmm.)
>>
>> The average user should not see a change in behavior of vmm(4) or
>> vmd(8). The biggest change is for *Intel* users as this diff changes the
>> current vmx logic which was not injecting #GP for unsupported
>> msr's. (Your guests were potentially getting garbage results from rdmsr
>> instructions.)
>>
>
> If you do test the diff and have issues, I forgot to mention to please
> build the kernel with VMM_DEBUG. The output to the kernel buffer will
> help diagnose any problematic msr access.
>
>> The folks attempting to host the latest release of 9front as a guest on
>> AMD hosts should see their guest boot successfully with this diff :-)
>>
>> -dv
>>
>> [1] https://marc.info/?l=openbsd-tech&m=161693517121814&w=2
>>
>>
>> Index: sys/arch/amd64/include/vmmvar.h
>> ===================================================================
>> RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
>> retrieving revision 1.70
>> diff -u -p -r1.70 vmmvar.h
>> --- sys/arch/amd64/include/vmmvar.h 8 Apr 2020 07:39:48 -0000 1.70
>> +++ sys/arch/amd64/include/vmmvar.h 31 Mar 2021 00:15:43 -0000
>> @@ -936,6 +936,9 @@ struct vcpu {
>>   paddr_t vc_pvclock_system_gpa;
>>   uint32_t vc_pvclock_system_tsc_mul;
>>
>> + /* Shadowed MSRs */
>> + uint64_t vc_shadow_pat;
>> +
>>   /* VMX only */
>>   uint64_t vc_vmx_basic;
>>   uint64_t vc_vmx_entry_ctls;
>> Index: sys/arch/amd64/amd64/vmm.c
>> ===================================================================
>> RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
>> retrieving revision 1.278
>> diff -u -p -r1.278 vmm.c
>> --- sys/arch/amd64/amd64/vmm.c 11 Mar 2021 11:16:55 -0000 1.278
>> +++ sys/arch/amd64/amd64/vmm.c 31 Mar 2021 00:15:43 -0000
>> @@ -207,6 +207,7 @@ void svm_set_dirty(struct vcpu *, uint32
>>  int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
>>  void vmm_init_pvclock(struct vcpu *, paddr_t);
>>  int vmm_update_pvclock(struct vcpu *);
>> +int vmm_pat_is_valid(uint64_t);
>>
>>  #ifdef VMM_DEBUG
>>  void dump_vcpu(struct vcpu *);
>> @@ -3193,6 +3194,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
>>   /* xcr0 power on default sets bit 0 (x87 state) */
>>   vcpu->vc_gueststate.vg_xcr0 = XCR0_X87 & xsave_mask;
>>
>> + /* XXX PAT shadow */
>> + vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
>> +
>>  exit:
>>   /* Flush the VMCS */
>>   if (vmclear(&vcpu->vc_control_pa)) {
>> @@ -3584,6 +3588,10 @@ vcpu_init(struct vcpu *vcpu)
>>   vcpu->vc_state = VCPU_STATE_STOPPED;
>>   vcpu->vc_vpid = 0;
>>   vcpu->vc_pvclock_system_gpa = 0;
>> +
>> + /* Shadow PAT MSR, starting with host's value. */
>> + vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
>> +
>>   if (vmm_softc->mode == VMM_MODE_VMX ||
>>      vmm_softc->mode == VMM_MODE_EPT)
>>   ret = vcpu_init_vmx(vcpu);
>> @@ -6257,26 +6265,24 @@ vmx_handle_rdmsr(struct vcpu *vcpu)
>>   rdx = &vcpu->vc_gueststate.vg_rdx;
>>
>>   switch (*rcx) {
>> - case MSR_SMBASE:
>> - /*
>> - * 34.15.6.3 - Saving Guest State (SMM)
>> - *
>> - * Unsupported, so inject #GP and return without
>> - * advancing %rip.
>> - */
>> + case MSR_BIOS_SIGN:
>> + case MSR_PLATFORM_ID:
>> + /* Ignored */
>> + *rax = 0;
>> + *rdx = 0;
>> + break;
>> + case MSR_CR_PAT:
>> + *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
>> + *rdx = (vcpu->vc_shadow_pat >> 32);
>> + break;
>> + default:
>> + /* Unsupported MSRs causes #GP exception, don't advance %rip */
>> + DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n",
>> +    __func__, *rcx);
>>   ret = vmm_inject_gp(vcpu);
>>   return (ret);
>>   }
>>
>> - *rax = 0;
>> - *rdx = 0;
>> -
>> -#ifdef VMM_DEBUG
>> - /* Log the access, to be able to identify unknown MSRs */
>> - DPRINTF("%s: rdmsr exit, msr=0x%llx, data returned to "
>> -    "guest=0x%llx:0x%llx\n", __func__, *rcx, *rdx, *rax);
>> -#endif /* VMM_DEBUG */
>> -
>>   vcpu->vc_gueststate.vg_rip += insn_length;
>>
>>   return (0);
>> @@ -6442,7 +6448,7 @@ vmx_handle_misc_enable_msr(struct vcpu *
>>  int
>>  vmx_handle_wrmsr(struct vcpu *vcpu)
>>  {
>> - uint64_t insn_length;
>> + uint64_t insn_length, val;
>>   uint64_t *rax, *rdx, *rcx;
>>   int ret;
>>
>> @@ -6460,8 +6466,16 @@ vmx_handle_wrmsr(struct vcpu *vcpu)
>>   rax = &vcpu->vc_gueststate.vg_rax;
>>   rcx = &vcpu->vc_gueststate.vg_rcx;
>>   rdx = &vcpu->vc_gueststate.vg_rdx;
>> + val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
>>
>>   switch (*rcx) {
>> + case MSR_CR_PAT:
>> + if (!vmm_pat_is_valid(val)) {
>> + ret = vmm_inject_gp(vcpu);
>> + return (ret);
>> + }
>> + vcpu->vc_shadow_pat = val;
>> + break;
>>   case MSR_MISC_ENABLE:
>>   vmx_handle_misc_enable_msr(vcpu);
>>   break;
>> @@ -6508,7 +6522,7 @@ vmx_handle_wrmsr(struct vcpu *vcpu)
>>  int
>>  svm_handle_msr(struct vcpu *vcpu)
>>  {
>> - uint64_t insn_length;
>> + uint64_t insn_length, val;
>>   uint64_t *rax, *rcx, *rdx;
>>   struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
>>   int ret;
>> @@ -6521,35 +6535,58 @@ svm_handle_msr(struct vcpu *vcpu)
>>   rdx = &vcpu->vc_gueststate.vg_rdx;
>>
>>   if (vmcb->v_exitinfo1 == 1) {
>> - if (*rcx == MSR_EFER) {
>> + /* WRMSR */
>> + val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
>> +
>> + switch (*rcx) {
>> + case MSR_CR_PAT:
>> + if (!vmm_pat_is_valid(val)) {
>> + ret = vmm_inject_gp(vcpu);
>> + return (ret);
>> + }
>> + vcpu->vc_shadow_pat = val;
>> + break;
>> + case MSR_EFER:
>>   vmcb->v_efer = *rax | EFER_SVME;
>> - } else if (*rcx == KVM_MSR_SYSTEM_TIME) {
>> + break;
>> + case KVM_MSR_SYSTEM_TIME:
>>   vmm_init_pvclock(vcpu,
>>      (*rax & 0xFFFFFFFFULL) | (*rdx  << 32));
>> - } else {
>> -#ifdef VMM_DEBUG
>> + break;
>> + default:
>>   /* Log the access, to be able to identify unknown MSRs */
>>   DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
>>      "written from guest=0x%llx:0x%llx\n", __func__,
>>      *rcx, *rdx, *rax);
>> -#endif /* VMM_DEBUG */
>>   }
>>   } else {
>> + /* RDMSR */
>>   switch (*rcx) {
>> - case MSR_DE_CFG:
>> - /* LFENCE seralizing bit is set by host */
>> - *rax = DE_CFG_SERIALIZE_LFENCE;
>> - *rdx = 0;
>> - break;
>> - case MSR_INT_PEN_MSG:
>> - *rax = 0;
>> - *rdx = 0;
>> - break;
>> - default:
>> - DPRINTF("%s: guest read msr 0x%llx, injecting "
>> -    "#GP\n", __func__, *rcx);
>> - ret = vmm_inject_gp(vcpu);
>> - return (ret);
>> + case MSR_BIOS_SIGN:
>> + case MSR_INT_PEN_MSG:
>> + case MSR_PLATFORM_ID:
>> + /* Ignored */
>> + *rax = 0;
>> + *rdx = 0;
>> + break;
>> + case MSR_CR_PAT:
>> + *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
>> + *rdx = (vcpu->vc_shadow_pat >> 32);
>> + break;
>> + case MSR_DE_CFG:
>> + /* LFENCE seralizing bit is set by host */
>> + *rax = DE_CFG_SERIALIZE_LFENCE;
>> + *rdx = 0;
>> + break;
>> + default:
>> + /*
>> + * Unsupported MSRs causes #GP exception, don't advance
>> + * %rip
>> + */
>> + DPRINTF("%s: unsupported rdmsr (msr=0x%llx), "
>> +    "injecting #GP\n", __func__, *rcx);
>> + ret = vmm_inject_gp(vcpu);
>> + return (ret);
>>   }
>>   }
>>
>> @@ -7271,6 +7308,23 @@ vmm_update_pvclock(struct vcpu *vcpu)
>>   pvclock_ti->ti_version &= ~0x1;
>>   }
>>   return (0);
>> +}
>> +
>> +int
>> +vmm_pat_is_valid(uint64_t pat)
>> +{
>> + int i;
>> + uint8_t *byte = (uint8_t *)&pat;
>> +
>> + /* Intel SDM Vol 3A, 11.12.2: 0x02, 0x03, and 0x08-0xFF result in #GP */
>> + for (i = 0; i < 8; i++) {
>> + if (byte[i] == 0x02 || byte[i] == 0x03 || byte[i] > 0x07) {
>> + DPRINTF("%s: invalid pat %llx\n", __func__, pat);
>> + return 0;
>> + }
>> + }
>> +
>> + return 1;
>>  }
>>
>>  /*

Reply | Threaded
Open this post in threaded view
|

Re: vmm(4): improve {rd,wr}msr exit handling for both amd & intel

Bryan Steele-2
In reply to this post by Dave Voutila-2
On Mon, Apr 05, 2021 at 09:54:14AM -0400, Dave Voutila wrote:

>
> Dave Voutila writes:
>
> > The following diff cleans up and improves MSR-related event handling in
> > vmm(4) for when the guest attempts a rdmsr/wrmsr instruction. As
> > mentioned in a previous email to tech@ about fixing support for 9front
> > guests [1], we found some discprepencies between vmm(4)'s handling on
> > Intel hosts and AMD hosts.
> >
> > While the diff has been tested already by abieber@ and brynet@ with
> > additional review by mlarkin@, I'm looking for additional testers
> > willing to look for regressions.
>
> Last call for additional testers. Plan is to commit the diff later today
> as I have an OK from mlarkin@.

This is ok brynet@ too!

> >
> > This diff specifically improves and standardizes msr-based exit handling
> > between Intel and AMD hosts to the following:
> >
> > 1. All RDMSR instructions that cause a vm-exit must be explicitly
> >    handled (e.g. via emulation) or they result in injecting a #GP
> >    exception into the guest.
> >
> > 2. All WRMSR instructions that cause a vm-exit and are not explicitly
> >    handled are ignored (i.e. %rax and %rdx values are not inspected or
> >    used).
> >
> > Consequently with the change for (1) above, the diff adds explicit
> > handling for the following MSRs:
> >
> > 1. MSR_CR_PAT: for now reads/writes are shadowed for the guest vcpu and
> >    host state is not touched. The shadow state is initialized on vcpu
> >    reset to the same value as the host.
> >
> > 2. MSR_BIOS_SIGN, MSR_INT_PEN_MSG (on AMD), and MSR_PLATFORM_ID are all
> >    ignored. This means reads result in vmm(4) setting the guest vcpu's
> >    %rax and %rdx to 0. (These msr's are ignored in the same manner by
> >    other hypervisors like kvm and nvmm.)
> >
> > The average user should not see a change in behavior of vmm(4) or
> > vmd(8). The biggest change is for *Intel* users as this diff changes the
> > current vmx logic which was not injecting #GP for unsupported
> > msr's. (Your guests were potentially getting garbage results from rdmsr
> > instructions.)
> >
>
> If you do test the diff and have issues, I forgot to mention to please
> build the kernel with VMM_DEBUG. The output to the kernel buffer will
> help diagnose any problematic msr access.
>
> > The folks attempting to host the latest release of 9front as a guest on
> > AMD hosts should see their guest boot successfully with this diff :-)
> >
> > -dv
> >
> > [1] https://marc.info/?l=openbsd-tech&m=161693517121814&w=2
> >
> >
> > Index: sys/arch/amd64/include/vmmvar.h
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
> > retrieving revision 1.70
> > diff -u -p -r1.70 vmmvar.h
> > --- sys/arch/amd64/include/vmmvar.h 8 Apr 2020 07:39:48 -0000 1.70
> > +++ sys/arch/amd64/include/vmmvar.h 31 Mar 2021 00:15:43 -0000
> > @@ -936,6 +936,9 @@ struct vcpu {
> >   paddr_t vc_pvclock_system_gpa;
> >   uint32_t vc_pvclock_system_tsc_mul;
> >
> > + /* Shadowed MSRs */
> > + uint64_t vc_shadow_pat;
> > +
> >   /* VMX only */
> >   uint64_t vc_vmx_basic;
> >   uint64_t vc_vmx_entry_ctls;
> > Index: sys/arch/amd64/amd64/vmm.c
> > ===================================================================
> > RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
> > retrieving revision 1.278
> > diff -u -p -r1.278 vmm.c
> > --- sys/arch/amd64/amd64/vmm.c 11 Mar 2021 11:16:55 -0000 1.278
> > +++ sys/arch/amd64/amd64/vmm.c 31 Mar 2021 00:15:43 -0000
> > @@ -207,6 +207,7 @@ void svm_set_dirty(struct vcpu *, uint32
> >  int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
> >  void vmm_init_pvclock(struct vcpu *, paddr_t);
> >  int vmm_update_pvclock(struct vcpu *);
> > +int vmm_pat_is_valid(uint64_t);
> >
> >  #ifdef VMM_DEBUG
> >  void dump_vcpu(struct vcpu *);
> > @@ -3193,6 +3194,9 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
> >   /* xcr0 power on default sets bit 0 (x87 state) */
> >   vcpu->vc_gueststate.vg_xcr0 = XCR0_X87 & xsave_mask;
> >
> > + /* XXX PAT shadow */
> > + vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
> > +
> >  exit:
> >   /* Flush the VMCS */
> >   if (vmclear(&vcpu->vc_control_pa)) {
> > @@ -3584,6 +3588,10 @@ vcpu_init(struct vcpu *vcpu)
> >   vcpu->vc_state = VCPU_STATE_STOPPED;
> >   vcpu->vc_vpid = 0;
> >   vcpu->vc_pvclock_system_gpa = 0;
> > +
> > + /* Shadow PAT MSR, starting with host's value. */
> > + vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT);
> > +
> >   if (vmm_softc->mode == VMM_MODE_VMX ||
> >      vmm_softc->mode == VMM_MODE_EPT)
> >   ret = vcpu_init_vmx(vcpu);
> > @@ -6257,26 +6265,24 @@ vmx_handle_rdmsr(struct vcpu *vcpu)
> >   rdx = &vcpu->vc_gueststate.vg_rdx;
> >
> >   switch (*rcx) {
> > - case MSR_SMBASE:
> > - /*
> > - * 34.15.6.3 - Saving Guest State (SMM)
> > - *
> > - * Unsupported, so inject #GP and return without
> > - * advancing %rip.
> > - */
> > + case MSR_BIOS_SIGN:
> > + case MSR_PLATFORM_ID:
> > + /* Ignored */
> > + *rax = 0;
> > + *rdx = 0;
> > + break;
> > + case MSR_CR_PAT:
> > + *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
> > + *rdx = (vcpu->vc_shadow_pat >> 32);
> > + break;
> > + default:
> > + /* Unsupported MSRs causes #GP exception, don't advance %rip */
> > + DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n",
> > +    __func__, *rcx);
> >   ret = vmm_inject_gp(vcpu);
> >   return (ret);
> >   }
> >
> > - *rax = 0;
> > - *rdx = 0;
> > -
> > -#ifdef VMM_DEBUG
> > - /* Log the access, to be able to identify unknown MSRs */
> > - DPRINTF("%s: rdmsr exit, msr=0x%llx, data returned to "
> > -    "guest=0x%llx:0x%llx\n", __func__, *rcx, *rdx, *rax);
> > -#endif /* VMM_DEBUG */
> > -
> >   vcpu->vc_gueststate.vg_rip += insn_length;
> >
> >   return (0);
> > @@ -6442,7 +6448,7 @@ vmx_handle_misc_enable_msr(struct vcpu *
> >  int
> >  vmx_handle_wrmsr(struct vcpu *vcpu)
> >  {
> > - uint64_t insn_length;
> > + uint64_t insn_length, val;
> >   uint64_t *rax, *rdx, *rcx;
> >   int ret;
> >
> > @@ -6460,8 +6466,16 @@ vmx_handle_wrmsr(struct vcpu *vcpu)
> >   rax = &vcpu->vc_gueststate.vg_rax;
> >   rcx = &vcpu->vc_gueststate.vg_rcx;
> >   rdx = &vcpu->vc_gueststate.vg_rdx;
> > + val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
> >
> >   switch (*rcx) {
> > + case MSR_CR_PAT:
> > + if (!vmm_pat_is_valid(val)) {
> > + ret = vmm_inject_gp(vcpu);
> > + return (ret);
> > + }
> > + vcpu->vc_shadow_pat = val;
> > + break;
> >   case MSR_MISC_ENABLE:
> >   vmx_handle_misc_enable_msr(vcpu);
> >   break;
> > @@ -6508,7 +6522,7 @@ vmx_handle_wrmsr(struct vcpu *vcpu)
> >  int
> >  svm_handle_msr(struct vcpu *vcpu)
> >  {
> > - uint64_t insn_length;
> > + uint64_t insn_length, val;
> >   uint64_t *rax, *rcx, *rdx;
> >   struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
> >   int ret;
> > @@ -6521,35 +6535,58 @@ svm_handle_msr(struct vcpu *vcpu)
> >   rdx = &vcpu->vc_gueststate.vg_rdx;
> >
> >   if (vmcb->v_exitinfo1 == 1) {
> > - if (*rcx == MSR_EFER) {
> > + /* WRMSR */
> > + val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
> > +
> > + switch (*rcx) {
> > + case MSR_CR_PAT:
> > + if (!vmm_pat_is_valid(val)) {
> > + ret = vmm_inject_gp(vcpu);
> > + return (ret);
> > + }
> > + vcpu->vc_shadow_pat = val;
> > + break;
> > + case MSR_EFER:
> >   vmcb->v_efer = *rax | EFER_SVME;
> > - } else if (*rcx == KVM_MSR_SYSTEM_TIME) {
> > + break;
> > + case KVM_MSR_SYSTEM_TIME:
> >   vmm_init_pvclock(vcpu,
> >      (*rax & 0xFFFFFFFFULL) | (*rdx  << 32));
> > - } else {
> > -#ifdef VMM_DEBUG
> > + break;
> > + default:
> >   /* Log the access, to be able to identify unknown MSRs */
> >   DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
> >      "written from guest=0x%llx:0x%llx\n", __func__,
> >      *rcx, *rdx, *rax);
> > -#endif /* VMM_DEBUG */
> >   }
> >   } else {
> > + /* RDMSR */
> >   switch (*rcx) {
> > - case MSR_DE_CFG:
> > - /* LFENCE seralizing bit is set by host */
> > - *rax = DE_CFG_SERIALIZE_LFENCE;
> > - *rdx = 0;
> > - break;
> > - case MSR_INT_PEN_MSG:
> > - *rax = 0;
> > - *rdx = 0;
> > - break;
> > - default:
> > - DPRINTF("%s: guest read msr 0x%llx, injecting "
> > -    "#GP\n", __func__, *rcx);
> > - ret = vmm_inject_gp(vcpu);
> > - return (ret);
> > + case MSR_BIOS_SIGN:
> > + case MSR_INT_PEN_MSG:
> > + case MSR_PLATFORM_ID:
> > + /* Ignored */
> > + *rax = 0;
> > + *rdx = 0;
> > + break;
> > + case MSR_CR_PAT:
> > + *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
> > + *rdx = (vcpu->vc_shadow_pat >> 32);
> > + break;
> > + case MSR_DE_CFG:
> > + /* LFENCE seralizing bit is set by host */
> > + *rax = DE_CFG_SERIALIZE_LFENCE;
> > + *rdx = 0;
> > + break;
> > + default:
> > + /*
> > + * Unsupported MSRs causes #GP exception, don't advance
> > + * %rip
> > + */
> > + DPRINTF("%s: unsupported rdmsr (msr=0x%llx), "
> > +    "injecting #GP\n", __func__, *rcx);
> > + ret = vmm_inject_gp(vcpu);
> > + return (ret);
> >   }
> >   }
> >
> > @@ -7271,6 +7308,23 @@ vmm_update_pvclock(struct vcpu *vcpu)
> >   pvclock_ti->ti_version &= ~0x1;
> >   }
> >   return (0);
> > +}
> > +
> > +int
> > +vmm_pat_is_valid(uint64_t pat)
> > +{
> > + int i;
> > + uint8_t *byte = (uint8_t *)&pat;
> > +
> > + /* Intel SDM Vol 3A, 11.12.2: 0x02, 0x03, and 0x08-0xFF result in #GP */
> > + for (i = 0; i < 8; i++) {
> > + if (byte[i] == 0x02 || byte[i] == 0x03 || byte[i] > 0x07) {
> > + DPRINTF("%s: invalid pat %llx\n", __func__, pat);
> > + return 0;
> > + }
> > + }
> > +
> > + return 1;
> >  }
> >
> >  /*
>
>
> --
> -Dave Voutila
>
>