p***@xen.org
2018-11-20 14:24:27 UTC
commit 64f3090d9c49158149293af6ad5564c22bfb7344
Author: Jan Beulich <***@suse.com>
AuthorDate: Tue Nov 20 15:13:54 2018 +0100
Commit: Jan Beulich <***@suse.com>
CommitDate: Tue Nov 20 15:13:54 2018 +0100
x86emul: support AVX512{F,BW} packed integer arithmetic insns
Note: vpadd* / vpsub* et al are put at seemingly the wrong slot of the
big switch(). This is in anticipation of adding e.g. vpunpck* to those
groups (see the legacy/VEX encoded case labels nearby to support this).
Signed-off-by: Jan Beulich <***@suse.com>
Acked-by: Andrew Cooper <***@citrix.com>
---
tools/tests/x86_emulator/evex-disp8.c | 39 +++++++++++++++++
xen/arch/x86/x86_emulate/x86_emulate.c | 77 +++++++++++++++++++++++++++++++---
2 files changed, 110 insertions(+), 6 deletions(-)
diff --git a/tools/tests/x86_emulator/evex-disp8.c b/tools/tests/x86_emulator/evex-disp8.c
index 0061bbf6ee..1c0f6c4040 100644
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -160,6 +160,8 @@ static const struct test avx512f_all[] = {
INSN_PFP_NB(movu, 0f, 10),
INSN_PFP_NB(movu, 0f, 11),
INSN_FP(mul, 0f, 59),
+ INSN(paddd, 66, 0f, fe, vl, d, vl),
+ INSN(paddq, 66, 0f, d4, vl, q, vl),
INSN(pand, 66, 0f, db, vl, dq, vl),
INSN(pandn, 66, 0f, df, vl, dq, vl),
INSN(pcmp, 66, 0f3a, 1f, vl, dq, vl),
@@ -168,7 +170,16 @@ static const struct test avx512f_all[] = {
INSN(pcmpgtd, 66, 0f, 66, vl, d, vl),
INSN(pcmpgtq, 66, 0f38, 37, vl, q, vl),
INSN(pcmpu, 66, 0f3a, 1e, vl, dq, vl),
+ INSN(pmaxs, 66, 0f38, 3d, vl, dq, vl),
+ INSN(pmaxu, 66, 0f38, 3f, vl, dq, vl),
+ INSN(pmins, 66, 0f38, 39, vl, dq, vl),
+ INSN(pminu, 66, 0f38, 3b, vl, dq, vl),
+ INSN(pmuldq, 66, 0f38, 28, vl, q, vl),
+ INSN(pmulld, 66, 0f38, 40, vl, d, vl),
+ INSN(pmuludq, 66, 0f, f4, vl, q, vl),
INSN(por, 66, 0f, eb, vl, dq, vl),
+ INSN(psubd, 66, 0f, fa, vl, d, vl),
+ INSN(psubq, 66, 0f, fb, vl, q, vl),
INSN(pternlog, 66, 0f3a, 25, vl, dq, vl),
INSN(ptestm, 66, 0f38, 27, vl, dq, vl),
INSN(ptestnm, f3, 0f38, 27, vl, dq, vl),
@@ -203,12 +214,39 @@ static const struct test avx512bw_all[] = {
INSN(movdqu8, f2, 0f, 7f, vl, b, vl),
INSN(movdqu16, f2, 0f, 6f, vl, w, vl),
INSN(movdqu16, f2, 0f, 7f, vl, w, vl),
+ INSN(paddb, 66, 0f, fc, vl, b, vl),
+ INSN(paddsb, 66, 0f, ec, vl, b, vl),
+ INSN(paddsw, 66, 0f, ed, vl, w, vl),
+ INSN(paddusb, 66, 0f, dc, vl, b, vl),
+ INSN(paddusw, 66, 0f, dd, vl, w, vl),
+ INSN(paddw, 66, 0f, fd, vl, w, vl),
+ INSN(pavgb, 66, 0f, e0, vl, b, vl),
+ INSN(pavgw, 66, 0f, e3, vl, w, vl),
INSN(pcmp, 66, 0f3a, 3f, vl, bw, vl),
INSN(pcmpeqb, 66, 0f, 74, vl, b, vl),
INSN(pcmpeqw, 66, 0f, 75, vl, w, vl),
INSN(pcmpgtb, 66, 0f, 64, vl, b, vl),
INSN(pcmpgtw, 66, 0f, 65, vl, w, vl),
INSN(pcmpu, 66, 0f3a, 3e, vl, bw, vl),
+ INSN(pmaddwd, 66, 0f, f5, vl, w, vl),
+ INSN(pmaxsb, 66, 0f38, 3c, vl, b, vl),
+ INSN(pmaxsw, 66, 0f, ee, vl, w, vl),
+ INSN(pmaxub, 66, 0f, de, vl, b, vl),
+ INSN(pmaxuw, 66, 0f38, 3e, vl, w, vl),
+ INSN(pminsb, 66, 0f38, 38, vl, b, vl),
+ INSN(pminsw, 66, 0f, ea, vl, w, vl),
+ INSN(pminub, 66, 0f, da, vl, b, vl),
+ INSN(pminuw, 66, 0f38, 3a, vl, w, vl),
+ INSN(pmulhuw, 66, 0f, e4, vl, w, vl),
+ INSN(pmulhw, 66, 0f, e5, vl, w, vl),
+ INSN(pmullw, 66, 0f, d5, vl, w, vl),
+ INSN(psadbw, 66, 0f, f6, vl, b, vl),
+ INSN(psubb, 66, 0f, f8, vl, b, vl),
+ INSN(psubsb, 66, 0f, e8, vl, b, vl),
+ INSN(psubsw, 66, 0f, e9, vl, w, vl),
+ INSN(psubusb, 66, 0f, d8, vl, b, vl),
+ INSN(psubusw, 66, 0f, d9, vl, w, vl),
+ INSN(psubw, 66, 0f, f9, vl, w, vl),
INSN(ptestm, 66, 0f38, 26, vl, bw, vl),
INSN(ptestnm, f3, 0f38, 26, vl, bw, vl),
};
@@ -217,6 +255,7 @@ static const struct test avx512dq_all[] = {
INSN_PFP(and, 0f, 54),
INSN_PFP(andn, 0f, 55),
INSN_PFP(or, 0f, 56),
+ INSN(pmullq, 66, 0f38, 40, vl, q, vl),
INSN_PFP(xor, 0f, 57),
};
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index 2c559740b8..ec5892fb01 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -367,21 +367,21 @@ static const struct twobyte_table {
[0xc8 ... 0xcf] = { ImplicitOps },
[0xd0] = { DstImplicit|SrcMem|ModRM, simd_other },
[0xd1 ... 0xd3] = { DstImplicit|SrcMem|ModRM, simd_other },
- [0xd4 ... 0xd5] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+ [0xd4 ... 0xd5] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xd6] = { DstMem|SrcImplicit|ModRM|Mov, simd_other, 3 },
[0xd7] = { DstReg|SrcImplicit|ModRM|Mov },
[0xd8 ... 0xdf] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
- [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+ [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xe1 ... 0xe2] = { DstImplicit|SrcMem|ModRM, simd_other },
- [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+ [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xe6] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
[0xe7] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int, d8s_vl },
[0xe8 ... 0xef] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xf0] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
[0xf1 ... 0xf3] = { DstImplicit|SrcMem|ModRM, simd_other },
- [0xf4 ... 0xf6] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+ [0xf4 ... 0xf6] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xf7] = { DstMem|SrcMem|ModRM|Mov, simd_packed_int },
- [0xf8 ... 0xfe] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+ [0xf8 ... 0xfe] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xff] = { ModRM }
};
@@ -451,7 +451,7 @@ static const struct ext0f38_table {
[0x2e ... 0x2f] = { .simd_size = simd_packed_fp, .to_mem = 1 },
[0x30 ... 0x35] = { .simd_size = simd_other, .two_op = 1 },
[0x36 ... 0x3f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
- [0x40] = { .simd_size = simd_packed_int },
+ [0x40] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0x41] = { .simd_size = simd_packed_int, .two_op = 1 },
[0x45 ... 0x47] = { .simd_size = simd_packed_int },
[0x58 ... 0x59] = { .simd_size = simd_other, .two_op = 1 },
@@ -5984,6 +5984,10 @@ x86_emulate(
case X86EMUL_OPC_EVEX_66(0x0f, 0xdf): /* vpandn{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
case X86EMUL_OPC_EVEX_66(0x0f, 0xeb): /* vpor{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
case X86EMUL_OPC_EVEX_66(0x0f, 0xef): /* vpxor{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x39): /* vpmins{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3b): /* vpminu{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3d): /* vpmaxs{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3f): /* vpmaxu{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
avx512f_no_sae:
host_and_vcpu_must_have(avx512f);
generate_exception_if(ea.type != OP_MEM && evex.br, EXC_UD);
@@ -6585,6 +6589,31 @@ x86_emulate(
get_fpu(X86EMUL_FPU_mmx);
goto simd_0f_common;
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xf5): /* vpmaddwd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xf6): /* vpsadbw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ fault_suppression = false;
+ /* fall through */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xd5): /* vpmullw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xd8): /* vpsubusb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xd9): /* vpsubusw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xdc): /* vpaddusb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xdd): /* vpaddusw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe0): /* vpavgb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe3): /* vpavgw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe5): /* vpmulhw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe8): /* vpsubsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe9): /* vpsubsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xec): /* vpaddsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xed): /* vpaddsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xf8): /* vpsubb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xf9): /* vpsubw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xfc): /* vpaddb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xfd): /* vpaddw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ host_and_vcpu_must_have(avx512bw);
+ generate_exception_if(evex.br, EXC_UD);
+ elem_bytes = 1 << (b & 1);
+ goto avx512f_no_sae;
+
case X86EMUL_OPC_EVEX_F3(0x0f38, 0x26): /* vptestnm{b,w} [xyz]mm/mem,[xyz]mm,k{k} */
case X86EMUL_OPC_EVEX_F3(0x0f38, 0x27): /* vptestnm{d,q} [xyz]mm/mem,[xyz]mm,k{k} */
op_bytes = 16 << evex.lr;
@@ -6611,6 +6640,12 @@ x86_emulate(
avx512_vlen_check(false);
goto simd_zmm;
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xd4): /* vpaddq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xf4): /* vpmuludq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x28): /* vpmuldq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ generate_exception_if(!evex.w, EXC_UD);
+ goto avx512f_no_sae;
+
CASE_SIMD_PACKED_INT(0x0f, 0x6e): /* mov{d,q} r/m,{,x}mm */
case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */
CASE_SIMD_PACKED_INT(0x0f, 0x7e): /* mov{d,q} {,x}mm,r/m */
@@ -7834,6 +7869,12 @@ x86_emulate(
op_bytes = vex.pfx ? 16 : 8;
goto simd_0f_int;
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xfa): /* vpsubd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xfb): /* vpsubq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xfe): /* vpaddd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ generate_exception_if(evex.w != (b & 1), EXC_UD);
+ goto avx512f_no_sae;
+
case X86EMUL_OPC(0x0f, 0xd4): /* paddq mm/m64,mm */
case X86EMUL_OPC(0x0f, 0xf4): /* pmuludq mm/m64,mm */
case X86EMUL_OPC(0x0f, 0xfb): /* psubq mm/m64,mm */
@@ -7862,6 +7903,16 @@ x86_emulate(
vcpu_must_have(mmxext);
goto simd_0f_mmx;
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xda): /* vpminub [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xde): /* vpmaxub [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe4): /* vpmulhuw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xea): /* vpminsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xee): /* vpmaxsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ host_and_vcpu_must_have(avx512bw);
+ generate_exception_if(evex.br, EXC_UD);
+ elem_bytes = b & 0x10 ? 1 : 2;
+ goto avx512f_no_sae;
+
case X86EMUL_OPC_66(0x0f, 0xe6): /* cvttpd2dq xmm/mem,xmm */
case X86EMUL_OPC_VEX_66(0x0f, 0xe6): /* vcvttpd2dq {x,y}mm/mem,xmm */
case X86EMUL_OPC_F3(0x0f, 0xe6): /* cvtdq2pd xmm/mem,xmm */
@@ -8236,6 +8287,20 @@ x86_emulate(
host_and_vcpu_must_have(sse4_2);
goto simd_0f38_common;
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x38): /* vpminsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3a): /* vpminuw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3c): /* vpmaxsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3e): /* vpmaxuw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ host_and_vcpu_must_have(avx512bw);
+ generate_exception_if(evex.br, EXC_UD);
+ elem_bytes = b & 2 ?: 1;
+ goto avx512f_no_sae;
+
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x40): /* vpmull{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ if ( evex.w )
+ host_and_vcpu_must_have(avx512dq);
+ goto avx512f_no_sae;
+
case X86EMUL_OPC_66(0x0f38, 0xdb): /* aesimc xmm/m128,xmm */
case X86EMUL_OPC_VEX_66(0x0f38, 0xdb): /* vaesimc xmm/m128,xmm */
case X86EMUL_OPC_66(0x0f38, 0xdc): /* aesenc xmm/m128,xmm,xmm */
--
generated by git-patchbot for /home/xen/git/xen.git#staging
Author: Jan Beulich <***@suse.com>
AuthorDate: Tue Nov 20 15:13:54 2018 +0100
Commit: Jan Beulich <***@suse.com>
CommitDate: Tue Nov 20 15:13:54 2018 +0100
x86emul: support AVX512{F,BW} packed integer arithmetic insns
Note: vpadd* / vpsub* et al are put at seemingly the wrong slot of the
big switch(). This is in anticipation of adding e.g. vpunpck* to those
groups (see the legacy/VEX encoded case labels nearby to support this).
Signed-off-by: Jan Beulich <***@suse.com>
Acked-by: Andrew Cooper <***@citrix.com>
---
tools/tests/x86_emulator/evex-disp8.c | 39 +++++++++++++++++
xen/arch/x86/x86_emulate/x86_emulate.c | 77 +++++++++++++++++++++++++++++++---
2 files changed, 110 insertions(+), 6 deletions(-)
diff --git a/tools/tests/x86_emulator/evex-disp8.c b/tools/tests/x86_emulator/evex-disp8.c
index 0061bbf6ee..1c0f6c4040 100644
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -160,6 +160,8 @@ static const struct test avx512f_all[] = {
INSN_PFP_NB(movu, 0f, 10),
INSN_PFP_NB(movu, 0f, 11),
INSN_FP(mul, 0f, 59),
+ INSN(paddd, 66, 0f, fe, vl, d, vl),
+ INSN(paddq, 66, 0f, d4, vl, q, vl),
INSN(pand, 66, 0f, db, vl, dq, vl),
INSN(pandn, 66, 0f, df, vl, dq, vl),
INSN(pcmp, 66, 0f3a, 1f, vl, dq, vl),
@@ -168,7 +170,16 @@ static const struct test avx512f_all[] = {
INSN(pcmpgtd, 66, 0f, 66, vl, d, vl),
INSN(pcmpgtq, 66, 0f38, 37, vl, q, vl),
INSN(pcmpu, 66, 0f3a, 1e, vl, dq, vl),
+ INSN(pmaxs, 66, 0f38, 3d, vl, dq, vl),
+ INSN(pmaxu, 66, 0f38, 3f, vl, dq, vl),
+ INSN(pmins, 66, 0f38, 39, vl, dq, vl),
+ INSN(pminu, 66, 0f38, 3b, vl, dq, vl),
+ INSN(pmuldq, 66, 0f38, 28, vl, q, vl),
+ INSN(pmulld, 66, 0f38, 40, vl, d, vl),
+ INSN(pmuludq, 66, 0f, f4, vl, q, vl),
INSN(por, 66, 0f, eb, vl, dq, vl),
+ INSN(psubd, 66, 0f, fa, vl, d, vl),
+ INSN(psubq, 66, 0f, fb, vl, q, vl),
INSN(pternlog, 66, 0f3a, 25, vl, dq, vl),
INSN(ptestm, 66, 0f38, 27, vl, dq, vl),
INSN(ptestnm, f3, 0f38, 27, vl, dq, vl),
@@ -203,12 +214,39 @@ static const struct test avx512bw_all[] = {
INSN(movdqu8, f2, 0f, 7f, vl, b, vl),
INSN(movdqu16, f2, 0f, 6f, vl, w, vl),
INSN(movdqu16, f2, 0f, 7f, vl, w, vl),
+ INSN(paddb, 66, 0f, fc, vl, b, vl),
+ INSN(paddsb, 66, 0f, ec, vl, b, vl),
+ INSN(paddsw, 66, 0f, ed, vl, w, vl),
+ INSN(paddusb, 66, 0f, dc, vl, b, vl),
+ INSN(paddusw, 66, 0f, dd, vl, w, vl),
+ INSN(paddw, 66, 0f, fd, vl, w, vl),
+ INSN(pavgb, 66, 0f, e0, vl, b, vl),
+ INSN(pavgw, 66, 0f, e3, vl, w, vl),
INSN(pcmp, 66, 0f3a, 3f, vl, bw, vl),
INSN(pcmpeqb, 66, 0f, 74, vl, b, vl),
INSN(pcmpeqw, 66, 0f, 75, vl, w, vl),
INSN(pcmpgtb, 66, 0f, 64, vl, b, vl),
INSN(pcmpgtw, 66, 0f, 65, vl, w, vl),
INSN(pcmpu, 66, 0f3a, 3e, vl, bw, vl),
+ INSN(pmaddwd, 66, 0f, f5, vl, w, vl),
+ INSN(pmaxsb, 66, 0f38, 3c, vl, b, vl),
+ INSN(pmaxsw, 66, 0f, ee, vl, w, vl),
+ INSN(pmaxub, 66, 0f, de, vl, b, vl),
+ INSN(pmaxuw, 66, 0f38, 3e, vl, w, vl),
+ INSN(pminsb, 66, 0f38, 38, vl, b, vl),
+ INSN(pminsw, 66, 0f, ea, vl, w, vl),
+ INSN(pminub, 66, 0f, da, vl, b, vl),
+ INSN(pminuw, 66, 0f38, 3a, vl, w, vl),
+ INSN(pmulhuw, 66, 0f, e4, vl, w, vl),
+ INSN(pmulhw, 66, 0f, e5, vl, w, vl),
+ INSN(pmullw, 66, 0f, d5, vl, w, vl),
+ INSN(psadbw, 66, 0f, f6, vl, b, vl),
+ INSN(psubb, 66, 0f, f8, vl, b, vl),
+ INSN(psubsb, 66, 0f, e8, vl, b, vl),
+ INSN(psubsw, 66, 0f, e9, vl, w, vl),
+ INSN(psubusb, 66, 0f, d8, vl, b, vl),
+ INSN(psubusw, 66, 0f, d9, vl, w, vl),
+ INSN(psubw, 66, 0f, f9, vl, w, vl),
INSN(ptestm, 66, 0f38, 26, vl, bw, vl),
INSN(ptestnm, f3, 0f38, 26, vl, bw, vl),
};
@@ -217,6 +255,7 @@ static const struct test avx512dq_all[] = {
INSN_PFP(and, 0f, 54),
INSN_PFP(andn, 0f, 55),
INSN_PFP(or, 0f, 56),
+ INSN(pmullq, 66, 0f38, 40, vl, q, vl),
INSN_PFP(xor, 0f, 57),
};
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index 2c559740b8..ec5892fb01 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -367,21 +367,21 @@ static const struct twobyte_table {
[0xc8 ... 0xcf] = { ImplicitOps },
[0xd0] = { DstImplicit|SrcMem|ModRM, simd_other },
[0xd1 ... 0xd3] = { DstImplicit|SrcMem|ModRM, simd_other },
- [0xd4 ... 0xd5] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+ [0xd4 ... 0xd5] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xd6] = { DstMem|SrcImplicit|ModRM|Mov, simd_other, 3 },
[0xd7] = { DstReg|SrcImplicit|ModRM|Mov },
[0xd8 ... 0xdf] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
- [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+ [0xe0] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xe1 ... 0xe2] = { DstImplicit|SrcMem|ModRM, simd_other },
- [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+ [0xe3 ... 0xe5] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xe6] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
[0xe7] = { DstMem|SrcImplicit|ModRM|Mov, simd_packed_int, d8s_vl },
[0xe8 ... 0xef] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xf0] = { DstImplicit|SrcMem|ModRM|Mov, simd_other },
[0xf1 ... 0xf3] = { DstImplicit|SrcMem|ModRM, simd_other },
- [0xf4 ... 0xf6] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+ [0xf4 ... 0xf6] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xf7] = { DstMem|SrcMem|ModRM|Mov, simd_packed_int },
- [0xf8 ... 0xfe] = { DstImplicit|SrcMem|ModRM, simd_packed_int },
+ [0xf8 ... 0xfe] = { DstImplicit|SrcMem|ModRM, simd_packed_int, d8s_vl },
[0xff] = { ModRM }
};
@@ -451,7 +451,7 @@ static const struct ext0f38_table {
[0x2e ... 0x2f] = { .simd_size = simd_packed_fp, .to_mem = 1 },
[0x30 ... 0x35] = { .simd_size = simd_other, .two_op = 1 },
[0x36 ... 0x3f] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
- [0x40] = { .simd_size = simd_packed_int },
+ [0x40] = { .simd_size = simd_packed_int, .d8s = d8s_vl },
[0x41] = { .simd_size = simd_packed_int, .two_op = 1 },
[0x45 ... 0x47] = { .simd_size = simd_packed_int },
[0x58 ... 0x59] = { .simd_size = simd_other, .two_op = 1 },
@@ -5984,6 +5984,10 @@ x86_emulate(
case X86EMUL_OPC_EVEX_66(0x0f, 0xdf): /* vpandn{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
case X86EMUL_OPC_EVEX_66(0x0f, 0xeb): /* vpor{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
case X86EMUL_OPC_EVEX_66(0x0f, 0xef): /* vpxor{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x39): /* vpmins{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3b): /* vpminu{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3d): /* vpmaxs{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3f): /* vpmaxu{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
avx512f_no_sae:
host_and_vcpu_must_have(avx512f);
generate_exception_if(ea.type != OP_MEM && evex.br, EXC_UD);
@@ -6585,6 +6589,31 @@ x86_emulate(
get_fpu(X86EMUL_FPU_mmx);
goto simd_0f_common;
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xf5): /* vpmaddwd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xf6): /* vpsadbw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ fault_suppression = false;
+ /* fall through */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xd5): /* vpmullw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xd8): /* vpsubusb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xd9): /* vpsubusw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xdc): /* vpaddusb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xdd): /* vpaddusw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe0): /* vpavgb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe3): /* vpavgw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe5): /* vpmulhw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe8): /* vpsubsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe9): /* vpsubsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xec): /* vpaddsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xed): /* vpaddsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xf8): /* vpsubb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xf9): /* vpsubw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xfc): /* vpaddb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xfd): /* vpaddw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ host_and_vcpu_must_have(avx512bw);
+ generate_exception_if(evex.br, EXC_UD);
+ elem_bytes = 1 << (b & 1);
+ goto avx512f_no_sae;
+
case X86EMUL_OPC_EVEX_F3(0x0f38, 0x26): /* vptestnm{b,w} [xyz]mm/mem,[xyz]mm,k{k} */
case X86EMUL_OPC_EVEX_F3(0x0f38, 0x27): /* vptestnm{d,q} [xyz]mm/mem,[xyz]mm,k{k} */
op_bytes = 16 << evex.lr;
@@ -6611,6 +6640,12 @@ x86_emulate(
avx512_vlen_check(false);
goto simd_zmm;
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xd4): /* vpaddq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xf4): /* vpmuludq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x28): /* vpmuldq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ generate_exception_if(!evex.w, EXC_UD);
+ goto avx512f_no_sae;
+
CASE_SIMD_PACKED_INT(0x0f, 0x6e): /* mov{d,q} r/m,{,x}mm */
case X86EMUL_OPC_VEX_66(0x0f, 0x6e): /* vmov{d,q} r/m,xmm */
CASE_SIMD_PACKED_INT(0x0f, 0x7e): /* mov{d,q} {,x}mm,r/m */
@@ -7834,6 +7869,12 @@ x86_emulate(
op_bytes = vex.pfx ? 16 : 8;
goto simd_0f_int;
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xfa): /* vpsubd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xfb): /* vpsubq [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xfe): /* vpaddd [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ generate_exception_if(evex.w != (b & 1), EXC_UD);
+ goto avx512f_no_sae;
+
case X86EMUL_OPC(0x0f, 0xd4): /* paddq mm/m64,mm */
case X86EMUL_OPC(0x0f, 0xf4): /* pmuludq mm/m64,mm */
case X86EMUL_OPC(0x0f, 0xfb): /* psubq mm/m64,mm */
@@ -7862,6 +7903,16 @@ x86_emulate(
vcpu_must_have(mmxext);
goto simd_0f_mmx;
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xda): /* vpminub [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xde): /* vpmaxub [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xe4): /* vpmulhuw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xea): /* vpminsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f, 0xee): /* vpmaxsw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ host_and_vcpu_must_have(avx512bw);
+ generate_exception_if(evex.br, EXC_UD);
+ elem_bytes = b & 0x10 ? 1 : 2;
+ goto avx512f_no_sae;
+
case X86EMUL_OPC_66(0x0f, 0xe6): /* cvttpd2dq xmm/mem,xmm */
case X86EMUL_OPC_VEX_66(0x0f, 0xe6): /* vcvttpd2dq {x,y}mm/mem,xmm */
case X86EMUL_OPC_F3(0x0f, 0xe6): /* cvtdq2pd xmm/mem,xmm */
@@ -8236,6 +8287,20 @@ x86_emulate(
host_and_vcpu_must_have(sse4_2);
goto simd_0f38_common;
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x38): /* vpminsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3a): /* vpminuw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3c): /* vpmaxsb [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x3e): /* vpmaxuw [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ host_and_vcpu_must_have(avx512bw);
+ generate_exception_if(evex.br, EXC_UD);
+ elem_bytes = b & 2 ?: 1;
+ goto avx512f_no_sae;
+
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x40): /* vpmull{d,q} [xyz]mm/mem,[xyz]mm,[xyz]mm{k} */
+ if ( evex.w )
+ host_and_vcpu_must_have(avx512dq);
+ goto avx512f_no_sae;
+
case X86EMUL_OPC_66(0x0f38, 0xdb): /* aesimc xmm/m128,xmm */
case X86EMUL_OPC_VEX_66(0x0f38, 0xdb): /* vaesimc xmm/m128,xmm */
case X86EMUL_OPC_66(0x0f38, 0xdc): /* aesenc xmm/m128,xmm,xmm */
--
generated by git-patchbot for /home/xen/git/xen.git#staging