p***@xen.org
2018-11-20 14:23:57 UTC
commit 88855b727bd666754fd0b8ed42aa63c480fb0179
Author: Jan Beulich <***@suse.com>
AuthorDate: Tue Nov 20 15:11:50 2018 +0100
Commit: Jan Beulich <***@suse.com>
CommitDate: Tue Nov 20 15:11:50 2018 +0100
x86emul: support AVX512{F,DQ} FP broadcast insns
Signed-off-by: Jan Beulich <***@suse.com>
Acked-by: Andrew Cooper <***@citrix.com>
---
tools/tests/x86_emulator/evex-disp8.c | 25 ++++++++++++++++
xen/arch/x86/x86_emulate/x86_emulate.c | 55 ++++++++++++++++++++++++++++++++--
2 files changed, 77 insertions(+), 3 deletions(-)
diff --git a/tools/tests/x86_emulator/evex-disp8.c b/tools/tests/x86_emulator/evex-disp8.c
index 41f0faac81..43c4a9f992 100644
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -105,6 +105,7 @@ enum esz {
static const struct test avx512f_all[] = {
INSN_FP(add, 0f, 58),
+ INSN(broadcastss, 66, 0f38, 18, el, d, el),
INSN_FP(cmp, 0f, c2),
INSN_FP(div, 0f, 5e),
INSN(fmadd132, 66, 0f38, 98, vl, sd, vl),
@@ -176,6 +177,15 @@ static const struct test avx512f_128[] = {
INSN(movq, 66, 0f, d6, el, q, el),
};
+static const struct test avx512f_no128[] = {
+ INSN(broadcastf32x4, 66, 0f38, 1a, el_4, d, vl),
+ INSN(broadcastsd, 66, 0f38, 19, el, q, el),
+};
+
+static const struct test avx512f_512[] = {
+ INSN(broadcastf64x4, 66, 0f38, 1b, el_4, q, vl),
+};
+
static const struct test avx512bw_all[] = {
INSN(movdqu8, f2, 0f, 6f, vl, b, vl),
INSN(movdqu8, f2, 0f, 7f, vl, b, vl),
@@ -190,8 +200,19 @@ static const struct test avx512dq_all[] = {
INSN_PFP(xor, 0f, 57),
};
+static const struct test avx512dq_no128[] = {
+ INSN(broadcastf32x2, 66, 0f38, 19, el_2, d, vl),
+ INSN(broadcastf64x2, 66, 0f38, 1a, el_2, q, vl),
+};
+
+static const struct test avx512dq_512[] = {
+ INSN(broadcastf32x8, 66, 0f38, 1b, el_8, d, vl),
+};
+
static const unsigned char vl_all[] = { VL_512, VL_128, VL_256 };
static const unsigned char vl_128[] = { VL_128 };
+static const unsigned char vl_no128[] = { VL_512, VL_256 };
+static const unsigned char vl_512[] = { VL_512 };
/*
* This table, indicating the presence of an immediate (byte) for an opcode
@@ -520,6 +541,10 @@ void evex_disp8_test(void *instr, struct x86_emulate_ctxt *ctxt,
RUN(avx512f, all);
RUN(avx512f, 128);
+ RUN(avx512f, no128);
+ RUN(avx512f, 512);
RUN(avx512bw, all);
RUN(avx512dq, all);
+ RUN(avx512dq, no128);
+ RUN(avx512dq, 512);
}
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index 9bd5d35a44..73ce8ddd99 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -234,10 +234,16 @@ enum simd_opsize {
/*
* 128 bits of integer or floating point data, with no further
- * formatting information.
+ * formatting information, or with it encoded by EVEX.W.
*/
simd_128,
+ /*
+ * 256 bits of integer or floating point data, with formatting
+ * encoded by EVEX.W.
+ */
+ simd_256,
+
/* Operand size encoded in non-standard way. */
simd_other
};
@@ -432,8 +438,10 @@ static const struct ext0f38_table {
[0x13] = { .simd_size = simd_other, .two_op = 1 },
[0x14 ... 0x16] = { .simd_size = simd_packed_fp },
[0x17] = { .simd_size = simd_packed_int, .two_op = 1 },
- [0x18 ... 0x19] = { .simd_size = simd_scalar_opc, .two_op = 1 },
- [0x1a] = { .simd_size = simd_128, .two_op = 1 },
+ [0x18] = { .simd_size = simd_scalar_opc, .two_op = 1, .d8s = 2 },
+ [0x19] = { .simd_size = simd_scalar_opc, .two_op = 1, .d8s = 3 },
+ [0x1a] = { .simd_size = simd_128, .two_op = 1, .d8s = 4 },
+ [0x1b] = { .simd_size = simd_256, .two_op = 1, .d8s = d8s_vl_by_2 },
[0x1c ... 0x1e] = { .simd_size = simd_packed_int, .two_op = 1 },
[0x20 ... 0x25] = { .simd_size = simd_other, .two_op = 1 },
[0x28 ... 0x29] = { .simd_size = simd_packed_int },
@@ -3339,6 +3347,10 @@ x86_decode(
op_bytes = 16;
break;
+ case simd_256:
+ op_bytes = 32;
+ break;
+
default:
op_bytes = 0;
break;
@@ -7993,6 +8005,43 @@ x86_emulate(
dst.type = OP_NONE;
break;
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x18): /* vbroadcastss xmm/m32,[xyz]mm{k} */
+ generate_exception_if(evex.w || evex.br, EXC_UD);
+ avx512_broadcast:
+ /*
+ * For the respective code below the main switch() to work we need to
+ * fold op_mask here: A source element gets read whenever any of its
+ * respective destination elements' mask bits is set.
+ */
+ if ( fault_suppression )
+ {
+ n = 1 << ((b & 3) - evex.w);
+ EXPECT(elem_bytes > 0);
+ ASSERT(op_bytes == n * elem_bytes);
+ for ( i = n; i < (16 << evex.lr) / elem_bytes; i += n )
+ op_mask |= (op_mask >> i) & ((1 << n) - 1);
+ }
+ goto avx512f_no_sae;
+
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x1b): /* vbroadcastf32x8 m256,zmm{k} */
+ /* vbroadcastf64x4 m256,zmm{k} */
+ generate_exception_if(ea.type != OP_MEM || evex.lr != 2, EXC_UD);
+ /* fall through */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x19): /* vbroadcastsd xmm/m64,{y,z}mm{k} */
+ /* vbroadcastf32x2 xmm/m64,{y,z}mm{k} */
+ generate_exception_if(!evex.lr || evex.br, EXC_UD);
+ if ( !evex.w )
+ host_and_vcpu_must_have(avx512dq);
+ goto avx512_broadcast;
+
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x1a): /* vbroadcastf32x4 m128,{y,z}mm{k} */
+ /* vbroadcastf64x2 m128,{y,z}mm{k} */
+ generate_exception_if(ea.type != OP_MEM || !evex.lr || evex.br,
+ EXC_UD);
+ if ( evex.w )
+ host_and_vcpu_must_have(avx512dq);
+ goto avx512_broadcast;
+
case X86EMUL_OPC_66(0x0f38, 0x20): /* pmovsxbw xmm/m64,xmm */
case X86EMUL_OPC_66(0x0f38, 0x21): /* pmovsxbd xmm/m32,xmm */
case X86EMUL_OPC_66(0x0f38, 0x22): /* pmovsxbq xmm/m16,xmm */
--
generated by git-patchbot for /home/xen/git/xen.git#staging
Author: Jan Beulich <***@suse.com>
AuthorDate: Tue Nov 20 15:11:50 2018 +0100
Commit: Jan Beulich <***@suse.com>
CommitDate: Tue Nov 20 15:11:50 2018 +0100
x86emul: support AVX512{F,DQ} FP broadcast insns
Signed-off-by: Jan Beulich <***@suse.com>
Acked-by: Andrew Cooper <***@citrix.com>
---
tools/tests/x86_emulator/evex-disp8.c | 25 ++++++++++++++++
xen/arch/x86/x86_emulate/x86_emulate.c | 55 ++++++++++++++++++++++++++++++++--
2 files changed, 77 insertions(+), 3 deletions(-)
diff --git a/tools/tests/x86_emulator/evex-disp8.c b/tools/tests/x86_emulator/evex-disp8.c
index 41f0faac81..43c4a9f992 100644
--- a/tools/tests/x86_emulator/evex-disp8.c
+++ b/tools/tests/x86_emulator/evex-disp8.c
@@ -105,6 +105,7 @@ enum esz {
static const struct test avx512f_all[] = {
INSN_FP(add, 0f, 58),
+ INSN(broadcastss, 66, 0f38, 18, el, d, el),
INSN_FP(cmp, 0f, c2),
INSN_FP(div, 0f, 5e),
INSN(fmadd132, 66, 0f38, 98, vl, sd, vl),
@@ -176,6 +177,15 @@ static const struct test avx512f_128[] = {
INSN(movq, 66, 0f, d6, el, q, el),
};
+static const struct test avx512f_no128[] = {
+ INSN(broadcastf32x4, 66, 0f38, 1a, el_4, d, vl),
+ INSN(broadcastsd, 66, 0f38, 19, el, q, el),
+};
+
+static const struct test avx512f_512[] = {
+ INSN(broadcastf64x4, 66, 0f38, 1b, el_4, q, vl),
+};
+
static const struct test avx512bw_all[] = {
INSN(movdqu8, f2, 0f, 6f, vl, b, vl),
INSN(movdqu8, f2, 0f, 7f, vl, b, vl),
@@ -190,8 +200,19 @@ static const struct test avx512dq_all[] = {
INSN_PFP(xor, 0f, 57),
};
+static const struct test avx512dq_no128[] = {
+ INSN(broadcastf32x2, 66, 0f38, 19, el_2, d, vl),
+ INSN(broadcastf64x2, 66, 0f38, 1a, el_2, q, vl),
+};
+
+static const struct test avx512dq_512[] = {
+ INSN(broadcastf32x8, 66, 0f38, 1b, el_8, d, vl),
+};
+
static const unsigned char vl_all[] = { VL_512, VL_128, VL_256 };
static const unsigned char vl_128[] = { VL_128 };
+static const unsigned char vl_no128[] = { VL_512, VL_256 };
+static const unsigned char vl_512[] = { VL_512 };
/*
* This table, indicating the presence of an immediate (byte) for an opcode
@@ -520,6 +541,10 @@ void evex_disp8_test(void *instr, struct x86_emulate_ctxt *ctxt,
RUN(avx512f, all);
RUN(avx512f, 128);
+ RUN(avx512f, no128);
+ RUN(avx512f, 512);
RUN(avx512bw, all);
RUN(avx512dq, all);
+ RUN(avx512dq, no128);
+ RUN(avx512dq, 512);
}
diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c
index 9bd5d35a44..73ce8ddd99 100644
--- a/xen/arch/x86/x86_emulate/x86_emulate.c
+++ b/xen/arch/x86/x86_emulate/x86_emulate.c
@@ -234,10 +234,16 @@ enum simd_opsize {
/*
* 128 bits of integer or floating point data, with no further
- * formatting information.
+ * formatting information, or with it encoded by EVEX.W.
*/
simd_128,
+ /*
+ * 256 bits of integer or floating point data, with formatting
+ * encoded by EVEX.W.
+ */
+ simd_256,
+
/* Operand size encoded in non-standard way. */
simd_other
};
@@ -432,8 +438,10 @@ static const struct ext0f38_table {
[0x13] = { .simd_size = simd_other, .two_op = 1 },
[0x14 ... 0x16] = { .simd_size = simd_packed_fp },
[0x17] = { .simd_size = simd_packed_int, .two_op = 1 },
- [0x18 ... 0x19] = { .simd_size = simd_scalar_opc, .two_op = 1 },
- [0x1a] = { .simd_size = simd_128, .two_op = 1 },
+ [0x18] = { .simd_size = simd_scalar_opc, .two_op = 1, .d8s = 2 },
+ [0x19] = { .simd_size = simd_scalar_opc, .two_op = 1, .d8s = 3 },
+ [0x1a] = { .simd_size = simd_128, .two_op = 1, .d8s = 4 },
+ [0x1b] = { .simd_size = simd_256, .two_op = 1, .d8s = d8s_vl_by_2 },
[0x1c ... 0x1e] = { .simd_size = simd_packed_int, .two_op = 1 },
[0x20 ... 0x25] = { .simd_size = simd_other, .two_op = 1 },
[0x28 ... 0x29] = { .simd_size = simd_packed_int },
@@ -3339,6 +3347,10 @@ x86_decode(
op_bytes = 16;
break;
+ case simd_256:
+ op_bytes = 32;
+ break;
+
default:
op_bytes = 0;
break;
@@ -7993,6 +8005,43 @@ x86_emulate(
dst.type = OP_NONE;
break;
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x18): /* vbroadcastss xmm/m32,[xyz]mm{k} */
+ generate_exception_if(evex.w || evex.br, EXC_UD);
+ avx512_broadcast:
+ /*
+ * For the respective code below the main switch() to work we need to
+ * fold op_mask here: A source element gets read whenever any of its
+ * respective destination elements' mask bits is set.
+ */
+ if ( fault_suppression )
+ {
+ n = 1 << ((b & 3) - evex.w);
+ EXPECT(elem_bytes > 0);
+ ASSERT(op_bytes == n * elem_bytes);
+ for ( i = n; i < (16 << evex.lr) / elem_bytes; i += n )
+ op_mask |= (op_mask >> i) & ((1 << n) - 1);
+ }
+ goto avx512f_no_sae;
+
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x1b): /* vbroadcastf32x8 m256,zmm{k} */
+ /* vbroadcastf64x4 m256,zmm{k} */
+ generate_exception_if(ea.type != OP_MEM || evex.lr != 2, EXC_UD);
+ /* fall through */
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x19): /* vbroadcastsd xmm/m64,{y,z}mm{k} */
+ /* vbroadcastf32x2 xmm/m64,{y,z}mm{k} */
+ generate_exception_if(!evex.lr || evex.br, EXC_UD);
+ if ( !evex.w )
+ host_and_vcpu_must_have(avx512dq);
+ goto avx512_broadcast;
+
+ case X86EMUL_OPC_EVEX_66(0x0f38, 0x1a): /* vbroadcastf32x4 m128,{y,z}mm{k} */
+ /* vbroadcastf64x2 m128,{y,z}mm{k} */
+ generate_exception_if(ea.type != OP_MEM || !evex.lr || evex.br,
+ EXC_UD);
+ if ( evex.w )
+ host_and_vcpu_must_have(avx512dq);
+ goto avx512_broadcast;
+
case X86EMUL_OPC_66(0x0f38, 0x20): /* pmovsxbw xmm/m64,xmm */
case X86EMUL_OPC_66(0x0f38, 0x21): /* pmovsxbd xmm/m32,xmm */
case X86EMUL_OPC_66(0x0f38, 0x22): /* pmovsxbq xmm/m16,xmm */
--
generated by git-patchbot for /home/xen/git/xen.git#staging