Skip to content

Commit

Permalink
[RV64_DYNAREC] Fixed more issues for vector (#1928)
Browse files Browse the repository at this point in the history
* [RV64_DYNAREC] Fixed emitter for xtheadvector

* MOVSD can be unaligned

* fixed unaligned issues
  • Loading branch information
ksco authored Oct 12, 2024
1 parent efd1030 commit 6a3a19d
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 32 deletions.
58 changes: 37 additions & 21 deletions src/dynarec/rv64/dynarec_rv64_f20f_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
INST_NAME("MOVSD Gx, Ex");
nextop = F8;
GETG;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
if (MODREG) {
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
ed = (nextop & 7) + (rex.b << 3);
v0 = sse_get_reg_vector(dyn, ninst, x1, gd, 1, VECTOR_SEW64);
v1 = sse_get_reg_vector(dyn, ninst, x1, ed, 0, VECTOR_SEW64);
Expand All @@ -64,11 +64,12 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
}
} else {
SMREAD();
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1); // unaligned
v0 = sse_get_reg_empty_vector(dyn, ninst, x1, gd);
d0 = fpu_get_scratch(dyn);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
VLE64_V(d0, ed, VECTOR_MASKED, VECTOR_NFIELD1);
vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1);
VLE8_V(d0, ed, VECTOR_MASKED, VECTOR_NFIELD1);
VXOR_VV(v0, v0, v0, VECTOR_UNMASKED);
VMERGE_VVM(v0, v0, d0); // implies VMASK
}
Expand Down Expand Up @@ -124,15 +125,17 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
INST_NAME("CVTTSD2SI Gd, Ex");
nextop = F8;
GETGD;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
if (MODREG) {
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
v0 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, dyn->vector_eew);
} else {
SMREAD();
v0 = fpu_get_scratch(dyn);
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
VLE_V(v0, ed, dyn->vector_eew, VECTOR_MASKED, VECTOR_NFIELD1);
vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1);
VLE8_V(v0, ed, VECTOR_MASKED, VECTOR_NFIELD1);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
}
if (box64_dynarec_fastround) {
VFMV_F_S(v0, v0);
Expand All @@ -157,15 +160,17 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
INST_NAME("CVTSD2SI Gd, Ex");
nextop = F8;
GETGD;
SET_ELEMENT_WIDTH(x1, (rex.w ? VECTOR_SEW64 : VECTOR_SEW32), 1);
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
if (MODREG) {
SET_ELEMENT_WIDTH(x1, (rex.w ? VECTOR_SEW64 : VECTOR_SEW32), 1);
v0 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, dyn->vector_eew);
} else {
SMREAD();
v0 = fpu_get_scratch(dyn);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
VLE_V(v0, ed, dyn->vector_eew, VECTOR_MASKED, VECTOR_NFIELD1);
vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1);
VLE8_V(v0, ed, VECTOR_MASKED, VECTOR_NFIELD1);
SET_ELEMENT_WIDTH(x1, (rex.w ? VECTOR_SEW64 : VECTOR_SEW32), 1);
}
if (box64_dynarec_fastround) {
VFMV_F_S(v0, v0);
Expand Down Expand Up @@ -193,18 +198,22 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
case 0x59:
INST_NAME("MULSD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
GETGX_vector(v0, 1, VECTOR_SEW64);
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
if (MODREG) {
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
GETGX_vector(v0, 1, VECTOR_SEW64);
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
} else {
SMREAD();
v1 = fpu_get_scratch(dyn);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
VLE64_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1);
VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
GETGX_vector(v0, 1, VECTOR_SEW64);
}
if (box64_dynarec_fastnan) {
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
VFMUL_VV(v0, v0, v1, VECTOR_MASKED);
} else {
VFMV_F_S(v0, v0);
Expand All @@ -230,16 +239,19 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
case 0x5E:
INST_NAME("DIVSD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
GETGX_vector(v0, 1, VECTOR_SEW64);
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
if (MODREG) {
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
GETGX_vector(v0, 1, VECTOR_SEW64);
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
} else {
SMREAD();
v1 = fpu_get_scratch(dyn);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
VLE64_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1);
VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
GETGX_vector(v0, 1, VECTOR_SEW64);
}
if (!box64_dynarec_fastnan) {
VFMV_F_S(v0, v0);
Expand All @@ -262,22 +274,26 @@ uintptr_t dynarec64_F20F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VFMV_S_F(v0, v0);
}
} else {
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
VFDIV_VV(v0, v0, v1, VECTOR_MASKED);
}
break;
case 0xC2:
INST_NAME("CMPSD Gx, Ex, Ib");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
GETGX_vector(d0, 1, VECTOR_SEW64);
if (MODREG) {
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
GETGX_vector(d0, 1, VECTOR_SEW64);
d1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW64);
} else {
SMREAD();
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
d1 = fpu_get_scratch(dyn);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1);
VLE64_V(d1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1);
VLE8_V(d1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
GETGX_vector(d0, 1, VECTOR_SEW64);
}
u8 = F8;
VFMV_F_S(d0, d0);
Expand Down
25 changes: 16 additions & 9 deletions src/dynarec/rv64/dynarec_rv64_f30f_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,18 +130,22 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
case 0x59:
INST_NAME("MULSS Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETGX_vector(v0, 1, VECTOR_SEW32);
vector_loadmask(dyn, ninst, VMASK, 0b0001, x4, 1);
if (MODREG) {
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETGX_vector(v0, 1, VECTOR_SEW32);
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
} else {
SMREAD();
v1 = fpu_get_scratch(dyn);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
VLE32_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1);
VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETGX_vector(v0, 1, VECTOR_SEW32);
}
if (box64_dynarec_fastnan) {
vector_loadmask(dyn, ninst, VMASK, 0b0001, x4, 1);
VFMUL_VV(v0, v0, v1, VECTOR_MASKED);
} else {
VFMV_F_S(v0, v0);
Expand All @@ -167,22 +171,25 @@ uintptr_t dynarec64_F30F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
case 0x5A:
INST_NAME("CVTSS2SD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETGX_vector(v0, 1, VECTOR_SEW32);
vector_loadmask(dyn, ninst, VMASK, 0b0001, x4, 1);
if (MODREG) {
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETGX_vector(v0, 1, VECTOR_SEW32);
v1 = sse_get_reg_vector(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 0, VECTOR_SEW32);
} else {
SMREAD();
v1 = fpu_get_scratch(dyn);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW8, 1);
addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
VLE32_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
vector_loadmask(dyn, ninst, VMASK, 0xFF, x4, 1);
VLE8_V(v1, ed, VECTOR_MASKED, VECTOR_NFIELD1);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
GETGX_vector(v0, 1, VECTOR_SEW32);
}
d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
vector_loadmask(dyn, ninst, VMASK, 0b0001, x4, 1);
VFWCVT_F_F_V(d0, v1, VECTOR_MASKED);
SET_ELEMENT_WIDTH(x1, VECTOR_SEW64, 1);
if (rv64_xtheadvector) {
vector_loadmask(dyn, ninst, VMASK, 0b01, x4, 1);
VMERGE_VVM(v0, v0, d0); // implies VMASK
} else {
VMV_X_S(x4, d0);
Expand Down
4 changes: 2 additions & 2 deletions src/dynarec/rv64/rv64_emitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -1378,7 +1378,7 @@ f28–31 ft8–11 FP temporaries Caller
#define VFSLIDE1UP_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0011100 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001110...........101.....1010111
#define VFSLIDE1DOWN_VF(vd, vs2, rs1, vm) EMIT(R_type(0b0011110 | (vm), vs2, rs1, 0b101, vd, 0b1010111)) // 001111...........101.....1010111

#define VFMV_S_F(vd, rs1) EMIT(I_type(0b010000100000, rs1, 0b101, vd, 0b1010111)) // 010000100000.....101.....1010111
#define VFMV_S_F(vd, rs1) EMIT(I_type((rv64_xtheadvector ? 0b001101100000 : 0b010000100000), rs1, 0b101, vd, 0b1010111)) // 010000100000.....101.....1010111
#define VFMV_V_F(vd, rs1) EMIT(I_type(0b010111100000, rs1, 0b101, vd, 0b1010111)) // 010111100000.....101.....1010111

#define VFMERGE_VFM(vd, vs2, rs1) EMIT(R_type(0b0101110, vs2, rs1, 0b101, vd, 0b1010111)) // 0101110..........101.....1010111
Expand Down Expand Up @@ -1424,7 +1424,7 @@ f28–31 ft8–11 FP temporaries Caller
#define VFSGNJN_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 001001...........001.....1010111
#define VFSGNJX_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0010100 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 001010...........001.....1010111

#define VFMV_F_S(rd, vs2) EMIT(R_type(0b0100001, vs2, 0b00000, 0b001, rd, 0b1010111)) // 0100001.....00000001.....1010111
#define VFMV_F_S(rd, vs2) EMIT(R_type((rv64_xtheadvector ? 0b0011001 : 0b0100001), vs2, 0b00000, 0b001, rd, 0b1010111)) // 0100001.....00000001.....1010111

#define VMFEQ_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0110000 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 011000...........001.....1010111
#define VMFLE_VV(vd, vs2, vs1, vm) EMIT(R_type(0b0110010 | (vm), vs2, vs1, 0b001, vd, 0b1010111)) // 011001...........001.....1010111
Expand Down

0 comments on commit 6a3a19d

Please sign in to comment.