Skip to content

Commit

Permalink
Added 1 more opcode
Browse files Browse the repository at this point in the history
  • Loading branch information
ksco committed Oct 3, 2024
1 parent c16d512 commit 5ebdd07
Showing 1 changed file with 29 additions and 13 deletions.
42 changes: 29 additions & 13 deletions src/dynarec/rv64/dynarec_rv64_660f_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -1767,27 +1767,43 @@ uintptr_t dynarec64_660F_vector(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t i
VSLL_VX(q0, q0, x4, VECTOR_UNMASKED);
break;
case 0xF5:
if (rv64_xtheadvector) { DEFAULT_VECTOR; } // lack of vrgatherei16.vv

INST_NAME("PMADDWD Gx, Ex");
nextop = F8;
SET_ELEMENT_WIDTH(x1, VECTOR_SEW16, 1);
GETGX_vector(q0, 1, VECTOR_SEW16);
GETEX_vector(q1, 0, 0, VECTOR_SEW16);
v0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
v1 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2);
VWMUL_VV(v0, q1, q0, VECTOR_UNMASKED);
d1 = fpu_get_scratch(dyn); // use this at caution!
d0 = fpu_get_scratch_lmul(dyn, VECTOR_LMUL2); // warning, no more scratches!
ADDI(x4, xZR, 6);
VID_V(d0, VECTOR_UNMASKED);
VSLL_VI(d0, d0, 1, VECTOR_UNMASKED); // times 2
VMIN_VX(d0, d0, x4, VECTOR_UNMASKED);
VADD_VI(q0, d0, 1, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 2);
VRGATHEREI16_VV(v1, v0, d0, VECTOR_UNMASKED); // 6 4 2 0
VRGATHEREI16_VV(d0, v0, q0, VECTOR_UNMASKED); // 7 5 3 1
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
VADD_VV(q0, v1, d0, VECTOR_UNMASKED);
VWMUL_VV(v0, q1, q0, VECTOR_UNMASKED);
if (rv64_xtheadvector) { // lack of vrgatherei16.vv
ADDI(x4, xZR, 6);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 1);
VID_V(d0, VECTOR_UNMASKED);
VSLL_VI(d0, d0, 1, VECTOR_UNMASKED); // times 2
VMIN_VX(d0, d0, x4, VECTOR_UNMASKED);
VRGATHER_VV(v1, v0, d0, VECTOR_UNMASKED); // 6 4 2 0
// out of scratches, go back to lmul1 and slideup....
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL1, 1);
VSLIDEUP_VX(d1, v1, 0, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 1);
VADD_VI(d0, d0, 1, VECTOR_UNMASKED);
VRGATHER_VV(v1, v0, d0, VECTOR_UNMASKED); // 7 5 3 1
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
VADD_VV(q0, v1, d1, VECTOR_UNMASKED);
} else {
ADDI(x4, xZR, 6);
VID_V(d0, VECTOR_UNMASKED);
VSLL_VI(d0, d0, 1, VECTOR_UNMASKED); // times 2
VMIN_VX(d0, d0, x4, VECTOR_UNMASKED);
VADD_VI(q0, d0, 1, VECTOR_UNMASKED);
vector_vsetvli(dyn, ninst, x1, VECTOR_SEW32, VECTOR_LMUL2, 1);
VRGATHEREI16_VV(v1, v0, d0, VECTOR_UNMASKED); // 6 4 2 0
VRGATHEREI16_VV(d0, v0, q0, VECTOR_UNMASKED); // 7 5 3 1
SET_ELEMENT_WIDTH(x1, VECTOR_SEW32, 1);
VADD_VV(q0, v1, d0, VECTOR_UNMASKED);
}
break;
case 0xF6:
INST_NAME("PSADBW Gx, Ex");
Expand Down

0 comments on commit 5ebdd07

Please sign in to comment.