Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpcodeDispatcher: Fixes segment prefixing on vector element loadstores #4102

Merged
merged 2 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -857,7 +857,7 @@ void OpDispatchBuilder::AVX128_VMOVLP(OpcodeArgs) {
///< VMOVLPS/PD xmm1, xmm2, mem64
// Bits[63:0] come from Src2[63:0]
// Bits[127:64] come from Src1[127:64]
auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Op->Src[1], OpSize::i64Bit, Op->Flags, {.LoadData = false});
auto Src2 = MakeSegmentAddress(Op, Op->Src[1]);
Ref Result_Low = _VLoadVectorElement(OpSize::i128Bit, OpSize::i64Bit, Src1.Low, 0, Src2);
Ref ZeroVector = LoadZeroVector(OpSize::i128Bit);

Expand All @@ -879,11 +879,11 @@ void OpDispatchBuilder::AVX128_VMOVHP(OpcodeArgs) {
if (!Op->Dest.IsGPR()) {
///< VMOVHPS/PD mem64, xmm1
// Need to store Bits[127:64]. Use a vector element store.
auto Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, OpSize::i64Bit, Op->Flags, {.LoadData = false});
auto Dest = MakeSegmentAddress(Op, Op->Dest);
_VStoreVectorElement(OpSize::i128Bit, OpSize::i64Bit, Src1.Low, 1, Dest);
} else if (!Op->Src[1].IsGPR()) {
///< VMOVHPS/PD xmm2, xmm1, mem64
auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Op->Src[1], OpSize::i64Bit, Op->Flags, {.LoadData = false});
auto Src2 = MakeSegmentAddress(Op, Op->Src[1]);

// Bits[63:0] come from Src1[63:0]
// Bits[127:64] come from Src2[63:0]
Expand Down Expand Up @@ -1236,7 +1236,7 @@ void OpDispatchBuilder::AVX128_PExtr(OpcodeArgs) {
}

// If we are storing to memory then we store the size of the element extracted
Ref Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false});
Ref Dest = MakeSegmentAddress(Op, Op->Dest);
_VStoreVectorElement(OpSize::i128Bit, OverridenElementSize, Src.Low, Index, Dest);
}

Expand Down Expand Up @@ -1386,7 +1386,7 @@ void OpDispatchBuilder::AVX128_PINSRImpl(OpcodeArgs, size_t ElementSize, const X
Result.Low = _VInsGPR(OpSize::i128Bit, ElementSize, Index, Src1.Low, Src2);
} else {
// If loading from memory then we only load the element size
auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Src2Op, ElementSize, Op->Flags, {.LoadData = false});
auto Src2 = MakeSegmentAddress(Op, Src2Op);
Result.Low = _VLoadVectorElement(OpSize::i128Bit, ElementSize, Src1.Low, Index, Src2);
}

Expand Down
10 changes: 5 additions & 5 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ void OpDispatchBuilder::MOVHPDOp(OpcodeArgs) {
} else {
// If the destination is a GPR then the source is memory
// xmm1[127:64] = src
Ref Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.LoadData = false});
Ref Src = MakeSegmentAddress(Op, Op->Src[0]);
Ref Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, 16, Op->Flags);
auto Result = _VLoadVectorElement(16, 8, Dest, 1, Src);
StoreResult(FPRClass, Op, Result, -1);
Expand All @@ -115,7 +115,7 @@ void OpDispatchBuilder::MOVHPDOp(OpcodeArgs) {
// In this case memory is the destination and the high bits of the XMM are source
// Mem64 = xmm1[127:64]
Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags);
Ref Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, 8, Op->Flags, {.LoadData = false});
Ref Dest = MakeSegmentAddress(Op, Op->Dest);
_VStoreVectorElement(16, 8, Src, 1, Dest);
}
}
Expand Down Expand Up @@ -144,7 +144,7 @@ void OpDispatchBuilder::MOVLPOp(OpcodeArgs) {
StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, 16, 16);
} else {
auto DstSize = GetDstSize(Op);
Ref Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.Align = 8, .LoadData = false});
Ref Src = MakeSegmentAddress(Op, Op->Src[0]);
Ref Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, DstSize, Op->Flags);
auto Result = _VLoadVectorElement(16, 8, Dest, 0, Src);
StoreResult(FPRClass, Op, Result, -1);
Expand Down Expand Up @@ -1515,7 +1515,7 @@ Ref OpDispatchBuilder::PINSROpImpl(OpcodeArgs, size_t ElementSize, const X86Tabl
}

// If loading from memory then we only load the element size
auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Src2Op, ElementSize, Op->Flags, {.LoadData = false});
Ref Src2 = MakeSegmentAddress(Op, Src2Op);
return _VLoadVectorElement(Size, ElementSize, Src1, Index, Src2);
}

Expand Down Expand Up @@ -1639,7 +1639,7 @@ void OpDispatchBuilder::PExtrOp(OpcodeArgs, size_t ElementSize) {
}

// If we are storing to memory then we store the size of the element extracted
Ref Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false});
Ref Dest = MakeSegmentAddress(Op, Op->Dest);
_VStoreVectorElement(16, OverridenElementSize, Src, Index, Dest);
}

Expand Down
138 changes: 138 additions & 0 deletions unittests/ASM/FEX_bugs/tls_vector_element.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
%ifdef CONFIG
{
"RegData": {
"RAX": "0x4142434445464748",
"RBX": "0x4142434445464748",
"RCX": "0x0000000000000056",
"RDX": "0x4142434445464748",
"RSI": "0x0000000000004748",
"RDI": "0x4142434445464748",
"RSP": "0x0000000045464748",
"RBP": "0x6162636465666768",
"R8": "0x4142434445464748",
"R9": "0x4142434445464748",
"R10": "0x0000000000000056",
"R11": "0x4142434445464748",
"R12": "0x4142434445464748",
"R13": "0x0000000000004748",
"R14": "0x0000000045464748",
"R15": "0x6162636465666768",
"XMM0": ["0x5152535455565758", "0x4142434445464748"],
"XMM1": ["0x4142434445464748", "0x6162636465666768"],
"XMM2": ["0x5152535455564858", "0x6162636465666768"],
"XMM3": ["0x5152535455565758", "0x4142434445464748"],
"XMM4": ["0x4142434445464748", "0x6162636465666768"],
"XMM5": ["0x4546474855565758", "0x6162636465666768"],
"XMM6": ["0x5152535455565758", "0x4142434445464748"],
"XMM7": ["0x5152535447485758", "0x6162636465666768"],
"XMM8": ["0x5152535455565758", "0x4142434445464748"],
"XMM9": ["0x4142434445464748", "0x4142434445464748"],
"XMM10": ["0x5152535455564858", "0x6162636465666768"],
"XMM11": ["0x5152535455565758", "0x4142434445464748"],
"XMM12": ["0x4142434445464748", "0x4142434445464748"],
"XMM13": ["0x4546474855565758", "0x6162636465666768"],
"XMM14": ["0x5152535455565758", "0x4142434445464748"],
"XMM15": ["0x5152535447485758", "0x6162636465666768"]
}
}
%endif

; FEX-Emu had a bug where TLS vector element loadstores weren't correctly prefixing the segment on the address.
; This caused a crash in the game Halls of Torment (steamid 2218750) where it had some TLS vector data loaded with movhps.
; This tests all the vector element loadstores that FEX had missed.

; Setup TLS segment
mov rax, 0xe000_0000
wrgsbase rax

movups xmm0, [rel .data_setup]
movups xmm1, [rel .data_setup]
movups xmm2, [rel .data_setup]
movups xmm3, [rel .data_setup]
movups xmm4, [rel .data_setup]
movups xmm5, [rel .data_setup]
movups xmm6, [rel .data_setup]
movups xmm7, [rel .data_setup]
movups xmm8, [rel .data_setup]
movups xmm9, [rel .data_setup]
movups xmm10, [rel .data_setup]
movups xmm11, [rel .data_setup]
movups xmm12, [rel .data_setup]
movups xmm13, [rel .data_setup]
movups xmm14, [rel .data_setup]
movups xmm15, [rel .data_setup]

mov rax, [rel .data]
mov [gs:0], rax

jmp .test
.test:

; SSE loads
movhps xmm0, [gs:0]
movlps xmm1, [gs:0]
pinsrb xmm2, [gs:0], 1
movhpd xmm3, [gs:0]
movlpd xmm4, [gs:0]
pinsrd xmm5, [gs:0], 1
pinsrq xmm6, [gs:0], 1
pinsrw xmm7, [gs:0], 1

; AVX loads
vmovhps xmm8, xmm0, [gs:0]
vmovlps xmm9, xmm0, [gs:0]
vpinsrb xmm10, [gs:0], 1
vmovhpd xmm11, xmm0, [gs:0]
vmovlpd xmm12, xmm0, [gs:0]
vpinsrd xmm13, [gs:0], 1
vpinsrq xmm14, [gs:0], 1
vpinsrw xmm15, [gs:0], 1

; SSE stores
movhps [gs:(0 * 8)], xmm0
movlps [gs:(1 * 8)], xmm1
pextrb [gs:(2 * 8)], xmm2, 2
movhpd [gs:(3 * 8)], xmm3
movlpd [gs:(4 * 8)], xmm4
pextrw [gs:(5 * 8)], xmm5, 2
pextrd [gs:(6 * 8)], xmm6, 2
pextrq [gs:(7 * 8)], xmm7, 1

; AVX stores
vmovhps [gs:(8 * 8)], xmm0
vmovlps [gs:(9 * 8)], xmm1
vpextrb [gs:(10 * 8)], xmm2, 2
vmovhpd [gs:(11 * 8)], xmm3
vmovlpd [gs:(12 * 8)], xmm4
vpextrw [gs:(13 * 8)], xmm5, 2
vpextrd [gs:(14 * 8)], xmm6, 2
vpextrq [gs:(15 * 8)], xmm7, 1

; Load the results back in to GPRs
mov rax, [gs:(0 * 8)]
mov rbx, [gs:(1 * 8)]
mov rcx, [gs:(2 * 8)]
mov rdx, [gs:(3 * 8)]
mov rdi, [gs:(4 * 8)]
mov rsi, [gs:(5 * 8)]
mov rsp, [gs:(6 * 8)]
mov rbp, [gs:(7 * 8)]
mov r8, [gs:(8 * 8)]
mov r9, [gs:(9 * 8)]
mov r10, [gs:(10 * 8)]
mov r11, [gs:(11 * 8)]
mov r12, [gs:(12 * 8)]
mov r13, [gs:(13 * 8)]
mov r14, [gs:(14 * 8)]
mov r15, [gs:(15 * 8)]

hlt

.data:
dq 0x4142434445464748

.data_setup:
dq 0x5152535455565758, 0x6162636465666768

.data_result:
dq 0, 0, 0, 0, 0, 0, 0, 0
Loading