Skip to content

Commit

Permalink
Merge pull request FEX-Emu#3397 from pmatos/XCHGOp
Browse files Browse the repository at this point in the history
Improve XCHG operations
  • Loading branch information
lioncash authored Feb 2, 2024
2 parents 9c37c0f + 4623544 commit 920a8db
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 40 deletions.
12 changes: 8 additions & 4 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1367,14 +1367,17 @@ void OpDispatchBuilder::XCHGOp(OpcodeArgs) {
// So x86-64 spec mandates this special case that even though it is a 32bit instruction and
// is supposed to zext the result, it is a true no-op
if (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX) {
// If this instruction has a REP prefix then this is architectually defined to be a `PAUSE` instruction.
// On older processors this ends up being a true `REP NOP` which is why they stuck this here.
// If this instruction has a REP prefix then this is architecturally
// defined to be a `PAUSE` instruction. On older processors this ends up
// being a true `REP NOP` which is why they stuck this here.
_Yield();
}
return;
}

OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags);
// AllowUpperGarbage: OK to allow as it will be overwritten by StoreResult.
OrderedNode *Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags,
{.AllowUpperGarbage = true});
if (DestIsMem(Op)) {
HandledLock = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK;
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false});
Expand All @@ -1385,7 +1388,8 @@ void OpDispatchBuilder::XCHGOp(OpcodeArgs) {
StoreResult(GPRClass, Op, Op->Src[0], Result, -1);
}
else {
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags);
// AllowUpperGarbage: OK to allow as it will be overwritten by StoreResult.
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});

// Swap the contents
// Order matters here since we don't want to swap context contents for one that effects the other
Expand Down
15 changes: 6 additions & 9 deletions unittests/InstructionCountCI/Atomics.json
Original file line number Diff line number Diff line change
Expand Up @@ -358,31 +358,28 @@
]
},
"xchg byte [rax], cl": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": "0x86",
"ExpectedArm64ASM": [
"uxtb w20, w5",
"mov w1, w20",
"mov w1, w5",
"swpalb w1, w20, [x4]",
"bfxil x5, x20, #0, #8"
]
},
"xchg word [rax], cx": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": "0x87",
"ExpectedArm64ASM": [
"uxth w20, w5",
"mov w1, w20",
"mov w1, w5",
"swpalh w1, w20, [x4]",
"bfxil x5, x20, #0, #16"
]
},
"xchg dword [rax], ecx": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"Comment": "0x87",
"ExpectedArm64ASM": [
"mov w20, w5",
"mov w1, w20",
"mov w1, w5",
"swpal w1, w5, [x4]"
]
},
Expand Down
51 changes: 24 additions & 27 deletions unittests/InstructionCountCI/Primary.json
Original file line number Diff line number Diff line change
Expand Up @@ -2210,18 +2210,17 @@
"ExpectedInstructionCount": 4,
"Comment": "0x86",
"ExpectedArm64ASM": [
"uxtb w20, w7",
"uxtb w21, w5",
"bfxil x5, x20, #0, #8",
"bfxil x7, x21, #0, #8"
"mov x20, x5",
"mov x5, x20",
"bfxil x5, x7, #0, #8",
"bfxil x7, x20, #0, #8"
]
},
"xchg [rax], cl": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": "0x86",
"ExpectedArm64ASM": [
"uxtb w20, w5",
"mov w1, w20",
"mov w1, w5",
"swpalb w1, w20, [x4]",
"bfxil x5, x20, #0, #8"
]
Expand All @@ -2230,18 +2229,17 @@
"ExpectedInstructionCount": 4,
"Comment": "0x87",
"ExpectedArm64ASM": [
"uxth w20, w7",
"uxth w21, w5",
"bfxil x5, x20, #0, #16",
"bfxil x7, x21, #0, #16"
"mov x20, x5",
"mov x5, x20",
"bfxil x5, x7, #0, #16",
"bfxil x7, x20, #0, #16"
]
},
"xchg [rax], cx": {
"ExpectedInstructionCount": 4,
"ExpectedInstructionCount": 3,
"Comment": "0x87",
"ExpectedArm64ASM": [
"uxth w20, w5",
"mov w1, w20",
"mov w1, w5",
"swpalh w1, w20, [x4]",
"bfxil x5, x20, #0, #16"
]
Expand All @@ -2250,17 +2248,16 @@
"ExpectedInstructionCount": 3,
"Comment": "0x87",
"ExpectedArm64ASM": [
"mov w20, w7",
"mov w7, w5",
"mov x5, x20"
"mov x20, x5",
"mov w5, w7",
"mov w7, w20"
]
},
"xchg [rax], ecx": {
"ExpectedInstructionCount": 3,
"ExpectedInstructionCount": 2,
"Comment": "0x87",
"ExpectedArm64ASM": [
"mov w20, w5",
"mov w1, w20",
"mov w1, w5",
"swpal w1, w5, [x4]"
]
},
Expand Down Expand Up @@ -2753,19 +2750,19 @@
"ExpectedInstructionCount": 4,
"Comment": "0x90",
"ExpectedArm64ASM": [
"uxth w20, w4",
"uxth w21, w7",
"bfxil x7, x20, #0, #16",
"bfxil x4, x21, #0, #16"
"mov x20, x7",
"mov x7, x20",
"bfxil x7, x4, #0, #16",
"bfxil x4, x20, #0, #16"
]
},
"xchg eax, ebx": {
"ExpectedInstructionCount": 3,
"Comment": "0x90",
"ExpectedArm64ASM": [
"mov w20, w4",
"mov w4, w7",
"mov x7, x20"
"mov x20, x7",
"mov w7, w4",
"mov w4, w20"
]
},
"xchg rax, rbx": {
Expand Down

0 comments on commit 920a8db

Please sign in to comment.