From 5d613e87160b10db105314bdef10799486e97844 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 20 Aug 2024 20:59:32 -0400 Subject: [PATCH 1/2] OpcodeDispatcher: optimize `test x, x` fewer uops now that we invert carry Signed-off-by: Alyssa Rosenzweig --- .../Source/Interface/Core/OpcodeDispatcher.cpp | 10 ++++++++-- .../Source/Interface/Core/OpcodeDispatcher.h | 17 +++++++++++++++-- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index f870adbff7..bafb2a9894 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -1006,8 +1006,14 @@ void OpDispatchBuilder::TESTOp(OpcodeArgs) { } } - HandleNZ00Write(); - CalculatePF(_AndWithFlags(IR::SizeToOpSize(Size), Dest, Src)); + // Try to optimize out the AND. + if (Dest == Src) { + SetNZP_ZeroCV(Size, Src); + } else { + HandleNZ00Write(); + CalculatePF(_AndWithFlags(IR::SizeToOpSize(Size), Dest, Src)); + } + InvalidateAF(); } diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index b39b786bcf..3b894492bb 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1666,7 +1666,7 @@ class OpDispatchBuilder final : public IREmitter { NZCVDirty = true; } - void SetNZ_ZeroCV(unsigned SrcSize, Ref Res) { + void SetNZ_ZeroCV(unsigned SrcSize, Ref Res, bool SetPF = false) { HandleNZ00Write(); // x - 0 = x. NZ set according to Res. C always set. V always unset. This @@ -1674,15 +1674,28 @@ class OpDispatchBuilder final : public IREmitter { // // This is currently worse for 8/16-bit, but that should be optimized. TODO if (SrcSize >= 4) { - _SubNZCV(IR::SizeToOpSize(SrcSize), Res, _Constant(0)); + if (SetPF) { + CalculatePF(_SubWithFlags(IR::SizeToOpSize(SrcSize), Res, _Constant(0))); + } else { + _SubNZCV(IR::SizeToOpSize(SrcSize), Res, _Constant(0)); + } + PossiblySetNZCVBits |= 1u << IndexNZCV(FEXCore::X86State::RFLAG_CF_RAW_LOC); CFInverted = true; } else { _TestNZ(IR::SizeToOpSize(SrcSize), Res, Res); CFInverted = false; + + if (SetPF) { + CalculatePF(Res); + } } } + void SetNZP_ZeroCV(unsigned SrcSize, Ref Res) { + SetNZ_ZeroCV(SrcSize, Res, true); + } + void InsertNZCV(unsigned BitOffset, Ref Value, signed FlagOffset, bool MustMask) { signed Bit = IndexNZCV(BitOffset); From 19010491da33070fbd014a081ad4d0477b8c3a47 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 20 Aug 2024 21:01:08 -0400 Subject: [PATCH 2/2] InstCountCI: Update Signed-off-by: Alyssa Rosenzweig --- .../InstructionCountCI/FlagM/FlagOpts.json | 40 +++++++------- .../InstructionCountCI/FlagM/Primary.json | 22 ++++---- .../FlagM/PrimaryGroup.json | 16 +++--- unittests/InstructionCountCI/Primary.json | 52 +++++++------------ .../InstructionCountCI/PrimaryGroup.json | 20 +++---- 5 files changed, 63 insertions(+), 87 deletions(-) diff --git a/unittests/InstructionCountCI/FlagM/FlagOpts.json b/unittests/InstructionCountCI/FlagM/FlagOpts.json index fb7e41684d..45b2f936df 100644 --- a/unittests/InstructionCountCI/FlagM/FlagOpts.json +++ b/unittests/InstructionCountCI/FlagM/FlagOpts.json @@ -68,7 +68,7 @@ ] }, "ADC dead": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 3, "x86Insts": [ "add rax, rbx", "adc rcx, rcx", @@ -77,8 +77,7 @@ "ExpectedArm64ASM": [ "adds x4, x4, x6", "adc x7, x7, x7", - "ands x26, x7, x7", - "cfinv" + "subs x26, x7, #0x0 (0)" ] }, "INC consumed": { @@ -173,9 +172,9 @@ "uxtb w20, w4", "sub w20, w20, #0x1 (1)", "bfxil x4, x20, #0, #8", - "mov x26, x4", - "cmn wzr, w26, lsl #24", - "cfinv" + "cmn wzr, w4, lsl #24", + "cfinv", + "mov x26, x4" ] }, "Variable shift dead": { @@ -227,9 +226,9 @@ "cmp x4, x6", "cset x20, eq", "bfxil x7, x20, #0, #8", - "mov x26, x7", - "cmn wzr, w26, lsl #24", - "cfinv" + "cmn wzr, w7, lsl #24", + "cfinv", + "mov x26, x7" ] }, "Partial NZCV select (add)": { @@ -243,9 +242,9 @@ "adds x4, x4, x6", "cset x20, eq", "bfxil x7, x20, #0, #8", - "mov x26, x7", - "cmn wzr, w26, lsl #24", - "cfinv" + "cmn wzr, w7, lsl #24", + "cfinv", + "mov x26, x7" ] }, "AND use only ZF": { @@ -259,9 +258,9 @@ "ands w4, w4, w6", "cset x20, eq", "bfxil x7, x20, #0, #8", - "mov x26, x7", - "cmn wzr, w26, lsl #24", - "cfinv" + "cmn wzr, w7, lsl #24", + "cfinv", + "mov x26, x7" ] }, "AND use only PF": { @@ -279,13 +278,13 @@ "eon w20, w20, w20, lsr #1", "and x20, x20, #0x1", "bfxil x7, x20, #0, #8", - "mov x26, x7", - "cmn wzr, w26, lsl #24", - "cfinv" + "cmn wzr, w7, lsl #24", + "cfinv", + "mov x26, x7" ] }, "Dead cmpxchg flags": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 10, "x86Insts": [ "cmpxchg8b [rbp]", "test rax, rax" @@ -300,8 +299,7 @@ "rmif x0, #0, #NzCV", "csel x4, x20, x4, ne", "csel x5, x21, x5, ne", - "ands x26, x4, x4", - "cfinv" + "subs x26, x4, #0x0 (0)" ] } } diff --git a/unittests/InstructionCountCI/FlagM/Primary.json b/unittests/InstructionCountCI/FlagM/Primary.json index f8f89f3edf..33f818a046 100644 --- a/unittests/InstructionCountCI/FlagM/Primary.json +++ b/unittests/InstructionCountCI/FlagM/Primary.json @@ -2204,34 +2204,32 @@ "ExpectedInstructionCount": 3, "Comment": "0xa8", "ExpectedArm64ASM": [ - "mov x26, x4", - "cmn wzr, w26, lsl #24", - "cfinv" + "cmn wzr, w4, lsl #24", + "cfinv", + "mov x26, x4" ] }, "test ax, -1": { "ExpectedInstructionCount": 3, "Comment": "0xa9", "ExpectedArm64ASM": [ - "mov x26, x4", - "cmn wzr, w26, lsl #16", - "cfinv" + "cmn wzr, w4, lsl #16", + "cfinv", + "mov x26, x4" ] }, "test eax, -1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "Comment": "0xa9", "ExpectedArm64ASM": [ - "ands w26, w4, w4", - "cfinv" + "subs w26, w4, #0x0 (0)" ] }, "test rax, -1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "Comment": "0xa9", "ExpectedArm64ASM": [ - "ands x26, x4, x4", - "cfinv" + "subs x26, x4, #0x0 (0)" ] }, "scasb": { diff --git a/unittests/InstructionCountCI/FlagM/PrimaryGroup.json b/unittests/InstructionCountCI/FlagM/PrimaryGroup.json index bdb2f02c4e..7d1d1eeb79 100644 --- a/unittests/InstructionCountCI/FlagM/PrimaryGroup.json +++ b/unittests/InstructionCountCI/FlagM/PrimaryGroup.json @@ -2240,25 +2240,23 @@ "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "mov x26, x6", - "cmn wzr, w26, lsl #16", - "cfinv" + "cmn wzr, w6, lsl #16", + "cfinv", + "mov x26, x6" ] }, "test ebx, -1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "ands w26, w6, w6", - "cfinv" + "subs w26, w6, #0x0 (0)" ] }, "test rbx, -1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 1, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "ands x26, x6, x6", - "cfinv" + "subs x26, x6, #0x0 (0)" ] }, "neg bx": { diff --git a/unittests/InstructionCountCI/Primary.json b/unittests/InstructionCountCI/Primary.json index f4c5f260bf..00bc905abe 100644 --- a/unittests/InstructionCountCI/Primary.json +++ b/unittests/InstructionCountCI/Primary.json @@ -1960,42 +1960,36 @@ "ExpectedInstructionCount": 5, "Comment": "0x84", "ExpectedArm64ASM": [ - "mov x26, x4", - "cmn wzr, w26, lsl #24", + "cmn wzr, w4, lsl #24", "mrs x20, nzcv", "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "msr nzcv, x20", + "mov x26, x4" ] }, "test ax, ax": { "ExpectedInstructionCount": 5, "Comment": "0x84", "ExpectedArm64ASM": [ - "mov x26, x4", - "cmn wzr, w26, lsl #16", + "cmn wzr, w4, lsl #16", "mrs x20, nzcv", "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "msr nzcv, x20", + "mov x26, x4" ] }, "test eax, eax": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 1, "Comment": "0x84", "ExpectedArm64ASM": [ - "ands w26, w4, w4", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "subs w26, w4, #0x0 (0)" ] }, "test rax, rax": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 1, "Comment": "0x84", "ExpectedArm64ASM": [ - "ands x26, x4, x4", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "subs x26, x4, #0x0 (0)" ] }, "xchg bl, cl": { @@ -3588,42 +3582,36 @@ "ExpectedInstructionCount": 5, "Comment": "0xa8", "ExpectedArm64ASM": [ - "mov x26, x4", - "cmn wzr, w26, lsl #24", + "cmn wzr, w4, lsl #24", "mrs x20, nzcv", "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "msr nzcv, x20", + "mov x26, x4" ] }, "test ax, -1": { "ExpectedInstructionCount": 5, "Comment": "0xa9", "ExpectedArm64ASM": [ - "mov x26, x4", - "cmn wzr, w26, lsl #16", + "cmn wzr, w4, lsl #16", "mrs x20, nzcv", "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "msr nzcv, x20", + "mov x26, x4" ] }, "test eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 1, "Comment": "0xa9", "ExpectedArm64ASM": [ - "ands w26, w4, w4", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "subs w26, w4, #0x0 (0)" ] }, "test rax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 1, "Comment": "0xa9", "ExpectedArm64ASM": [ - "ands x26, x4, x4", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "subs x26, x4, #0x0 (0)" ] }, "stosb": { diff --git a/unittests/InstructionCountCI/PrimaryGroup.json b/unittests/InstructionCountCI/PrimaryGroup.json index ed1f9d8ae7..ae6f3b8fe2 100644 --- a/unittests/InstructionCountCI/PrimaryGroup.json +++ b/unittests/InstructionCountCI/PrimaryGroup.json @@ -2636,31 +2636,25 @@ "ExpectedInstructionCount": 5, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "mov x26, x6", - "cmn wzr, w26, lsl #16", + "cmn wzr, w6, lsl #16", "mrs x20, nzcv", "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "msr nzcv, x20", + "mov x26, x6" ] }, "test ebx, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 1, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "ands w26, w6, w6", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "subs w26, w6, #0x0 (0)" ] }, "test rbx, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 1, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "ands x26, x6, x6", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "subs x26, x6, #0x0 (0)" ] }, "not bx": {