diff --git a/FEXCore/Source/Interface/Context/Context.h b/FEXCore/Source/Interface/Context/Context.h index 429a33e8d7..b2b154d04c 100644 --- a/FEXCore/Source/Interface/Context/Context.h +++ b/FEXCore/Source/Interface/Context/Context.h @@ -316,6 +316,11 @@ class ContextImpl final : public FEXCore::Context::Context { return Config.Is64BitMode ? 8 : 4; } + // TODO: Temporary while OpcodeDispatcher shifts over + IR::OpSize GetGPROpSize() const { + return Config.Is64BitMode ? IR::OpSize::i64Bit : IR::OpSize::i32Bit; + } + FEXCore::JITSymbols Symbols; FEXCore::Utils::PooledAllocatorVirtual OpDispatcherAllocator; diff --git a/FEXCore/Source/Interface/Core/Core.cpp b/FEXCore/Source/Interface/Core/Core.cpp index c261ad0ef0..34f9c79098 100644 --- a/FEXCore/Source/Interface/Core/Core.cpp +++ b/FEXCore/Source/Interface/Core/Core.cpp @@ -1017,12 +1017,13 @@ void ContextImpl::AddThunkTrampolineIRHandler(uintptr_t Entrypoint, uintptr_t Gu IRHeader.first->Blocks = emit->WrapNode(Block); emit->SetCurrentCodeBlock(Block); - const uint8_t GPRSize = GetGPRSize(); + const auto GPRSize = GetGPROpSize(); - if (GPRSize == 8) { + if (GPRSize == IR::OpSize::i64Bit) { emit->_StoreRegister(emit->_Constant(Entrypoint), X86State::REG_R11, IR::GPRClass, GPRSize); } else { - emit->_StoreContext(GPRSize, IR::FPRClass, emit->_VCastFromGPR(8, 8, emit->_Constant(Entrypoint)), offsetof(Core::CPUState, mm[0][0])); + emit->_StoreContext(GPRSize, IR::FPRClass, emit->_VCastFromGPR(IR::OpSize::i64Bit, IR::OpSize::i64Bit, emit->_Constant(Entrypoint)), + offsetof(Core::CPUState, mm[0][0])); } emit->_ExitFunction(emit->_Constant(GuestThunkEntrypoint)); }, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 9f81dffad4..c457f80186 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -72,7 +72,7 @@ void OpDispatchBuilder::SyscallOp(OpcodeArgs, bool IsSyscallInst) { // Calculate flags early. CalculateDeferredFlags(); - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); auto NewRIP = GetRelocatedPC(Op, -Op->InstSize); _StoreContext(GPRSize, GPRClass, NewRIP, offsetof(FEXCore::Core::CPUState, rip)); @@ -112,7 +112,7 @@ void OpDispatchBuilder::SyscallOp(OpcodeArgs, bool IsSyscallInst) { } void OpDispatchBuilder::ThunkOp(OpcodeArgs) { - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); uint8_t* sha256 = (uint8_t*)(Op->PC + 2); if (CTX->Config.Is64BitMode) { @@ -132,29 +132,28 @@ void OpDispatchBuilder::ThunkOp(OpcodeArgs) { void OpDispatchBuilder::LEAOp(OpcodeArgs) { // LEA specifically ignores segment prefixes - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); if (CTX->Config.Is64BitMode) { - const uint32_t DstSize = - X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0) == X86Tables::DecodeFlags::FLAG_OPERAND_SIZE_LAST ? OpSize::i16Bit : - X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0) == X86Tables::DecodeFlags::FLAG_WIDENING_SIZE_LAST ? OpSize::i64Bit : - OpSize::i32Bit; + const auto DstSize = X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0) == X86Tables::DecodeFlags::FLAG_OPERAND_SIZE_LAST ? OpSize::i16Bit : + X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0) == X86Tables::DecodeFlags::FLAG_WIDENING_SIZE_LAST ? OpSize::i64Bit : + OpSize::i32Bit; auto Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], SrcSize, Op->Flags, {.LoadData = false, .AllowUpperGarbage = SrcSize > DstSize}); - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Src, DstSize, -1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Src, DstSize, OpSize::iInvalid); } else { - uint32_t DstSize = + const auto DstSize = X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0) == X86Tables::DecodeFlags::FLAG_OPERAND_SIZE_LAST ? OpSize::i16Bit : OpSize::i32Bit; auto Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], SrcSize, Op->Flags, {.LoadData = false, .AllowUpperGarbage = SrcSize > DstSize}); - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Src, DstSize, -1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Src, DstSize, OpSize::iInvalid); } } void OpDispatchBuilder::NOPOp(OpcodeArgs) {} void OpDispatchBuilder::RETOp(OpcodeArgs) { - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); // ABI Optimization: Flags don't survive calls or rets if (CTX->Config.ABILocalFlags) { @@ -197,7 +196,7 @@ void OpDispatchBuilder::IRETOp(OpcodeArgs) { return; } - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); Ref SP = _RMWHandle(LoadGPRRegister(X86State::REG_RSP)); @@ -230,7 +229,7 @@ void OpDispatchBuilder::IRETOp(OpcodeArgs) { } void OpDispatchBuilder::CallbackReturnOp(OpcodeArgs) { - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); // Store the new RIP _CallbackReturn(); auto NewRIP = _LoadContext(GPRSize, GPRClass, offsetof(FEXCore::Core::CPUState, rip)); @@ -316,7 +315,7 @@ void OpDispatchBuilder::ADCOp(OpcodeArgs, uint32_t SrcIndex) { } if (!DestIsLockedMem(Op)) { - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); } } @@ -343,7 +342,7 @@ void OpDispatchBuilder::SBBOp(OpcodeArgs, uint32_t SrcIndex) { Result = CalculateFlags_SBB(Size, Before, Src); if (!DestIsLockedMem(Op)) { - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); } } @@ -352,11 +351,11 @@ void OpDispatchBuilder::SALCOp(OpcodeArgs) { auto Result = NZCVSelect(OpSize::i32Bit, {COND_UGE} /* CF = 1 */, _InlineConstant(0xffffffff), _InlineConstant(0)); - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::PUSHOp(OpcodeArgs) { - const uint8_t Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); Push(Size, Src); @@ -364,7 +363,7 @@ void OpDispatchBuilder::PUSHOp(OpcodeArgs) { } void OpDispatchBuilder::PUSHREGOp(OpcodeArgs) { - const uint8_t Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Src = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); @@ -374,7 +373,7 @@ void OpDispatchBuilder::PUSHREGOp(OpcodeArgs) { void OpDispatchBuilder::PUSHAOp(OpcodeArgs) { // 32bit only - const uint8_t Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); auto OldSP = LoadGPRRegister(X86State::REG_RSP); @@ -391,7 +390,7 @@ void OpDispatchBuilder::PUSHAOp(OpcodeArgs) { Ref Src {}; Ref NewSP = OldSP; - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); Src = LoadGPRRegister(X86State::REG_RAX); NewSP = _Push(GPRSize, Size, Src, NewSP); @@ -423,8 +422,8 @@ void OpDispatchBuilder::PUSHAOp(OpcodeArgs) { } void OpDispatchBuilder::PUSHSegmentOp(OpcodeArgs, uint32_t SegmentReg) { - const uint8_t SrcSize = GetSrcSize(Op); - const uint8_t DstSize = GetDstSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); + const auto DstSize = OpSizeFromDst(Op); Ref Src {}; if (!CTX->Config.Is64BitMode()) { @@ -480,13 +479,13 @@ void OpDispatchBuilder::PUSHSegmentOp(OpcodeArgs, uint32_t SegmentReg) { } void OpDispatchBuilder::POPOp(OpcodeArgs) { - Ref Value = Pop(GetSrcSize(Op)); - StoreResult(GPRClass, Op, Value, -1); + Ref Value = Pop(OpSizeFromSrc(Op)); + StoreResult(GPRClass, Op, Value, OpSize::iInvalid); } void OpDispatchBuilder::POPAOp(OpcodeArgs) { // 32bit only - const uint8_t Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref SP = _RMWHandle(LoadGPRRegister(X86State::REG_RSP)); @@ -507,8 +506,8 @@ void OpDispatchBuilder::POPAOp(OpcodeArgs) { } void OpDispatchBuilder::POPSegmentOp(OpcodeArgs, uint32_t SegmentReg) { - const uint8_t SrcSize = GetSrcSize(Op); - const uint8_t DstSize = GetDstSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); + const auto DstSize = OpSizeFromDst(Op); auto NewSegment = Pop(SrcSize); @@ -540,7 +539,7 @@ void OpDispatchBuilder::POPSegmentOp(OpcodeArgs, uint32_t SegmentReg) { void OpDispatchBuilder::LEAVEOp(OpcodeArgs) { // First we move RBP in to RSP and then behave effectively like a pop auto SP = _RMWHandle(LoadGPRRegister(X86State::REG_RBP)); - auto NewGPR = Pop(GetSrcSize(Op), SP); + auto NewGPR = Pop(OpSizeFromSrc(Op), SP); // Store the new stack pointer StoreGPRRegister(X86State::REG_RSP, SP); @@ -550,7 +549,7 @@ void OpDispatchBuilder::LEAVEOp(OpcodeArgs) { } void OpDispatchBuilder::CALLOp(OpcodeArgs) { - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); BlockSetRIP = true; @@ -586,7 +585,7 @@ void OpDispatchBuilder::CALLOp(OpcodeArgs) { void OpDispatchBuilder::CALLAbsoluteOp(OpcodeArgs) { BlockSetRIP = true; - const uint8_t Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref JMPPCOffset = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); // Push the return address. @@ -698,11 +697,11 @@ void OpDispatchBuilder::SETccOp(OpcodeArgs) { auto SrcCond = SelectCC(Op->OP & 0xF, OpSize::i64Bit, OneConst, ZeroConst); - StoreResult(GPRClass, Op, SrcCond, -1); + StoreResult(GPRClass, Op, SrcCond, OpSize::iInvalid); } void OpDispatchBuilder::CMOVOp(OpcodeArgs) { - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); // Calculate flags early. CalculateDeferredFlags(); @@ -718,7 +717,7 @@ void OpDispatchBuilder::CMOVOp(OpcodeArgs) { auto SrcCond = SelectCC(Op->OP & 0xF, IR::SizeToOpSize(std::max(OpSize::i32Bit, GetSrcSize(Op))), Src, Dest); - StoreResult(GPRClass, Op, SrcCond, -1); + StoreResult(GPRClass, Op, SrcCond, OpSize::iInvalid); } void OpDispatchBuilder::CondJUMPOp(OpcodeArgs) { @@ -861,12 +860,12 @@ void OpDispatchBuilder::LoopOp(OpcodeArgs) { bool ZFTrue = Op->OP == 0xE1; BlockSetRIP = true; - uint32_t SrcSize = (Op->Flags & X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) ? OpSize::i32Bit : OpSize::i64Bit; + auto SrcSize = (Op->Flags & X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) ? OpSize::i32Bit : OpSize::i64Bit; auto OpSize = SrcSize == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit; if (!CTX->Config.Is64BitMode) { // RCX size is 32-bit or 16-bit when executing in 32-bit mode. - SrcSize >>= 1; + SrcSize = IR::SizeToOpSize(IR::OpSizeToSize(SrcSize) >> 1); OpSize = OpSize::i32Bit; } @@ -874,7 +873,7 @@ void OpDispatchBuilder::LoopOp(OpcodeArgs) { Ref CondReg = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], SrcSize, Op->Flags); CondReg = _Sub(OpSize, CondReg, _InlineConstant(1)); - StoreResult(GPRClass, Op, Op->Src[0], CondReg, -1); + StoreResult(GPRClass, Op, Op->Src[0], CondReg, OpSize::iInvalid); // If LOOPE then jumps to target if RCX != 0 && ZF == 1 // If LOOPNE then jumps to target if RCX != 0 && ZF == 0 @@ -1041,20 +1040,20 @@ void OpDispatchBuilder::MOVSXDOp(OpcodeArgs) { // else // Zext(32, Src) // - uint8_t Size = std::min(OpSize::i32Bit, GetSrcSize(Op)); + auto Size = std::min(OpSize::i32Bit, OpSizeFromSrc(Op)); bool Sext = (Size != OpSize::i16Bit) && Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REX_WIDENING; Ref Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], Size, Op->Flags, {.AllowUpperGarbage = Sext}); if (Size == OpSize::i16Bit) { // This'll make sure to insert in to the lower 16bits without modifying upper bits - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Src, Size, -1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Src, Size, OpSize::iInvalid); } else if (Sext) { // With REX.W then Sext Src = _Sbfe(OpSize::i64Bit, Size * 8, 0, Src); - StoreResult(GPRClass, Op, Src, -1); + StoreResult(GPRClass, Op, Src, OpSize::iInvalid); } else { // Without REX.W then Zext (store result implicitly zero extends) - StoreResult(GPRClass, Op, Src, -1); + StoreResult(GPRClass, Op, Src, OpSize::iInvalid); } } @@ -1067,13 +1066,13 @@ void OpDispatchBuilder::MOVSXOp(OpcodeArgs) { // path for 32-bit dests where the native 32-bit Sbfe zero extends the top. uint8_t DstSize = GetDstSize(Op); Src = _Sbfe(DstSize == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit, Size * 8, 0, Src); - StoreResult(GPRClass, Op, Op->Dest, Src, -1); + StoreResult(GPRClass, Op, Op->Dest, Src, OpSize::iInvalid); } void OpDispatchBuilder::MOVZXOp(OpcodeArgs) { Ref Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); // Store result implicitly zero extends - StoreResult(GPRClass, Op, Src, -1); + StoreResult(GPRClass, Op, Src, OpSize::iInvalid); } void OpDispatchBuilder::CMPOp(OpcodeArgs, uint32_t SrcIndex) { @@ -1089,7 +1088,7 @@ void OpDispatchBuilder::CQOOp(OpcodeArgs) { auto Size = GetSrcSize(Op); Ref Upper = _Sbfe(OpSize::i64Bit, 1, Size * 8 - 1, Src); - StoreResult(GPRClass, Op, Upper, -1); + StoreResult(GPRClass, Op, Upper, OpSize::iInvalid); } void OpDispatchBuilder::XCHGOp(OpcodeArgs) { @@ -1121,26 +1120,26 @@ void OpDispatchBuilder::XCHGOp(OpcodeArgs) { Ref Dest = MakeSegmentAddress(Op, Op->Dest); auto Result = _AtomicSwap(OpSizeFromSrc(Op), Src, Dest); - StoreResult(GPRClass, Op, Op->Src[0], Result, -1); + StoreResult(GPRClass, Op, Op->Src[0], Result, OpSize::iInvalid); } else { // AllowUpperGarbage: OK to allow as it will be overwritten by StoreResult. Ref Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); // Swap the contents // Order matters here since we don't want to swap context contents for one that effects the other - StoreResult(GPRClass, Op, Op->Dest, Src, -1); - StoreResult(GPRClass, Op, Op->Src[0], Dest, -1); + StoreResult(GPRClass, Op, Op->Dest, Src, OpSize::iInvalid); + StoreResult(GPRClass, Op, Op->Src[0], Dest, OpSize::iInvalid); } } void OpDispatchBuilder::CDQOp(OpcodeArgs) { - uint8_t DstSize = GetDstSize(Op); - uint8_t SrcSize = DstSize >> 1; + const auto DstSize = OpSizeFromDst(Op); + const auto SrcSize = IR::SizeToOpSize(IR::OpSizeToSize(DstSize) >> 1); Ref Src = LoadGPRRegister(X86State::REG_RAX, SrcSize, 0, true); Src = _Sbfe(DstSize <= OpSize::i32Bit ? OpSize::i32Bit : OpSize::i64Bit, SrcSize * 8, 0, Src); - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Src, DstSize, -1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Src, DstSize, OpSize::iInvalid); } void OpDispatchBuilder::SAHFOp(OpcodeArgs) { @@ -1301,10 +1300,10 @@ void OpDispatchBuilder::MOVSegOp(OpcodeArgs, bool ToSeg) { } if (DestIsMem(Op)) { // If the destination is memory then we always store 16-bits only - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Segment, 2, -1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Segment, OpSize::i16Bit, OpSize::iInvalid); } else { // If the destination is a GPR then we follow register storing rules - StoreResult(GPRClass, Op, Segment, -1); + StoreResult(GPRClass, Op, Segment, OpSize::iInvalid); } } } @@ -1318,7 +1317,7 @@ void OpDispatchBuilder::MOVOffsetOp(OpcodeArgs) { // Source is memory(literal) // Dest is GPR Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.ForceLoad = true}); - StoreResult(GPRClass, Op, Op->Dest, Src, -1); + StoreResult(GPRClass, Op, Op->Dest, Src, OpSize::iInvalid); break; case 0xA2: case 0xA3: @@ -1327,13 +1326,13 @@ void OpDispatchBuilder::MOVOffsetOp(OpcodeArgs) { Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); // This one is a bit special since the destination is a literal // So the destination gets stored in Src[1] - StoreResult(GPRClass, Op, Op->Src[1], Src, -1); + StoreResult(GPRClass, Op, Op->Src[1], Src, OpSize::iInvalid); break; } } void OpDispatchBuilder::CPUIDOp(OpcodeArgs) { - const auto GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); Ref Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], GPRSize, Op->Flags); Ref Leaf = LoadGPRRegister(X86State::REG_RCX); @@ -1394,7 +1393,7 @@ void OpDispatchBuilder::SHLImmediateOp(OpcodeArgs, bool SHL1Bit) { CalculateFlags_ShiftLeftImmediate(GetSrcSize(Op), Result, Dest, Shift); CalculateDeferredFlags(); - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::SHROp(OpcodeArgs) { @@ -1417,7 +1416,7 @@ void OpDispatchBuilder::SHRImmediateOp(OpcodeArgs, bool SHR1Bit) { CalculateFlags_ShiftRightImmediate(GetSrcSize(Op), ALUOp, Dest, Shift); CalculateDeferredFlags(); - StoreResult(GPRClass, Op, ALUOp, -1); + StoreResult(GPRClass, Op, ALUOp, OpSize::iInvalid); } void OpDispatchBuilder::SHLDOp(OpcodeArgs) { @@ -1486,10 +1485,10 @@ void OpDispatchBuilder::SHLDImmediateOp(OpcodeArgs) { CalculateFlags_ShiftLeftImmediate(GetSrcSize(Op), Res, Dest, Shift); CalculateDeferredFlags(); - StoreResult(GPRClass, Op, Res, -1); + StoreResult(GPRClass, Op, Res, OpSize::iInvalid); } else if (Shift == 0 && Size == 32) { // Ensure Zext still occurs - StoreResult(GPRClass, Op, Dest, -1); + StoreResult(GPRClass, Op, Dest, OpSize::iInvalid); } } @@ -1549,11 +1548,11 @@ void OpDispatchBuilder::SHRDImmediateOp(OpcodeArgs) { Res = _Extr(OpSizeFromSrc(Op), Src, Dest, Shift); } - StoreResult(GPRClass, Op, Res, -1); + StoreResult(GPRClass, Op, Res, OpSize::iInvalid); CalculateFlags_ShiftRightDoubleImmediate(GetSrcSize(Op), Res, Dest, Shift); } else if (Shift == 0 && Size == 32) { // Ensure Zext still occurs - StoreResult(GPRClass, Op, Dest, -1); + StoreResult(GPRClass, Op, Dest, OpSize::iInvalid); } } @@ -1577,7 +1576,7 @@ void OpDispatchBuilder::ASHROp(OpcodeArgs, bool Immediate, bool SHR1Bit) { CalculateFlags_SignShiftRightImmediate(GetSrcSize(Op), Result, Dest, Shift); CalculateDeferredFlags(); - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); } else { auto Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); Ref Result = _Ashr(IR::SizeToOpSize(OpSize), Dest, Src); @@ -1619,7 +1618,7 @@ void OpDispatchBuilder::RotateOp(OpcodeArgs, bool Left, bool IsImmediate, bool I // To rotate 64-bits left, right-rotate by (64 - Shift) = -Shift mod 64. auto Res = _Ror(OpSize, Dest, Left ? _Neg(OpSize, Src) : Src); - StoreResult(GPRClass, Op, Res, -1); + StoreResult(GPRClass, Op, Res, OpSize::iInvalid); if (Is1Bit || IsImmediate) { if (UnmaskedConst) { @@ -1653,7 +1652,7 @@ void OpDispatchBuilder::ANDNBMIOp(OpcodeArgs) { auto Dest = _Andn(OpSizeFromSrc(Op), Src2, Src1); - StoreResult(GPRClass, Op, Dest, -1); + StoreResult(GPRClass, Op, Dest, OpSize::iInvalid); CalculateFlags_Logical(GetSrcSize(Op), Dest, Src1, Src2); } @@ -1692,7 +1691,7 @@ void OpDispatchBuilder::BEXTRBMIOp(OpcodeArgs) { auto Dest = _Select(IR::COND_ULE, Length, MaxSrcBitOp, Masked, SanitizedShifted); // Finally store the result. - StoreResult(GPRClass, Op, Dest, -1); + StoreResult(GPRClass, Op, Dest, OpSize::iInvalid); // ZF is set properly. CF and OF are defined as being set to zero. SF, PF, and // AF are undefined. @@ -1709,7 +1708,7 @@ void OpDispatchBuilder::BLSIBMIOp(OpcodeArgs) { auto NegatedSrc = _Neg(Size, Src); auto Result = _And(Size, Src, NegatedSrc); - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); // CF is cleared if Src is zero, otherwise it's set. However, Src is zero iff // Result is zero, so we can test the result instead. So, CF is just the @@ -1729,7 +1728,7 @@ void OpDispatchBuilder::BLSMSKBMIOp(OpcodeArgs) { auto* Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); auto Result = _Xor(Size, _Sub(Size, Src, _InlineConstant(1)), Src); - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); InvalidatePF_AF(); // CF set according to the Src @@ -1750,7 +1749,7 @@ void OpDispatchBuilder::BLSRBMIOp(OpcodeArgs) { auto Size = OpSizeFromSrc(Op); auto Result = _And(Size, _Sub(Size, Src, _InlineConstant(1)), Src); - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); auto Zero = _Constant(0); auto One = _Constant(1); @@ -1765,8 +1764,8 @@ void OpDispatchBuilder::BLSRBMIOp(OpcodeArgs) { void OpDispatchBuilder::BMI2Shift(OpcodeArgs) { // In the event the source is a memory operand, use the // exact width instead of the GPR size. - const auto GPRSize = CTX->GetGPRSize(); - const auto Size = GetSrcSize(Op); + const auto GPRSize = CTX->GetGPROpSize(); + const auto Size = OpSizeFromSrc(Op); const auto SrcSize = Op->Src[0].IsGPR() ? GPRSize : Size; auto* Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], SrcSize, Op->Flags); @@ -1784,7 +1783,7 @@ void OpDispatchBuilder::BMI2Shift(OpcodeArgs) { Result = _Lshr(IR::SizeToOpSize(Size), Src, Shift); } - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::BZHI(OpcodeArgs) { @@ -1812,7 +1811,7 @@ void OpDispatchBuilder::BZHI(OpcodeArgs) { // shenanigans and use the raw versions here. _TestNZ(OpSize::i64Bit, Index, _Constant(0xFF & ~(OperandSize - 1))); auto Result = _NZCVSelect(IR::SizeToOpSize(Size), {COND_NEQ}, Src, MaskResult); - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); auto Zero = _InlineConstant(0); auto One = _InlineConstant(1); @@ -1849,17 +1848,17 @@ void OpDispatchBuilder::RORX(OpcodeArgs) { Result = _Ror(OpSizeFromSrc(Op), Src, _InlineConstant(Amount)); } - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::MULX(OpcodeArgs) { // RDX is the implied source operand in the instruction - const auto OperandSize = GetSrcSize(Op); + const auto OperandSize = OpSizeFromSrc(Op); const auto OpSize = IR::SizeToOpSize(OperandSize); // Src1 can be a memory operand, so ensure we constrain to the // absolute width of the access in that scenario. - const auto GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); const auto Src1Size = Op->Src[1].IsGPR() ? GPRSize : OperandSize; Ref Src1 = LoadSource_WithOpSize(GPRClass, Op, Op->Src[1], Src1Size, Op->Flags); @@ -1870,13 +1869,13 @@ void OpDispatchBuilder::MULX(OpcodeArgs) { // will be the high half of the multiplication result. if (Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) { Ref ResultHi = _UMulH(OpSize, Src1, Src2); - StoreResult(GPRClass, Op, Op->Dest, ResultHi, -1); + StoreResult(GPRClass, Op, Op->Dest, ResultHi, OpSize::iInvalid); } else { Ref ResultLo = _UMul(OpSize, Src1, Src2); Ref ResultHi = _UMulH(OpSize, Src1, Src2); - StoreResult(GPRClass, Op, Op->Src[0], ResultLo, -1); - StoreResult(GPRClass, Op, Op->Dest, ResultHi, -1); + StoreResult(GPRClass, Op, Op->Src[0], ResultLo, OpSize::iInvalid); + StoreResult(GPRClass, Op, Op->Dest, ResultHi, OpSize::iInvalid); } } @@ -1886,7 +1885,7 @@ void OpDispatchBuilder::PDEP(OpcodeArgs) { auto* Mask = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); auto Result = _PDep(OpSizeFromSrc(Op), Input, Mask); - StoreResult(GPRClass, Op, Op->Dest, Result, -1); + StoreResult(GPRClass, Op, Op->Dest, Result, OpSize::iInvalid); } void OpDispatchBuilder::PEXT(OpcodeArgs) { @@ -1895,7 +1894,7 @@ void OpDispatchBuilder::PEXT(OpcodeArgs) { auto* Mask = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); auto Result = _PExt(OpSizeFromSrc(Op), Input, Mask); - StoreResult(GPRClass, Op, Op->Dest, Result, -1); + StoreResult(GPRClass, Op, Op->Dest, Result, OpSize::iInvalid); } void OpDispatchBuilder::ADXOp(OpcodeArgs) { @@ -1924,7 +1923,7 @@ void OpDispatchBuilder::ADXOp(OpcodeArgs) { // Do the actual add. HandleNZCV_RMW(); auto Result = _AdcWithFlags(OpSize, Src, Before); - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); // Now restore all flags except the one we're updating. if (CTX->HostFeatures.SupportsFlagM) { @@ -1973,7 +1972,7 @@ void OpDispatchBuilder::RCROp1Bit(OpcodeArgs) { Res = _Orlshl(OpSize::i32Bit, Res, CF, Size - Shift); } - StoreResult(GPRClass, Op, Res, -1); + StoreResult(GPRClass, Op, Res, OpSize::iInvalid); // OF is the top two MSBs XOR'd together // Only when Shift == 1, it is undefined otherwise @@ -1995,7 +1994,7 @@ void OpDispatchBuilder::RCROp8x1Bit(OpcodeArgs) { Ref Res = _Bfe(OpSize::i32Bit, 7, 1, Dest); Res = _Bfi(OpSize::i32Bit, 1, 7, Res, CF); - StoreResult(GPRClass, Op, Res, -1); + StoreResult(GPRClass, Op, Res, OpSize::iInvalid); // OF is the top two MSBs XOR'd together SetRFLAG(_XorShift(OpSize::i32Bit, Res, Res, ShiftType::LSR, 1), SizeBit - 2, true); @@ -2050,7 +2049,7 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) { SetRFLAG(Xor, Size - 2, true); } - StoreResult(GPRClass, Op, Res, -1); + StoreResult(GPRClass, Op, Res, OpSize::iInvalid); return; } @@ -2092,7 +2091,7 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) { auto Xor = _XorShift(OpSize, Res, Res, ShiftType::LSR, 1); SetRFLAG(Xor, Size - 2, true); - StoreResult(GPRClass, Op, Res, -1); + StoreResult(GPRClass, Op, Res, OpSize::iInvalid); }, GetSrcSize(Op) == OpSize::i32Bit ? std::make_optional(&OpDispatchBuilder::ZeroShiftResult) : std::nullopt); } @@ -2178,7 +2177,7 @@ void OpDispatchBuilder::RCRSmallerOp(OpcodeArgs) { // rather than zeroes. Ref Res = _Lshr(OpSize::i64Bit, Tmp, Src); - StoreResult(GPRClass, Op, Res, -1); + StoreResult(GPRClass, Op, Res, OpSize::iInvalid); uint64_t SrcConst; bool IsSrcConst = IsValueConstant(WrapNode(Src), &SrcConst); @@ -2223,7 +2222,7 @@ void OpDispatchBuilder::RCLOp1Bit(OpcodeArgs) { // Top two MSBs is CF and top bit of result SetRFLAG(_Xor(OpSize, Res, Dest), Size - 1, true); - StoreResult(GPRClass, Op, Res, -1); + StoreResult(GPRClass, Op, Res, OpSize::iInvalid); } void OpDispatchBuilder::RCLOp(OpcodeArgs) { @@ -2275,7 +2274,7 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) { SetRFLAG(NewOF, Size - 1, true); } - StoreResult(GPRClass, Op, Res, -1); + StoreResult(GPRClass, Op, Res, OpSize::iInvalid); return; } @@ -2314,7 +2313,7 @@ void OpDispatchBuilder::RCLOp(OpcodeArgs) { auto NewOF = _XorShift(OpSize, Res, NewCF, ShiftType::LSL, Size - 1); SetRFLAG(NewOF, Size - 1, true); - StoreResult(GPRClass, Op, Res, -1); + StoreResult(GPRClass, Op, Res, OpSize::iInvalid); }, GetSrcSize(Op) == OpSize::i32Bit ? std::make_optional(&OpDispatchBuilder::ZeroShiftResult) : std::nullopt); } @@ -2358,7 +2357,7 @@ void OpDispatchBuilder::RCLSmallerOp(OpcodeArgs) { // Which we emulate with a _Ror Ref Res = _Ror(OpSize::i64Bit, Tmp, _Neg(OpSize::i32Bit, Src)); - StoreResult(GPRClass, Op, Res, -1); + StoreResult(GPRClass, Op, Res, OpSize::iInvalid); // Our new CF is now at the bit position that we are shifting // Either 0 if CF hasn't changed (CF is living in bit 0) @@ -2401,7 +2400,7 @@ void OpDispatchBuilder::BTOp(OpcodeArgs, uint32_t SrcIndex, BTAction Action) { if (Op->Dest.IsGPR()) { // When the destination is a GPR, we don't care about garbage in the upper bits. // Load the full register. - auto Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, CTX->GetGPRSize(), Op->Flags); + auto Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, CTX->GetGPROpSize(), Op->Flags); Value = Dest; // Get the bit selection from the src. We need to mask for 8/16-bit, but @@ -2429,14 +2428,14 @@ void OpDispatchBuilder::BTOp(OpcodeArgs, uint32_t SrcIndex, BTAction Action) { case BTAction::BTClear: { Ref BitMask = _Lshl(IR::SizeToOpSize(LshrSize), _Constant(1), BitSelect); Dest = _Andn(IR::SizeToOpSize(LshrSize), Dest, BitMask); - StoreResult(GPRClass, Op, Dest, -1); + StoreResult(GPRClass, Op, Dest, OpSize::iInvalid); break; } case BTAction::BTSet: { Ref BitMask = _Lshl(IR::SizeToOpSize(LshrSize), _Constant(1), BitSelect); Dest = _Or(IR::SizeToOpSize(LshrSize), Dest, BitMask); - StoreResult(GPRClass, Op, Dest, -1); + StoreResult(GPRClass, Op, Dest, OpSize::iInvalid); break; } @@ -2451,7 +2450,7 @@ void OpDispatchBuilder::BTOp(OpcodeArgs, uint32_t SrcIndex, BTAction Action) { } SetCFInverted(Value, ConstantShift, true); - StoreResult(GPRClass, Op, Dest, -1); + StoreResult(GPRClass, Op, Dest, OpSize::iInvalid); break; } } @@ -2473,7 +2472,7 @@ void OpDispatchBuilder::BTOp(OpcodeArgs, uint32_t SrcIndex, BTAction Action) { switch (Action) { case BTAction::BTNone: { - Value = _LoadMemAutoTSO(GPRClass, 1, Address, 1); + Value = _LoadMemAutoTSO(GPRClass, OpSize::i8Bit, Address, OpSize::i8Bit); break; } @@ -2484,10 +2483,10 @@ void OpDispatchBuilder::BTOp(OpcodeArgs, uint32_t SrcIndex, BTAction Action) { HandledLock = true; Value = _AtomicFetchCLR(OpSize::i8Bit, BitMask, LoadEffectiveAddress(Address, true)); } else { - Value = _LoadMemAutoTSO(GPRClass, 1, Address, 1); + Value = _LoadMemAutoTSO(GPRClass, OpSize::i8Bit, Address, OpSize::i8Bit); auto Modified = _Andn(OpSize::i64Bit, Value, BitMask); - _StoreMemAutoTSO(GPRClass, 1, Address, Modified, 1); + _StoreMemAutoTSO(GPRClass, OpSize::i8Bit, Address, Modified, OpSize::i8Bit); } break; } @@ -2499,10 +2498,10 @@ void OpDispatchBuilder::BTOp(OpcodeArgs, uint32_t SrcIndex, BTAction Action) { HandledLock = true; Value = _AtomicFetchOr(OpSize::i8Bit, BitMask, LoadEffectiveAddress(Address, true)); } else { - Value = _LoadMemAutoTSO(GPRClass, 1, Address, 1); + Value = _LoadMemAutoTSO(GPRClass, OpSize::i8Bit, Address, OpSize::i8Bit); auto Modified = _Or(OpSize::i64Bit, Value, BitMask); - _StoreMemAutoTSO(GPRClass, 1, Address, Modified, 1); + _StoreMemAutoTSO(GPRClass, OpSize::i8Bit, Address, Modified, OpSize::i8Bit); } break; } @@ -2514,10 +2513,10 @@ void OpDispatchBuilder::BTOp(OpcodeArgs, uint32_t SrcIndex, BTAction Action) { HandledLock = true; Value = _AtomicFetchXor(OpSize::i8Bit, BitMask, LoadEffectiveAddress(Address, true)); } else { - Value = _LoadMemAutoTSO(GPRClass, 1, Address, 1); + Value = _LoadMemAutoTSO(GPRClass, OpSize::i8Bit, Address, OpSize::i8Bit); auto Modified = _Xor(OpSize::i64Bit, Value, BitMask); - _StoreMemAutoTSO(GPRClass, 1, Address, Modified, 1); + _StoreMemAutoTSO(GPRClass, OpSize::i8Bit, Address, Modified, OpSize::i8Bit); } break; } @@ -2565,7 +2564,7 @@ void OpDispatchBuilder::IMUL1SrcOp(OpcodeArgs) { default: FEX_UNREACHABLE; } - StoreResult(GPRClass, Op, Dest, -1); + StoreResult(GPRClass, Op, Dest, OpSize::iInvalid); CalculateFlags_MUL(Size, Dest, ResultHigh); } @@ -2603,7 +2602,7 @@ void OpDispatchBuilder::IMUL2SrcOp(OpcodeArgs) { default: FEX_UNREACHABLE; } - StoreResult(GPRClass, Op, Dest, -1); + StoreResult(GPRClass, Op, Dest, OpSize::iInvalid); CalculateFlags_MUL(Size, Dest, ResultHigh); } @@ -2725,7 +2724,7 @@ void OpDispatchBuilder::NOTOp(OpcodeArgs) { // GPR version plays fast and loose with sizes, be safe for memory tho. Ref Src = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); Src = _Xor(OpSize::i64Bit, Src, MaskConst); - StoreResult(GPRClass, Op, Src, -1); + StoreResult(GPRClass, Op, Src, OpSize::iInvalid); } else { // Specially handle high bits so we can invert in place with the correct // mask and a larger type. @@ -2738,7 +2737,7 @@ void OpDispatchBuilder::NOTOp(OpcodeArgs) { // Always load full size, we explicitly want the upper bits to get the // insert behaviour for free/implicitly. - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); Ref Src = LoadSource_WithOpSize(GPRClass, Op, Dest, GPRSize, Op->Flags); // For 8/16-bit, use 64-bit invert so we invert in place, while getting @@ -2754,7 +2753,7 @@ void OpDispatchBuilder::NOTOp(OpcodeArgs) { // Always store 64-bit, the Not/Xor correctly handle the upper bits and this // way we can delete the store. - StoreResult_WithOpSize(GPRClass, Op, Dest, Src, GPRSize, -1); + StoreResult_WithOpSize(GPRClass, Op, Dest, Src, GPRSize, OpSize::iInvalid); } } @@ -2768,23 +2767,23 @@ void OpDispatchBuilder::XADDOp(OpcodeArgs) { Result = CalculateFlags_ADD(GetSrcSize(Op), Dest, Src); // Previous value in dest gets stored in src - StoreResult(GPRClass, Op, Op->Src[0], Dest, -1); + StoreResult(GPRClass, Op, Op->Src[0], Dest, OpSize::iInvalid); // Calculated value gets stored in dst (order is important if dst is same as src) - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); } else { HandledLock = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK; Dest = AppendSegmentOffset(Dest, Op->Flags); auto Before = _AtomicFetchAdd(OpSizeFromSrc(Op), Src, Dest); CalculateFlags_ADD(GetSrcSize(Op), Before, Src); - StoreResult(GPRClass, Op, Op->Src[0], Before, -1); + StoreResult(GPRClass, Op, Op->Src[0], Before, OpSize::iInvalid); } } void OpDispatchBuilder::PopcountOp(OpcodeArgs) { Ref Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = GetSrcSize(Op) >= 4}); Src = _Popcount(OpSizeFromSrc(Op), Src); - StoreResult(GPRClass, Op, Src, -1); + StoreResult(GPRClass, Op, Src, OpSize::iInvalid); // We need to set ZF while clearing the rest of NZCV. The result of a popcount // is in the range [0, 63]. In particular, it is always positive. So a @@ -2932,7 +2931,7 @@ void OpDispatchBuilder::XLATOp(OpcodeArgs) { void OpDispatchBuilder::ReadSegmentReg(OpcodeArgs, OpDispatchBuilder::Segment Seg) { // 64-bit only // Doesn't hit the segment register optimization - auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Src {}; if (Seg == Segment::FS) { Src = _LoadContext(Size, GPRClass, offsetof(FEXCore::Core::CPUState, fs_cached)); @@ -2940,13 +2939,13 @@ void OpDispatchBuilder::ReadSegmentReg(OpcodeArgs, OpDispatchBuilder::Segment Se Src = _LoadContext(Size, GPRClass, offsetof(FEXCore::Core::CPUState, gs_cached)); } - StoreResult(GPRClass, Op, Src, -1); + StoreResult(GPRClass, Op, Src, OpSize::iInvalid); } void OpDispatchBuilder::WriteSegmentReg(OpcodeArgs, OpDispatchBuilder::Segment Seg) { // Documentation claims that the 32-bit version of this instruction inserts in to the lower 32-bits of the segment // This is incorrect and it instead zero extends the 32-bit value to 64-bit - auto Size = GetDstSize(Op); + const auto Size = OpSizeFromDst(Op); Ref Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); if (Seg == Segment::FS) { _StoreContext(Size, GPRClass, Src, offsetof(FEXCore::Core::CPUState, fs_cached)); @@ -2956,14 +2955,14 @@ void OpDispatchBuilder::WriteSegmentReg(OpcodeArgs, OpDispatchBuilder::Segment S } void OpDispatchBuilder::EnterOp(OpcodeArgs) { - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); const uint64_t Value = Op->Src[0].Literal(); const uint16_t AllocSpace = Value & 0xFFFF; const uint8_t Level = (Value >> 16) & 0x1F; - const auto PushValue = [&](uint8_t Size, Ref Src) -> Ref { - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto PushValue = [&](IR::OpSize Size, Ref Src) -> Ref { + const auto GPRSize = CTX->GetGPROpSize(); auto OldSP = LoadGPRRegister(X86State::REG_RSP); auto NewSP = _Push(GPRSize, Size, Src, OldSP); @@ -3003,7 +3002,7 @@ void OpDispatchBuilder::SGDTOp(OpcodeArgs) { // // Operand size prefix is ignored on this instruction, size purely depends on operating mode. uint64_t GDTAddress = 0xFFFFFFFFFFFE0000ULL; - size_t GDTStoreSize = OpSize::i64Bit; + auto GDTStoreSize = OpSize::i64Bit; if (!CTX->Config.Is64BitMode) { // Mask off upper bits if 32-bit result. GDTAddress &= ~0U; @@ -3017,7 +3016,7 @@ void OpDispatchBuilder::SGDTOp(OpcodeArgs) { void OpDispatchBuilder::SMSWOp(OpcodeArgs) { const bool IsMemDst = DestIsMem(Op); - uint32_t DstSize {}; + IR::OpSize DstSize {OpSize::iInvalid}; Ref Const = _Constant((1U << 31) | ///< PG - Paging (0U << 30) | ///< CD - Cache Disable (0U << 29) | ///< NW - Not Writethrough (Legacy, now ignored) @@ -3034,19 +3033,19 @@ void OpDispatchBuilder::SMSWOp(OpcodeArgs) { (1U << 0)); ///< PE - Protection Enabled if (CTX->Config.Is64BitMode) { - DstSize = X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0) == X86Tables::DecodeFlags::FLAG_OPERAND_SIZE_LAST ? 2 : - X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0) == X86Tables::DecodeFlags::FLAG_WIDENING_SIZE_LAST ? 8 : - 4; + DstSize = X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0) == X86Tables::DecodeFlags::FLAG_OPERAND_SIZE_LAST ? OpSize::i16Bit : + X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0) == X86Tables::DecodeFlags::FLAG_WIDENING_SIZE_LAST ? OpSize::i64Bit : + OpSize::i32Bit; if (!IsMemDst && DstSize == OpSize::i32Bit) { // Special-case version of `smsw ebx`. This instruction does an insert in to the lower 32-bits on 64-bit hosts. // Override and insert. - auto Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, CTX->GetGPRSize(), Op->Flags); + auto Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, CTX->GetGPROpSize(), Op->Flags); Const = _Bfi(OpSize::i64Bit, 32, 0, Dest, Const); DstSize = OpSize::i64Bit; } } else { - DstSize = X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0) == X86Tables::DecodeFlags::FLAG_OPERAND_SIZE_LAST ? 2 : 4; + DstSize = X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0) == X86Tables::DecodeFlags::FLAG_OPERAND_SIZE_LAST ? OpSize::i16Bit : OpSize::i32Bit; } if (IsMemDst) { @@ -3054,7 +3053,7 @@ void OpDispatchBuilder::SMSWOp(OpcodeArgs) { DstSize = OpSize::i16Bit; } - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Const, DstSize, -1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Const, DstSize, OpSize::iInvalid); } OpDispatchBuilder::CycleCounterPair OpDispatchBuilder::CycleCounter() { @@ -3118,7 +3117,7 @@ void OpDispatchBuilder::INCOp(OpcodeArgs) { } if (!IsLocked) { - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); } } @@ -3161,7 +3160,7 @@ void OpDispatchBuilder::DECOp(OpcodeArgs) { } if (!IsLocked) { - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); } } @@ -3172,7 +3171,7 @@ void OpDispatchBuilder::STOSOp(OpcodeArgs) { return; } - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); const bool Repeat = (Op->Flags & (FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX | FEXCore::X86Tables::DecodeFlags::FLAG_REPNE_PREFIX)) != 0; if (!Repeat) { @@ -3214,7 +3213,7 @@ void OpDispatchBuilder::MOVSOp(OpcodeArgs) { } // RA now can handle these to be here, to avoid DF accesses - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); if (Op->Flags & (FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX | FEXCore::X86Tables::DecodeFlags::FLAG_REPNE_PREFIX)) { auto SrcAddr = LoadGPRRegister(X86State::REG_RSI); @@ -3272,7 +3271,7 @@ void OpDispatchBuilder::CMPSOp(OpcodeArgs) { return; } - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); bool Repeat = Op->Flags & (FEXCore::X86Tables::DecodeFlags::FLAG_REPNE_PREFIX | FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX); if (!Repeat) { @@ -3392,7 +3391,7 @@ void OpDispatchBuilder::LODSOp(OpcodeArgs) { return; } - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); const bool Repeat = (Op->Flags & (FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX | FEXCore::X86Tables::DecodeFlags::FLAG_REPNE_PREFIX)) != 0; if (!Repeat) { @@ -3400,7 +3399,7 @@ void OpDispatchBuilder::LODSOp(OpcodeArgs) { auto Src = _LoadMemAutoTSO(GPRClass, Size, Dest_RSI, Size); - StoreResult(GPRClass, Op, Src, -1); + StoreResult(GPRClass, Op, Src, OpSize::iInvalid); // Offset the pointer Ref TailDest_RSI = LoadGPRRegister(X86State::REG_RSI); @@ -3441,7 +3440,7 @@ void OpDispatchBuilder::LODSOp(OpcodeArgs) { auto Src = _LoadMemAutoTSO(GPRClass, Size, Dest_RSI, Size); - StoreResult(GPRClass, Op, Src, -1); + StoreResult(GPRClass, Op, Src, OpSize::iInvalid); Ref TailCounter = LoadGPRRegister(X86State::REG_RCX); Ref TailDest_RSI = LoadGPRRegister(X86State::REG_RSI); @@ -3475,7 +3474,7 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) { return; } - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); const bool Repeat = (Op->Flags & (FEXCore::X86Tables::DecodeFlags::FLAG_REPNE_PREFIX | FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX)) != 0; if (!Repeat) { @@ -3560,26 +3559,26 @@ void OpDispatchBuilder::SCASOp(OpcodeArgs) { void OpDispatchBuilder::BSWAPOp(OpcodeArgs) { Ref Dest; - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); if (Size == OpSize::i16Bit) { // BSWAP of 16bit is undef. ZEN+ causes the lower 16bits to get zero'd Dest = _Constant(0); } else { - Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, CTX->GetGPRSize(), Op->Flags); - Dest = _Rev(IR::SizeToOpSize(Size), Dest); + Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, CTX->GetGPROpSize(), Op->Flags); + Dest = _Rev(Size, Dest); } - StoreResult(GPRClass, Op, Dest, -1); + StoreResult(GPRClass, Op, Dest, OpSize::iInvalid); } void OpDispatchBuilder::PUSHFOp(OpcodeArgs) { - const uint8_t Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Src = GetPackedRFLAG(); Push(Size, Src); } void OpDispatchBuilder::POPFOp(OpcodeArgs) { - const uint8_t Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Src = Pop(Size); // Add back our flag constants @@ -3605,7 +3604,7 @@ void OpDispatchBuilder::NEGOp(OpcodeArgs) { Ref Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); Ref Result = CalculateFlags_SUB(Size, ZeroConst, Dest); - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); } } @@ -3712,8 +3711,8 @@ void OpDispatchBuilder::IDIVOp(OpcodeArgs) { } void OpDispatchBuilder::BSFOp(OpcodeArgs) { - const uint8_t GPRSize = CTX->GetGPRSize(); - const uint8_t DstSize = GetDstSize(Op) == OpSize::i16Bit ? OpSize::i16Bit : GPRSize; + const auto GPRSize = CTX->GetGPROpSize(); + const auto DstSize = OpSizeFromDst(Op) == OpSize::i16Bit ? OpSize::i16Bit : GPRSize; Ref Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, DstSize, Op->Flags, {.AllowUpperGarbage = true}); Ref Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); @@ -3730,12 +3729,12 @@ void OpDispatchBuilder::BSFOp(OpcodeArgs) { // hardware satisfies it. We provide the stronger AMD behaviour as // applications might rely on that in the wild. auto SelectOp = NZCVSelect(IR::SizeToOpSize(GPRSize), {COND_EQ}, Dest, Result); - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, SelectOp, DstSize, -1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, SelectOp, DstSize, OpSize::iInvalid); } void OpDispatchBuilder::BSROp(OpcodeArgs) { - const uint8_t GPRSize = CTX->GetGPRSize(); - const uint8_t DstSize = GetDstSize(Op) == OpSize::i16Bit ? OpSize::i16Bit : GPRSize; + const auto GPRSize = CTX->GetGPROpSize(); + const auto DstSize = OpSizeFromDst(Op) == OpSize::i16Bit ? OpSize::i16Bit : GPRSize; Ref Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, DstSize, Op->Flags, {.AllowUpperGarbage = true}); Ref Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); @@ -3748,7 +3747,7 @@ void OpDispatchBuilder::BSROp(OpcodeArgs) { // If Src was zero then the destination doesn't get modified auto SelectOp = NZCVSelect(IR::SizeToOpSize(GPRSize), {COND_EQ}, Dest, Result); - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, SelectOp, DstSize, -1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, SelectOp, DstSize, OpSize::iInvalid); } void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) { @@ -3767,8 +3766,8 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) { // *Xn = Xt // Xs = MemData - const auto GPRSize = CTX->GetGPRSize(); - auto Size = GetSrcSize(Op); + const auto GPRSize = CTX->GetGPROpSize(); + auto Size = OpSizeFromSrc(Op); if (Op->Dest.IsGPR()) { // This is our source register @@ -3814,9 +3813,9 @@ void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) { // Store in to GPR Dest if (GPRSize == OpSize::i64Bit && Size == OpSize::i32Bit) { - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, DestResult, GPRSize, -1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, DestResult, GPRSize, OpSize::iInvalid); } else { - StoreResult(GPRClass, Op, DestResult, -1); + StoreResult(GPRClass, Op, DestResult, OpSize::iInvalid); } } else { Ref Src2 = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); @@ -3972,7 +3971,7 @@ uint32_t OpDispatchBuilder::GetDstBitSize(X86Tables::DecodedOp Op) const { } Ref OpDispatchBuilder::GetSegment(uint32_t Flags, uint32_t DefaultPrefix, bool Override) { - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); if (CTX->Config.Is64BitMode) { if (Flags & FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX) { @@ -4114,7 +4113,7 @@ void OpDispatchBuilder::UpdatePrefixFromSegment(Ref Segment, uint32_t SegmentReg // Use BFE to extract the selector index in bits [15,3] of the segment register. // In some cases the upper 16-bits of the 32-bit GPR contain garbage to ignore. Segment = _Bfe(OpSize::i32Bit, 16 - 3, 3, Segment); - auto NewSegment = _LoadContextIndexed(Segment, 4, offsetof(FEXCore::Core::CPUState, gdt[0]), 4, GPRClass); + auto NewSegment = _LoadContextIndexed(Segment, OpSize::i32Bit, offsetof(FEXCore::Core::CPUState, gdt[0]), 4, GPRClass); CheckLegacySegmentWrite(NewSegment, SegmentReg); switch (SegmentReg) { case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX: @@ -4292,7 +4291,7 @@ AddressMode OpDispatchBuilder::DecodeAddress(const X86Tables::DecodedOp& Op, con Ref OpDispatchBuilder::LoadSource_WithOpSize(RegisterClassType Class, const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, - uint8_t OpSize, uint32_t Flags, const LoadSourceOptions& Options) { + IR::OpSize OpSize, uint32_t Flags, const LoadSourceOptions& Options) { auto [Align, LoadData, ForceLoad, AccessType, AllowUpperGarbage] = Options; AddressMode A = DecodeAddress(Op, Operand, AccessType, true /* IsLoad */); @@ -4322,7 +4321,7 @@ Ref OpDispatchBuilder::LoadSource_WithOpSize(RegisterClassType Class, const X86T } if ((IsOperandMem(Operand, true) && LoadData) || ForceLoad) { - return _LoadMemAutoTSO(Class, OpSize, A, Align == -1 ? OpSize : Align); + return _LoadMemAutoTSO(Class, OpSize, A, Align == OpSize::iInvalid ? OpSize : Align); } else { return LoadEffectiveAddress(A, false, AllowUpperGarbage); } @@ -4373,17 +4372,17 @@ void OpDispatchBuilder::StoreXMMRegister(uint32_t XMM, const Ref Src) { Ref OpDispatchBuilder::LoadSource(RegisterClassType Class, const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, uint32_t Flags, const LoadSourceOptions& Options) { - const uint8_t OpSize = GetSrcSize(Op); + const auto OpSize = OpSizeFromSrc(Op); return LoadSource_WithOpSize(Class, Op, Operand, OpSize, Flags, Options); } void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Class, FEXCore::X86Tables::DecodedOp Op, - const FEXCore::X86Tables::DecodedOperand& Operand, const Ref Src, uint8_t OpSize, - int8_t Align, MemoryAccessType AccessType) { + const FEXCore::X86Tables::DecodedOperand& Operand, const Ref Src, IR::OpSize OpSize, + IR::OpSize Align, MemoryAccessType AccessType) { if (Operand.IsGPR()) { // 8Bit and 16bit destination types store their result without effecting the upper bits // 32bit ops ZEXT the result to 64bit - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); const auto gpr = Operand.Data.GPR.GPR; if (gpr >= FEXCore::X86State::REG_MM_0) { @@ -4450,18 +4449,19 @@ void OpDispatchBuilder::StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Cl // For X87 extended doubles, split before storing _StoreMem(FPRClass, OpSize::i64Bit, MemStoreDst, Src, Align); auto Upper = _VExtractToGPR(OpSize::i128Bit, OpSize::i64Bit, Src, 1); - _StoreMem(GPRClass, OpSize::i16Bit, Upper, MemStoreDst, _Constant(8), std::min(Align, 8), MEM_OFFSET_SXTX, 1); + _StoreMem(GPRClass, OpSize::i16Bit, Upper, MemStoreDst, _Constant(8), std::min(Align, OpSize::i64Bit), MEM_OFFSET_SXTX, 1); } else { - _StoreMemAutoTSO(Class, OpSize, A, Src, Align == -1 ? OpSize : Align); + _StoreMemAutoTSO(Class, OpSize, A, Src, Align == OpSize::iInvalid ? OpSize : Align); } } void OpDispatchBuilder::StoreResult(FEXCore::IR::RegisterClassType Class, FEXCore::X86Tables::DecodedOp Op, - const FEXCore::X86Tables::DecodedOperand& Operand, const Ref Src, int8_t Align, MemoryAccessType AccessType) { - StoreResult_WithOpSize(Class, Op, Operand, Src, GetDstSize(Op), Align, AccessType); + const FEXCore::X86Tables::DecodedOperand& Operand, const Ref Src, IR::OpSize Align, + MemoryAccessType AccessType) { + StoreResult_WithOpSize(Class, Op, Operand, Src, OpSizeFromDst(Op), Align, AccessType); } -void OpDispatchBuilder::StoreResult(FEXCore::IR::RegisterClassType Class, FEXCore::X86Tables::DecodedOp Op, const Ref Src, int8_t Align, +void OpDispatchBuilder::StoreResult(FEXCore::IR::RegisterClassType Class, FEXCore::X86Tables::DecodedOp Op, const Ref Src, IR::OpSize Align, MemoryAccessType AccessType) { StoreResult(Class, Op, Op->Dest, Src, Align, AccessType); } @@ -4504,12 +4504,12 @@ void OpDispatchBuilder::UnhandledOp(OpcodeArgs) { void OpDispatchBuilder::MOVGPROp(OpcodeArgs, uint32_t SrcIndex) { // StoreResult will store with the same size as the input, so we allow upper // garbage on the input. The zero extension would be pointless. - Ref Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags, {.Align = 1, .AllowUpperGarbage = true}); + Ref Src = LoadSource(GPRClass, Op, Op->Src[SrcIndex], Op->Flags, {.Align = OpSize::i8Bit, .AllowUpperGarbage = true}); StoreResult(GPRClass, Op, Src, OpSize::i8Bit); } void OpDispatchBuilder::MOVGPRNTOp(OpcodeArgs) { - Ref Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1}); + Ref Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.Align = OpSize::i8Bit}); StoreResult(GPRClass, Op, Src, OpSize::i8Bit, MemoryAccessType::STREAM); } @@ -4524,17 +4524,17 @@ void OpDispatchBuilder::ALUOp(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCore::I Op->Dest.Data.GPR == Op->Src[SrcIdx].Data.GPR) { auto Result = _Constant(0); - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); CalculateFlags_Logical(GetSrcSize(Op), Result, Result, Result); return; } - auto Size = GetDstSize(Op); - uint8_t ResultSize = Size; + auto Size = OpSizeFromDst(Op); + auto ResultSize = Size; auto RoundedSize = Size; if (ALUIROp != FEXCore::IR::IROps::OP_ANDWITHFLAGS) { - RoundedSize = std::max(OpSize::i32Bit, RoundedSize); + RoundedSize = std::max(OpSize::i32Bit, RoundedSize); } // X86 basic ALU ops just do the operation between the destination and a single source @@ -4546,7 +4546,7 @@ void OpDispatchBuilder::ALUOp(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCore::I if (Size < OpSize::i32Bit && !DestIsLockedMem(Op) && Op->Dest.IsGPR() && !Op->Dest.Data.GPR.HighBits && IsValueConstant(WrapNode(Src), &Const) && (ALUIROp == IR::IROps::OP_XOR || ALUIROp == IR::IROps::OP_OR || ALUIROp == IR::IROps::OP_ANDWITHFLAGS)) { - RoundedSize = ResultSize = CTX->GetGPRSize(); + RoundedSize = ResultSize = CTX->GetGPROpSize(); LOGMAN_THROW_A_FMT(Const < (1ull << (Size * 8)), "does not clobber"); // For AND, we can play the same trick but we instead need the upper bits of @@ -4600,7 +4600,7 @@ void OpDispatchBuilder::ALUOp(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCore::I } if (!DestIsLockedMem(Op)) { - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Result, ResultSize, -1, MemoryAccessType::DEFAULT); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Result, ResultSize, OpSize::iInvalid, MemoryAccessType::DEFAULT); } } @@ -4686,7 +4686,7 @@ void OpDispatchBuilder::INTOp(OpcodeArgs) { // Calculate flags early. FlushRegisterCache(); - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); if (SetRIPToNext) { BlockSetRIP = SetRIPToNext; @@ -4730,7 +4730,7 @@ void OpDispatchBuilder::TZCNT(OpcodeArgs) { Ref Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); Src = _FindTrailingZeroes(OpSizeFromSrc(Op), Src); - StoreResult(GPRClass, Op, Src, -1); + StoreResult(GPRClass, Op, Src, OpSize::iInvalid); CalculateFlags_ZCNT(GetSrcSize(Op), Src); } @@ -4740,12 +4740,12 @@ void OpDispatchBuilder::LZCNT(OpcodeArgs) { Ref Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true}); auto Res = _CountLeadingZeroes(OpSizeFromSrc(Op), Src); - StoreResult(GPRClass, Op, Res, -1); + StoreResult(GPRClass, Op, Res, OpSize::iInvalid); CalculateFlags_ZCNT(GetSrcSize(Op), Res); } void OpDispatchBuilder::MOVBEOp(OpcodeArgs) { - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); const auto SrcSize = GetSrcSize(Op); Ref Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.Align = OpSize::i8Bit}); @@ -4757,10 +4757,10 @@ void OpDispatchBuilder::MOVBEOp(OpcodeArgs) { // bfxil the 16-bit result in to the GPR. Ref Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, GPRSize, Op->Flags); auto Result = _Bfxil(IR::SizeToOpSize(GPRSize), 16, 16, Dest, Src); - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Result, GPRSize, -1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Result, GPRSize, OpSize::iInvalid); } else { // 32-bit does regular zext - StoreResult(GPRClass, Op, Op->Dest, Src, -1); + StoreResult(GPRClass, Op, Op->Dest, Src, OpSize::iInvalid); } } @@ -4833,14 +4833,14 @@ void OpDispatchBuilder::RDTSCPOp(OpcodeArgs) { } void OpDispatchBuilder::RDPIDOp(OpcodeArgs) { - StoreResult(GPRClass, Op, _ProcessorID(), -1); + StoreResult(GPRClass, Op, _ProcessorID(), OpSize::iInvalid); } void OpDispatchBuilder::CRC32(OpcodeArgs) { - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); // Destination GPR size is always 4 or 8 bytes depending on widening - uint8_t DstSize = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REX_WIDENING ? OpSize::i64Bit : OpSize::i32Bit; + const auto DstSize = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REX_WIDENING ? OpSize::i64Bit : OpSize::i32Bit; Ref Dest = LoadSource_WithOpSize(GPRClass, Op, Op->Dest, GPRSize, Op->Flags); // Incoming memory is 8, 16, 32, or 64 @@ -4848,15 +4848,15 @@ void OpDispatchBuilder::CRC32(OpcodeArgs) { if (Op->Src[0].IsGPR()) { Src = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], GPRSize, Op->Flags); } else { - Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1}); + Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.Align = OpSize::i8Bit}); } - auto Result = _CRC32(Dest, Src, GetSrcSize(Op)); - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Result, DstSize, -1); + auto Result = _CRC32(Dest, Src, OpSizeFromSrc(Op)); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid); } template void OpDispatchBuilder::RDRANDOp(OpcodeArgs) { - StoreResult(GPRClass, Op, _RDRAND(Reseed), -1); + StoreResult(GPRClass, Op, _RDRAND(Reseed), OpSize::iInvalid); // If the rng number is valid then NZCV is 0b0000, otherwise NZCV is 0b0100 auto CF_inv = GetRFLAG(X86State::RFLAG_ZF_RAW_LOC); @@ -4876,7 +4876,7 @@ void OpDispatchBuilder::RDRANDOp(OpcodeArgs) { } void OpDispatchBuilder::BreakOp(OpcodeArgs, FEXCore::IR::BreakDefinition BreakDefinition) { - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); // We don't actually support this instruction // Multiblock may hit it though @@ -5008,11 +5008,11 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b00, 0x13), 1, &OpDispatchBuilder::VMOVLPOp}, {OPD(1, 0b01, 0x13), 1, &OpDispatchBuilder::VMOVLPOp}, - {OPD(1, 0b00, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 4>}, - {OPD(1, 0b01, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 8>}, + {OPD(1, 0b00, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, OpSize::i64Bit>}, - {OPD(1, 0b00, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, 4>}, - {OPD(1, 0b01, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, 8>}, + {OPD(1, 0b00, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, OpSize::i64Bit>}, {OPD(1, 0b00, 0x16), 1, &OpDispatchBuilder::VMOVHPOp}, {OPD(1, 0b01, 0x16), 1, &OpDispatchBuilder::VMOVHPOp}, @@ -5025,121 +5025,121 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b00, 0x29), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp}, {OPD(1, 0b01, 0x29), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp}, - {OPD(1, 0b10, 0x2A), 1, &OpDispatchBuilder::AVXInsertCVTGPR_To_FPR<4>}, - {OPD(1, 0b11, 0x2A), 1, &OpDispatchBuilder::AVXInsertCVTGPR_To_FPR<8>}, + {OPD(1, 0b10, 0x2A), 1, &OpDispatchBuilder::AVXInsertCVTGPR_To_FPR}, + {OPD(1, 0b11, 0x2A), 1, &OpDispatchBuilder::AVXInsertCVTGPR_To_FPR}, {OPD(1, 0b00, 0x2B), 1, &OpDispatchBuilder::MOVVectorNTOp}, {OPD(1, 0b01, 0x2B), 1, &OpDispatchBuilder::MOVVectorNTOp}, - {OPD(1, 0b10, 0x2C), 1, &OpDispatchBuilder::CVTFPR_To_GPR<4, false>}, - {OPD(1, 0b11, 0x2C), 1, &OpDispatchBuilder::CVTFPR_To_GPR<8, false>}, + {OPD(1, 0b10, 0x2C), 1, &OpDispatchBuilder::CVTFPR_To_GPR}, + {OPD(1, 0b11, 0x2C), 1, &OpDispatchBuilder::CVTFPR_To_GPR}, - {OPD(1, 0b10, 0x2D), 1, &OpDispatchBuilder::CVTFPR_To_GPR<4, true>}, - {OPD(1, 0b11, 0x2D), 1, &OpDispatchBuilder::CVTFPR_To_GPR<8, true>}, + {OPD(1, 0b10, 0x2D), 1, &OpDispatchBuilder::CVTFPR_To_GPR}, + {OPD(1, 0b11, 0x2D), 1, &OpDispatchBuilder::CVTFPR_To_GPR}, - {OPD(1, 0b00, 0x2E), 1, &OpDispatchBuilder::UCOMISxOp<4>}, - {OPD(1, 0b01, 0x2E), 1, &OpDispatchBuilder::UCOMISxOp<8>}, - {OPD(1, 0b00, 0x2F), 1, &OpDispatchBuilder::UCOMISxOp<4>}, - {OPD(1, 0b01, 0x2F), 1, &OpDispatchBuilder::UCOMISxOp<8>}, + {OPD(1, 0b00, 0x2E), 1, &OpDispatchBuilder::UCOMISxOp}, + {OPD(1, 0b01, 0x2E), 1, &OpDispatchBuilder::UCOMISxOp}, + {OPD(1, 0b00, 0x2F), 1, &OpDispatchBuilder::UCOMISxOp}, + {OPD(1, 0b01, 0x2F), 1, &OpDispatchBuilder::UCOMISxOp}, - {OPD(1, 0b00, 0x50), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, 4>}, - {OPD(1, 0b01, 0x50), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, 8>}, + {OPD(1, 0b00, 0x50), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x50), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, OpSize::i64Bit>}, - {OPD(1, 0b00, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VFSQRT, 4>}, - {OPD(1, 0b01, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VFSQRT, 8>}, - {OPD(1, 0b10, 0x51), 1, &OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp}, - {OPD(1, 0b11, 0x51), 1, &OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp}, + {OPD(1, 0b00, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VFSQRT, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VFSQRT, OpSize::i64Bit>}, + {OPD(1, 0b10, 0x51), 1, &OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp}, + {OPD(1, 0b11, 0x51), 1, &OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp}, - {OPD(1, 0b00, 0x52), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VFRSQRT, 4>}, - {OPD(1, 0b10, 0x52), 1, &OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp}, + {OPD(1, 0b00, 0x52), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VFRSQRT, OpSize::i32Bit>}, + {OPD(1, 0b10, 0x52), 1, &OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp}, - {OPD(1, 0b00, 0x53), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VFRECP, 4>}, - {OPD(1, 0b10, 0x53), 1, &OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp}, + {OPD(1, 0b00, 0x53), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VFRECP, OpSize::i32Bit>}, + {OPD(1, 0b10, 0x53), 1, &OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp}, - {OPD(1, 0b00, 0x54), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VAND, 16>}, - {OPD(1, 0b01, 0x54), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VAND, 16>}, + {OPD(1, 0b00, 0x54), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VAND, OpSize::i128Bit>}, + {OPD(1, 0b01, 0x54), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VAND, OpSize::i128Bit>}, {OPD(1, 0b00, 0x55), 1, &OpDispatchBuilder::VANDNOp}, {OPD(1, 0b01, 0x55), 1, &OpDispatchBuilder::VANDNOp}, - {OPD(1, 0b00, 0x56), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VOR, 16>}, - {OPD(1, 0b01, 0x56), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VOR, 16>}, + {OPD(1, 0b00, 0x56), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VOR, OpSize::i128Bit>}, + {OPD(1, 0b01, 0x56), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VOR, OpSize::i128Bit>}, {OPD(1, 0b00, 0x57), 1, &OpDispatchBuilder::AVXVectorXOROp}, {OPD(1, 0b01, 0x57), 1, &OpDispatchBuilder::AVXVectorXOROp}, - {OPD(1, 0b00, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFADD, 4>}, - {OPD(1, 0b01, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFADD, 8>}, - {OPD(1, 0b10, 0x58), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, - {OPD(1, 0b11, 0x58), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, - - {OPD(1, 0b00, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMUL, 4>}, - {OPD(1, 0b01, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMUL, 8>}, - {OPD(1, 0b10, 0x59), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, - {OPD(1, 0b11, 0x59), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, - - {OPD(1, 0b00, 0x5A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Vector_CVT_Float_To_Float, 8, 4, true>}, - {OPD(1, 0b01, 0x5A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Vector_CVT_Float_To_Float, 4, 8, true>}, - {OPD(1, 0b10, 0x5A), 1, &OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float<8, 4>}, - {OPD(1, 0b11, 0x5A), 1, &OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float<4, 8>}, - - {OPD(1, 0b00, 0x5B), 1, &OpDispatchBuilder::AVXVector_CVT_Int_To_Float<4, false>}, - {OPD(1, 0b01, 0x5B), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Int<4, false, true>}, - {OPD(1, 0b10, 0x5B), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Int<4, false, false>}, - - {OPD(1, 0b00, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFSUB, 4>}, - {OPD(1, 0b01, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFSUB, 8>}, - {OPD(1, 0b10, 0x5C), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, - {OPD(1, 0b11, 0x5C), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, - - {OPD(1, 0b00, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMIN, 4>}, - {OPD(1, 0b01, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMIN, 8>}, - {OPD(1, 0b10, 0x5D), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, - {OPD(1, 0b11, 0x5D), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, - - {OPD(1, 0b00, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFDIV, 4>}, - {OPD(1, 0b01, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFDIV, 8>}, - {OPD(1, 0b10, 0x5E), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, - {OPD(1, 0b11, 0x5E), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, - - {OPD(1, 0b00, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMAX, 4>}, - {OPD(1, 0b01, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMAX, 8>}, - {OPD(1, 0b10, 0x5F), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, - {OPD(1, 0b11, 0x5F), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, - - {OPD(1, 0b01, 0x60), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 1>}, - {OPD(1, 0b01, 0x61), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 2>}, - {OPD(1, 0b01, 0x62), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 4>}, - {OPD(1, 0b01, 0x63), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPACKSSOp, 2>}, - {OPD(1, 0b01, 0x64), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, 1>}, - {OPD(1, 0b01, 0x65), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, 2>}, - {OPD(1, 0b01, 0x66), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, 4>}, - {OPD(1, 0b01, 0x67), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPACKUSOp, 2>}, - {OPD(1, 0b01, 0x68), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, 1>}, - {OPD(1, 0b01, 0x69), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, 2>}, - {OPD(1, 0b01, 0x6A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, 4>}, - {OPD(1, 0b01, 0x6B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPACKSSOp, 4>}, - {OPD(1, 0b01, 0x6C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, 8>}, - {OPD(1, 0b01, 0x6D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, 8>}, + {OPD(1, 0b00, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFADD, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFADD, OpSize::i64Bit>}, + {OPD(1, 0b10, 0x58), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, + {OPD(1, 0b11, 0x58), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, + + {OPD(1, 0b00, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMUL, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMUL, OpSize::i64Bit>}, + {OPD(1, 0b10, 0x59), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, + {OPD(1, 0b11, 0x59), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, + + {OPD(1, 0b00, 0x5A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Vector_CVT_Float_To_Float, OpSize::i64Bit, OpSize::i32Bit, true>}, + {OPD(1, 0b01, 0x5A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Vector_CVT_Float_To_Float, OpSize::i32Bit, OpSize::i64Bit, true>}, + {OPD(1, 0b10, 0x5A), 1, &OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float}, + {OPD(1, 0b11, 0x5A), 1, &OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float}, + + {OPD(1, 0b00, 0x5B), 1, &OpDispatchBuilder::AVXVector_CVT_Int_To_Float}, + {OPD(1, 0b01, 0x5B), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Int}, + {OPD(1, 0b10, 0x5B), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Int}, + + {OPD(1, 0b00, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFSUB, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFSUB, OpSize::i64Bit>}, + {OPD(1, 0b10, 0x5C), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, + {OPD(1, 0b11, 0x5C), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, + + {OPD(1, 0b00, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMIN, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMIN, OpSize::i64Bit>}, + {OPD(1, 0b10, 0x5D), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, + {OPD(1, 0b11, 0x5D), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, + + {OPD(1, 0b00, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFDIV, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFDIV, OpSize::i64Bit>}, + {OPD(1, 0b10, 0x5E), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, + {OPD(1, 0b11, 0x5E), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, + + {OPD(1, 0b00, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMAX, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMAX, OpSize::i64Bit>}, + {OPD(1, 0b10, 0x5F), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, + {OPD(1, 0b11, 0x5F), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp}, + + {OPD(1, 0b01, 0x60), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, OpSize::i8Bit>}, + {OPD(1, 0b01, 0x61), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, OpSize::i16Bit>}, + {OPD(1, 0b01, 0x62), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x63), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPACKSSOp, OpSize::i16Bit>}, + {OPD(1, 0b01, 0x64), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, OpSize::i8Bit>}, + {OPD(1, 0b01, 0x65), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, OpSize::i16Bit>}, + {OPD(1, 0b01, 0x66), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x67), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPACKUSOp, OpSize::i16Bit>}, + {OPD(1, 0b01, 0x68), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, OpSize::i8Bit>}, + {OPD(1, 0b01, 0x69), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, OpSize::i16Bit>}, + {OPD(1, 0b01, 0x6A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x6B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPACKSSOp, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x6C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, OpSize::i64Bit>}, + {OPD(1, 0b01, 0x6D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, OpSize::i64Bit>}, {OPD(1, 0b01, 0x6E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::AVX>}, {OPD(1, 0b01, 0x6F), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp}, {OPD(1, 0b10, 0x6F), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPDOp}, - {OPD(1, 0b01, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSHUFWOp, 4, true>}, - {OPD(1, 0b10, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSHUFWOp, 2, false>}, - {OPD(1, 0b11, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSHUFWOp, 2, true>}, + {OPD(1, 0b01, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSHUFWOp, OpSize::i32Bit, true>}, + {OPD(1, 0b10, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSHUFWOp, OpSize::i16Bit, false>}, + {OPD(1, 0b11, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSHUFWOp, OpSize::i16Bit, true>}, - {OPD(1, 0b01, 0x74), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPEQ, 1>}, - {OPD(1, 0b01, 0x75), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPEQ, 2>}, - {OPD(1, 0b01, 0x76), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPEQ, 4>}, + {OPD(1, 0b01, 0x74), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPEQ, OpSize::i8Bit>}, + {OPD(1, 0b01, 0x75), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPEQ, OpSize::i16Bit>}, + {OPD(1, 0b01, 0x76), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPEQ, OpSize::i32Bit>}, {OPD(1, 0b00, 0x77), 1, &OpDispatchBuilder::VZEROOp}, - {OPD(1, 0b01, 0x7C), 1, &OpDispatchBuilder::VHADDPOp}, - {OPD(1, 0b11, 0x7C), 1, &OpDispatchBuilder::VHADDPOp}, - {OPD(1, 0b01, 0x7D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VHSUBPOp, 8>}, - {OPD(1, 0b11, 0x7D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VHSUBPOp, 4>}, + {OPD(1, 0b01, 0x7C), 1, &OpDispatchBuilder::VHADDPOp}, + {OPD(1, 0b11, 0x7C), 1, &OpDispatchBuilder::VHADDPOp}, + {OPD(1, 0b01, 0x7D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VHSUBPOp, OpSize::i64Bit>}, + {OPD(1, 0b11, 0x7D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VHSUBPOp, OpSize::i32Bit>}, {OPD(1, 0b01, 0x7E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::AVX>}, {OPD(1, 0b10, 0x7E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVQOp, OpDispatchBuilder::VectorOpType::AVX>}, @@ -5147,151 +5147,151 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(1, 0b01, 0x7F), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp}, {OPD(1, 0b10, 0x7F), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPDOp}, - {OPD(1, 0b00, 0xC2), 1, &OpDispatchBuilder::AVXVFCMPOp<4>}, - {OPD(1, 0b01, 0xC2), 1, &OpDispatchBuilder::AVXVFCMPOp<8>}, - {OPD(1, 0b10, 0xC2), 1, &OpDispatchBuilder::AVXInsertScalarFCMPOp<4>}, - {OPD(1, 0b11, 0xC2), 1, &OpDispatchBuilder::AVXInsertScalarFCMPOp<8>}, + {OPD(1, 0b00, 0xC2), 1, &OpDispatchBuilder::AVXVFCMPOp}, + {OPD(1, 0b01, 0xC2), 1, &OpDispatchBuilder::AVXVFCMPOp}, + {OPD(1, 0b10, 0xC2), 1, &OpDispatchBuilder::AVXInsertScalarFCMPOp}, + {OPD(1, 0b11, 0xC2), 1, &OpDispatchBuilder::AVXInsertScalarFCMPOp}, {OPD(1, 0b01, 0xC4), 1, &OpDispatchBuilder::VPINSRWOp}, - {OPD(1, 0b01, 0xC5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 2>}, + {OPD(1, 0b01, 0xC5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i16Bit>}, - {OPD(1, 0b00, 0xC6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VSHUFOp, 4>}, - {OPD(1, 0b01, 0xC6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VSHUFOp, 8>}, + {OPD(1, 0b00, 0xC6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VSHUFOp, OpSize::i32Bit>}, + {OPD(1, 0b01, 0xC6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VSHUFOp, OpSize::i64Bit>}, - {OPD(1, 0b01, 0xD0), 1, &OpDispatchBuilder::VADDSUBPOp<8>}, - {OPD(1, 0b11, 0xD0), 1, &OpDispatchBuilder::VADDSUBPOp<4>}, + {OPD(1, 0b01, 0xD0), 1, &OpDispatchBuilder::VADDSUBPOp}, + {OPD(1, 0b11, 0xD0), 1, &OpDispatchBuilder::VADDSUBPOp}, - {OPD(1, 0b01, 0xD1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLDOp, 2>}, - {OPD(1, 0b01, 0xD2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLDOp, 4>}, - {OPD(1, 0b01, 0xD3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLDOp, 8>}, - {OPD(1, 0b01, 0xD4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VADD, 8>}, - {OPD(1, 0b01, 0xD5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VMUL, 2>}, + {OPD(1, 0b01, 0xD1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLDOp, OpSize::i16Bit>}, + {OPD(1, 0b01, 0xD2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLDOp, OpSize::i32Bit>}, + {OPD(1, 0b01, 0xD3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLDOp, OpSize::i64Bit>}, + {OPD(1, 0b01, 0xD4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VADD, OpSize::i64Bit>}, + {OPD(1, 0b01, 0xD5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VMUL, OpSize::i16Bit>}, {OPD(1, 0b01, 0xD6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVQOp, OpDispatchBuilder::VectorOpType::AVX>}, {OPD(1, 0b01, 0xD7), 1, &OpDispatchBuilder::MOVMSKOpOne}, - {OPD(1, 0b01, 0xD8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUQSUB, 1>}, - {OPD(1, 0b01, 0xD9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUQSUB, 2>}, - {OPD(1, 0b01, 0xDA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMIN, 1>}, - {OPD(1, 0b01, 0xDB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VAND, 16>}, - {OPD(1, 0b01, 0xDC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUQADD, 1>}, - {OPD(1, 0b01, 0xDD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUQADD, 2>}, - {OPD(1, 0b01, 0xDE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMAX, 1>}, + {OPD(1, 0b01, 0xD8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUQSUB, OpSize::i8Bit>}, + {OPD(1, 0b01, 0xD9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUQSUB, OpSize::i16Bit>}, + {OPD(1, 0b01, 0xDA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMIN, OpSize::i8Bit>}, + {OPD(1, 0b01, 0xDB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VAND, OpSize::i128Bit>}, + {OPD(1, 0b01, 0xDC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUQADD, OpSize::i8Bit>}, + {OPD(1, 0b01, 0xDD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUQADD, OpSize::i16Bit>}, + {OPD(1, 0b01, 0xDE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMAX, OpSize::i8Bit>}, {OPD(1, 0b01, 0xDF), 1, &OpDispatchBuilder::VANDNOp}, - {OPD(1, 0b01, 0xE0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VURAVG, 1>}, - {OPD(1, 0b01, 0xE1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRAOp, 2>}, - {OPD(1, 0b01, 0xE2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRAOp, 4>}, - {OPD(1, 0b01, 0xE3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VURAVG, 2>}, + {OPD(1, 0b01, 0xE0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VURAVG, OpSize::i8Bit>}, + {OPD(1, 0b01, 0xE1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRAOp, OpSize::i16Bit>}, + {OPD(1, 0b01, 0xE2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRAOp, OpSize::i32Bit>}, + {OPD(1, 0b01, 0xE3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VURAVG, OpSize::i16Bit>}, {OPD(1, 0b01, 0xE4), 1, &OpDispatchBuilder::VPMULHWOp}, {OPD(1, 0b01, 0xE5), 1, &OpDispatchBuilder::VPMULHWOp}, - {OPD(1, 0b01, 0xE6), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Int<8, true, false>}, - {OPD(1, 0b10, 0xE6), 1, &OpDispatchBuilder::AVXVector_CVT_Int_To_Float<4, true>}, - {OPD(1, 0b11, 0xE6), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Int<8, true, true>}, + {OPD(1, 0b01, 0xE6), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Int}, + {OPD(1, 0b10, 0xE6), 1, &OpDispatchBuilder::AVXVector_CVT_Int_To_Float}, + {OPD(1, 0b11, 0xE6), 1, &OpDispatchBuilder::AVXVector_CVT_Float_To_Int}, {OPD(1, 0b01, 0xE7), 1, &OpDispatchBuilder::MOVVectorNTOp}, - {OPD(1, 0b01, 0xE8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSQSUB, 1>}, - {OPD(1, 0b01, 0xE9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSQSUB, 2>}, - {OPD(1, 0b01, 0xEA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMIN, 2>}, - {OPD(1, 0b01, 0xEB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VOR, 16>}, - {OPD(1, 0b01, 0xEC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSQADD, 1>}, - {OPD(1, 0b01, 0xED), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSQADD, 2>}, - {OPD(1, 0b01, 0xEE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMAX, 2>}, + {OPD(1, 0b01, 0xE8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSQSUB, OpSize::i8Bit>}, + {OPD(1, 0b01, 0xE9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSQSUB, OpSize::i16Bit>}, + {OPD(1, 0b01, 0xEA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMIN, OpSize::i16Bit>}, + {OPD(1, 0b01, 0xEB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VOR, OpSize::i128Bit>}, + {OPD(1, 0b01, 0xEC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSQADD, OpSize::i8Bit>}, + {OPD(1, 0b01, 0xED), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSQADD, OpSize::i16Bit>}, + {OPD(1, 0b01, 0xEE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMAX, OpSize::i16Bit>}, {OPD(1, 0b01, 0xEF), 1, &OpDispatchBuilder::AVXVectorXOROp}, {OPD(1, 0b11, 0xF0), 1, &OpDispatchBuilder::MOVVectorUnalignedOp}, - {OPD(1, 0b01, 0xF1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLOp, 2>}, - {OPD(1, 0b01, 0xF2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLOp, 4>}, - {OPD(1, 0b01, 0xF3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLOp, 8>}, - {OPD(1, 0b01, 0xF4), 1, &OpDispatchBuilder::VPMULLOp<4, false>}, + {OPD(1, 0b01, 0xF1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLOp, OpSize::i16Bit>}, + {OPD(1, 0b01, 0xF2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLOp, OpSize::i32Bit>}, + {OPD(1, 0b01, 0xF3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLOp, OpSize::i64Bit>}, + {OPD(1, 0b01, 0xF4), 1, &OpDispatchBuilder::VPMULLOp}, {OPD(1, 0b01, 0xF5), 1, &OpDispatchBuilder::VPMADDWDOp}, {OPD(1, 0b01, 0xF6), 1, &OpDispatchBuilder::VPSADBWOp}, {OPD(1, 0b01, 0xF7), 1, &OpDispatchBuilder::MASKMOVOp}, - {OPD(1, 0b01, 0xF8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSUB, 1>}, - {OPD(1, 0b01, 0xF9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSUB, 2>}, - {OPD(1, 0b01, 0xFA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSUB, 4>}, - {OPD(1, 0b01, 0xFB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSUB, 8>}, - {OPD(1, 0b01, 0xFC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VADD, 1>}, - {OPD(1, 0b01, 0xFD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VADD, 2>}, - {OPD(1, 0b01, 0xFE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VADD, 4>}, + {OPD(1, 0b01, 0xF8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSUB, OpSize::i8Bit>}, + {OPD(1, 0b01, 0xF9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSUB, OpSize::i16Bit>}, + {OPD(1, 0b01, 0xFA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSUB, OpSize::i32Bit>}, + {OPD(1, 0b01, 0xFB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSUB, OpSize::i64Bit>}, + {OPD(1, 0b01, 0xFC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VADD, OpSize::i8Bit>}, + {OPD(1, 0b01, 0xFD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VADD, OpSize::i16Bit>}, + {OPD(1, 0b01, 0xFE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VADD, OpSize::i32Bit>}, {OPD(2, 0b01, 0x00), 1, &OpDispatchBuilder::VPSHUFBOp}, - {OPD(2, 0b01, 0x01), 1, &OpDispatchBuilder::VHADDPOp}, - {OPD(2, 0b01, 0x02), 1, &OpDispatchBuilder::VHADDPOp}, + {OPD(2, 0b01, 0x01), 1, &OpDispatchBuilder::VHADDPOp}, + {OPD(2, 0b01, 0x02), 1, &OpDispatchBuilder::VHADDPOp}, {OPD(2, 0b01, 0x03), 1, &OpDispatchBuilder::VPHADDSWOp}, {OPD(2, 0b01, 0x04), 1, &OpDispatchBuilder::VPMADDUBSWOp}, - {OPD(2, 0b01, 0x05), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPHSUBOp, 2>}, - {OPD(2, 0b01, 0x06), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPHSUBOp, 4>}, + {OPD(2, 0b01, 0x05), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPHSUBOp, OpSize::i16Bit>}, + {OPD(2, 0b01, 0x06), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPHSUBOp, OpSize::i32Bit>}, {OPD(2, 0b01, 0x07), 1, &OpDispatchBuilder::VPHSUBSWOp}, - {OPD(2, 0b01, 0x08), 1, &OpDispatchBuilder::VPSIGN<1>}, - {OPD(2, 0b01, 0x09), 1, &OpDispatchBuilder::VPSIGN<2>}, - {OPD(2, 0b01, 0x0A), 1, &OpDispatchBuilder::VPSIGN<4>}, + {OPD(2, 0b01, 0x08), 1, &OpDispatchBuilder::VPSIGN}, + {OPD(2, 0b01, 0x09), 1, &OpDispatchBuilder::VPSIGN}, + {OPD(2, 0b01, 0x0A), 1, &OpDispatchBuilder::VPSIGN}, {OPD(2, 0b01, 0x0B), 1, &OpDispatchBuilder::VPMULHRSWOp}, - {OPD(2, 0b01, 0x0C), 1, &OpDispatchBuilder::VPERMILRegOp<4>}, - {OPD(2, 0b01, 0x0D), 1, &OpDispatchBuilder::VPERMILRegOp<8>}, - {OPD(2, 0b01, 0x0E), 1, &OpDispatchBuilder::VTESTPOp<4>}, - {OPD(2, 0b01, 0x0F), 1, &OpDispatchBuilder::VTESTPOp<8>}, + {OPD(2, 0b01, 0x0C), 1, &OpDispatchBuilder::VPERMILRegOp}, + {OPD(2, 0b01, 0x0D), 1, &OpDispatchBuilder::VPERMILRegOp}, + {OPD(2, 0b01, 0x0E), 1, &OpDispatchBuilder::VTESTPOp}, + {OPD(2, 0b01, 0x0F), 1, &OpDispatchBuilder::VTESTPOp}, {OPD(2, 0b01, 0x13), 1, &OpDispatchBuilder::VCVTPH2PSOp}, {OPD(2, 0b01, 0x16), 1, &OpDispatchBuilder::VPERMDOp}, {OPD(2, 0b01, 0x17), 1, &OpDispatchBuilder::PTestOp}, - {OPD(2, 0b01, 0x18), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, 4>}, - {OPD(2, 0b01, 0x19), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, 8>}, - {OPD(2, 0b01, 0x1A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, 16>}, - {OPD(2, 0b01, 0x1C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VABS, 1>}, - {OPD(2, 0b01, 0x1D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VABS, 2>}, - {OPD(2, 0b01, 0x1E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VABS, 4>}, - - {OPD(2, 0b01, 0x20), 1, &OpDispatchBuilder::ExtendVectorElements<1, 2, true>}, - {OPD(2, 0b01, 0x21), 1, &OpDispatchBuilder::ExtendVectorElements<1, 4, true>}, - {OPD(2, 0b01, 0x22), 1, &OpDispatchBuilder::ExtendVectorElements<1, 8, true>}, - {OPD(2, 0b01, 0x23), 1, &OpDispatchBuilder::ExtendVectorElements<2, 4, true>}, - {OPD(2, 0b01, 0x24), 1, &OpDispatchBuilder::ExtendVectorElements<2, 8, true>}, - {OPD(2, 0b01, 0x25), 1, &OpDispatchBuilder::ExtendVectorElements<4, 8, true>}, - - {OPD(2, 0b01, 0x28), 1, &OpDispatchBuilder::VPMULLOp<4, true>}, - {OPD(2, 0b01, 0x29), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPEQ, 8>}, + {OPD(2, 0b01, 0x18), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, OpSize::i32Bit>}, + {OPD(2, 0b01, 0x19), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, OpSize::i64Bit>}, + {OPD(2, 0b01, 0x1A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, OpSize::i128Bit>}, + {OPD(2, 0b01, 0x1C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VABS, OpSize::i8Bit>}, + {OPD(2, 0b01, 0x1D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VABS, OpSize::i16Bit>}, + {OPD(2, 0b01, 0x1E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VABS, OpSize::i32Bit>}, + + {OPD(2, 0b01, 0x20), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(2, 0b01, 0x21), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(2, 0b01, 0x22), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(2, 0b01, 0x23), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(2, 0b01, 0x24), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(2, 0b01, 0x25), 1, &OpDispatchBuilder::ExtendVectorElements}, + + {OPD(2, 0b01, 0x28), 1, &OpDispatchBuilder::VPMULLOp}, + {OPD(2, 0b01, 0x29), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPEQ, OpSize::i64Bit>}, {OPD(2, 0b01, 0x2A), 1, &OpDispatchBuilder::MOVVectorNTOp}, - {OPD(2, 0b01, 0x2B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPACKUSOp, 4>}, - {OPD(2, 0b01, 0x2C), 1, &OpDispatchBuilder::VMASKMOVOp<4, false>}, - {OPD(2, 0b01, 0x2D), 1, &OpDispatchBuilder::VMASKMOVOp<8, false>}, - {OPD(2, 0b01, 0x2E), 1, &OpDispatchBuilder::VMASKMOVOp<4, true>}, - {OPD(2, 0b01, 0x2F), 1, &OpDispatchBuilder::VMASKMOVOp<8, true>}, - - {OPD(2, 0b01, 0x30), 1, &OpDispatchBuilder::ExtendVectorElements<1, 2, false>}, - {OPD(2, 0b01, 0x31), 1, &OpDispatchBuilder::ExtendVectorElements<1, 4, false>}, - {OPD(2, 0b01, 0x32), 1, &OpDispatchBuilder::ExtendVectorElements<1, 8, false>}, - {OPD(2, 0b01, 0x33), 1, &OpDispatchBuilder::ExtendVectorElements<2, 4, false>}, - {OPD(2, 0b01, 0x34), 1, &OpDispatchBuilder::ExtendVectorElements<2, 8, false>}, - {OPD(2, 0b01, 0x35), 1, &OpDispatchBuilder::ExtendVectorElements<4, 8, false>}, + {OPD(2, 0b01, 0x2B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPACKUSOp, OpSize::i32Bit>}, + {OPD(2, 0b01, 0x2C), 1, &OpDispatchBuilder::VMASKMOVOp}, + {OPD(2, 0b01, 0x2D), 1, &OpDispatchBuilder::VMASKMOVOp}, + {OPD(2, 0b01, 0x2E), 1, &OpDispatchBuilder::VMASKMOVOp}, + {OPD(2, 0b01, 0x2F), 1, &OpDispatchBuilder::VMASKMOVOp}, + + {OPD(2, 0b01, 0x30), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(2, 0b01, 0x31), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(2, 0b01, 0x32), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(2, 0b01, 0x33), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(2, 0b01, 0x34), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(2, 0b01, 0x35), 1, &OpDispatchBuilder::ExtendVectorElements}, {OPD(2, 0b01, 0x36), 1, &OpDispatchBuilder::VPERMDOp}, - {OPD(2, 0b01, 0x37), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, 8>}, - {OPD(2, 0b01, 0x38), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMIN, 1>}, - {OPD(2, 0b01, 0x39), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMIN, 4>}, - {OPD(2, 0b01, 0x3A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMIN, 2>}, - {OPD(2, 0b01, 0x3B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMIN, 4>}, - {OPD(2, 0b01, 0x3C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMAX, 1>}, - {OPD(2, 0b01, 0x3D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMAX, 4>}, - {OPD(2, 0b01, 0x3E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMAX, 2>}, - {OPD(2, 0b01, 0x3F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMAX, 4>}, - - {OPD(2, 0b01, 0x40), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VMUL, 4>}, + {OPD(2, 0b01, 0x37), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, OpSize::i64Bit>}, + {OPD(2, 0b01, 0x38), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMIN, OpSize::i8Bit>}, + {OPD(2, 0b01, 0x39), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMIN, OpSize::i32Bit>}, + {OPD(2, 0b01, 0x3A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMIN, OpSize::i16Bit>}, + {OPD(2, 0b01, 0x3B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMIN, OpSize::i32Bit>}, + {OPD(2, 0b01, 0x3C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMAX, OpSize::i8Bit>}, + {OPD(2, 0b01, 0x3D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMAX, OpSize::i32Bit>}, + {OPD(2, 0b01, 0x3E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMAX, OpSize::i16Bit>}, + {OPD(2, 0b01, 0x3F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMAX, OpSize::i32Bit>}, + + {OPD(2, 0b01, 0x40), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VMUL, OpSize::i32Bit>}, {OPD(2, 0b01, 0x41), 1, &OpDispatchBuilder::PHMINPOSUWOp}, {OPD(2, 0b01, 0x45), 1, &OpDispatchBuilder::VPSRLVOp}, {OPD(2, 0b01, 0x46), 1, &OpDispatchBuilder::VPSRAVDOp}, {OPD(2, 0b01, 0x47), 1, &OpDispatchBuilder::VPSLLVOp}, - {OPD(2, 0b01, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, 4>}, - {OPD(2, 0b01, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, 8>}, - {OPD(2, 0b01, 0x5A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, 16>}, + {OPD(2, 0b01, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, OpSize::i32Bit>}, + {OPD(2, 0b01, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, OpSize::i64Bit>}, + {OPD(2, 0b01, 0x5A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, OpSize::i128Bit>}, - {OPD(2, 0b01, 0x78), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, 1>}, - {OPD(2, 0b01, 0x79), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, 2>}, + {OPD(2, 0b01, 0x78), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, OpSize::i8Bit>}, + {OPD(2, 0b01, 0x79), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, OpSize::i16Bit>}, {OPD(2, 0b01, 0x8C), 1, &OpDispatchBuilder::VPMASKMOVOp}, {OPD(2, 0b01, 0x8E), 1, &OpDispatchBuilder::VPMASKMOVOp}, @@ -5346,22 +5346,22 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(3, 0b01, 0x00), 1, &OpDispatchBuilder::VPERMQOp}, {OPD(3, 0b01, 0x01), 1, &OpDispatchBuilder::VPERMQOp}, {OPD(3, 0b01, 0x02), 1, &OpDispatchBuilder::VPBLENDDOp}, - {OPD(3, 0b01, 0x04), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPERMILImmOp, 4>}, - {OPD(3, 0b01, 0x05), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPERMILImmOp, 8>}, + {OPD(3, 0b01, 0x04), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPERMILImmOp, OpSize::i32Bit>}, + {OPD(3, 0b01, 0x05), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPERMILImmOp, OpSize::i64Bit>}, {OPD(3, 0b01, 0x06), 1, &OpDispatchBuilder::VPERM2Op}, - {OPD(3, 0b01, 0x08), 1, &OpDispatchBuilder::AVXVectorRound<4>}, - {OPD(3, 0b01, 0x09), 1, &OpDispatchBuilder::AVXVectorRound<8>}, - {OPD(3, 0b01, 0x0A), 1, &OpDispatchBuilder::AVXInsertScalarRound<4>}, - {OPD(3, 0b01, 0x0B), 1, &OpDispatchBuilder::AVXInsertScalarRound<8>}, + {OPD(3, 0b01, 0x08), 1, &OpDispatchBuilder::AVXVectorRound}, + {OPD(3, 0b01, 0x09), 1, &OpDispatchBuilder::AVXVectorRound}, + {OPD(3, 0b01, 0x0A), 1, &OpDispatchBuilder::AVXInsertScalarRound}, + {OPD(3, 0b01, 0x0B), 1, &OpDispatchBuilder::AVXInsertScalarRound}, {OPD(3, 0b01, 0x0C), 1, &OpDispatchBuilder::VPBLENDDOp}, {OPD(3, 0b01, 0x0D), 1, &OpDispatchBuilder::VBLENDPDOp}, {OPD(3, 0b01, 0x0E), 1, &OpDispatchBuilder::VPBLENDWOp}, {OPD(3, 0b01, 0x0F), 1, &OpDispatchBuilder::VPALIGNROp}, - {OPD(3, 0b01, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 1>}, - {OPD(3, 0b01, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 2>}, - {OPD(3, 0b01, 0x16), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 4>}, - {OPD(3, 0b01, 0x17), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 4>}, + {OPD(3, 0b01, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i8Bit>}, + {OPD(3, 0b01, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i16Bit>}, + {OPD(3, 0b01, 0x16), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i32Bit>}, + {OPD(3, 0b01, 0x17), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i32Bit>}, {OPD(3, 0b01, 0x18), 1, &OpDispatchBuilder::VINSERTOp}, {OPD(3, 0b01, 0x19), 1, &OpDispatchBuilder::VEXTRACT128Op}, @@ -5373,15 +5373,15 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { {OPD(3, 0b01, 0x38), 1, &OpDispatchBuilder::VINSERTOp}, {OPD(3, 0b01, 0x39), 1, &OpDispatchBuilder::VEXTRACT128Op}, - {OPD(3, 0b01, 0x40), 1, &OpDispatchBuilder::VDPPOp<4>}, - {OPD(3, 0b01, 0x41), 1, &OpDispatchBuilder::VDPPOp<8>}, + {OPD(3, 0b01, 0x40), 1, &OpDispatchBuilder::VDPPOp}, + {OPD(3, 0b01, 0x41), 1, &OpDispatchBuilder::VDPPOp}, {OPD(3, 0b01, 0x42), 1, &OpDispatchBuilder::VMPSADBWOp}, {OPD(3, 0b01, 0x46), 1, &OpDispatchBuilder::VPERM2Op}, - {OPD(3, 0b01, 0x4A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorVariableBlend, 4>}, - {OPD(3, 0b01, 0x4B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorVariableBlend, 8>}, - {OPD(3, 0b01, 0x4C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorVariableBlend, 1>}, + {OPD(3, 0b01, 0x4A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorVariableBlend, OpSize::i32Bit>}, + {OPD(3, 0b01, 0x4B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorVariableBlend, OpSize::i64Bit>}, + {OPD(3, 0b01, 0x4C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorVariableBlend, OpSize::i8Bit>}, {OPD(3, 0b01, 0x60), 1, &OpDispatchBuilder::VPCMPESTRMOp}, {OPD(3, 0b01, 0x61), 1, &OpDispatchBuilder::VPCMPESTRIOp}, @@ -5394,17 +5394,17 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() { #define OPD(group, pp, opcode) (((group - X86Tables::TYPE_VEX_GROUP_12) << 4) | (pp << 3) | (opcode)) static constexpr std::tuple VEXTableGroupOps[] { - {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b010), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLIOp, 2>}, - {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b110), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLIOp, 2>}, - {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b100), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRAIOp, 2>}, + {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b010), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLIOp, OpSize::i16Bit>}, + {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b110), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLIOp, OpSize::i16Bit>}, + {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b100), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRAIOp, OpSize::i16Bit>}, - {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b010), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLIOp, 4>}, - {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b110), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLIOp, 4>}, - {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b100), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRAIOp, 4>}, + {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b010), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLIOp, OpSize::i32Bit>}, + {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b110), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLIOp, OpSize::i32Bit>}, + {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b100), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRAIOp, OpSize::i32Bit>}, - {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b010), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLIOp, 8>}, + {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b010), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLIOp, OpSize::i64Bit>}, {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b011), 1, &OpDispatchBuilder::VPSRLDQOp}, - {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b110), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLIOp, 8>}, + {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b110), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLIOp, OpSize::i64Bit>}, {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b111), 1, &OpDispatchBuilder::VPSLLDQOp}, {OPD(X86Tables::TYPE_VEX_GROUP_15, 0, 0b010), 1, &OpDispatchBuilder::LDMXCSR}, @@ -5453,38 +5453,46 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { #define OPDReg(op, reg) ((1 << 15) | ((op - 0xD8) << 8) | (reg << 3)) #define OPD(op, modrmop) (((op - 0xD8) << 8) | modrmop) constexpr static std::tuple X87F64OpTable[] = { - {OPDReg(0xD8, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, 32, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xD8, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, OpSize::i32Bit, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xD8, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, 32, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xD8, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, OpSize::i32Bit, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xD8, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 32, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPDReg(0xD8, 2) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::i32Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xD8, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 32, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPDReg(0xD8, 3) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::i32Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xD8, 4) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, 32, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xD8, 4) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::i32Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xD8, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, 32, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xD8, 5) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::i32Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xD8, 6) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, 32, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xD8, 6) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::i32Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xD8, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, 32, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xD8, 7) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::i32Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, 80, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, 80, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xD0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPD(0xD8, 0xD8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPD(0xD8, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, 80, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, 80, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, 80, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, 80, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xD0), 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPD(0xD8, 0xD8), 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPD(0xD8, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xD9, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDF64, 32>}, + {OPDReg(0xD9, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDF64, OpSize::i32Bit>}, // 1 = Invalid - {OPDReg(0xD9, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTF64, 32>}, + {OPDReg(0xD9, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTF64, OpSize::i32Bit>}, - {OPDReg(0xD9, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTF64, 32>}, + {OPDReg(0xD9, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTF64, OpSize::i32Bit>}, {OPDReg(0xD9, 4) | 0x00, 8, &OpDispatchBuilder::X87LDENVF64}, @@ -5531,21 +5539,27 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(0xD9, 0xFE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80SINSTACK, true>}, {OPD(0xD9, 0xFF), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80COSSTACK, true>}, - {OPDReg(0xDA, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, 32, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDA, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, OpSize::i32Bit, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDA, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, 32, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDA, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, OpSize::i32Bit, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDA, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 32, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPDReg(0xDA, 2) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::i32Bit, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xDA, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 32, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPDReg(0xDA, 3) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::i32Bit, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xDA, 4) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, 32, true, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDA, 4) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::i32Bit, true, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDA, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, 32, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDA, 5) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::i32Bit, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDA, 6) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, 32, true, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDA, 6) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::i32Bit, true, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDA, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, 32, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDA, 7) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::i32Bit, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, {OPD(0xDA, 0xC0), 8, &OpDispatchBuilder::X87FCMOV}, {OPD(0xDA, 0xC8), 8, &OpDispatchBuilder::X87FCMOV}, @@ -5553,7 +5567,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(0xDA, 0xD8), 8, &OpDispatchBuilder::X87FCMOV}, // E0 = Invalid // E8 = Invalid - {OPD(0xDA, 0xE9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>}, + {OPD(0xDA, 0xE9), 1, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>}, // EA = Invalid // F0 = Invalid // F8 = Invalid @@ -5568,11 +5583,11 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { // 4 = Invalid - {OPDReg(0xDB, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDF64, 80>}, + {OPDReg(0xDB, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDF64, OpSize::f80Bit>}, // 6 = Invalid - {OPDReg(0xDB, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTF64, 80>}, + {OPDReg(0xDB, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTF64, OpSize::f80Bit>}, {OPD(0xDB, 0xC0), 8, &OpDispatchBuilder::X87FCMOV}, @@ -5583,41 +5598,49 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(0xDB, 0xE2), 1, &OpDispatchBuilder::NOPOp}, // FNCLEX {OPD(0xDB, 0xE3), 1, &OpDispatchBuilder::FNINIT}, // E4 = Invalid - {OPD(0xDB, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, - {OPD(0xDB, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, + {OPD(0xDB, 0xE8), 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, + {OPD(0xDB, 0xF0), 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, // F8 = Invalid - {OPDReg(0xDC, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, 64, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDC, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, OpSize::i64Bit, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDC, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, 64, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDC, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, OpSize::i64Bit, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDC, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 64, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPDReg(0xDC, 2) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::i64Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xDC, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 64, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPDReg(0xDC, 3) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::i64Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xDC, 4) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, 64, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDC, 4) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::i64Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDC, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, 64, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDC, 5) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::i64Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDC, 6) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, 64, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDC, 6) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::i64Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDC, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, 64, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDC, 7) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::i64Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xDC, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, 80, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, 80, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, 80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, 80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, 80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, 80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPDReg(0xDD, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDF64, 64>}, + {OPDReg(0xDD, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDF64, OpSize::i64Bit>}, {OPDReg(0xDD, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FISTF64, true>}, - {OPDReg(0xDD, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTF64, 64>}, + {OPDReg(0xDD, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTF64, OpSize::i64Bit>}, - {OPDReg(0xDD, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTF64, 64>}, + {OPDReg(0xDD, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTF64, OpSize::i64Bit>}, {OPDReg(0xDD, 4) | 0x00, 8, &OpDispatchBuilder::X87FRSTOR}, @@ -5630,32 +5653,41 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(0xDD, 0xD0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTToStack>}, // register-register from regular X87 {OPD(0xDD, 0xD8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTToStack>}, //^ - {OPD(0xDD, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPD(0xDD, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPD(0xDD, 0xE0), 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPD(0xDD, 0xE8), 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xDE, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, 16, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDE, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, OpSize::i16Bit, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDE, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, 16, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDE, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, OpSize::i16Bit, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDE, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 16, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPDReg(0xDE, 2) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::i16Bit, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xDE, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 16, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPDReg(0xDE, 3) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::i16Bit, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xDE, 4) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, 16, true, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDE, 4) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::i16Bit, true, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDE, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, 16, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDE, 5) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::i16Bit, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDE, 6) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, 16, true, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDE, 6) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::i16Bit, true, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDE, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, 16, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDE, 7) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::i16Bit, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xDE, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, 80, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, 80, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xD9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>}, - {OPD(0xDE, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, 80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, 80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, 80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, 80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xD9), 1, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>}, + {OPD(0xDE, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_STI>}, {OPDReg(0xDF, 0) | 0x00, 8, &OpDispatchBuilder::FILDF64}, @@ -5678,43 +5710,51 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(0xDF, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87ModifySTP, true>}, {OPD(0xDF, 0xE0), 8, &OpDispatchBuilder::X87FNSTSW}, - {OPD(0xDF, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, - {OPD(0xDF, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, + {OPD(0xDF, 0xE8), 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, + {OPD(0xDF, 0xF0), 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, }; constexpr static std::tuple X87OpTable[] = { - {OPDReg(0xD8, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, 32, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xD8, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, OpSize::i32Bit, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xD8, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, 32, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xD8, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, OpSize::i32Bit, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xD8, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 32, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPDReg(0xD8, 2) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::i32Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xD8, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 32, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPDReg(0xD8, 3) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::i32Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xD8, 4) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, 32, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xD8, 4) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::i32Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xD8, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, 32, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xD8, 5) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::i32Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xD8, 6) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, 32, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xD8, 6) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::i32Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xD8, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, 32, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xD8, 7) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::i32Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, 80, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, 80, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xD0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPD(0xD8, 0xD8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPD(0xD8, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, 80, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, 80, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, 80, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xD8, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, 80, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xD0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPD(0xD8, 0xD8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPD(0xD8, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPD(0xD8, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xD9, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLD, 32>}, + {OPDReg(0xD9, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLD, OpSize::i32Bit>}, // 1 = Invalid - {OPDReg(0xD9, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, 32>}, + {OPDReg(0xD9, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, OpSize::i32Bit>}, - {OPDReg(0xD9, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, 32>}, + {OPDReg(0xD9, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, OpSize::i32Bit>}, {OPDReg(0xD9, 4) | 0x00, 8, &OpDispatchBuilder::X87LDENV}, @@ -5761,21 +5801,25 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(0xD9, 0xFE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80SINSTACK, true>}, {OPD(0xD9, 0xFF), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80COSSTACK, true>}, - {OPDReg(0xDA, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, 32, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDA, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, OpSize::i32Bit, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDA, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, 32, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDA, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, OpSize::i32Bit, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDA, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 32, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPDReg(0xDA, 2) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::i32Bit, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xDA, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 32, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPDReg(0xDA, 3) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::i32Bit, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xDA, 4) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, 32, true, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDA, 4) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::i32Bit, true, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDA, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, 32, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDA, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::i32Bit, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDA, 6) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, 32, true, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDA, 6) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::i32Bit, true, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDA, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, 32, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDA, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::i32Bit, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, {OPD(0xDA, 0xC0), 8, &OpDispatchBuilder::X87FCMOV}, {OPD(0xDA, 0xC8), 8, &OpDispatchBuilder::X87FCMOV}, @@ -5783,7 +5827,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(0xDA, 0xD8), 8, &OpDispatchBuilder::X87FCMOV}, // E0 = Invalid // E8 = Invalid - {OPD(0xDA, 0xE9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>}, + {OPD(0xDA, 0xE9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>}, // EA = Invalid // F0 = Invalid // F8 = Invalid @@ -5798,11 +5842,11 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { // 4 = Invalid - {OPDReg(0xDB, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLD, 80>}, + {OPDReg(0xDB, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLD, OpSize::f80Bit>}, // 6 = Invalid - {OPDReg(0xDB, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, 80>}, + {OPDReg(0xDB, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, OpSize::f80Bit>}, {OPD(0xDB, 0xC0), 8, &OpDispatchBuilder::X87FCMOV}, @@ -5813,41 +5857,49 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(0xDB, 0xE2), 1, &OpDispatchBuilder::NOPOp}, // FNCLEX {OPD(0xDB, 0xE3), 1, &OpDispatchBuilder::FNINIT}, // E4 = Invalid - {OPD(0xDB, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, - {OPD(0xDB, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, + {OPD(0xDB, 0xE8), 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, + {OPD(0xDB, 0xF0), 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, // F8 = Invalid - {OPDReg(0xDC, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, 64, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDC, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, OpSize::i64Bit, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDC, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, 64, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDC, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, OpSize::i64Bit, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDC, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 64, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPDReg(0xDC, 2) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::i64Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xDC, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 64, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPDReg(0xDC, 3) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::i64Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xDC, 4) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, 64, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDC, 4) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::i64Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDC, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, 64, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDC, 5) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::i64Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDC, 6) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, 64, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDC, 6) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::i64Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDC, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, 64, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDC, 7) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::i64Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xDC, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, 80, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, 80, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, 80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, 80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, 80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDC, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, 80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDC, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPDReg(0xDD, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLD, 64>}, + {OPDReg(0xDD, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLD, OpSize::i64Bit>}, {OPDReg(0xDD, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FIST, true>}, - {OPDReg(0xDD, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, 64>}, + {OPDReg(0xDD, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, OpSize::i64Bit>}, - {OPDReg(0xDD, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, 64>}, + {OPDReg(0xDD, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, OpSize::i64Bit>}, {OPDReg(0xDD, 4) | 0x00, 8, &OpDispatchBuilder::X87FRSTOR}, @@ -5860,32 +5912,36 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(0xDD, 0xD0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTToStack>}, {OPD(0xDD, 0xD8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTToStack>}, - {OPD(0xDD, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPD(0xDD, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPD(0xDD, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPD(0xDD, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xDE, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, 16, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDE, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, OpSize::i16Bit, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDE, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, 16, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDE, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, OpSize::i16Bit, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDE, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 16, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPDReg(0xDE, 2) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::i16Bit, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xDE, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 16, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, + {OPDReg(0xDE, 3) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::i16Bit, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>}, - {OPDReg(0xDE, 4) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, 16, true, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDE, 4) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::i16Bit, true, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDE, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, 16, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDE, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::i16Bit, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDE, 6) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, 16, true, false, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDE, 6) | 0x00, 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::i16Bit, true, false, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPDReg(0xDE, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, 16, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, + {OPDReg(0xDE, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::i16Bit, true, true, OpDispatchBuilder::OpResult::RES_ST0>}, - {OPD(0xDE, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, 80, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, 80, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xD9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>}, - {OPD(0xDE, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, 80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, 80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, 80, false, true, OpDispatchBuilder::OpResult::RES_STI>}, - {OPD(0xDE, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, 80, false, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xD9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>}, + {OPD(0xDE, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_STI>}, + {OPD(0xDE, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_STI>}, {OPDReg(0xDF, 0) | 0x00, 8, &OpDispatchBuilder::FILD}, @@ -5908,8 +5964,10 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(0xDF, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87ModifySTP, true>}, {OPD(0xDF, 0xE0), 8, &OpDispatchBuilder::X87FNSTSW}, - {OPD(0xDF, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, - {OPD(0xDF, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, 80, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, + {OPD(0xDF, 0xE8), 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, + {OPD(0xDF, 0xF0), 8, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>}, }; #undef OPD #undef OPDReg diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 7daee35ad0..6b0f9fcedd 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -47,8 +47,8 @@ enum class BTAction { }; struct LoadSourceOptions { - // Alignment of the load in bytes. -1 signifies unaligned - int8_t Align = -1; + // Alignment of the load in bytes. iInvalid signifies opsize aligned. + IR::OpSize Align = OpSize::iInvalid; // Whether or not to load the data if a memory access occurs. // If set to false, then the address that would have been loaded from @@ -136,8 +136,8 @@ class OpDispatchBuilder final : public IREmitter { FlushRegisterCache(); return _Jump(_TargetBlock); } - IRPair - CondJump(Ref _Cmp1, Ref _Cmp2, Ref _TrueBlock, Ref _FalseBlock, CondClassType _Cond = {COND_NEQ}, uint8_t _CompareSize = 0) { + IRPair CondJump(Ref _Cmp1, Ref _Cmp2, Ref _TrueBlock, Ref _FalseBlock, CondClassType _Cond = {COND_NEQ}, + IR::OpSize _CompareSize = OpSize::iInvalid) { FlushRegisterCache(); return _CondJump(_Cmp1, _Cmp2, _TrueBlock, _FalseBlock, _Cond, _CompareSize); } @@ -151,12 +151,12 @@ class OpDispatchBuilder final : public IREmitter { } IRPair CondJumpNZCV(CondClassType Cond) { FlushRegisterCache(); - return _CondJump(InvalidNode, InvalidNode, InvalidNode, InvalidNode, Cond, 0, true); + return _CondJump(InvalidNode, InvalidNode, InvalidNode, InvalidNode, Cond, OpSize::iInvalid, true); } IRPair CondJumpBit(Ref Src, unsigned Bit, bool Set) { FlushRegisterCache(); auto InlineConst = _InlineConstant(Bit); - return _CondJump(Src, InlineConst, InvalidNode, InvalidNode, {Set ? COND_TSTNZ : COND_TSTZ}, 0, false); + return _CondJump(Src, InlineConst, InvalidNode, InvalidNode, {Set ? COND_TSTNZ : COND_TSTZ}, OpSize::iInvalid, false); } IRPair ExitFunction(Ref NewRIP) { FlushRegisterCache(); @@ -430,63 +430,63 @@ class OpDispatchBuilder final : public IREmitter { void MOVHPDOp(OpcodeArgs); void MOVSDOp(OpcodeArgs); void MOVSSOp(OpcodeArgs); - void VectorALUOp(OpcodeArgs, IROps IROp, size_t ElementSize); + void VectorALUOp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize); void VectorXOROp(OpcodeArgs); - void VectorALUROp(OpcodeArgs, IROps IROp, size_t ElementSize); - void VectorUnaryOp(OpcodeArgs, IROps IROp, size_t ElementSize); - template + void VectorALUROp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize); + void VectorUnaryOp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize); + template void VectorUnaryDuplicateOp(OpcodeArgs); void MOVQOp(OpcodeArgs, VectorOpType VectorType); void MOVQMMXOp(OpcodeArgs); - void MOVMSKOp(OpcodeArgs, size_t ElementSize); + void MOVMSKOp(OpcodeArgs, IR::OpSize ElementSize); void MOVMSKOpOne(OpcodeArgs); - void PUNPCKLOp(OpcodeArgs, size_t ElementSize); - void PUNPCKHOp(OpcodeArgs, size_t ElementSize); + void PUNPCKLOp(OpcodeArgs, IR::OpSize ElementSize); + void PUNPCKHOp(OpcodeArgs, IR::OpSize ElementSize); void PSHUFBOp(OpcodeArgs); - Ref PShufWLane(size_t Size, FEXCore::IR::IndexNamedVectorConstant IndexConstant, bool LowLane, Ref IncomingLane, uint8_t Shuffle); + Ref PShufWLane(IR::OpSize Size, FEXCore::IR::IndexNamedVectorConstant IndexConstant, bool LowLane, Ref IncomingLane, uint8_t Shuffle); void PSHUFWOp(OpcodeArgs, bool Low); void PSHUFW8ByteOp(OpcodeArgs); void PSHUFDOp(OpcodeArgs); - void PSRLDOp(OpcodeArgs, size_t ElementSize); - void PSRLI(OpcodeArgs, size_t ElementSize); - void PSLLI(OpcodeArgs, size_t ElementSize); - void PSLL(OpcodeArgs, size_t ElementSize); - void PSRAOp(OpcodeArgs, size_t ElementSize); + void PSRLDOp(OpcodeArgs, IR::OpSize ElementSize); + void PSRLI(OpcodeArgs, IR::OpSize ElementSize); + void PSLLI(OpcodeArgs, IR::OpSize ElementSize); + void PSLL(OpcodeArgs, IR::OpSize ElementSize); + void PSRAOp(OpcodeArgs, IR::OpSize ElementSize); void PSRLDQ(OpcodeArgs); void PSLLDQ(OpcodeArgs); - void PSRAIOp(OpcodeArgs, size_t ElementSize); + void PSRAIOp(OpcodeArgs, IR::OpSize ElementSize); void MOVDDUPOp(OpcodeArgs); - template + template void CVTGPR_To_FPR(OpcodeArgs); - template + template void CVTFPR_To_GPR(OpcodeArgs); - template + template void Vector_CVT_Int_To_Float(OpcodeArgs); - template + template void Scalar_CVT_Float_To_Float(OpcodeArgs); - void Vector_CVT_Float_To_Float(OpcodeArgs, size_t DstElementSize, size_t SrcElementSize, bool IsAVX); - template + void Vector_CVT_Float_To_Float(OpcodeArgs, IR::OpSize DstElementSize, IR::OpSize SrcElementSize, bool IsAVX); + template void Vector_CVT_Float_To_Int(OpcodeArgs); void MMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs); - template + template void XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs); void MASKMOVOp(OpcodeArgs); void MOVBetweenGPR_FPR(OpcodeArgs, VectorOpType VectorType); void TZCNT(OpcodeArgs); void LZCNT(OpcodeArgs); - template + template void VFCMPOp(OpcodeArgs); - void SHUFOp(OpcodeArgs, size_t ElementSize); - template + void SHUFOp(OpcodeArgs, IR::OpSize ElementSize); + template void PINSROp(OpcodeArgs); void InsertPSOp(OpcodeArgs); - void PExtrOp(OpcodeArgs, size_t ElementSize); + void PExtrOp(OpcodeArgs, IR::OpSize ElementSize); - template + template void PSIGN(OpcodeArgs); - template + template void VPSIGN(OpcodeArgs); // BMI1 Ops @@ -510,58 +510,58 @@ class OpDispatchBuilder final : public IREmitter { // AVX Ops void AVXVectorXOROp(OpcodeArgs); - template + template void AVXVectorRound(OpcodeArgs); - template + template void AVXScalar_CVT_Float_To_Float(OpcodeArgs); - template + template void AVXVector_CVT_Float_To_Int(OpcodeArgs); - template + template void AVXVector_CVT_Int_To_Float(OpcodeArgs); - template + template void VectorScalarInsertALUOp(OpcodeArgs); - template + template void AVXVectorScalarInsertALUOp(OpcodeArgs); - template + template void VectorScalarUnaryInsertALUOp(OpcodeArgs); - template + template void AVXVectorScalarUnaryInsertALUOp(OpcodeArgs); void InsertMMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs); - template + template void InsertCVTGPR_To_FPR(OpcodeArgs); - template + template void AVXInsertCVTGPR_To_FPR(OpcodeArgs); - template + template void InsertScalar_CVT_Float_To_Float(OpcodeArgs); - template + template void AVXInsertScalar_CVT_Float_To_Float(OpcodeArgs); RoundType TranslateRoundType(uint8_t Mode); - template + template void InsertScalarRound(OpcodeArgs); - template + template void AVXInsertScalarRound(OpcodeArgs); - template + template void InsertScalarFCMPOp(OpcodeArgs); - template + template void AVXInsertScalarFCMPOp(OpcodeArgs); - template + template void AVXCVTGPR_To_FPR(OpcodeArgs); - template + template void AVXVFCMPOp(OpcodeArgs); - template + template void VADDSUBPOp(OpcodeArgs); void VAESDecOp(OpcodeArgs); @@ -571,26 +571,26 @@ class OpDispatchBuilder final : public IREmitter { void VANDNOp(OpcodeArgs); - Ref VBLENDOpImpl(uint32_t VecSize, uint32_t ElementSize, Ref Src1, Ref Src2, Ref ZeroRegister, uint64_t Selector); + Ref VBLENDOpImpl(IR::OpSize VecSize, IR::OpSize ElementSize, Ref Src1, Ref Src2, Ref ZeroRegister, uint64_t Selector); void VBLENDPDOp(OpcodeArgs); void VPBLENDDOp(OpcodeArgs); void VPBLENDWOp(OpcodeArgs); - void VBROADCASTOp(OpcodeArgs, size_t ElementSize); + void VBROADCASTOp(OpcodeArgs, IR::OpSize ElementSize); - template + template void VDPPOp(OpcodeArgs); void VEXTRACT128Op(OpcodeArgs); - template + template void VHADDPOp(OpcodeArgs); - void VHSUBPOp(OpcodeArgs, size_t ElementSize); + void VHSUBPOp(OpcodeArgs, IR::OpSize ElementSize); void VINSERTOp(OpcodeArgs); void VINSERTPSOp(OpcodeArgs); - template + template void VMASKMOVOp(OpcodeArgs); void VMOVHPOp(OpcodeArgs); @@ -608,9 +608,9 @@ class OpDispatchBuilder final : public IREmitter { void VMPSADBWOp(OpcodeArgs); - void VPACKSSOp(OpcodeArgs, size_t ElementSize); + void VPACKSSOp(OpcodeArgs, IR::OpSize ElementSize); - void VPACKUSOp(OpcodeArgs, size_t ElementSize); + void VPACKUSOp(OpcodeArgs, IR::OpSize ElementSize); void VPALIGNROp(OpcodeArgs); @@ -627,15 +627,15 @@ class OpDispatchBuilder final : public IREmitter { void VPERMDOp(OpcodeArgs); void VPERMQOp(OpcodeArgs); - void VPERMILImmOp(OpcodeArgs, size_t ElementSize); + void VPERMILImmOp(OpcodeArgs, IR::OpSize ElementSize); - Ref VPERMILRegOpImpl(OpSize DstSize, size_t ElementSize, Ref Src, Ref Indices); - template + Ref VPERMILRegOpImpl(OpSize DstSize, IR::OpSize ElementSize, Ref Src, Ref Indices); + template void VPERMILRegOp(OpcodeArgs); void VPHADDSWOp(OpcodeArgs); - void VPHSUBOp(OpcodeArgs, size_t ElementSize); + void VPHSUBOp(OpcodeArgs, IR::OpSize ElementSize); void VPHSUBSWOp(OpcodeArgs); void VPINSRBOp(OpcodeArgs); @@ -653,39 +653,39 @@ class OpDispatchBuilder final : public IREmitter { template void VPMULHWOp(OpcodeArgs); - template + template void VPMULLOp(OpcodeArgs); void VPSADBWOp(OpcodeArgs); void VPSHUFBOp(OpcodeArgs); - void VPSHUFWOp(OpcodeArgs, size_t ElementSize, bool Low); + void VPSHUFWOp(OpcodeArgs, IR::OpSize ElementSize, bool Low); - void VPSLLOp(OpcodeArgs, size_t ElementSize); + void VPSLLOp(OpcodeArgs, IR::OpSize ElementSize); void VPSLLDQOp(OpcodeArgs); - void VPSLLIOp(OpcodeArgs, size_t ElementSize); + void VPSLLIOp(OpcodeArgs, IR::OpSize ElementSize); void VPSLLVOp(OpcodeArgs); - void VPSRAOp(OpcodeArgs, size_t ElementSize); + void VPSRAOp(OpcodeArgs, IR::OpSize ElementSize); - void VPSRAIOp(OpcodeArgs, size_t ElementSize); + void VPSRAIOp(OpcodeArgs, IR::OpSize ElementSize); void VPSRAVDOp(OpcodeArgs); void VPSRLVOp(OpcodeArgs); - void VPSRLDOp(OpcodeArgs, size_t ElementSize); + void VPSRLDOp(OpcodeArgs, IR::OpSize ElementSize); void VPSRLDQOp(OpcodeArgs); - void VPUNPCKHOp(OpcodeArgs, size_t ElementSize); + void VPUNPCKHOp(OpcodeArgs, IR::OpSize ElementSize); - void VPUNPCKLOp(OpcodeArgs, size_t ElementSize); + void VPUNPCKLOp(OpcodeArgs, IR::OpSize ElementSize); - void VPSRLIOp(OpcodeArgs, size_t ElementSize); + void VPSRLIOp(OpcodeArgs, IR::OpSize ElementSize); - void VSHUFOp(OpcodeArgs, size_t ElementSize); + void VSHUFOp(OpcodeArgs, IR::OpSize ElementSize); - template + template void VTESTPOp(OpcodeArgs); void VZEROOp(OpcodeArgs); @@ -694,7 +694,7 @@ class OpDispatchBuilder final : public IREmitter { Ref ReconstructFSW_Helper(Ref T = nullptr); // Returns new x87 stack top from FSW. Ref ReconstructX87StateFromFSW_Helper(Ref FSW); - void FLD(OpcodeArgs, size_t Width); + void FLD(OpcodeArgs, IR::OpSize Width); void FLDFromStack(OpcodeArgs); void FLD_Const(OpcodeArgs, NamedVectorConstant Constant); @@ -703,7 +703,7 @@ class OpDispatchBuilder final : public IREmitter { void FILD(OpcodeArgs); - void FST(OpcodeArgs, size_t Width); + void FST(OpcodeArgs, IR::OpSize Width); void FSTToStack(OpcodeArgs); void FIST(OpcodeArgs, bool Truncate); @@ -717,10 +717,10 @@ class OpDispatchBuilder final : public IREmitter { }; void X87OpHelper(OpcodeArgs, FEXCore::IR::IROps IROp, bool ZeroC2); - void FADD(OpcodeArgs, size_t Width, bool Integer, OpResult ResInST0); - void FMUL(OpcodeArgs, size_t Width, bool Integer, OpResult ResInST0); - void FDIV(OpcodeArgs, size_t Width, bool Integer, bool Reverse, OpResult ResInST0); - void FSUB(OpcodeArgs, size_t Width, bool Integer, bool Reverse, OpResult ResInST0); + void FADD(OpcodeArgs, IR::OpSize Width, bool Integer, OpResult ResInST0); + void FMUL(OpcodeArgs, IR::OpSize Width, bool Integer, OpResult ResInST0); + void FDIV(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpResult ResInST0); + void FSUB(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpResult ResInST0); void FTST(OpcodeArgs); void FNINIT(OpcodeArgs); @@ -747,10 +747,10 @@ class OpDispatchBuilder final : public IREmitter { FLAGS_X87, FLAGS_RFLAGS, }; - void FCOMI(OpcodeArgs, size_t Width, bool Integer, FCOMIFlags WhichFlags, bool PopTwice); + void FCOMI(OpcodeArgs, IR::OpSize Width, bool Integer, FCOMIFlags WhichFlags, bool PopTwice); // F64 X87 Ops - void FLDF64(OpcodeArgs, size_t Width); + void FLDF64(OpcodeArgs, IR::OpSize Width); void FLDF64_Const(OpcodeArgs, uint64_t Num); void FBLDF64(OpcodeArgs); @@ -758,14 +758,14 @@ class OpDispatchBuilder final : public IREmitter { void FILDF64(OpcodeArgs); - void FSTF64(OpcodeArgs, size_t Width); + void FSTF64(OpcodeArgs, IR::OpSize Width); void FISTF64(OpcodeArgs, bool Truncate); - void FADDF64(OpcodeArgs, size_t Width, bool Integer, OpResult ResInST0); - void FMULF64(OpcodeArgs, size_t Width, bool Integer, OpResult ResInST0); - void FDIVF64(OpcodeArgs, size_t Width, bool Integer, bool Reverse, OpResult ResInST0); - void FSUBF64(OpcodeArgs, size_t Width, bool Integer, bool Reverse, OpResult ResInST0); + void FADDF64(OpcodeArgs, IR::OpSize Width, bool Integer, OpResult ResInST0); + void FMULF64(OpcodeArgs, IR::OpSize Width, bool Integer, OpResult ResInST0); + void FDIVF64(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpResult ResInST0); + void FSUBF64(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpResult ResInST0); void FCHSF64(OpcodeArgs); void FABSF64(OpcodeArgs); void FTSTF64(OpcodeArgs); @@ -781,7 +781,7 @@ class OpDispatchBuilder final : public IREmitter { void X87FXTRACTF64(OpcodeArgs); void X87LDENVF64(OpcodeArgs); - void FCOMIF64(OpcodeArgs, size_t width, bool Integer, FCOMIFlags whichflags, bool poptwice); + void FCOMIF64(OpcodeArgs, IR::OpSize width, bool Integer, FCOMIFlags whichflags, bool poptwice); void FXSaveOp(OpcodeArgs); void FXRStoreOp(OpcodeArgs); @@ -790,24 +790,24 @@ class OpDispatchBuilder final : public IREmitter { void XSaveOp(OpcodeArgs); void PAlignrOp(OpcodeArgs); - template + template void UCOMISxOp(OpcodeArgs); void LDMXCSR(OpcodeArgs); void STMXCSR(OpcodeArgs); - template + template void PACKUSOp(OpcodeArgs); - template + template void PACKSSOp(OpcodeArgs); - template + template void PMULLOp(OpcodeArgs); template void MOVQ2DQ(OpcodeArgs); - template + template void ADDSUBPOp(OpcodeArgs); void PFNACCOp(OpcodeArgs); @@ -830,9 +830,9 @@ class OpDispatchBuilder final : public IREmitter { void PMULHRSW(OpcodeArgs); void MOVBEOp(OpcodeArgs); - template + template void HSUBP(OpcodeArgs); - template + template void PHSUB(OpcodeArgs); void PHADDS(OpcodeArgs); @@ -883,21 +883,21 @@ class OpDispatchBuilder final : public IREmitter { template void VPGATHER(OpcodeArgs); - template + template void ExtendVectorElements(OpcodeArgs); - template + template void VectorRound(OpcodeArgs); - Ref VectorBlend(OpSize Size, size_t ElementSize, Ref Src1, Ref Src2, uint8_t Selector); + Ref VectorBlend(OpSize Size, IR::OpSize ElementSize, Ref Src1, Ref Src2, uint8_t Selector); - template + template void VectorBlend(OpcodeArgs); - void VectorVariableBlend(OpcodeArgs, size_t ElementSize); + void VectorVariableBlend(OpcodeArgs, IR::OpSize ElementSize); void PTestOpImpl(OpSize Size, Ref Dest, Ref Src); void PTestOp(OpcodeArgs); void PHMINPOSUWOp(OpcodeArgs); - template + template void DPPOp(OpcodeArgs); void MPSADBWOp(OpcodeArgs); @@ -934,13 +934,14 @@ class OpDispatchBuilder final : public IREmitter { void AVX128_StoreResult_WithOpSize(FEXCore::X86Tables::DecodedOp Op, const FEXCore::X86Tables::DecodedOperand& Operand, const RefPair Src, MemoryAccessType AccessType = MemoryAccessType::DEFAULT); void InstallAVX128Handlers(); - void AVX128_VMOVScalarImpl(OpcodeArgs, size_t ElementSize); - void AVX128_VectorALU(OpcodeArgs, IROps IROp, size_t ElementSize); - void AVX128_VectorUnary(OpcodeArgs, IROps IROp, size_t ElementSize); - void AVX128_VectorUnaryImpl(OpcodeArgs, size_t SrcSize, size_t ElementSize, std::function Helper); - void AVX128_VectorBinaryImpl(OpcodeArgs, size_t SrcSize, size_t ElementSize, std::function Helper); - void AVX128_VectorShiftWideImpl(OpcodeArgs, size_t ElementSize, IROps IROp); - void AVX128_VectorShiftImmImpl(OpcodeArgs, size_t ElementSize, IROps IROp); + void AVX128_VMOVScalarImpl(OpcodeArgs, IR::OpSize ElementSize); + void AVX128_VectorALU(OpcodeArgs, IROps IROp, IR::OpSize ElementSize); + void AVX128_VectorUnary(OpcodeArgs, IROps IROp, IR::OpSize ElementSize); + void AVX128_VectorUnaryImpl(OpcodeArgs, IR::OpSize SrcSize, IR::OpSize ElementSize, std::function Helper); + void AVX128_VectorBinaryImpl(OpcodeArgs, size_t SrcSize, IR::OpSize ElementSize, + std::function Helper); + void AVX128_VectorShiftWideImpl(OpcodeArgs, IR::OpSize ElementSize, IROps IROp); + void AVX128_VectorShiftImmImpl(OpcodeArgs, IR::OpSize ElementSize, IROps IROp); void AVX128_VectorTrinaryImpl(OpcodeArgs, size_t SrcSize, size_t ElementSize, Ref Src3, std::function Helper); @@ -961,42 +962,42 @@ class OpDispatchBuilder final : public IREmitter { void AVX128_VMOVDDUP(OpcodeArgs); void AVX128_VMOVSLDUP(OpcodeArgs); void AVX128_VMOVSHDUP(OpcodeArgs); - template + template void AVX128_VBROADCAST(OpcodeArgs); - template + template void AVX128_VPUNPCKL(OpcodeArgs); - template + template void AVX128_VPUNPCKH(OpcodeArgs); void AVX128_MOVVectorUnaligned(OpcodeArgs); - template + template void AVX128_InsertCVTGPR_To_FPR(OpcodeArgs); - template + template void AVX128_CVTFPR_To_GPR(OpcodeArgs); void AVX128_VANDN(OpcodeArgs); - template + template void AVX128_VPACKSS(OpcodeArgs); - template + template void AVX128_VPACKUS(OpcodeArgs); - Ref AVX128_PSIGNImpl(size_t ElementSize, Ref Src1, Ref Src2); - template + Ref AVX128_PSIGNImpl(IR::OpSize ElementSize, Ref Src1, Ref Src2); + template void AVX128_VPSIGN(OpcodeArgs); - template + template void AVX128_UCOMISx(OpcodeArgs); - void AVX128_VectorScalarInsertALU(OpcodeArgs, FEXCore::IR::IROps IROp, size_t ElementSize); - Ref AVX128_VFCMPImpl(size_t ElementSize, Ref Src1, Ref Src2, uint8_t CompType); - template + void AVX128_VectorScalarInsertALU(OpcodeArgs, FEXCore::IR::IROps IROp, IR::OpSize ElementSize); + Ref AVX128_VFCMPImpl(IR::OpSize ElementSize, Ref Src1, Ref Src2, uint8_t CompType); + template void AVX128_VFCMP(OpcodeArgs); - template + template void AVX128_InsertScalarFCMP(OpcodeArgs); void AVX128_MOVBetweenGPR_FPR(OpcodeArgs); - template + template void AVX128_PExtr(OpcodeArgs); - void AVX128_ExtendVectorElements(OpcodeArgs, size_t ElementSize, size_t DstElementSize, bool Signed); + void AVX128_ExtendVectorElements(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DstElementSize, bool Signed); template void AVX128_MOVMSK(OpcodeArgs); void AVX128_MOVMSKB(OpcodeArgs); - void AVX128_PINSRImpl(OpcodeArgs, size_t ElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, - const X86Tables::DecodedOperand& Imm); + void AVX128_PINSRImpl(OpcodeArgs, IR::OpSize ElementSize, const X86Tables::DecodedOperand& Src1Op, + const X86Tables::DecodedOperand& Src2Op, const X86Tables::DecodedOperand& Imm); void AVX128_VPINSRB(OpcodeArgs); void AVX128_VPINSRW(OpcodeArgs); void AVX128_VPINSRDQ(OpcodeArgs); @@ -1007,15 +1008,15 @@ class OpDispatchBuilder final : public IREmitter { void AVX128_VINSERTPS(OpcodeArgs); Ref AVX128_PHSUBImpl(Ref Src1, Ref Src2, size_t ElementSize); - template + template void AVX128_VPHSUB(OpcodeArgs); void AVX128_VPHSUBSW(OpcodeArgs); - template + template void AVX128_VADDSUBP(OpcodeArgs); - template + template void AVX128_VPMULL(OpcodeArgs); void AVX128_VPMULHRSW(OpcodeArgs); @@ -1023,16 +1024,16 @@ class OpDispatchBuilder final : public IREmitter { template void AVX128_VPMULHW(OpcodeArgs); - template + template void AVX128_InsertScalar_CVT_Float_To_Float(OpcodeArgs); - template + template void AVX128_Vector_CVT_Float_To_Float(OpcodeArgs); - template + template void AVX128_Vector_CVT_Float_To_Int(OpcodeArgs); - template + template void AVX128_Vector_CVT_Int_To_Float(OpcodeArgs); void AVX128_VEXTRACT128(OpcodeArgs); @@ -1050,24 +1051,24 @@ class OpDispatchBuilder final : public IREmitter { void AVX128_PHMINPOSUW(OpcodeArgs); - template + template void AVX128_VectorRound(OpcodeArgs); - template + template void AVX128_InsertScalarRound(OpcodeArgs); - template + template void AVX128_VDPP(OpcodeArgs); void AVX128_VPERMQ(OpcodeArgs); void AVX128_VPSHUFW(OpcodeArgs, bool Low); - template + template void AVX128_VSHUF(OpcodeArgs); template void AVX128_VPERMILImm(OpcodeArgs); - template + template void AVX128_VHADDP(OpcodeArgs); void AVX128_VPHADDSW(OpcodeArgs); @@ -1075,10 +1076,10 @@ class OpDispatchBuilder final : public IREmitter { void AVX128_VPMADDUBSW(OpcodeArgs); void AVX128_VPMADDWD(OpcodeArgs); - template + template void AVX128_VBLEND(OpcodeArgs); - template + template void AVX128_VHSUBP(OpcodeArgs); void AVX128_VPSHUFB(OpcodeArgs); @@ -1087,18 +1088,18 @@ class OpDispatchBuilder final : public IREmitter { void AVX128_VMPSADBW(OpcodeArgs); void AVX128_VPALIGNR(OpcodeArgs); - void AVX128_VMASKMOVImpl(OpcodeArgs, size_t ElementSize, size_t DstSize, bool IsStore, const X86Tables::DecodedOperand& MaskOp, + void AVX128_VMASKMOVImpl(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DstSize, bool IsStore, const X86Tables::DecodedOperand& MaskOp, const X86Tables::DecodedOperand& DataOp); template void AVX128_VPMASKMOV(OpcodeArgs); - template + template void AVX128_VMASKMOV(OpcodeArgs); void AVX128_MASKMOV(OpcodeArgs); - template + template void AVX128_VectorVariableBlend(OpcodeArgs); void AVX128_SaveAVXState(Ref MemBase); @@ -1106,11 +1107,11 @@ class OpDispatchBuilder final : public IREmitter { void AVX128_DefaultAVXState(); void AVX128_VPERM2(OpcodeArgs); - template + template void AVX128_VTESTP(OpcodeArgs); void AVX128_PTest(OpcodeArgs); - template + template void AVX128_VPERMILReg(OpcodeArgs); void AVX128_VPERMD(OpcodeArgs); @@ -1134,7 +1135,7 @@ class OpDispatchBuilder final : public IREmitter { // AVX 256-bit operations void StoreResult_WithAVXInsert(VectorOpType Type, FEXCore::IR::RegisterClassType Class, FEXCore::X86Tables::DecodedOp Op, Ref Value, - int8_t Align, MemoryAccessType AccessType = MemoryAccessType::DEFAULT) { + IR::OpSize Align, MemoryAccessType AccessType = MemoryAccessType::DEFAULT) { if (Op->Dest.IsGPR() && Op->Dest.Data.GPR.GPR >= X86State::REG_XMM_0 && Op->Dest.Data.GPR.GPR <= X86State::REG_XMM_15 && GetGuestVectorLength() == Core::CPUState::XMM_AVX_REG_SIZE && Type == VectorOpType::SSE) { const auto gpr = Op->Dest.Data.GPR.GPR; @@ -1188,7 +1189,7 @@ class OpDispatchBuilder final : public IREmitter { CalculateDeferredFlags(); - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); const auto VectorSize = GetGuestVectorLength(); // Write backwards. This is a heuristic to improve coalescing, since we @@ -1221,10 +1222,10 @@ class OpDispatchBuilder final : public IREmitter { } else if (Index >= FPR0Index && Index <= FPR15Index) { _StoreRegister(Value, Index - FPR0Index, FPRClass, VectorSize); } else if (Index == DFIndex) { - _StoreContext(1, GPRClass, Value, offsetof(Core::CPUState, flags[X86State::RFLAG_DF_RAW_LOC])); + _StoreContext(OpSize::i8Bit, GPRClass, Value, offsetof(Core::CPUState, flags[X86State::RFLAG_DF_RAW_LOC])); } else { bool Partial = RegCache.Partial & (1ull << Index); - unsigned Size = Partial ? 8 : CacheIndexToSize(Index); + auto Size = Partial ? OpSize::i64Bit : CacheIndexToOpSize(Index); uint64_t NextBit = (1ull << (Index - 1)); uint32_t Offset = CacheIndexToContextOffset(Index); auto Class = CacheIndexClass(Index); @@ -1243,7 +1244,7 @@ class OpDispatchBuilder final : public IREmitter { _StoreContext(Size, Class, Value, Offset); // If Partial and MMX register, then we need to store all 1s in bits 64-80 if (Partial && Index >= MM0Index && Index <= MM7Index) { - _StoreContext(2, IR::GPRClass, _Constant(0xFFFF), Offset + 8); + _StoreContext(OpSize::i16Bit, IR::GPRClass, _Constant(0xFFFF), Offset + 8); } } } @@ -1355,26 +1356,26 @@ class OpDispatchBuilder final : public IREmitter { // Opcode helpers for generalizing behavior across VEX and non-VEX variants. - Ref ADDSUBPOpImpl(OpSize Size, size_t ElementSize, Ref Src1, Ref Src2); + Ref ADDSUBPOpImpl(OpSize Size, IR::OpSize ElementSize, Ref Src1, Ref Src2); - void AVXVectorALUOp(OpcodeArgs, IROps IROp, size_t ElementSize); - void AVXVectorUnaryOp(OpcodeArgs, IROps IROp, size_t ElementSize); + void AVXVectorALUOp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize); + void AVXVectorUnaryOp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize); - void AVXVectorVariableBlend(OpcodeArgs, size_t ElementSize); + void AVXVectorVariableBlend(OpcodeArgs, IR::OpSize ElementSize); void AVXVariableShiftImpl(OpcodeArgs, IROps IROp); Ref AESKeyGenAssistImpl(OpcodeArgs); - Ref CVTGPR_To_FPRImpl(OpcodeArgs, size_t DstElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op); + Ref CVTGPR_To_FPRImpl(OpcodeArgs, IR::OpSize DstElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op); - Ref DPPOpImpl(size_t DstSize, Ref Src1, Ref Src2, uint8_t Mask, size_t ElementSize); + Ref DPPOpImpl(IR::OpSize DstSize, Ref Src1, Ref Src2, uint8_t Mask, IR::OpSize ElementSize); Ref VDPPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2, const X86Tables::DecodedOperand& Imm); - Ref ExtendVectorElementsImpl(OpcodeArgs, size_t ElementSize, size_t DstElementSize, bool Signed); + Ref ExtendVectorElementsImpl(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DstElementSize, bool Signed); - Ref HSUBPOpImpl(OpSize Size, size_t ElementSize, Ref Src1, Ref Src2); + Ref HSUBPOpImpl(OpSize Size, IR::OpSize ElementSize, Ref Src1, Ref Src2); Ref InsertPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2, const X86Tables::DecodedOperand& Imm); @@ -1390,51 +1391,51 @@ class OpDispatchBuilder final : public IREmitter { Ref PHMINPOSUWOpImpl(OpcodeArgs); - Ref PHSUBOpImpl(OpSize Size, Ref Src1, Ref Src2, size_t ElementSize); + Ref PHSUBOpImpl(OpSize Size, Ref Src1, Ref Src2, IR::OpSize ElementSize); Ref PHSUBSOpImpl(OpSize Size, Ref Src1, Ref Src2); - Ref PINSROpImpl(OpcodeArgs, size_t ElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, + Ref PINSROpImpl(OpcodeArgs, IR::OpSize ElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, const X86Tables::DecodedOperand& Imm); - Ref PMADDWDOpImpl(size_t Size, Ref Src1, Ref Src2); + Ref PMADDWDOpImpl(IR::OpSize Size, Ref Src1, Ref Src2); - Ref PMADDUBSWOpImpl(size_t Size, Ref Src1, Ref Src2); + Ref PMADDUBSWOpImpl(IR::OpSize Size, Ref Src1, Ref Src2); Ref PMULHRSWOpImpl(OpSize Size, Ref Src1, Ref Src2); Ref PMULHWOpImpl(OpcodeArgs, bool Signed, Ref Src1, Ref Src2); - Ref PMULLOpImpl(OpSize Size, size_t ElementSize, bool Signed, Ref Src1, Ref Src2); + Ref PMULLOpImpl(OpSize Size, IR::OpSize ElementSize, bool Signed, Ref Src1, Ref Src2); - Ref PSADBWOpImpl(size_t Size, Ref Src1, Ref Src2); + Ref PSADBWOpImpl(IR::OpSize Size, Ref Src1, Ref Src2); - Ref GeneratePSHUFBMask(uint8_t SrcSize); - Ref PSHUFBOpImpl(uint8_t SrcSize, Ref Src1, Ref Src2, Ref MaskVector); + Ref GeneratePSHUFBMask(IR::OpSize SrcSize); + Ref PSHUFBOpImpl(IR::OpSize SrcSize, Ref Src1, Ref Src2, Ref MaskVector); - Ref PSIGNImpl(OpcodeArgs, size_t ElementSize, Ref Src1, Ref Src2); + Ref PSIGNImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src1, Ref Src2); - Ref PSLLIImpl(OpcodeArgs, size_t ElementSize, Ref Src, uint64_t Shift); + Ref PSLLIImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src, uint64_t Shift); - Ref PSLLImpl(OpcodeArgs, size_t ElementSize, Ref Src, Ref ShiftVec); + Ref PSLLImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src, Ref ShiftVec); - Ref PSRAOpImpl(OpcodeArgs, size_t ElementSize, Ref Src, Ref ShiftVec); + Ref PSRAOpImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src, Ref ShiftVec); - Ref PSRLDOpImpl(OpcodeArgs, size_t ElementSize, Ref Src, Ref ShiftVec); + Ref PSRLDOpImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src, Ref ShiftVec); - Ref SHUFOpImpl(OpcodeArgs, size_t DstSize, size_t ElementSize, Ref Src1, Ref Src2, uint8_t Shuffle); + Ref SHUFOpImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize ElementSize, Ref Src1, Ref Src2, uint8_t Shuffle); - void VMASKMOVOpImpl(OpcodeArgs, size_t ElementSize, size_t DataSize, bool IsStore, const X86Tables::DecodedOperand& MaskOp, + void VMASKMOVOpImpl(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DataSize, bool IsStore, const X86Tables::DecodedOperand& MaskOp, const X86Tables::DecodedOperand& DataOp); - void MOVScalarOpImpl(OpcodeArgs, size_t ElementSize); - void VMOVScalarOpImpl(OpcodeArgs, size_t ElementSize); + void MOVScalarOpImpl(OpcodeArgs, IR::OpSize ElementSize); + void VMOVScalarOpImpl(OpcodeArgs, IR::OpSize ElementSize); - Ref VFCMPOpImpl(OpSize Size, size_t ElementSize, Ref Src1, Ref Src2, uint8_t CompType); + Ref VFCMPOpImpl(OpSize Size, IR::OpSize ElementSize, Ref Src1, Ref Src2, uint8_t CompType); - void VTESTOpImpl(OpSize SrcSize, size_t ElementSize, Ref Src1, Ref Src2); + void VTESTOpImpl(OpSize SrcSize, IR::OpSize ElementSize, Ref Src1, Ref Src2); - void VectorUnaryDuplicateOpImpl(OpcodeArgs, IROps IROp, size_t ElementSize); + void VectorUnaryDuplicateOpImpl(OpcodeArgs, IROps IROp, IR::OpSize ElementSize); // x86 ALU scalar operations operate in three different ways // - AVX512: Writemask shenanigans that we don't care about. @@ -1446,30 +1447,30 @@ class OpDispatchBuilder final : public IREmitter { // - Example 32bit ADDSS Dest, Src // - Dest[31:0] = Dest[31:0] + Src[31:0] // - Dest[{256,128}:32] = (Unmodified) - Ref VectorScalarInsertALUOpImpl(OpcodeArgs, IROps IROp, size_t DstSize, size_t ElementSize, const X86Tables::DecodedOperand& Src1Op, - const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits); + Ref VectorScalarInsertALUOpImpl(OpcodeArgs, IROps IROp, IR::OpSize DstSize, IR::OpSize ElementSize, + const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits); - Ref VectorScalarUnaryInsertALUOpImpl(OpcodeArgs, IROps IROp, size_t DstSize, size_t ElementSize, const X86Tables::DecodedOperand& Src1Op, - const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits); + Ref VectorScalarUnaryInsertALUOpImpl(OpcodeArgs, IROps IROp, IR::OpSize DstSize, IR::OpSize ElementSize, + const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits); - Ref InsertCVTGPR_To_FPRImpl(OpcodeArgs, size_t DstSize, size_t DstElementSize, const X86Tables::DecodedOperand& Src1Op, + Ref InsertCVTGPR_To_FPRImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize DstElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits); - Ref InsertScalar_CVT_Float_To_FloatImpl(OpcodeArgs, size_t DstSize, size_t DstElementSize, size_t SrcElementSize, + Ref InsertScalar_CVT_Float_To_FloatImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize DstElementSize, IR::OpSize SrcElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits); - Ref InsertScalarRoundImpl(OpcodeArgs, size_t DstSize, size_t ElementSize, const X86Tables::DecodedOperand& Src1Op, + Ref InsertScalarRoundImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize ElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, uint64_t Mode, bool ZeroUpperBits); - Ref InsertScalarFCMPOpImpl(OpSize Size, uint8_t OpDstSize, size_t ElementSize, Ref Src1, Ref Src2, uint8_t CompType, bool ZeroUpperBits); + Ref InsertScalarFCMPOpImpl(OpSize Size, IR::OpSize OpDstSize, IR::OpSize ElementSize, Ref Src1, Ref Src2, uint8_t CompType, bool ZeroUpperBits); - Ref VectorRoundImpl(OpSize Size, size_t ElementSize, Ref Src, uint64_t Mode); + Ref VectorRoundImpl(OpSize Size, IR::OpSize ElementSize, Ref Src, uint64_t Mode); - Ref Scalar_CVT_Float_To_FloatImpl(OpcodeArgs, size_t DstElementSize, size_t SrcElementSize, const X86Tables::DecodedOperand& Src1Op, - const X86Tables::DecodedOperand& Src2Op); + Ref Scalar_CVT_Float_To_FloatImpl(OpcodeArgs, IR::OpSize DstElementSize, IR::OpSize SrcElementSize, + const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op); - Ref Vector_CVT_Float_To_IntImpl(OpcodeArgs, size_t SrcElementSize, bool Narrow, bool HostRoundingMode); + Ref Vector_CVT_Float_To_IntImpl(OpcodeArgs, IR::OpSize SrcElementSize, bool Narrow, bool HostRoundingMode); - Ref Vector_CVT_Int_To_FloatImpl(OpcodeArgs, size_t SrcElementSize, bool Widen); + Ref Vector_CVT_Int_To_FloatImpl(OpcodeArgs, IR::OpSize SrcElementSize, bool Widen); void XSaveOpImpl(OpcodeArgs); void SaveX87State(OpcodeArgs, Ref MemBase); @@ -1518,25 +1519,25 @@ class OpDispatchBuilder final : public IREmitter { Ref LoadSource(RegisterClassType Class, const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, uint32_t Flags, const LoadSourceOptions& Options = {}); Ref LoadSource_WithOpSize(RegisterClassType Class, const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, - uint8_t OpSize, uint32_t Flags, const LoadSourceOptions& Options = {}); + IR::OpSize OpSize, uint32_t Flags, const LoadSourceOptions& Options = {}); void StoreResult_WithOpSize(FEXCore::IR::RegisterClassType Class, FEXCore::X86Tables::DecodedOp Op, - const FEXCore::X86Tables::DecodedOperand& Operand, const Ref Src, uint8_t OpSize, int8_t Align, + const FEXCore::X86Tables::DecodedOperand& Operand, const Ref Src, IR::OpSize OpSize, IR::OpSize Align, MemoryAccessType AccessType = MemoryAccessType::DEFAULT); void StoreResult(FEXCore::IR::RegisterClassType Class, FEXCore::X86Tables::DecodedOp Op, const FEXCore::X86Tables::DecodedOperand& Operand, - const Ref Src, int8_t Align, MemoryAccessType AccessType = MemoryAccessType::DEFAULT); - void StoreResult(FEXCore::IR::RegisterClassType Class, FEXCore::X86Tables::DecodedOp Op, const Ref Src, int8_t Align, + const Ref Src, IR::OpSize Align, MemoryAccessType AccessType = MemoryAccessType::DEFAULT); + void StoreResult(FEXCore::IR::RegisterClassType Class, FEXCore::X86Tables::DecodedOp Op, const Ref Src, IR::OpSize Align, MemoryAccessType AccessType = MemoryAccessType::DEFAULT); // In several instances, it's desirable to get a base address with the segment offset // applied to it. This pulls all the common-case appending into a single set of functions. [[nodiscard]] - Ref MakeSegmentAddress(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, uint8_t OpSize) { + Ref MakeSegmentAddress(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, IR::OpSize OpSize) { Ref Mem = LoadSource_WithOpSize(GPRClass, Op, Operand, OpSize, Op->Flags, {.LoadData = false}); return AppendSegmentOffset(Mem, Op->Flags); } [[nodiscard]] Ref MakeSegmentAddress(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand) { - return MakeSegmentAddress(Op, Operand, GetSrcSize(Op)); + return MakeSegmentAddress(Op, Operand, OpSizeFromSrc(Op)); } [[nodiscard]] Ref MakeSegmentAddress(X86State::X86Reg Reg, uint32_t Flags, uint32_t DefaultPrefix = 0, bool Override = false) { @@ -1771,7 +1772,7 @@ class OpDispatchBuilder final : public IREmitter { // For DF, we need to transform 0/1 into 1/-1 StoreDF(_SubShift(OpSize::i64Bit, _Constant(1), Value, ShiftType::LSL, 1)); } else { - _StoreContext(1, GPRClass, Value, offsetof(FEXCore::Core::CPUState, flags[BitOffset])); + _StoreContext(OpSize::i8Bit, GPRClass, Value, offsetof(FEXCore::Core::CPUState, flags[BitOffset])); } } @@ -1849,6 +1850,17 @@ class OpDispatchBuilder final : public IREmitter { } } + // TODO: Temporary while OpcodeDispatcher shifts over + IR::OpSize CacheIndexToOpSize(int Index) { + // MMX registers are rounded up to 128-bit since they are shared with 80-bit + // x87 registers, even though MMX is logically only 64-bit. + if (Index >= AVXHigh0Index || ((Index >= MM0Index && Index <= MM7Index))) { + return OpSize::i128Bit; + } else { + return OpSize::i8Bit; + } + } + struct { uint64_t Cached; uint64_t Written; @@ -1866,7 +1878,7 @@ class OpDispatchBuilder final : public IREmitter { RegCache.Written &= ~Bit; } - Ref LoadRegCache(uint64_t Offset, uint8_t Index, RegisterClassType RegClass, uint8_t Size) { + Ref LoadRegCache(uint64_t Offset, uint8_t Index, RegisterClassType RegClass, IR::OpSize Size) { LOGMAN_THROW_AA_FMT(Index < 64, "valid index"); uint64_t Bit = (1ull << (uint64_t)Index); @@ -1877,7 +1889,7 @@ class OpDispatchBuilder final : public IREmitter { // If we did a partial store, we're inserting into the full register if (RegCache.Written & Bit) { - Full = _VInsElement(16, 8, 0, 0, Full, Value); + Full = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 0, 0, Full, Value); } RegCache.Value[Index] = Full; @@ -1907,7 +1919,7 @@ class OpDispatchBuilder final : public IREmitter { return RegCache.Value[Index]; } - RefPair AllocatePair(FEXCore::IR::RegisterClassType Class, uint8_t Size) { + RefPair AllocatePair(FEXCore::IR::RegisterClassType Class, IR::OpSize Size) { if (Class == FPRClass) { return {_AllocateFPR(Size, Size), _AllocateFPR(Size, Size)}; } else { @@ -1915,13 +1927,13 @@ class OpDispatchBuilder final : public IREmitter { } } - RefPair LoadContextPair_Uncached(FEXCore::IR::RegisterClassType Class, uint8_t Size, unsigned Offset) { + RefPair LoadContextPair_Uncached(FEXCore::IR::RegisterClassType Class, IR::OpSize Size, unsigned Offset) { RefPair Values = AllocatePair(Class, Size); _LoadContextPair(Size, Class, Offset, Values.Low, Values.High); return Values; } - RefPair LoadRegCachePair(uint64_t Offset, uint8_t Index, RegisterClassType RegClass, uint8_t Size) { + RefPair LoadRegCachePair(uint64_t Offset, uint8_t Index, RegisterClassType RegClass, IR::OpSize Size) { LOGMAN_THROW_AA_FMT(Index != DFIndex, "must be pairable"); // Try to load a pair into the cache @@ -1945,19 +1957,19 @@ class OpDispatchBuilder final : public IREmitter { } Ref LoadGPR(uint8_t Reg) { - return LoadRegCache(Reg, GPR0Index + Reg, GPRClass, CTX->GetGPRSize()); + return LoadRegCache(Reg, GPR0Index + Reg, GPRClass, CTX->GetGPROpSize()); } - Ref LoadContext(uint8_t Size, uint8_t Index) { + Ref LoadContext(IR::OpSize Size, uint8_t Index) { return LoadRegCache(CacheIndexToContextOffset(Index), Index, CacheIndexClass(Index), Size); } - RefPair LoadContextPair(uint8_t Size, uint8_t Index) { + RefPair LoadContextPair(IR::OpSize Size, uint8_t Index) { return LoadRegCachePair(CacheIndexToContextOffset(Index), Index, CacheIndexClass(Index), Size); } Ref LoadContext(uint8_t Index) { - return LoadContext(CacheIndexToSize(Index), Index); + return LoadContext(CacheIndexToOpSize(Index), Index); } Ref LoadXMMRegister(uint8_t Reg) { @@ -2018,7 +2030,7 @@ class OpDispatchBuilder final : public IREmitter { // Recover the sign bit, it is the logical DF value return _Lshr(OpSize::i64Bit, LoadDF(), _Constant(63)); } else { - return _LoadContext(1, GPRClass, offsetof(Core::CPUState, flags[BitOffset])); + return _LoadContext(OpSize::i8Bit, GPRClass, offsetof(Core::CPUState, flags[BitOffset])); } } @@ -2067,7 +2079,7 @@ class OpDispatchBuilder final : public IREmitter { } // Compares two floats and sets flags for a COMISS instruction - void Comiss(size_t ElementSize, Ref Src1, Ref Src2, bool InvalidateAF = false) { + void Comiss(IR::OpSize ElementSize, Ref Src1, Ref Src2, bool InvalidateAF = false) { // First, set flags according to Arm FCMP. HandleNZCVWrite(); _FCmp(ElementSize, Src1, Src2); @@ -2144,7 +2156,7 @@ class OpDispatchBuilder final : public IREmitter { HandleNZCV_RMW(); CalculatePF(_ShiftFlags(OpSizeFromSrc(Op), Result, Dest, Shift, Src, OldPF, CFInverted)); - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); } // Helper to derive Dest by a given builder-using Expression with the opcode @@ -2175,8 +2187,8 @@ class OpDispatchBuilder final : public IREmitter { fextl::unordered_map CachedIndexedNamedVectorConstants; // Load and cache a named vector constant. - Ref LoadAndCacheNamedVectorConstant(uint8_t Size, FEXCore::IR::NamedVectorConstant NamedConstant) { - auto log2_size_bytes = FEXCore::ilog2(Size); + Ref LoadAndCacheNamedVectorConstant(IR::OpSize Size, FEXCore::IR::NamedVectorConstant NamedConstant) { + auto log2_size_bytes = FEXCore::ilog2(IR::OpSizeToSize(Size)); if (CachedNamedVectorConstants[NamedConstant][log2_size_bytes]) { return CachedNamedVectorConstants[NamedConstant][log2_size_bytes]; } @@ -2185,11 +2197,11 @@ class OpDispatchBuilder final : public IREmitter { CachedNamedVectorConstants[NamedConstant][log2_size_bytes] = Constant; return Constant; } - Ref LoadAndCacheIndexedNamedVectorConstant(uint8_t Size, FEXCore::IR::IndexNamedVectorConstant NamedIndexedConstant, uint32_t Index) { + Ref LoadAndCacheIndexedNamedVectorConstant(IR::OpSize Size, FEXCore::IR::IndexNamedVectorConstant NamedIndexedConstant, uint32_t Index) { IndexNamedVectorMapKey Key { .Index = Index, .NamedIndexedConstant = NamedIndexedConstant, - .log2_size_in_bytes = FEXCore::ilog2(Size), + .log2_size_in_bytes = FEXCore::ilog2(IR::OpSizeToSize(Size)), }; auto it = CachedIndexedNamedVectorConstants.find(Key); @@ -2202,11 +2214,11 @@ class OpDispatchBuilder final : public IREmitter { return Constant; } - Ref LoadUncachedZeroVector(uint8_t Size) { + Ref LoadUncachedZeroVector(IR::OpSize Size) { return _LoadNamedVectorConstant(Size, IR::NamedVectorConstant::NAMED_VECTOR_ZERO); } - Ref LoadZeroVector(uint8_t Size) { + Ref LoadZeroVector(IR::OpSize Size) { return LoadAndCacheNamedVectorConstant(Size, IR::NamedVectorConstant::NAMED_VECTOR_ZERO); } @@ -2241,7 +2253,7 @@ class OpDispatchBuilder final : public IREmitter { return; } auto Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); - StoreResult(GPRClass, Op, Dest, -1); + StoreResult(GPRClass, Op, Dest, OpSize::iInvalid); } using ZeroShiftFunctionPtr = void (OpDispatchBuilder::*)(FEXCore::X86Tables::DecodedOp Op); @@ -2387,7 +2399,7 @@ class OpDispatchBuilder final : public IREmitter { } } - Ref _StoreMemAutoTSO(FEXCore::IR::RegisterClassType Class, uint8_t Size, Ref Addr, Ref Value, uint8_t Align = 1) { + Ref _StoreMemAutoTSO(FEXCore::IR::RegisterClassType Class, IR::OpSize Size, Ref Addr, Ref Value, IR::OpSize Align = IR::OpSize::i8Bit) { if (IsTSOEnabled(Class)) { return _StoreMemTSO(Class, Size, Value, Addr, Invalid(), Align, MEM_OFFSET_SXTX, 1); } else { @@ -2395,7 +2407,7 @@ class OpDispatchBuilder final : public IREmitter { } } - Ref _LoadMemAutoTSO(FEXCore::IR::RegisterClassType Class, uint8_t Size, Ref ssa0, uint8_t Align = 1) { + Ref _LoadMemAutoTSO(FEXCore::IR::RegisterClassType Class, IR::OpSize Size, Ref ssa0, IR::OpSize Align = IR::OpSize::i8Bit) { if (IsTSOEnabled(Class)) { return _LoadMemTSO(Class, Size, ssa0, Invalid(), Align, MEM_OFFSET_SXTX, 1); } else { @@ -2403,7 +2415,7 @@ class OpDispatchBuilder final : public IREmitter { } } - Ref _LoadMemAutoTSO(FEXCore::IR::RegisterClassType Class, uint8_t Size, AddressMode A, uint8_t Align = 1) { + Ref _LoadMemAutoTSO(FEXCore::IR::RegisterClassType Class, IR::OpSize Size, AddressMode A, IR::OpSize Align = IR::OpSize::i8Bit) { bool AtomicTSO = IsTSOEnabled(Class) && !A.NonTSO; A = SelectAddressMode(A, AtomicTSO, Class != GPRClass, Size); @@ -2428,17 +2440,17 @@ class OpDispatchBuilder final : public IREmitter { } - RefPair LoadMemPair(FEXCore::IR::RegisterClassType Class, uint8_t Size, Ref Base, unsigned Offset) { + RefPair LoadMemPair(FEXCore::IR::RegisterClassType Class, IR::OpSize Size, Ref Base, unsigned Offset) { RefPair Values = AllocatePair(Class, Size); _LoadMemPair(Class, Size, Base, Offset, Values.Low, Values.High); return Values; } - RefPair _LoadMemPairAutoTSO(FEXCore::IR::RegisterClassType Class, uint8_t Size, AddressMode A, uint8_t Align = 1) { + RefPair _LoadMemPairAutoTSO(FEXCore::IR::RegisterClassType Class, IR::OpSize Size, AddressMode A, IR::OpSize Align = IR::OpSize::i8Bit) { bool AtomicTSO = IsTSOEnabled(Class) && !A.NonTSO; // Use ldp if possible, otherwise fallback on two loads. - if (!AtomicTSO && !A.Segment && Size >= 4 & Size <= 16) { + if (!AtomicTSO && !A.Segment && Size >= OpSize::i32Bit & Size <= OpSize::i128Bit) { A = SelectPairAddressMode(A, Size); return LoadMemPair(Class, Size, A.Base, A.Offset); } else { @@ -2452,7 +2464,7 @@ class OpDispatchBuilder final : public IREmitter { } } - Ref _StoreMemAutoTSO(FEXCore::IR::RegisterClassType Class, uint8_t Size, AddressMode A, Ref Value, uint8_t Align = 1) { + Ref _StoreMemAutoTSO(FEXCore::IR::RegisterClassType Class, IR::OpSize Size, AddressMode A, Ref Value, IR::OpSize Align = IR::OpSize::i8Bit) { bool AtomicTSO = IsTSOEnabled(Class) && !A.NonTSO; A = SelectAddressMode(A, AtomicTSO, Class != GPRClass, Size); @@ -2463,27 +2475,28 @@ class OpDispatchBuilder final : public IREmitter { } } - void _StoreMemPairAutoTSO(FEXCore::IR::RegisterClassType Class, uint8_t Size, AddressMode A, Ref Value1, Ref Value2, uint8_t Align = 1) { + void _StoreMemPairAutoTSO(FEXCore::IR::RegisterClassType Class, IR::OpSize Size, AddressMode A, Ref Value1, Ref Value2, + IR::OpSize Align = IR::OpSize::i8Bit) { bool AtomicTSO = IsTSOEnabled(Class) && !A.NonTSO; // Use stp if possible, otherwise fallback on two stores. - if (!AtomicTSO && !A.Segment && Size >= 4 & Size <= 16) { + if (!AtomicTSO && !A.Segment && Size >= OpSize::i32Bit & Size <= OpSize::i128Bit) { A = SelectPairAddressMode(A, Size); _StoreMemPair(Class, Size, Value1, Value2, A.Base, A.Offset); } else { - _StoreMemAutoTSO(Class, Size, A, Value1, 1); + _StoreMemAutoTSO(Class, Size, A, Value1, OpSize::i8Bit); A.Offset += Size; - _StoreMemAutoTSO(Class, Size, A, Value2, 1); + _StoreMemAutoTSO(Class, Size, A, Value2, OpSize::i8Bit); } } - Ref Pop(uint8_t Size, Ref SP_RMW) { + Ref Pop(IR::OpSize Size, Ref SP_RMW) { Ref Value = _AllocateGPR(false); _Pop(Size, SP_RMW, Value); return Value; } - Ref Pop(uint8_t Size) { + Ref Pop(IR::OpSize Size) { Ref SP = _RMWHandle(LoadGPRRegister(X86State::REG_RSP)); Ref Value = _AllocateGPR(false); @@ -2494,9 +2507,9 @@ class OpDispatchBuilder final : public IREmitter { return Value; } - void Push(uint8_t Size, Ref Value) { + void Push(IR::OpSize Size, Ref Value) { auto OldSP = LoadGPRRegister(X86State::REG_RSP); - auto NewSP = _Push(CTX->GetGPRSize(), Size, Value, OldSP); + auto NewSP = _Push(CTX->GetGPROpSize(), Size, Value, OldSP); StoreGPRRegister(X86State::REG_RSP, NewSP); } diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp index 9158c87fc2..7427e087b0 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp @@ -40,11 +40,11 @@ void OpDispatchBuilder::InstallAVX128Handlers() { {OPD(1, 0b00, 0x13), 1, &OpDispatchBuilder::AVX128_VMOVLP}, {OPD(1, 0b01, 0x13), 1, &OpDispatchBuilder::AVX128_VMOVLP}, - {OPD(1, 0b00, 0x14), 1, &OpDispatchBuilder::AVX128_VPUNPCKL<4>}, - {OPD(1, 0b01, 0x14), 1, &OpDispatchBuilder::AVX128_VPUNPCKL<8>}, + {OPD(1, 0b00, 0x14), 1, &OpDispatchBuilder::AVX128_VPUNPCKL}, + {OPD(1, 0b01, 0x14), 1, &OpDispatchBuilder::AVX128_VPUNPCKL}, - {OPD(1, 0b00, 0x15), 1, &OpDispatchBuilder::AVX128_VPUNPCKH<4>}, - {OPD(1, 0b01, 0x15), 1, &OpDispatchBuilder::AVX128_VPUNPCKH<8>}, + {OPD(1, 0b00, 0x15), 1, &OpDispatchBuilder::AVX128_VPUNPCKH}, + {OPD(1, 0b01, 0x15), 1, &OpDispatchBuilder::AVX128_VPUNPCKH}, {OPD(1, 0b00, 0x16), 1, &OpDispatchBuilder::AVX128_VMOVHP}, {OPD(1, 0b01, 0x16), 1, &OpDispatchBuilder::AVX128_VMOVHP}, @@ -57,102 +57,102 @@ void OpDispatchBuilder::InstallAVX128Handlers() { {OPD(1, 0b00, 0x29), 1, &OpDispatchBuilder::AVX128_VMOVAPS}, {OPD(1, 0b01, 0x29), 1, &OpDispatchBuilder::AVX128_VMOVAPS}, - {OPD(1, 0b10, 0x2A), 1, &OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR<4>}, - {OPD(1, 0b11, 0x2A), 1, &OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR<8>}, + {OPD(1, 0b10, 0x2A), 1, &OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR}, + {OPD(1, 0b11, 0x2A), 1, &OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR}, {OPD(1, 0b00, 0x2B), 1, &OpDispatchBuilder::AVX128_MOVVectorNT}, {OPD(1, 0b01, 0x2B), 1, &OpDispatchBuilder::AVX128_MOVVectorNT}, - {OPD(1, 0b10, 0x2C), 1, &OpDispatchBuilder::AVX128_CVTFPR_To_GPR<4, false>}, - {OPD(1, 0b11, 0x2C), 1, &OpDispatchBuilder::AVX128_CVTFPR_To_GPR<8, false>}, + {OPD(1, 0b10, 0x2C), 1, &OpDispatchBuilder::AVX128_CVTFPR_To_GPR}, + {OPD(1, 0b11, 0x2C), 1, &OpDispatchBuilder::AVX128_CVTFPR_To_GPR}, - {OPD(1, 0b10, 0x2D), 1, &OpDispatchBuilder::AVX128_CVTFPR_To_GPR<4, true>}, - {OPD(1, 0b11, 0x2D), 1, &OpDispatchBuilder::AVX128_CVTFPR_To_GPR<8, true>}, + {OPD(1, 0b10, 0x2D), 1, &OpDispatchBuilder::AVX128_CVTFPR_To_GPR}, + {OPD(1, 0b11, 0x2D), 1, &OpDispatchBuilder::AVX128_CVTFPR_To_GPR}, - {OPD(1, 0b00, 0x2E), 1, &OpDispatchBuilder::AVX128_UCOMISx<4>}, - {OPD(1, 0b01, 0x2E), 1, &OpDispatchBuilder::AVX128_UCOMISx<8>}, - {OPD(1, 0b00, 0x2F), 1, &OpDispatchBuilder::AVX128_UCOMISx<4>}, - {OPD(1, 0b01, 0x2F), 1, &OpDispatchBuilder::AVX128_UCOMISx<8>}, + {OPD(1, 0b00, 0x2E), 1, &OpDispatchBuilder::AVX128_UCOMISx}, + {OPD(1, 0b01, 0x2E), 1, &OpDispatchBuilder::AVX128_UCOMISx}, + {OPD(1, 0b00, 0x2F), 1, &OpDispatchBuilder::AVX128_UCOMISx}, + {OPD(1, 0b01, 0x2F), 1, &OpDispatchBuilder::AVX128_UCOMISx}, {OPD(1, 0b00, 0x50), 1, &OpDispatchBuilder::AVX128_MOVMSK<4>}, {OPD(1, 0b01, 0x50), 1, &OpDispatchBuilder::AVX128_MOVMSK<8>}, - {OPD(1, 0b00, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VFSQRT, 4>}, - {OPD(1, 0b01, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VFSQRT, 8>}, - {OPD(1, 0b10, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFSQRTSCALARINSERT, 4>}, - {OPD(1, 0b11, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFSQRTSCALARINSERT, 8>}, + {OPD(1, 0b00, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VFSQRT, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VFSQRT, OpSize::i64Bit>}, + {OPD(1, 0b10, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFSQRTSCALARINSERT, OpSize::i32Bit>}, + {OPD(1, 0b11, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFSQRTSCALARINSERT, OpSize::i64Bit>}, - {OPD(1, 0b00, 0x52), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VFRSQRT, 4>}, - {OPD(1, 0b10, 0x52), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFRSQRTSCALARINSERT, 4>}, + {OPD(1, 0b00, 0x52), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VFRSQRT, OpSize::i32Bit>}, + {OPD(1, 0b10, 0x52), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFRSQRTSCALARINSERT, OpSize::i32Bit>}, - {OPD(1, 0b00, 0x53), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VFRECP, 4>}, - {OPD(1, 0b10, 0x53), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFRECPSCALARINSERT, 4>}, + {OPD(1, 0b00, 0x53), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VFRECP, OpSize::i32Bit>}, + {OPD(1, 0b10, 0x53), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFRECPSCALARINSERT, OpSize::i32Bit>}, - {OPD(1, 0b00, 0x54), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VAND, 16>}, - {OPD(1, 0b01, 0x54), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VAND, 16>}, + {OPD(1, 0b00, 0x54), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VAND, OpSize::i128Bit>}, + {OPD(1, 0b01, 0x54), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VAND, OpSize::i128Bit>}, {OPD(1, 0b00, 0x55), 1, &OpDispatchBuilder::AVX128_VANDN}, {OPD(1, 0b01, 0x55), 1, &OpDispatchBuilder::AVX128_VANDN}, - {OPD(1, 0b00, 0x56), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VOR, 16>}, - {OPD(1, 0b01, 0x56), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VOR, 16>}, + {OPD(1, 0b00, 0x56), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VOR, OpSize::i128Bit>}, + {OPD(1, 0b01, 0x56), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VOR, OpSize::i128Bit>}, {OPD(1, 0b00, 0x57), 1, &OpDispatchBuilder::AVX128_VectorXOR}, {OPD(1, 0b01, 0x57), 1, &OpDispatchBuilder::AVX128_VectorXOR}, - {OPD(1, 0b00, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFADD, 4>}, - {OPD(1, 0b01, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFADD, 8>}, - {OPD(1, 0b10, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFADDSCALARINSERT, 4>}, - {OPD(1, 0b11, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFADDSCALARINSERT, 8>}, - - {OPD(1, 0b00, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMUL, 4>}, - {OPD(1, 0b01, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMUL, 8>}, - {OPD(1, 0b10, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMULSCALARINSERT, 4>}, - {OPD(1, 0b11, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMULSCALARINSERT, 8>}, - - {OPD(1, 0b00, 0x5A), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Float<8, 4>}, - {OPD(1, 0b01, 0x5A), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Float<4, 8>}, - {OPD(1, 0b10, 0x5A), 1, &OpDispatchBuilder::AVX128_InsertScalar_CVT_Float_To_Float<8, 4>}, - {OPD(1, 0b11, 0x5A), 1, &OpDispatchBuilder::AVX128_InsertScalar_CVT_Float_To_Float<4, 8>}, - - {OPD(1, 0b00, 0x5B), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Int_To_Float<4, false>}, - {OPD(1, 0b01, 0x5B), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int<4, false, true>}, - {OPD(1, 0b10, 0x5B), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int<4, false, false>}, - - {OPD(1, 0b00, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFSUB, 4>}, - {OPD(1, 0b01, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFSUB, 8>}, - {OPD(1, 0b10, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFSUBSCALARINSERT, 4>}, - {OPD(1, 0b11, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFSUBSCALARINSERT, 8>}, - - {OPD(1, 0b00, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMIN, 4>}, - {OPD(1, 0b01, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMIN, 8>}, - {OPD(1, 0b10, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMINSCALARINSERT, 4>}, - {OPD(1, 0b11, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMINSCALARINSERT, 8>}, - - {OPD(1, 0b00, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFDIV, 4>}, - {OPD(1, 0b01, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFDIV, 8>}, - {OPD(1, 0b10, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFDIVSCALARINSERT, 4>}, - {OPD(1, 0b11, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFDIVSCALARINSERT, 8>}, - - {OPD(1, 0b00, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMAX, 4>}, - {OPD(1, 0b01, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMAX, 8>}, - {OPD(1, 0b10, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMAXSCALARINSERT, 4>}, - {OPD(1, 0b11, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMAXSCALARINSERT, 8>}, - - {OPD(1, 0b01, 0x60), 1, &OpDispatchBuilder::AVX128_VPUNPCKL<1>}, - {OPD(1, 0b01, 0x61), 1, &OpDispatchBuilder::AVX128_VPUNPCKL<2>}, - {OPD(1, 0b01, 0x62), 1, &OpDispatchBuilder::AVX128_VPUNPCKL<4>}, - {OPD(1, 0b01, 0x63), 1, &OpDispatchBuilder::AVX128_VPACKSS<2>}, - {OPD(1, 0b01, 0x64), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPGT, 1>}, - {OPD(1, 0b01, 0x65), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPGT, 2>}, - {OPD(1, 0b01, 0x66), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPGT, 4>}, - {OPD(1, 0b01, 0x67), 1, &OpDispatchBuilder::AVX128_VPACKUS<2>}, - {OPD(1, 0b01, 0x68), 1, &OpDispatchBuilder::AVX128_VPUNPCKH<1>}, - {OPD(1, 0b01, 0x69), 1, &OpDispatchBuilder::AVX128_VPUNPCKH<2>}, - {OPD(1, 0b01, 0x6A), 1, &OpDispatchBuilder::AVX128_VPUNPCKH<4>}, - {OPD(1, 0b01, 0x6B), 1, &OpDispatchBuilder::AVX128_VPACKSS<4>}, - {OPD(1, 0b01, 0x6C), 1, &OpDispatchBuilder::AVX128_VPUNPCKL<8>}, - {OPD(1, 0b01, 0x6D), 1, &OpDispatchBuilder::AVX128_VPUNPCKH<8>}, + {OPD(1, 0b00, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFADD, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFADD, OpSize::i64Bit>}, + {OPD(1, 0b10, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFADDSCALARINSERT, OpSize::i32Bit>}, + {OPD(1, 0b11, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFADDSCALARINSERT, OpSize::i64Bit>}, + + {OPD(1, 0b00, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMUL, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMUL, OpSize::i64Bit>}, + {OPD(1, 0b10, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMULSCALARINSERT, OpSize::i32Bit>}, + {OPD(1, 0b11, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMULSCALARINSERT, OpSize::i64Bit>}, + + {OPD(1, 0b00, 0x5A), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Float}, + {OPD(1, 0b01, 0x5A), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Float}, + {OPD(1, 0b10, 0x5A), 1, &OpDispatchBuilder::AVX128_InsertScalar_CVT_Float_To_Float}, + {OPD(1, 0b11, 0x5A), 1, &OpDispatchBuilder::AVX128_InsertScalar_CVT_Float_To_Float}, + + {OPD(1, 0b00, 0x5B), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Int_To_Float}, + {OPD(1, 0b01, 0x5B), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int}, + {OPD(1, 0b10, 0x5B), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int}, + + {OPD(1, 0b00, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFSUB, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFSUB, OpSize::i64Bit>}, + {OPD(1, 0b10, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFSUBSCALARINSERT, OpSize::i32Bit>}, + {OPD(1, 0b11, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFSUBSCALARINSERT, OpSize::i64Bit>}, + + {OPD(1, 0b00, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMIN, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMIN, OpSize::i64Bit>}, + {OPD(1, 0b10, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMINSCALARINSERT, OpSize::i32Bit>}, + {OPD(1, 0b11, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMINSCALARINSERT, OpSize::i64Bit>}, + + {OPD(1, 0b00, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFDIV, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFDIV, OpSize::i64Bit>}, + {OPD(1, 0b10, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFDIVSCALARINSERT, OpSize::i32Bit>}, + {OPD(1, 0b11, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFDIVSCALARINSERT, OpSize::i64Bit>}, + + {OPD(1, 0b00, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMAX, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMAX, OpSize::i64Bit>}, + {OPD(1, 0b10, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMAXSCALARINSERT, OpSize::i32Bit>}, + {OPD(1, 0b11, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMAXSCALARINSERT, OpSize::i64Bit>}, + + {OPD(1, 0b01, 0x60), 1, &OpDispatchBuilder::AVX128_VPUNPCKL}, + {OPD(1, 0b01, 0x61), 1, &OpDispatchBuilder::AVX128_VPUNPCKL}, + {OPD(1, 0b01, 0x62), 1, &OpDispatchBuilder::AVX128_VPUNPCKL}, + {OPD(1, 0b01, 0x63), 1, &OpDispatchBuilder::AVX128_VPACKSS}, + {OPD(1, 0b01, 0x64), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPGT, OpSize::i8Bit>}, + {OPD(1, 0b01, 0x65), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPGT, OpSize::i16Bit>}, + {OPD(1, 0b01, 0x66), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPGT, OpSize::i32Bit>}, + {OPD(1, 0b01, 0x67), 1, &OpDispatchBuilder::AVX128_VPACKUS}, + {OPD(1, 0b01, 0x68), 1, &OpDispatchBuilder::AVX128_VPUNPCKH}, + {OPD(1, 0b01, 0x69), 1, &OpDispatchBuilder::AVX128_VPUNPCKH}, + {OPD(1, 0b01, 0x6A), 1, &OpDispatchBuilder::AVX128_VPUNPCKH}, + {OPD(1, 0b01, 0x6B), 1, &OpDispatchBuilder::AVX128_VPACKSS}, + {OPD(1, 0b01, 0x6C), 1, &OpDispatchBuilder::AVX128_VPUNPCKL}, + {OPD(1, 0b01, 0x6D), 1, &OpDispatchBuilder::AVX128_VPUNPCKH}, {OPD(1, 0b01, 0x6E), 1, &OpDispatchBuilder::AVX128_MOVBetweenGPR_FPR}, {OPD(1, 0b01, 0x6F), 1, &OpDispatchBuilder::AVX128_VMOVAPS}, @@ -162,14 +162,14 @@ void OpDispatchBuilder::InstallAVX128Handlers() { {OPD(1, 0b10, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPSHUFW, false>}, {OPD(1, 0b11, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPSHUFW, true>}, - {OPD(1, 0b01, 0x74), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPEQ, 1>}, - {OPD(1, 0b01, 0x75), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPEQ, 2>}, - {OPD(1, 0b01, 0x76), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPEQ, 4>}, + {OPD(1, 0b01, 0x74), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPEQ, OpSize::i8Bit>}, + {OPD(1, 0b01, 0x75), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPEQ, OpSize::i16Bit>}, + {OPD(1, 0b01, 0x76), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPEQ, OpSize::i32Bit>}, {OPD(1, 0b00, 0x77), 1, &OpDispatchBuilder::AVX128_VZERO}, - {OPD(1, 0b01, 0x7C), 1, &OpDispatchBuilder::AVX128_VHADDP}, - {OPD(1, 0b11, 0x7C), 1, &OpDispatchBuilder::AVX128_VHADDP}, + {OPD(1, 0b01, 0x7C), 1, &OpDispatchBuilder::AVX128_VHADDP}, + {OPD(1, 0b11, 0x7C), 1, &OpDispatchBuilder::AVX128_VHADDP}, {OPD(1, 0b01, 0x7D), 1, &OpDispatchBuilder::AVX128_VHSUBP}, {OPD(1, 0b11, 0x7D), 1, &OpDispatchBuilder::AVX128_VHSUBP}, @@ -179,92 +179,92 @@ void OpDispatchBuilder::InstallAVX128Handlers() { {OPD(1, 0b01, 0x7F), 1, &OpDispatchBuilder::AVX128_VMOVAPS}, {OPD(1, 0b10, 0x7F), 1, &OpDispatchBuilder::AVX128_VMOVAPS}, - {OPD(1, 0b00, 0xC2), 1, &OpDispatchBuilder::AVX128_VFCMP<4>}, - {OPD(1, 0b01, 0xC2), 1, &OpDispatchBuilder::AVX128_VFCMP<8>}, - {OPD(1, 0b10, 0xC2), 1, &OpDispatchBuilder::AVX128_InsertScalarFCMP<4>}, - {OPD(1, 0b11, 0xC2), 1, &OpDispatchBuilder::AVX128_InsertScalarFCMP<8>}, + {OPD(1, 0b00, 0xC2), 1, &OpDispatchBuilder::AVX128_VFCMP}, + {OPD(1, 0b01, 0xC2), 1, &OpDispatchBuilder::AVX128_VFCMP}, + {OPD(1, 0b10, 0xC2), 1, &OpDispatchBuilder::AVX128_InsertScalarFCMP}, + {OPD(1, 0b11, 0xC2), 1, &OpDispatchBuilder::AVX128_InsertScalarFCMP}, {OPD(1, 0b01, 0xC4), 1, &OpDispatchBuilder::AVX128_VPINSRW}, - {OPD(1, 0b01, 0xC5), 1, &OpDispatchBuilder::AVX128_PExtr<2>}, + {OPD(1, 0b01, 0xC5), 1, &OpDispatchBuilder::AVX128_PExtr}, - {OPD(1, 0b00, 0xC6), 1, &OpDispatchBuilder::AVX128_VSHUF<4>}, - {OPD(1, 0b01, 0xC6), 1, &OpDispatchBuilder::AVX128_VSHUF<8>}, + {OPD(1, 0b00, 0xC6), 1, &OpDispatchBuilder::AVX128_VSHUF}, + {OPD(1, 0b01, 0xC6), 1, &OpDispatchBuilder::AVX128_VSHUF}, - {OPD(1, 0b01, 0xD0), 1, &OpDispatchBuilder::AVX128_VADDSUBP<8>}, - {OPD(1, 0b11, 0xD0), 1, &OpDispatchBuilder::AVX128_VADDSUBP<4>}, + {OPD(1, 0b01, 0xD0), 1, &OpDispatchBuilder::AVX128_VADDSUBP}, + {OPD(1, 0b11, 0xD0), 1, &OpDispatchBuilder::AVX128_VADDSUBP}, - {OPD(1, 0b01, 0xD1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, 2, IROps::OP_VUSHRSWIDE>}, // VPSRL - {OPD(1, 0b01, 0xD2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, 4, IROps::OP_VUSHRSWIDE>}, // VPSRL - {OPD(1, 0b01, 0xD3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, 8, IROps::OP_VUSHRSWIDE>}, // VPSRL - {OPD(1, 0b01, 0xD4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VADD, 8>}, - {OPD(1, 0b01, 0xD5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VMUL, 2>}, + {OPD(1, 0b01, 0xD1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, OpSize::i16Bit, IROps::OP_VUSHRSWIDE>}, // VPSRL + {OPD(1, 0b01, 0xD2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, OpSize::i32Bit, IROps::OP_VUSHRSWIDE>}, // VPSRL + {OPD(1, 0b01, 0xD3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, OpSize::i64Bit, IROps::OP_VUSHRSWIDE>}, // VPSRL + {OPD(1, 0b01, 0xD4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VADD, OpSize::i64Bit>}, + {OPD(1, 0b01, 0xD5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VMUL, OpSize::i16Bit>}, {OPD(1, 0b01, 0xD6), 1, &OpDispatchBuilder::AVX128_MOVQ}, {OPD(1, 0b01, 0xD7), 1, &OpDispatchBuilder::AVX128_MOVMSKB}, - {OPD(1, 0b01, 0xD8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUQSUB, 1>}, - {OPD(1, 0b01, 0xD9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUQSUB, 2>}, - {OPD(1, 0b01, 0xDA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMIN, 1>}, - {OPD(1, 0b01, 0xDB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VAND, 16>}, - {OPD(1, 0b01, 0xDC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUQADD, 1>}, - {OPD(1, 0b01, 0xDD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUQADD, 2>}, - {OPD(1, 0b01, 0xDE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMAX, 1>}, + {OPD(1, 0b01, 0xD8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUQSUB, OpSize::i8Bit>}, + {OPD(1, 0b01, 0xD9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUQSUB, OpSize::i16Bit>}, + {OPD(1, 0b01, 0xDA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMIN, OpSize::i8Bit>}, + {OPD(1, 0b01, 0xDB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VAND, OpSize::i128Bit>}, + {OPD(1, 0b01, 0xDC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUQADD, OpSize::i8Bit>}, + {OPD(1, 0b01, 0xDD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUQADD, OpSize::i16Bit>}, + {OPD(1, 0b01, 0xDE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMAX, OpSize::i8Bit>}, {OPD(1, 0b01, 0xDF), 1, &OpDispatchBuilder::AVX128_VANDN}, - {OPD(1, 0b01, 0xE0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VURAVG, 1>}, - {OPD(1, 0b01, 0xE1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, 2, IROps::OP_VSSHRSWIDE>}, // VPSRA - {OPD(1, 0b01, 0xE2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, 4, IROps::OP_VSSHRSWIDE>}, // VPSRA - {OPD(1, 0b01, 0xE3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VURAVG, 2>}, + {OPD(1, 0b01, 0xE0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VURAVG, OpSize::i8Bit>}, + {OPD(1, 0b01, 0xE1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, OpSize::i16Bit, IROps::OP_VSSHRSWIDE>}, // VPSRA + {OPD(1, 0b01, 0xE2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, OpSize::i32Bit, IROps::OP_VSSHRSWIDE>}, // VPSRA + {OPD(1, 0b01, 0xE3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VURAVG, OpSize::i16Bit>}, {OPD(1, 0b01, 0xE4), 1, &OpDispatchBuilder::AVX128_VPMULHW}, {OPD(1, 0b01, 0xE5), 1, &OpDispatchBuilder::AVX128_VPMULHW}, - {OPD(1, 0b01, 0xE6), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int<8, true, false>}, - {OPD(1, 0b10, 0xE6), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Int_To_Float<4, true>}, - {OPD(1, 0b11, 0xE6), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int<8, true, true>}, + {OPD(1, 0b01, 0xE6), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int}, + {OPD(1, 0b10, 0xE6), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Int_To_Float}, + {OPD(1, 0b11, 0xE6), 1, &OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int}, {OPD(1, 0b01, 0xE7), 1, &OpDispatchBuilder::AVX128_MOVVectorNT}, - {OPD(1, 0b01, 0xE8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSQSUB, 1>}, - {OPD(1, 0b01, 0xE9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSQSUB, 2>}, - {OPD(1, 0b01, 0xEA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMIN, 2>}, - {OPD(1, 0b01, 0xEB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VOR, 16>}, - {OPD(1, 0b01, 0xEC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSQADD, 1>}, - {OPD(1, 0b01, 0xED), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSQADD, 2>}, - {OPD(1, 0b01, 0xEE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMAX, 2>}, + {OPD(1, 0b01, 0xE8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSQSUB, OpSize::i8Bit>}, + {OPD(1, 0b01, 0xE9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSQSUB, OpSize::i16Bit>}, + {OPD(1, 0b01, 0xEA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMIN, OpSize::i16Bit>}, + {OPD(1, 0b01, 0xEB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VOR, OpSize::i128Bit>}, + {OPD(1, 0b01, 0xEC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSQADD, OpSize::i8Bit>}, + {OPD(1, 0b01, 0xED), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSQADD, OpSize::i16Bit>}, + {OPD(1, 0b01, 0xEE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMAX, OpSize::i16Bit>}, {OPD(1, 0b01, 0xEF), 1, &OpDispatchBuilder::AVX128_VectorXOR}, {OPD(1, 0b11, 0xF0), 1, &OpDispatchBuilder::AVX128_MOVVectorUnaligned}, - {OPD(1, 0b01, 0xF1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, 2, IROps::OP_VUSHLSWIDE>}, // VPSLL - {OPD(1, 0b01, 0xF2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, 4, IROps::OP_VUSHLSWIDE>}, // VPSLL - {OPD(1, 0b01, 0xF3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, 8, IROps::OP_VUSHLSWIDE>}, // VPSLL - {OPD(1, 0b01, 0xF4), 1, &OpDispatchBuilder::AVX128_VPMULL<4, false>}, + {OPD(1, 0b01, 0xF1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, OpSize::i16Bit, IROps::OP_VUSHLSWIDE>}, // VPSLL + {OPD(1, 0b01, 0xF2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, OpSize::i32Bit, IROps::OP_VUSHLSWIDE>}, // VPSLL + {OPD(1, 0b01, 0xF3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, OpSize::i64Bit, IROps::OP_VUSHLSWIDE>}, // VPSLL + {OPD(1, 0b01, 0xF4), 1, &OpDispatchBuilder::AVX128_VPMULL}, {OPD(1, 0b01, 0xF5), 1, &OpDispatchBuilder::AVX128_VPMADDWD}, {OPD(1, 0b01, 0xF6), 1, &OpDispatchBuilder::AVX128_VPSADBW}, {OPD(1, 0b01, 0xF7), 1, &OpDispatchBuilder::AVX128_MASKMOV}, - {OPD(1, 0b01, 0xF8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSUB, 1>}, - {OPD(1, 0b01, 0xF9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSUB, 2>}, - {OPD(1, 0b01, 0xFA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSUB, 4>}, - {OPD(1, 0b01, 0xFB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSUB, 8>}, - {OPD(1, 0b01, 0xFC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VADD, 1>}, - {OPD(1, 0b01, 0xFD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VADD, 2>}, - {OPD(1, 0b01, 0xFE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VADD, 4>}, + {OPD(1, 0b01, 0xF8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSUB, OpSize::i8Bit>}, + {OPD(1, 0b01, 0xF9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSUB, OpSize::i16Bit>}, + {OPD(1, 0b01, 0xFA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSUB, OpSize::i32Bit>}, + {OPD(1, 0b01, 0xFB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSUB, OpSize::i64Bit>}, + {OPD(1, 0b01, 0xFC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VADD, OpSize::i8Bit>}, + {OPD(1, 0b01, 0xFD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VADD, OpSize::i16Bit>}, + {OPD(1, 0b01, 0xFE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VADD, OpSize::i32Bit>}, {OPD(2, 0b01, 0x00), 1, &OpDispatchBuilder::AVX128_VPSHUFB}, - {OPD(2, 0b01, 0x01), 1, &OpDispatchBuilder::AVX128_VHADDP}, - {OPD(2, 0b01, 0x02), 1, &OpDispatchBuilder::AVX128_VHADDP}, + {OPD(2, 0b01, 0x01), 1, &OpDispatchBuilder::AVX128_VHADDP}, + {OPD(2, 0b01, 0x02), 1, &OpDispatchBuilder::AVX128_VHADDP}, {OPD(2, 0b01, 0x03), 1, &OpDispatchBuilder::AVX128_VPHADDSW}, {OPD(2, 0b01, 0x04), 1, &OpDispatchBuilder::AVX128_VPMADDUBSW}, - {OPD(2, 0b01, 0x05), 1, &OpDispatchBuilder::AVX128_VPHSUB<2>}, - {OPD(2, 0b01, 0x06), 1, &OpDispatchBuilder::AVX128_VPHSUB<4>}, + {OPD(2, 0b01, 0x05), 1, &OpDispatchBuilder::AVX128_VPHSUB}, + {OPD(2, 0b01, 0x06), 1, &OpDispatchBuilder::AVX128_VPHSUB}, {OPD(2, 0b01, 0x07), 1, &OpDispatchBuilder::AVX128_VPHSUBSW}, - {OPD(2, 0b01, 0x08), 1, &OpDispatchBuilder::AVX128_VPSIGN<1>}, - {OPD(2, 0b01, 0x09), 1, &OpDispatchBuilder::AVX128_VPSIGN<2>}, - {OPD(2, 0b01, 0x0A), 1, &OpDispatchBuilder::AVX128_VPSIGN<4>}, + {OPD(2, 0b01, 0x08), 1, &OpDispatchBuilder::AVX128_VPSIGN}, + {OPD(2, 0b01, 0x09), 1, &OpDispatchBuilder::AVX128_VPSIGN}, + {OPD(2, 0b01, 0x0A), 1, &OpDispatchBuilder::AVX128_VPSIGN}, {OPD(2, 0b01, 0x0B), 1, &OpDispatchBuilder::AVX128_VPMULHRSW}, - {OPD(2, 0b01, 0x0C), 1, &OpDispatchBuilder::AVX128_VPERMILReg<4>}, - {OPD(2, 0b01, 0x0D), 1, &OpDispatchBuilder::AVX128_VPERMILReg<8>}, + {OPD(2, 0b01, 0x0C), 1, &OpDispatchBuilder::AVX128_VPERMILReg}, + {OPD(2, 0b01, 0x0D), 1, &OpDispatchBuilder::AVX128_VPERMILReg}, {OPD(2, 0b01, 0x0E), 1, &OpDispatchBuilder::AVX128_VTESTP}, {OPD(2, 0b01, 0x0F), 1, &OpDispatchBuilder::AVX128_VTESTP}, @@ -272,59 +272,59 @@ void OpDispatchBuilder::InstallAVX128Handlers() { {OPD(2, 0b01, 0x13), 1, &OpDispatchBuilder::AVX128_VCVTPH2PS}, {OPD(2, 0b01, 0x16), 1, &OpDispatchBuilder::AVX128_VPERMD}, {OPD(2, 0b01, 0x17), 1, &OpDispatchBuilder::AVX128_PTest}, - {OPD(2, 0b01, 0x18), 1, &OpDispatchBuilder::AVX128_VBROADCAST<4>}, - {OPD(2, 0b01, 0x19), 1, &OpDispatchBuilder::AVX128_VBROADCAST<8>}, - {OPD(2, 0b01, 0x1A), 1, &OpDispatchBuilder::AVX128_VBROADCAST<16>}, - {OPD(2, 0b01, 0x1C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VABS, 1>}, - {OPD(2, 0b01, 0x1D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VABS, 2>}, - {OPD(2, 0b01, 0x1E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VABS, 4>}, - - {OPD(2, 0b01, 0x20), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, 1, 2, true>}, - {OPD(2, 0b01, 0x21), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, 1, 4, true>}, - {OPD(2, 0b01, 0x22), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, 1, 8, true>}, - {OPD(2, 0b01, 0x23), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, 2, 4, true>}, - {OPD(2, 0b01, 0x24), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, 2, 8, true>}, - {OPD(2, 0b01, 0x25), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, 4, 8, true>}, - - {OPD(2, 0b01, 0x28), 1, &OpDispatchBuilder::AVX128_VPMULL<4, true>}, - {OPD(2, 0b01, 0x29), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPEQ, 8>}, + {OPD(2, 0b01, 0x18), 1, &OpDispatchBuilder::AVX128_VBROADCAST}, + {OPD(2, 0b01, 0x19), 1, &OpDispatchBuilder::AVX128_VBROADCAST}, + {OPD(2, 0b01, 0x1A), 1, &OpDispatchBuilder::AVX128_VBROADCAST}, + {OPD(2, 0b01, 0x1C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VABS, OpSize::i8Bit>}, + {OPD(2, 0b01, 0x1D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VABS, OpSize::i16Bit>}, + {OPD(2, 0b01, 0x1E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VABS, OpSize::i32Bit>}, + + {OPD(2, 0b01, 0x20), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i8Bit, OpSize::i16Bit, true>}, + {OPD(2, 0b01, 0x21), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i8Bit, OpSize::i32Bit, true>}, + {OPD(2, 0b01, 0x22), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i8Bit, OpSize::i64Bit, true>}, + {OPD(2, 0b01, 0x23), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i16Bit, OpSize::i32Bit, true>}, + {OPD(2, 0b01, 0x24), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i16Bit, OpSize::i64Bit, true>}, + {OPD(2, 0b01, 0x25), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i32Bit, OpSize::i64Bit, true>}, + + {OPD(2, 0b01, 0x28), 1, &OpDispatchBuilder::AVX128_VPMULL}, + {OPD(2, 0b01, 0x29), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPEQ, OpSize::i64Bit>}, {OPD(2, 0b01, 0x2A), 1, &OpDispatchBuilder::AVX128_MOVVectorNT}, - {OPD(2, 0b01, 0x2B), 1, &OpDispatchBuilder::AVX128_VPACKUS<4>}, + {OPD(2, 0b01, 0x2B), 1, &OpDispatchBuilder::AVX128_VPACKUS}, {OPD(2, 0b01, 0x2C), 1, &OpDispatchBuilder::AVX128_VMASKMOV}, {OPD(2, 0b01, 0x2D), 1, &OpDispatchBuilder::AVX128_VMASKMOV}, {OPD(2, 0b01, 0x2E), 1, &OpDispatchBuilder::AVX128_VMASKMOV}, {OPD(2, 0b01, 0x2F), 1, &OpDispatchBuilder::AVX128_VMASKMOV}, - {OPD(2, 0b01, 0x30), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, 1, 2, false>}, - {OPD(2, 0b01, 0x31), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, 1, 4, false>}, - {OPD(2, 0b01, 0x32), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, 1, 8, false>}, - {OPD(2, 0b01, 0x33), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, 2, 4, false>}, - {OPD(2, 0b01, 0x34), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, 2, 8, false>}, - {OPD(2, 0b01, 0x35), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, 4, 8, false>}, + {OPD(2, 0b01, 0x30), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i8Bit, OpSize::i16Bit, false>}, + {OPD(2, 0b01, 0x31), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i8Bit, OpSize::i32Bit, false>}, + {OPD(2, 0b01, 0x32), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i8Bit, OpSize::i64Bit, false>}, + {OPD(2, 0b01, 0x33), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i16Bit, OpSize::i32Bit, false>}, + {OPD(2, 0b01, 0x34), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i16Bit, OpSize::i64Bit, false>}, + {OPD(2, 0b01, 0x35), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i32Bit, OpSize::i64Bit, false>}, {OPD(2, 0b01, 0x36), 1, &OpDispatchBuilder::AVX128_VPERMD}, - {OPD(2, 0b01, 0x37), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPGT, 8>}, - {OPD(2, 0b01, 0x38), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMIN, 1>}, - {OPD(2, 0b01, 0x39), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMIN, 4>}, - {OPD(2, 0b01, 0x3A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMIN, 2>}, - {OPD(2, 0b01, 0x3B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMIN, 4>}, - {OPD(2, 0b01, 0x3C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMAX, 1>}, - {OPD(2, 0b01, 0x3D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMAX, 4>}, - {OPD(2, 0b01, 0x3E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMAX, 2>}, - {OPD(2, 0b01, 0x3F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMAX, 4>}, - - {OPD(2, 0b01, 0x40), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VMUL, 4>}, + {OPD(2, 0b01, 0x37), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPGT, OpSize::i64Bit>}, + {OPD(2, 0b01, 0x38), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMIN, OpSize::i8Bit>}, + {OPD(2, 0b01, 0x39), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMIN, OpSize::i32Bit>}, + {OPD(2, 0b01, 0x3A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMIN, OpSize::i16Bit>}, + {OPD(2, 0b01, 0x3B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMIN, OpSize::i32Bit>}, + {OPD(2, 0b01, 0x3C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMAX, OpSize::i8Bit>}, + {OPD(2, 0b01, 0x3D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMAX, OpSize::i32Bit>}, + {OPD(2, 0b01, 0x3E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMAX, OpSize::i16Bit>}, + {OPD(2, 0b01, 0x3F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMAX, OpSize::i32Bit>}, + + {OPD(2, 0b01, 0x40), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VMUL, OpSize::i32Bit>}, {OPD(2, 0b01, 0x41), 1, &OpDispatchBuilder::AVX128_PHMINPOSUW}, {OPD(2, 0b01, 0x45), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VariableShiftImpl, IROps::OP_VUSHR>}, // VPSRLV {OPD(2, 0b01, 0x46), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VariableShiftImpl, IROps::OP_VSSHR>}, // VPSRAVD {OPD(2, 0b01, 0x47), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VariableShiftImpl, IROps::OP_VUSHL>}, // VPSLLV - {OPD(2, 0b01, 0x58), 1, &OpDispatchBuilder::AVX128_VBROADCAST<4>}, - {OPD(2, 0b01, 0x59), 1, &OpDispatchBuilder::AVX128_VBROADCAST<8>}, - {OPD(2, 0b01, 0x5A), 1, &OpDispatchBuilder::AVX128_VBROADCAST<16>}, + {OPD(2, 0b01, 0x58), 1, &OpDispatchBuilder::AVX128_VBROADCAST}, + {OPD(2, 0b01, 0x59), 1, &OpDispatchBuilder::AVX128_VBROADCAST}, + {OPD(2, 0b01, 0x5A), 1, &OpDispatchBuilder::AVX128_VBROADCAST}, - {OPD(2, 0b01, 0x78), 1, &OpDispatchBuilder::AVX128_VBROADCAST<1>}, - {OPD(2, 0b01, 0x79), 1, &OpDispatchBuilder::AVX128_VBROADCAST<2>}, + {OPD(2, 0b01, 0x78), 1, &OpDispatchBuilder::AVX128_VBROADCAST}, + {OPD(2, 0b01, 0x79), 1, &OpDispatchBuilder::AVX128_VBROADCAST}, {OPD(2, 0b01, 0x8C), 1, &OpDispatchBuilder::AVX128_VPMASKMOV}, {OPD(2, 0b01, 0x8E), 1, &OpDispatchBuilder::AVX128_VPMASKMOV}, @@ -382,19 +382,19 @@ void OpDispatchBuilder::InstallAVX128Handlers() { {OPD(3, 0b01, 0x04), 1, &OpDispatchBuilder::AVX128_VPERMILImm<4>}, {OPD(3, 0b01, 0x05), 1, &OpDispatchBuilder::AVX128_VPERMILImm<8>}, {OPD(3, 0b01, 0x06), 1, &OpDispatchBuilder::AVX128_VPERM2}, - {OPD(3, 0b01, 0x08), 1, &OpDispatchBuilder::AVX128_VectorRound<4>}, - {OPD(3, 0b01, 0x09), 1, &OpDispatchBuilder::AVX128_VectorRound<8>}, - {OPD(3, 0b01, 0x0A), 1, &OpDispatchBuilder::AVX128_InsertScalarRound<4>}, - {OPD(3, 0b01, 0x0B), 1, &OpDispatchBuilder::AVX128_InsertScalarRound<8>}, + {OPD(3, 0b01, 0x08), 1, &OpDispatchBuilder::AVX128_VectorRound}, + {OPD(3, 0b01, 0x09), 1, &OpDispatchBuilder::AVX128_VectorRound}, + {OPD(3, 0b01, 0x0A), 1, &OpDispatchBuilder::AVX128_InsertScalarRound}, + {OPD(3, 0b01, 0x0B), 1, &OpDispatchBuilder::AVX128_InsertScalarRound}, {OPD(3, 0b01, 0x0C), 1, &OpDispatchBuilder::AVX128_VBLEND}, {OPD(3, 0b01, 0x0D), 1, &OpDispatchBuilder::AVX128_VBLEND}, {OPD(3, 0b01, 0x0E), 1, &OpDispatchBuilder::AVX128_VBLEND}, {OPD(3, 0b01, 0x0F), 1, &OpDispatchBuilder::AVX128_VPALIGNR}, - {OPD(3, 0b01, 0x14), 1, &OpDispatchBuilder::AVX128_PExtr<1>}, - {OPD(3, 0b01, 0x15), 1, &OpDispatchBuilder::AVX128_PExtr<2>}, - {OPD(3, 0b01, 0x16), 1, &OpDispatchBuilder::AVX128_PExtr<4>}, - {OPD(3, 0b01, 0x17), 1, &OpDispatchBuilder::AVX128_PExtr<4>}, + {OPD(3, 0b01, 0x14), 1, &OpDispatchBuilder::AVX128_PExtr}, + {OPD(3, 0b01, 0x15), 1, &OpDispatchBuilder::AVX128_PExtr}, + {OPD(3, 0b01, 0x16), 1, &OpDispatchBuilder::AVX128_PExtr}, + {OPD(3, 0b01, 0x17), 1, &OpDispatchBuilder::AVX128_PExtr}, {OPD(3, 0b01, 0x18), 1, &OpDispatchBuilder::AVX128_VINSERT}, {OPD(3, 0b01, 0x19), 1, &OpDispatchBuilder::AVX128_VEXTRACT128}, @@ -406,15 +406,15 @@ void OpDispatchBuilder::InstallAVX128Handlers() { {OPD(3, 0b01, 0x38), 1, &OpDispatchBuilder::AVX128_VINSERT}, {OPD(3, 0b01, 0x39), 1, &OpDispatchBuilder::AVX128_VEXTRACT128}, - {OPD(3, 0b01, 0x40), 1, &OpDispatchBuilder::AVX128_VDPP<4>}, - {OPD(3, 0b01, 0x41), 1, &OpDispatchBuilder::AVX128_VDPP<8>}, + {OPD(3, 0b01, 0x40), 1, &OpDispatchBuilder::AVX128_VDPP}, + {OPD(3, 0b01, 0x41), 1, &OpDispatchBuilder::AVX128_VDPP}, {OPD(3, 0b01, 0x42), 1, &OpDispatchBuilder::AVX128_VMPSADBW}, {OPD(3, 0b01, 0x46), 1, &OpDispatchBuilder::AVX128_VPERM2}, - {OPD(3, 0b01, 0x4A), 1, &OpDispatchBuilder::AVX128_VectorVariableBlend<4>}, - {OPD(3, 0b01, 0x4B), 1, &OpDispatchBuilder::AVX128_VectorVariableBlend<8>}, - {OPD(3, 0b01, 0x4C), 1, &OpDispatchBuilder::AVX128_VectorVariableBlend<1>}, + {OPD(3, 0b01, 0x4A), 1, &OpDispatchBuilder::AVX128_VectorVariableBlend}, + {OPD(3, 0b01, 0x4B), 1, &OpDispatchBuilder::AVX128_VectorVariableBlend}, + {OPD(3, 0b01, 0x4C), 1, &OpDispatchBuilder::AVX128_VectorVariableBlend}, {OPD(3, 0b01, 0x60), 1, &OpDispatchBuilder::AVX128_VPCMPESTRM}, {OPD(3, 0b01, 0x61), 1, &OpDispatchBuilder::AVX128_VPCMPESTRI}, @@ -428,25 +428,33 @@ void OpDispatchBuilder::InstallAVX128Handlers() { #define OPD(group, pp, opcode) (((group - X86Tables::TYPE_VEX_GROUP_12) << 4) | (pp << 3) | (opcode)) static constexpr std::tuple VEX128TableGroupOps[] { // VPSRLI - {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b010), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, 2, IROps::OP_VUSHRI>}, + {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b010), 1, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, OpSize::i16Bit, IROps::OP_VUSHRI>}, // VPSLLI - {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b110), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, 2, IROps::OP_VSHLI>}, + {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b110), 1, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, OpSize::i16Bit, IROps::OP_VSHLI>}, // VPSRAI - {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b100), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, 2, IROps::OP_VSSHRI>}, + {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b100), 1, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, OpSize::i16Bit, IROps::OP_VSSHRI>}, // VPSRLI - {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b010), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, 4, IROps::OP_VUSHRI>}, + {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b010), 1, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, OpSize::i32Bit, IROps::OP_VUSHRI>}, // VPSLLI - {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b110), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, 4, IROps::OP_VSHLI>}, + {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b110), 1, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, OpSize::i32Bit, IROps::OP_VSHLI>}, // VPSRAI - {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b100), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, 4, IROps::OP_VSSHRI>}, + {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b100), 1, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, OpSize::i32Bit, IROps::OP_VSSHRI>}, // VPSRLI - {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b010), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, 8, IROps::OP_VUSHRI>}, + {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b010), 1, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, OpSize::i64Bit, IROps::OP_VUSHRI>}, // VPSRLDQ {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b011), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ShiftDoubleImm, ShiftDirection::RIGHT>}, // VPSLLI - {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b110), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, 8, IROps::OP_VSHLI>}, + {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b110), 1, + &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, OpSize::i64Bit, IROps::OP_VSHLI>}, // VPSLLDQ {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b111), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ShiftDoubleImm, ShiftDirection::LEFT>}, @@ -592,7 +600,7 @@ void OpDispatchBuilder::AVX128_VMOVAPS(OpcodeArgs) { } } -void OpDispatchBuilder::AVX128_VMOVScalarImpl(OpcodeArgs, size_t ElementSize) { +void OpDispatchBuilder::AVX128_VMOVScalarImpl(OpcodeArgs, IR::OpSize ElementSize) { if (Op->Dest.IsGPR() && Op->Src[0].IsGPR() && Op->Src[1].IsGPR()) { // VMOVSS/SD xmm1, xmm2, xmm3 // Lower 128-bits are merged @@ -610,7 +618,7 @@ void OpDispatchBuilder::AVX128_VMOVScalarImpl(OpcodeArgs, size_t ElementSize) { } else { // VMOVSS/SD mem32/mem64, xmm1 auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src.Low, ElementSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src.Low, ElementSize, OpSize::iInvalid); } } @@ -622,7 +630,7 @@ void OpDispatchBuilder::AVX128_VMOVSS(OpcodeArgs) { AVX128_VMOVScalarImpl(Op, OpSize::i32Bit); } -void OpDispatchBuilder::AVX128_VectorALU(OpcodeArgs, IROps IROp, size_t ElementSize) { +void OpDispatchBuilder::AVX128_VectorALU(OpcodeArgs, IROps IROp, IR::OpSize ElementSize) { const auto SrcSize = GetSrcSize(Op); const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; @@ -639,7 +647,7 @@ void OpDispatchBuilder::AVX128_VectorALU(OpcodeArgs, IROps IROp, size_t ElementS } } -void OpDispatchBuilder::AVX128_VectorUnary(OpcodeArgs, IROps IROp, size_t ElementSize) { +void OpDispatchBuilder::AVX128_VectorUnary(OpcodeArgs, IROps IROp, IR::OpSize ElementSize) { const auto SrcSize = GetSrcSize(Op); const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; @@ -655,8 +663,8 @@ void OpDispatchBuilder::AVX128_VectorUnary(OpcodeArgs, IROps IROp, size_t Elemen } } -void OpDispatchBuilder::AVX128_VectorUnaryImpl(OpcodeArgs, size_t SrcSize, size_t ElementSize, - std::function Helper) { +void OpDispatchBuilder::AVX128_VectorUnaryImpl(OpcodeArgs, IR::OpSize SrcSize, IR::OpSize ElementSize, + std::function Helper) { const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit); @@ -672,8 +680,8 @@ void OpDispatchBuilder::AVX128_VectorUnaryImpl(OpcodeArgs, size_t SrcSize, size_ AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result); } -void OpDispatchBuilder::AVX128_VectorBinaryImpl(OpcodeArgs, size_t SrcSize, size_t ElementSize, - std::function Helper) { +void OpDispatchBuilder::AVX128_VectorBinaryImpl(OpcodeArgs, size_t SrcSize, IR::OpSize ElementSize, + std::function Helper) { const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit); @@ -708,7 +716,7 @@ void OpDispatchBuilder::AVX128_VectorTrinaryImpl(OpcodeArgs, size_t SrcSize, siz AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result); } -void OpDispatchBuilder::AVX128_VectorShiftWideImpl(OpcodeArgs, size_t ElementSize, IROps IROp) { +void OpDispatchBuilder::AVX128_VectorShiftWideImpl(OpcodeArgs, IR::OpSize ElementSize, IROps IROp) { const auto Is128Bit = GetSrcSize(Op) == Core::CPUState::XMM_SSE_REG_SIZE; auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit); @@ -730,7 +738,7 @@ void OpDispatchBuilder::AVX128_VectorShiftWideImpl(OpcodeArgs, size_t ElementSiz AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result); } -void OpDispatchBuilder::AVX128_VectorShiftImmImpl(OpcodeArgs, size_t ElementSize, IROps IROp) { +void OpDispatchBuilder::AVX128_VectorShiftImmImpl(OpcodeArgs, IR::OpSize ElementSize, IROps IROp) { const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; const uint64_t ShiftConstant = Op->Src[1].Literal(); @@ -938,16 +946,16 @@ void OpDispatchBuilder::AVX128_VMOVDDUP(OpcodeArgs) { } void OpDispatchBuilder::AVX128_VMOVSLDUP(OpcodeArgs) { - AVX128_VectorUnaryImpl(Op, GetSrcSize(Op), OpSize::i32Bit, - [this](size_t ElementSize, Ref Src) { return _VTrn(OpSize::i128Bit, ElementSize, Src, Src); }); + AVX128_VectorUnaryImpl(Op, OpSizeFromSrc(Op), OpSize::i32Bit, + [this](IR::OpSize ElementSize, Ref Src) { return _VTrn(OpSize::i128Bit, ElementSize, Src, Src); }); } void OpDispatchBuilder::AVX128_VMOVSHDUP(OpcodeArgs) { - AVX128_VectorUnaryImpl(Op, GetSrcSize(Op), OpSize::i32Bit, - [this](size_t ElementSize, Ref Src) { return _VTrn2(OpSize::i128Bit, ElementSize, Src, Src); }); + AVX128_VectorUnaryImpl(Op, OpSizeFromSrc(Op), OpSize::i32Bit, + [this](IR::OpSize ElementSize, Ref Src) { return _VTrn2(OpSize::i128Bit, ElementSize, Src, Src); }); } -template +template void OpDispatchBuilder::AVX128_VBROADCAST(OpcodeArgs) { const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; @@ -961,7 +969,7 @@ void OpDispatchBuilder::AVX128_VBROADCAST(OpcodeArgs) { } } else { // Get the address to broadcast from into a GPR. - Ref Address = MakeSegmentAddress(Op, Op->Src[0], CTX->GetGPRSize()); + Ref Address = MakeSegmentAddress(Op, Op->Src[0], CTX->GetGPROpSize()); Src.Low = _VBroadcastFromMem(OpSize::i128Bit, ElementSize, Address); } @@ -974,16 +982,16 @@ void OpDispatchBuilder::AVX128_VBROADCAST(OpcodeArgs) { AVX128_StoreResult_WithOpSize(Op, Op->Dest, Src); } -template +template void OpDispatchBuilder::AVX128_VPUNPCKL(OpcodeArgs) { AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, - [this](size_t _ElementSize, Ref Src1, Ref Src2) { return _VZip(OpSize::i128Bit, _ElementSize, Src1, Src2); }); + [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return _VZip(OpSize::i128Bit, _ElementSize, Src1, Src2); }); } -template +template void OpDispatchBuilder::AVX128_VPUNPCKH(OpcodeArgs) { AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, - [this](size_t _ElementSize, Ref Src1, Ref Src2) { return _VZip2(OpSize::i128Bit, _ElementSize, Src1, Src2); }); + [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return _VZip2(OpSize::i128Bit, _ElementSize, Src1, Src2); }); } void OpDispatchBuilder::AVX128_MOVVectorUnaligned(OpcodeArgs) { @@ -1004,10 +1012,10 @@ void OpDispatchBuilder::AVX128_MOVVectorUnaligned(OpcodeArgs) { AVX128_StoreResult_WithOpSize(Op, Op->Dest, Src); } -template +template void OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR(OpcodeArgs) { - const auto SrcSize = GetSrcSize(Op); - const auto DstSize = GetDstSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); + const auto DstSize = OpSizeFromDst(Op); auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false); @@ -1015,20 +1023,20 @@ void OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR(OpcodeArgs) { if (Op->Src[1].IsGPR()) { // If the source is a GPR then convert directly from the GPR. - auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Op->Src[1], CTX->GetGPRSize(), Op->Flags); + auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Op->Src[1], CTX->GetGPROpSize(), Op->Flags); Result.Low = _VSToFGPRInsert(OpSize::i128Bit, DstElementSize, SrcSize, Src1.Low, Src2, false); } else if (SrcSize != DstElementSize) { // If the source is from memory but the Source size and destination size aren't the same, // then it is more optimal to load in to a GPR and convert between GPR->FPR. // ARM GPR->FPR conversion supports different size source and destinations while FPR->FPR doesn't. auto Src2 = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags); - Result.Low = _VSToFGPRInsert(IR::SizeToOpSize(DstSize), DstElementSize, SrcSize, Src1.Low, Src2, false); + Result.Low = _VSToFGPRInsert(DstSize, DstElementSize, SrcSize, Src1.Low, Src2, false); } else { // In the case of cvtsi2s{s,d} where the source and destination are the same size, // then it is more optimal to load in to the FPR register directly and convert there. auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false); // Always signed - Result.Low = _VSToFVectorInsert(IR::SizeToOpSize(DstSize), DstElementSize, DstElementSize, Src1.Low, Src2.Low, false, false); + Result.Low = _VSToFVectorInsert(DstSize, DstElementSize, DstElementSize, Src1.Low, Src2.Low, false, false); } [[maybe_unused]] const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; @@ -1038,7 +1046,7 @@ void OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR(OpcodeArgs) { AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result); } -template +template void OpDispatchBuilder::AVX128_CVTFPR_To_GPR(OpcodeArgs) { // If loading a vector, use the full size, so we don't // unnecessarily zero extend the vector. Otherwise, if @@ -1047,12 +1055,12 @@ void OpDispatchBuilder::AVX128_CVTFPR_To_GPR(OpcodeArgs) { if (Op->Src[0].IsGPR()) { Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false); } else { - Src.Low = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], GetSrcSize(Op), Op->Flags); + Src.Low = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], OpSizeFromSrc(Op), Op->Flags); } // GPR size is determined by REX.W // Source Element size is determined by instruction - size_t GPRSize = GetDstSize(Op); + const auto GPRSize = OpSizeFromDst(Op); Ref Result {}; if constexpr (HostRoundingMode) { @@ -1061,41 +1069,43 @@ void OpDispatchBuilder::AVX128_CVTFPR_To_GPR(OpcodeArgs) { Result = _Float_ToGPR_ZS(GPRSize, SrcElementSize, Src.Low); } - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Result, GPRSize, -1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Result, GPRSize, OpSize::iInvalid); } void OpDispatchBuilder::AVX128_VANDN(OpcodeArgs) { AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), OpSize::i128Bit, - [this](size_t _ElementSize, Ref Src1, Ref Src2) { return _VAndn(OpSize::i128Bit, _ElementSize, Src2, Src1); }); + [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return _VAndn(OpSize::i128Bit, _ElementSize, Src2, Src1); }); } -template +template void OpDispatchBuilder::AVX128_VPACKSS(OpcodeArgs) { - AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, - [this](size_t _ElementSize, Ref Src1, Ref Src2) { return _VSQXTNPair(OpSize::i128Bit, _ElementSize, Src1, Src2); }); + AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { + return _VSQXTNPair(OpSize::i128Bit, _ElementSize, Src1, Src2); + }); } -template +template void OpDispatchBuilder::AVX128_VPACKUS(OpcodeArgs) { - AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, - [this](size_t _ElementSize, Ref Src1, Ref Src2) { return _VSQXTUNPair(OpSize::i128Bit, _ElementSize, Src1, Src2); }); + AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { + return _VSQXTUNPair(OpSize::i128Bit, _ElementSize, Src1, Src2); + }); } -Ref OpDispatchBuilder::AVX128_PSIGNImpl(size_t ElementSize, Ref Src1, Ref Src2) { +Ref OpDispatchBuilder::AVX128_PSIGNImpl(IR::OpSize ElementSize, Ref Src1, Ref Src2) { Ref Control = _VSQSHL(OpSize::i128Bit, ElementSize, Src2, (ElementSize * 8) - 1); Control = _VSRSHR(OpSize::i128Bit, ElementSize, Control, (ElementSize * 8) - 1); return _VMul(OpSize::i128Bit, ElementSize, Src1, Control); } -template +template void OpDispatchBuilder::AVX128_VPSIGN(OpcodeArgs) { AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, - [this](size_t _ElementSize, Ref Src1, Ref Src2) { return AVX128_PSIGNImpl(_ElementSize, Src1, Src2); }); + [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return AVX128_PSIGNImpl(_ElementSize, Src1, Src2); }); } -template +template void OpDispatchBuilder::AVX128_UCOMISx(OpcodeArgs) { - const auto SrcSize = Op->Src[0].IsGPR() ? GetGuestVectorLength() : GetSrcSize(Op); + const auto SrcSize = Op->Src[0].IsGPR() ? GetGuestVectorLength() : OpSizeFromSrc(Op); auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Dest, Op->Flags, false); @@ -1112,11 +1122,11 @@ void OpDispatchBuilder::AVX128_UCOMISx(OpcodeArgs) { Comiss(ElementSize, Src1.Low, Src2.Low); } -void OpDispatchBuilder::AVX128_VectorScalarInsertALU(OpcodeArgs, FEXCore::IR::IROps IROp, size_t ElementSize) { +void OpDispatchBuilder::AVX128_VectorScalarInsertALU(OpcodeArgs, FEXCore::IR::IROps IROp, IR::OpSize ElementSize) { // We load the full vector width when dealing with a source vector, // so that we don't do any unnecessary zero extension to the scalar // element that we're going to operate on. - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false); RefPair Src2 {}; @@ -1132,7 +1142,7 @@ void OpDispatchBuilder::AVX128_VectorScalarInsertALU(OpcodeArgs, FEXCore::IR::IR AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = High}); } -template +template void OpDispatchBuilder::AVX128_VFCMP(OpcodeArgs) { const uint8_t CompType = Op->Src[2].Literal(); @@ -1144,17 +1154,17 @@ void OpDispatchBuilder::AVX128_VFCMP(OpcodeArgs) { .CompType = CompType, }; - AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, [this, &Capture](size_t _ElementSize, Ref Src1, Ref Src2) { + AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, [this, &Capture](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return VFCMPOpImpl(OpSize::i128Bit, _ElementSize, Src1, Src2, Capture.CompType); }); } -template +template void OpDispatchBuilder::AVX128_InsertScalarFCMP(OpcodeArgs) { // We load the full vector width when dealing with a source vector, // so that we don't do any unnecessary zero extension to the scalar // element that we're going to operate on. - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false); RefPair Src2 {}; @@ -1180,9 +1190,9 @@ void OpDispatchBuilder::AVX128_MOVBetweenGPR_FPR(OpcodeArgs) { RefPair Result {}; if (Op->Src[0].IsGPR()) { // Loading from GPR and moving to Vector. - Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], CTX->GetGPRSize(), Op->Flags); + Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], CTX->GetGPROpSize(), Op->Flags); // zext to 128bit - Result.Low = _VCastFromGPR(OpSize::i128Bit, GetSrcSize(Op), Src); + Result.Low = _VCastFromGPR(OpSize::i128Bit, OpSizeFromSrc(Op), Src); } else { // Loading from Memory as a scalar. Zero extend Result.Low = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -1195,21 +1205,21 @@ void OpDispatchBuilder::AVX128_MOVBetweenGPR_FPR(OpcodeArgs) { auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false); if (Op->Dest.IsGPR()) { - auto ElementSize = GetDstSize(Op); + auto ElementSize = OpSizeFromDst(Op); // Extract element from GPR. Zero extending in the process. - Src.Low = _VExtractToGPR(GetSrcSize(Op), ElementSize, Src.Low, 0); - StoreResult(GPRClass, Op, Op->Dest, Src.Low, -1); + Src.Low = _VExtractToGPR(OpSizeFromSrc(Op), ElementSize, Src.Low, 0); + StoreResult(GPRClass, Op, Op->Dest, Src.Low, OpSize::iInvalid); } else { // Storing first element to memory. Ref Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); - _StoreMem(FPRClass, GetDstSize(Op), Dest, Src.Low, 1); + _StoreMem(FPRClass, OpSizeFromDst(Op), Dest, Src.Low, OpSize::i8Bit); } } } -template +template void OpDispatchBuilder::AVX128_PExtr(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false); uint64_t Index = Op->Src[1].Literal(); @@ -1218,7 +1228,7 @@ void OpDispatchBuilder::AVX128_PExtr(OpcodeArgs) { // When the element size is 32-bit then it can be overriden as 64-bit because the encoding of PEXTRD/PEXTRQ // is the same except that REX.W or VEX.W is set to 1. Incredibly frustrating. // Use the destination size as the element size in this case. - size_t OverridenElementSize = ElementSize; + auto OverridenElementSize = ElementSize; if constexpr (ElementSize == OpSize::i32Bit) { OverridenElementSize = DstSize; } @@ -1228,10 +1238,10 @@ void OpDispatchBuilder::AVX128_PExtr(OpcodeArgs) { Index &= NumElements - 1; if (Op->Dest.IsGPR()) { - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); // Extract already zero extends the result. Ref Result = _VExtractToGPR(OpSize::i128Bit, OverridenElementSize, Src.Low, Index); - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Result, GPRSize, -1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Result, GPRSize, OpSize::iInvalid); return; } @@ -1240,7 +1250,7 @@ void OpDispatchBuilder::AVX128_PExtr(OpcodeArgs) { _VStoreVectorElement(OpSize::i128Bit, OverridenElementSize, Src.Low, Index, Dest); } -void OpDispatchBuilder::AVX128_ExtendVectorElements(OpcodeArgs, size_t ElementSize, size_t DstElementSize, bool Signed) { +void OpDispatchBuilder::AVX128_ExtendVectorElements(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DstElementSize, bool Signed) { const auto DstSize = GetDstSize(Op); const auto GetSrc = [&] { @@ -1249,15 +1259,16 @@ void OpDispatchBuilder::AVX128_ExtendVectorElements(OpcodeArgs, size_t ElementSi } else { // For memory operands the 256-bit variant loads twice the size specified in the table. const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; - const auto SrcSize = GetSrcSize(Op); - const auto LoadSize = Is256Bit ? SrcSize * 2 : SrcSize; + const auto SrcSize = OpSizeFromSrc(Op); + const auto LoadSize = Is256Bit ? IR::SizeToOpSize(IR::OpSizeToSize(SrcSize) * 2) : SrcSize; return LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], LoadSize, Op->Flags); } }; auto Transform = [=, this](Ref Src) { - for (size_t CurrentElementSize = ElementSize; CurrentElementSize != DstElementSize; CurrentElementSize <<= 1) { + for (auto CurrentElementSize = ElementSize; CurrentElementSize != DstElementSize; + CurrentElementSize = IR::MultiplyOpSize(CurrentElementSize, 2)) { if (Signed) { Src = _VSXTL(OpSize::i128Bit, CurrentElementSize, Src); } else { @@ -1279,7 +1290,7 @@ void OpDispatchBuilder::AVX128_ExtendVectorElements(OpcodeArgs, size_t ElementSi size_t TotalElementsToSplitSize = (TotalElementCount / 2) * ElementSize; // Split the number of elements in half between lower and upper. - Ref SrcHigh = _VDupElement(OpSize::i128Bit, TotalElementsToSplitSize, Src, 1); + Ref SrcHigh = _VDupElement(OpSize::i128Bit, IR::SizeToOpSize(TotalElementsToSplitSize), Src, 1); Result.Low = Transform(Src); Result.High = Transform(SrcHigh); } @@ -1340,7 +1351,7 @@ void OpDispatchBuilder::AVX128_MOVMSK(OpcodeArgs) { auto GPRHigh = Mask8Byte(Src.High); GPR = _Orlshl(OpSize::i64Bit, GPRLow, GPRHigh, 2); } - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, GPR, CTX->GetGPRSize(), -1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, GPR, CTX->GetGPROpSize(), OpSize::iInvalid); } void OpDispatchBuilder::AVX128_MOVMSKB(OpcodeArgs) { @@ -1369,10 +1380,10 @@ void OpDispatchBuilder::AVX128_MOVMSKB(OpcodeArgs) { Result = _Orlshl(OpSize::i64Bit, Result, ResultHigh, 16); } - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); } -void OpDispatchBuilder::AVX128_PINSRImpl(OpcodeArgs, size_t ElementSize, const X86Tables::DecodedOperand& Src1Op, +void OpDispatchBuilder::AVX128_PINSRImpl(OpcodeArgs, IR::OpSize ElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, const X86Tables::DecodedOperand& Imm) { const auto NumElements = OpSize::i128Bit / ElementSize; const uint64_t Index = Imm.Literal() & (NumElements - 1); @@ -1382,7 +1393,7 @@ void OpDispatchBuilder::AVX128_PINSRImpl(OpcodeArgs, size_t ElementSize, const X if (Src2Op.IsGPR()) { // If the source is a GPR then convert directly from the GPR. - auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Src2Op, CTX->GetGPRSize(), Op->Flags); + auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Src2Op, CTX->GetGPROpSize(), Op->Flags); Result.Low = _VInsGPR(OpSize::i128Bit, ElementSize, Index, Src1.Low, Src2); } else { // If loading from memory then we only load the element size @@ -1403,12 +1414,12 @@ void OpDispatchBuilder::AVX128_VPINSRW(OpcodeArgs) { } void OpDispatchBuilder::AVX128_VPINSRDQ(OpcodeArgs) { - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); AVX128_PINSRImpl(Op, SrcSize, Op->Src[0], Op->Src[1], Op->Src[2]); } void OpDispatchBuilder::AVX128_VariableShiftImpl(OpcodeArgs, IROps IROp) { - AVX128_VectorBinaryImpl(Op, GetDstSize(Op), GetSrcSize(Op), [this, IROp](size_t ElementSize, Ref Src1, Ref Src2) { + AVX128_VectorBinaryImpl(Op, GetDstSize(Op), OpSizeFromSrc(Op), [this, IROp](IR::OpSize ElementSize, Ref Src1, Ref Src2) { DeriveOp(Shift, IROp, _VUShr(OpSize::i128Bit, ElementSize, Src1, Src2, true)); return Shift; }); @@ -1473,41 +1484,42 @@ void OpDispatchBuilder::AVX128_VINSERTPS(OpcodeArgs) { AVX128_StoreResult_WithOpSize(Op, Op->Dest, AVX128_Zext(Result)); } -template +template void OpDispatchBuilder::AVX128_VPHSUB(OpcodeArgs) { - AVX128_VectorBinaryImpl(Op, GetDstSize(Op), ElementSize, - [this](size_t _ElementSize, Ref Src1, Ref Src2) { return PHSUBOpImpl(OpSize::i128Bit, Src1, Src2, _ElementSize); }); + AVX128_VectorBinaryImpl(Op, GetDstSize(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { + return PHSUBOpImpl(OpSize::i128Bit, Src1, Src2, _ElementSize); + }); } void OpDispatchBuilder::AVX128_VPHSUBSW(OpcodeArgs) { AVX128_VectorBinaryImpl(Op, GetDstSize(Op), OpSize::i16Bit, - [this](size_t _ElementSize, Ref Src1, Ref Src2) { return PHSUBSOpImpl(OpSize::i128Bit, Src1, Src2); }); + [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return PHSUBSOpImpl(OpSize::i128Bit, Src1, Src2); }); } -template +template void OpDispatchBuilder::AVX128_VADDSUBP(OpcodeArgs) { - AVX128_VectorBinaryImpl(Op, GetDstSize(Op), ElementSize, [this](size_t _ElementSize, Ref Src1, Ref Src2) { + AVX128_VectorBinaryImpl(Op, GetDstSize(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return ADDSUBPOpImpl(OpSize::i128Bit, _ElementSize, Src1, Src2); }); } -template +template void OpDispatchBuilder::AVX128_VPMULL(OpcodeArgs) { static_assert(ElementSize == sizeof(uint32_t), "Currently only handles 32-bit -> 64-bit"); - AVX128_VectorBinaryImpl(Op, GetDstSize(Op), ElementSize, [this](size_t _ElementSize, Ref Src1, Ref Src2) -> Ref { + AVX128_VectorBinaryImpl(Op, GetDstSize(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) -> Ref { return PMULLOpImpl(OpSize::i128Bit, ElementSize, Signed, Src1, Src2); }); } void OpDispatchBuilder::AVX128_VPMULHRSW(OpcodeArgs) { AVX128_VectorBinaryImpl(Op, GetDstSize(Op), OpSize::i16Bit, - [this](size_t _ElementSize, Ref Src1, Ref Src2) -> Ref { return PMULHRSWOpImpl(OpSize::i128Bit, Src1, Src2); }); + [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) -> Ref { return PMULHRSWOpImpl(OpSize::i128Bit, Src1, Src2); }); } template void OpDispatchBuilder::AVX128_VPMULHW(OpcodeArgs) { - AVX128_VectorBinaryImpl(Op, GetDstSize(Op), OpSize::i16Bit, [this](size_t _ElementSize, Ref Src1, Ref Src2) -> Ref { + AVX128_VectorBinaryImpl(Op, GetDstSize(Op), OpSize::i16Bit, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) -> Ref { if (Signed) { return _VSMulH(OpSize::i128Bit, _ElementSize, Src1, Src2); } else { @@ -1516,12 +1528,12 @@ void OpDispatchBuilder::AVX128_VPMULHW(OpcodeArgs) { }); } -template +template void OpDispatchBuilder::AVX128_InsertScalar_CVT_Float_To_Float(OpcodeArgs) { // Gotta be careful with this operation. // It inserts in to the lowest element, retaining the remainder of the lower 128-bits. // Then zero extends the top 128-bit. - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false); Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], SrcSize, Op->Flags, {.AllowUpperGarbage = true}); @@ -1529,10 +1541,10 @@ void OpDispatchBuilder::AVX128_InsertScalar_CVT_Float_To_Float(OpcodeArgs) { AVX128_StoreResult_WithOpSize(Op, Op->Dest, AVX128_Zext(Result)); } -template +template void OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Float(OpcodeArgs) { - const auto SrcSize = GetSrcSize(Op); - const auto DstSize = GetDstSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); + const auto DstSize = OpSizeFromDst(Op); const auto IsFloatSrc = SrcElementSize == 4; auto Is128BitSrc = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; @@ -1545,7 +1557,7 @@ void OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Float(OpcodeArgs) { Is128BitDst = true; } - const auto LoadSize = IsFloatSrc && !Op->Src[0].IsGPR() ? SrcSize / 2 : SrcSize; + const auto LoadSize = IsFloatSrc && !Op->Src[0].IsGPR() ? IR::SizeToOpSize(IR::OpSizeToSize(SrcSize) / 2) : SrcSize; RefPair Src {}; if (Op->Src[0].IsGPR() || LoadSize >= OpSize::i128Bit) { @@ -1593,7 +1605,7 @@ void OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Float(OpcodeArgs) { AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result); } -template +template void OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); @@ -1616,9 +1628,9 @@ void OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int(OpcodeArgs) { } } else { auto Convert = [this](Ref Src) -> Ref { - size_t ElementSize = SrcElementSize; + auto ElementSize = SrcElementSize; if (Narrow) { - ElementSize >>= 1; + ElementSize = IR::DivideOpSize(ElementSize, 2); Src = _Vector_FToF(OpSize::i128Bit, ElementSize, Src, SrcElementSize); } @@ -1648,9 +1660,9 @@ void OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int(OpcodeArgs) { AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result); } -template +template void OpDispatchBuilder::AVX128_Vector_CVT_Int_To_Float(OpcodeArgs) { - const size_t Size = GetDstSize(Op); + const auto Size = OpSizeFromDst(Op); const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE; RefPair Src = [&] { @@ -1658,7 +1670,7 @@ void OpDispatchBuilder::AVX128_Vector_CVT_Int_To_Float(OpcodeArgs) { // If loading a vector, use the full size, so we don't // unnecessarily zero extend the vector. Otherwise, if // memory, then we want to load the element size exactly. - const auto LoadSize = 8 * (Size / 16); + const auto LoadSize = IR::SizeToOpSize(8 * (IR::OpSizeToSize(Size) / 16)); return RefPair {.Low = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], LoadSize, Op->Flags)}; } else { return AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit); @@ -1666,11 +1678,11 @@ void OpDispatchBuilder::AVX128_Vector_CVT_Int_To_Float(OpcodeArgs) { }(); auto Convert = [this](Ref Src, IROps Op) -> Ref { - size_t ElementSize = SrcElementSize; + auto ElementSize = SrcElementSize; if (Widen) { DeriveOp(Extended, Op, _VSXTL(OpSize::i128Bit, ElementSize, Src)); Src = Extended; - ElementSize <<= 1; + ElementSize = IR::MultiplyOpSize(ElementSize, 2); } return _Vector_SToF(OpSize::i128Bit, ElementSize, Src); @@ -1716,7 +1728,7 @@ void OpDispatchBuilder::AVX128_VEXTRACT128(OpcodeArgs) { void OpDispatchBuilder::AVX128_VAESImc(OpcodeArgs) { ///< 128-bit only. - AVX128_VectorUnaryImpl(Op, OpSize::i128Bit, OpSize::i128Bit, [this](size_t, Ref Src) { return _VAESImc(Src); }); + AVX128_VectorUnaryImpl(Op, OpSize::i128Bit, OpSize::i128Bit, [this](IR::OpSize, Ref Src) { return _VAESImc(Src); }); } void OpDispatchBuilder::AVX128_VAESEnc(OpcodeArgs) { @@ -1755,7 +1767,7 @@ void OpDispatchBuilder::AVX128_VAESKeyGenAssist(OpcodeArgs) { .RCON = RCON, }; - AVX128_VectorUnaryImpl(Op, OpSize::i128Bit, OpSize::i128Bit, [this, &Capture](size_t, Ref Src) { + AVX128_VectorUnaryImpl(Op, OpSize::i128Bit, OpSize::i128Bit, [this, &Capture](IR::OpSize, Ref Src) { return _VAESKeyGenAssist(Src, Capture.KeyGenSwizzle, Capture.ZeroRegister, Capture.RCON); }); } @@ -1791,21 +1803,21 @@ void OpDispatchBuilder::AVX128_PHMINPOSUW(OpcodeArgs) { AVX128_StoreResult_WithOpSize(Op, Op->Dest, AVX128_Zext(Result)); } -template +template void OpDispatchBuilder::AVX128_VectorRound(OpcodeArgs) { - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); const auto Mode = Op->Src[1].Literal(); AVX128_VectorUnaryImpl(Op, Size, ElementSize, - [this, Mode](size_t, Ref Src) { return VectorRoundImpl(OpSize::i128Bit, ElementSize, Src, Mode); }); + [this, Mode](IR::OpSize, Ref Src) { return VectorRoundImpl(OpSize::i128Bit, ElementSize, Src, Mode); }); } -template +template void OpDispatchBuilder::AVX128_InsertScalarRound(OpcodeArgs) { // We load the full vector width when dealing with a source vector, // so that we don't do any unnecessary zero extension to the scalar // element that we're going to operate on. - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false); RefPair Src2 {}; @@ -1822,11 +1834,11 @@ void OpDispatchBuilder::AVX128_InsertScalarRound(OpcodeArgs) { AVX128_StoreResult_WithOpSize(Op, Op->Dest, AVX128_Zext(Result)); } -template +template void OpDispatchBuilder::AVX128_VDPP(OpcodeArgs) { const uint64_t Literal = Op->Src[2].Literal(); - AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, [this, Literal](size_t, Ref Src1, Ref Src2) { + AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, [this, Literal](IR::OpSize, Ref Src1, Ref Src2) { return DPPOpImpl(OpSize::i128Bit, Src1, Src2, Literal, ElementSize); }); } @@ -1886,7 +1898,7 @@ void OpDispatchBuilder::AVX128_VPSHUFW(OpcodeArgs, bool Low) { .Low = Low, }; - AVX128_VectorUnaryImpl(Op, GetSrcSize(Op), OpSize::i16Bit, [Pack](size_t _, Ref Src) { + AVX128_VectorUnaryImpl(Op, OpSizeFromSrc(Op), OpSize::i16Bit, [Pack](IR::OpSize, Ref Src) { const auto IndexedVectorConstant = Pack.Low ? FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFLW : FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFHW; @@ -1894,7 +1906,7 @@ void OpDispatchBuilder::AVX128_VPSHUFW(OpcodeArgs, bool Low) { }); } -template +template void OpDispatchBuilder::AVX128_VSHUF(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; @@ -1953,9 +1965,9 @@ void OpDispatchBuilder::AVX128_VPERMILImm(OpcodeArgs) { AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result); } -template +template void OpDispatchBuilder::AVX128_VHADDP(OpcodeArgs) { - AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, [this](size_t, Ref Src1, Ref Src2) { + AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, [this](IR::OpSize, Ref Src1, Ref Src2) { DeriveOp(Res, IROp, _VFAddP(OpSize::i128Bit, ElementSize, Src1, Src2)); return Res; }); @@ -1963,22 +1975,22 @@ void OpDispatchBuilder::AVX128_VHADDP(OpcodeArgs) { void OpDispatchBuilder::AVX128_VPHADDSW(OpcodeArgs) { AVX128_VectorBinaryImpl(Op, GetDstSize(Op), OpSize::i16Bit, - [this](size_t _ElementSize, Ref Src1, Ref Src2) { return PHADDSOpImpl(OpSize::i128Bit, Src1, Src2); }); + [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return PHADDSOpImpl(OpSize::i128Bit, Src1, Src2); }); } void OpDispatchBuilder::AVX128_VPMADDUBSW(OpcodeArgs) { AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), OpSize::i128Bit, - [this](size_t _ElementSize, Ref Src1, Ref Src2) { return PMADDUBSWOpImpl(OpSize::i128Bit, Src1, Src2); }); + [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return PMADDUBSWOpImpl(OpSize::i128Bit, Src1, Src2); }); } void OpDispatchBuilder::AVX128_VPMADDWD(OpcodeArgs) { AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), OpSize::i128Bit, - [this](size_t _ElementSize, Ref Src1, Ref Src2) { return PMADDWDOpImpl(OpSize::i128Bit, Src1, Src2); }); + [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return PMADDWDOpImpl(OpSize::i128Bit, Src1, Src2); }); } -template +template void OpDispatchBuilder::AVX128_VBLEND(OpcodeArgs) { - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; const uint64_t Selector = Op->Src[2].Literal(); @@ -2003,21 +2015,22 @@ void OpDispatchBuilder::AVX128_VBLEND(OpcodeArgs) { AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result); } -template +template void OpDispatchBuilder::AVX128_VHSUBP(OpcodeArgs) { AVX128_VectorBinaryImpl(Op, GetDstSize(Op), ElementSize, - [this](size_t, Ref Src1, Ref Src2) { return HSUBPOpImpl(OpSize::i128Bit, ElementSize, Src1, Src2); }); + [this](IR::OpSize, Ref Src1, Ref Src2) { return HSUBPOpImpl(OpSize::i128Bit, ElementSize, Src1, Src2); }); } void OpDispatchBuilder::AVX128_VPSHUFB(OpcodeArgs) { auto MaskVector = GeneratePSHUFBMask(OpSize::i128Bit); - AVX128_VectorBinaryImpl(Op, GetDstSize(Op), OpSize::i8Bit, - [this, MaskVector](size_t, Ref Src1, Ref Src2) { return PSHUFBOpImpl(OpSize::i128Bit, Src1, Src2, MaskVector); }); + AVX128_VectorBinaryImpl(Op, GetDstSize(Op), OpSize::i8Bit, [this, MaskVector](IR::OpSize, Ref Src1, Ref Src2) { + return PSHUFBOpImpl(OpSize::i128Bit, Src1, Src2, MaskVector); + }); } void OpDispatchBuilder::AVX128_VPSADBW(OpcodeArgs) { AVX128_VectorBinaryImpl(Op, GetDstSize(Op), OpSize::i8Bit, - [this](size_t, Ref Src1, Ref Src2) { return PSADBWOpImpl(OpSize::i128Bit, Src1, Src2); }); + [this](IR::OpSize, Ref Src1, Ref Src2) { return PSADBWOpImpl(OpSize::i128Bit, Src1, Src2); }); } void OpDispatchBuilder::AVX128_VMPSADBW(OpcodeArgs) { @@ -2044,10 +2057,10 @@ void OpDispatchBuilder::AVX128_VMPSADBW(OpcodeArgs) { void OpDispatchBuilder::AVX128_VPALIGNR(OpcodeArgs) { const auto Index = Op->Src[2].Literal(); - const auto Size = GetDstSize(Op); - const auto SanitizedDstSize = std::min(Size, uint8_t {16}); + const auto Size = OpSizeFromDst(Op); + const auto SanitizedDstSize = std::min(Size, OpSize::i128Bit); - AVX128_VectorBinaryImpl(Op, Size, SanitizedDstSize, [this, Index](size_t SanitizedDstSize, Ref Src1, Ref Src2) -> Ref { + AVX128_VectorBinaryImpl(Op, Size, SanitizedDstSize, [this, Index](IR::OpSize SanitizedDstSize, Ref Src1, Ref Src2) -> Ref { if (Index >= (SanitizedDstSize * 2)) { // If the immediate is greater than both vectors combined then it zeroes the vector return LoadZeroVector(OpSize::i128Bit); @@ -2061,14 +2074,14 @@ void OpDispatchBuilder::AVX128_VPALIGNR(OpcodeArgs) { }); } -void OpDispatchBuilder::AVX128_VMASKMOVImpl(OpcodeArgs, size_t ElementSize, size_t DstSize, bool IsStore, +void OpDispatchBuilder::AVX128_VMASKMOVImpl(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DstSize, bool IsStore, const X86Tables::DecodedOperand& MaskOp, const X86Tables::DecodedOperand& DataOp) { const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; auto Mask = AVX128_LoadSource_WithOpSize(Op, MaskOp, Op->Flags, !Is128Bit); const auto MakeAddress = [this, Op](const X86Tables::DecodedOperand& Data) { - return MakeSegmentAddress(Op, Data, CTX->GetGPRSize()); + return MakeSegmentAddress(Op, Data, CTX->GetGPROpSize()); }; if (IsStore) { @@ -2085,14 +2098,14 @@ void OpDispatchBuilder::AVX128_VMASKMOVImpl(OpcodeArgs, size_t ElementSize, size auto Address = MakeAddress(DataOp); RefPair Result {}; - Result.Low = _VLoadVectorMasked(OpSize::i128Bit, ElementSize, Mask.Low, Address, Invalid(), MEM_OFFSET_SXTX, 1); + Result.Low = _VLoadVectorMasked(OpSize::i128Bit, ElementSize, Mask.Low, Address, Invalid(), MEM_OFFSET_SXTX, OpSize::i8Bit); if (Is128Bit) { Result.High = LoadZeroVector(OpSize::i128Bit); } else { ///< TODO: This can be cleaner if AVX128_LoadSource_WithOpSize could return both constructed addresses. auto AddressHigh = _Add(OpSize::i64Bit, Address, _Constant(16)); - Result.High = _VLoadVectorMasked(OpSize::i128Bit, ElementSize, Mask.High, AddressHigh, Invalid(), MEM_OFFSET_SXTX, 1); + Result.High = _VLoadVectorMasked(OpSize::i128Bit, ElementSize, Mask.High, AddressHigh, Invalid(), MEM_OFFSET_SXTX, OpSize::i8Bit); } AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result); } @@ -2100,17 +2113,17 @@ void OpDispatchBuilder::AVX128_VMASKMOVImpl(OpcodeArgs, size_t ElementSize, size template void OpDispatchBuilder::AVX128_VPMASKMOV(OpcodeArgs) { - AVX128_VMASKMOVImpl(Op, GetSrcSize(Op), GetDstSize(Op), IsStore, Op->Src[0], Op->Src[1]); + AVX128_VMASKMOVImpl(Op, OpSizeFromSrc(Op), OpSizeFromDst(Op), IsStore, Op->Src[0], Op->Src[1]); } -template +template void OpDispatchBuilder::AVX128_VMASKMOV(OpcodeArgs) { - AVX128_VMASKMOVImpl(Op, ElementSize, GetDstSize(Op), IsStore, Op->Src[0], Op->Src[1]); + AVX128_VMASKMOVImpl(Op, ElementSize, OpSizeFromDst(Op), IsStore, Op->Src[0], Op->Src[1]); } void OpDispatchBuilder::AVX128_MASKMOV(OpcodeArgs) { ///< This instruction only supports 128-bit. - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE; auto MaskSrc = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit); @@ -2131,9 +2144,9 @@ void OpDispatchBuilder::AVX128_MASKMOV(OpcodeArgs) { _StoreMem(FPRClass, Size, MemDest, XMMReg, OpSize::i8Bit); } -template +template void OpDispatchBuilder::AVX128_VectorVariableBlend(OpcodeArgs) { - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE; const auto Src3Selector = Op->Src[2].Literal(); @@ -2213,7 +2226,7 @@ void OpDispatchBuilder::AVX128_VPERM2(OpcodeArgs) { AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result); } -template +template void OpDispatchBuilder::AVX128_VTESTP(OpcodeArgs) { InvalidateDeferredFlags(); @@ -2331,7 +2344,7 @@ void OpDispatchBuilder::AVX128_PTest(OpcodeArgs) { ZeroPF_AF(); } -template +template void OpDispatchBuilder::AVX128_VPERMILReg(OpcodeArgs) { AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), ElementSize, [this](size_t _ElementSize, Ref Src, Ref Indices) { return VPERMILRegOpImpl(OpSize::i128Bit, ElementSize, Src, Indices); @@ -2363,7 +2376,7 @@ void OpDispatchBuilder::AVX128_VPERMD(OpcodeArgs) { void OpDispatchBuilder::AVX128_VPCLMULQDQ(OpcodeArgs) { const auto Selector = static_cast(Op->Src[2].Literal()); - AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), 0, [this, Selector](size_t _, Ref Src1, Ref Src2) { + AVX128_VectorBinaryImpl(Op, GetSrcSize(Op), OpSize::iInvalid, [this, Selector](size_t _, Ref Src1, Ref Src2) { return _PCLMUL(OpSize::i128Bit, Src1, Src2, Selector & 0b1'0001); }); } @@ -2739,8 +2752,8 @@ void OpDispatchBuilder::AVX128_VPGATHER(OpcodeArgs) { } void OpDispatchBuilder::AVX128_VCVTPH2PS(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); - const auto SrcSize = DstSize / 2; + const auto DstSize = OpSizeFromDst(Op); + const auto SrcSize = IR::SizeToOpSize(IR::OpSizeToSize(DstSize) / 2); const auto Is128BitSrc = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; const auto Is128BitDst = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; @@ -2769,9 +2782,9 @@ void OpDispatchBuilder::AVX128_VCVTPH2PS(OpcodeArgs) { } void OpDispatchBuilder::AVX128_VCVTPS2PH(OpcodeArgs) { - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); const auto Is128BitSrc = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; - const auto StoreSize = Op->Dest.IsGPR() ? OpSize::i128Bit : SrcSize / 2; + const auto StoreSize = Op->Dest.IsGPR() ? OpSize::i128Bit : IR::SizeToOpSize(IR::OpSizeToSize(SrcSize) / 2); const auto Imm8 = Op->Src[1].Literal(); const auto UseMXCSR = (Imm8 & 0b100) != 0; @@ -2806,7 +2819,7 @@ void OpDispatchBuilder::AVX128_VCVTPS2PH(OpcodeArgs) { } if (!Op->Dest.IsGPR()) { - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result.Low, StoreSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result.Low, StoreSize, OpSize::iInvalid); } else { AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result); } diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp index ac3e3dbdc9..cf15aec2c1 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp @@ -43,7 +43,7 @@ void OpDispatchBuilder::SHA1NEXTEOp(OpcodeArgs) { auto Tmp = _VAdd(OpSize::i128Bit, OpSize::i32Bit, Src, RotatedNode); auto Result = _VInsElement(OpSize::i128Bit, OpSize::i32Bit, 3, 3, Src, Tmp); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::SHA1MSG1Op(OpcodeArgs) { @@ -55,7 +55,7 @@ void OpDispatchBuilder::SHA1MSG1Op(OpcodeArgs) { // [W0, W1, W2, W3] ^ [W2, W3, W4, W5] Ref Result = _VXor(OpSize::i128Bit, OpSize::i8Bit, Dest, NewVec); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::SHA1MSG2Op(OpcodeArgs) { @@ -86,7 +86,7 @@ void OpDispatchBuilder::SHA1MSG2Op(OpcodeArgs) { auto Result = _VInsElement(OpSize::i128Bit, OpSize::i32Bit, 0, 0, RotatedXor1, RotatedXorLower); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::SHA1RNDS4Op(OpcodeArgs) { @@ -147,7 +147,7 @@ void OpDispatchBuilder::SHA1RNDS4Op(OpcodeArgs) { }; const auto Round1To3 = [&](Ref A, Ref B, Ref C, Ref D, Ref E, Ref Src, unsigned W_idx) -> RoundResult { // Kill W and E at the beginning - auto W = _VExtractToGPR(OpSize::i128Bit, 4, Src, W_idx); + auto W = _VExtractToGPR(OpSize::i128Bit, OpSize::i32Bit, Src, W_idx); auto Q = _Add(OpSize::i32Bit, W, E); auto ANext = @@ -170,7 +170,7 @@ void OpDispatchBuilder::SHA1RNDS4Op(OpcodeArgs) { auto Dest1 = _VInsGPR(OpSize::i128Bit, OpSize::i32Bit, 1, Dest2, std::get<2>(Final)); auto Dest0 = _VInsGPR(OpSize::i128Bit, OpSize::i32Bit, 0, Dest1, std::get<3>(Final)); - StoreResult(FPRClass, Op, Dest0, -1); + StoreResult(FPRClass, Op, Dest0, OpSize::iInvalid); } void OpDispatchBuilder::SHA256MSG1Op(OpcodeArgs) { @@ -204,7 +204,7 @@ void OpDispatchBuilder::SHA256MSG1Op(OpcodeArgs) { Result = _VInsGPR(OpSize::i128Bit, OpSize::i32Bit, 0, D1, Sig0); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::SHA256MSG2Op(OpcodeArgs) { @@ -228,7 +228,7 @@ void OpDispatchBuilder::SHA256MSG2Op(OpcodeArgs) { auto D1 = _VInsGPR(OpSize::i128Bit, OpSize::i32Bit, 1, D2, W17); auto D0 = _VInsGPR(OpSize::i128Bit, OpSize::i32Bit, 0, D1, W16); - StoreResult(FPRClass, Op, D0, -1); + StoreResult(FPRClass, Op, D0, OpSize::iInvalid); } Ref OpDispatchBuilder::BitwiseAtLeastTwo(Ref A, Ref B, Ref C) { @@ -298,24 +298,24 @@ void OpDispatchBuilder::SHA256RNDS2Op(OpcodeArgs) { auto Res1 = _VInsGPR(OpSize::i128Bit, OpSize::i32Bit, 1, Res2, E2); auto Res0 = _VInsGPR(OpSize::i128Bit, OpSize::i32Bit, 0, Res1, E1); - StoreResult(FPRClass, Op, Res0, -1); + StoreResult(FPRClass, Op, Res0, OpSize::iInvalid); } void OpDispatchBuilder::AESImcOp(OpcodeArgs) { Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = _VAESImc(Src); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::AESEncOp(OpcodeArgs) { Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = _VAESEnc(OpSize::i128Bit, Dest, Src, LoadZeroVector(OpSize::i128Bit)); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VAESEncOp(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); [[maybe_unused]] const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; // TODO: Handle 256-bit VAESENC. @@ -325,18 +325,18 @@ void OpDispatchBuilder::VAESEncOp(OpcodeArgs) { Ref Key = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); Ref Result = _VAESEnc(DstSize, State, Key, LoadZeroVector(DstSize)); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::AESEncLastOp(OpcodeArgs) { Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = _VAESEncLast(OpSize::i128Bit, Dest, Src, LoadZeroVector(OpSize::i128Bit)); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VAESEncLastOp(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); [[maybe_unused]] const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; // TODO: Handle 256-bit VAESENCLAST. @@ -346,18 +346,18 @@ void OpDispatchBuilder::VAESEncLastOp(OpcodeArgs) { Ref Key = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); Ref Result = _VAESEncLast(DstSize, State, Key, LoadZeroVector(DstSize)); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::AESDecOp(OpcodeArgs) { Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = _VAESDec(OpSize::i128Bit, Dest, Src, LoadZeroVector(OpSize::i128Bit)); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VAESDecOp(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); [[maybe_unused]] const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; // TODO: Handle 256-bit VAESDEC. @@ -367,18 +367,18 @@ void OpDispatchBuilder::VAESDecOp(OpcodeArgs) { Ref Key = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); Ref Result = _VAESDec(DstSize, State, Key, LoadZeroVector(DstSize)); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::AESDecLastOp(OpcodeArgs) { Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = _VAESDecLast(OpSize::i128Bit, Dest, Src, LoadZeroVector(OpSize::i128Bit)); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VAESDecLastOp(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); [[maybe_unused]] const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; // TODO: Handle 256-bit VAESDECLAST. @@ -388,7 +388,7 @@ void OpDispatchBuilder::VAESDecLastOp(OpcodeArgs) { Ref Key = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); Ref Result = _VAESDecLast(DstSize, State, Key, LoadZeroVector(DstSize)); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } Ref OpDispatchBuilder::AESKeyGenAssistImpl(OpcodeArgs) { @@ -401,7 +401,7 @@ Ref OpDispatchBuilder::AESKeyGenAssistImpl(OpcodeArgs) { void OpDispatchBuilder::AESKeyGenAssist(OpcodeArgs) { Ref Result = AESKeyGenAssistImpl(Op); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::PCLMULQDQOp(OpcodeArgs) { @@ -410,18 +410,18 @@ void OpDispatchBuilder::PCLMULQDQOp(OpcodeArgs) { const auto Selector = static_cast(Op->Src[1].Literal()); auto Res = _PCLMUL(OpSize::i128Bit, Dest, Src, Selector & 0b1'0001); - StoreResult(FPRClass, Op, Res, -1); + StoreResult(FPRClass, Op, Res, OpSize::iInvalid); } void OpDispatchBuilder::VPCLMULQDQOp(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); const auto Selector = static_cast(Op->Src[2].Literal()); Ref Res = _PCLMUL(DstSize, Src1, Src2, Selector & 0b1'0001); - StoreResult(FPRClass, Op, Res, -1); + StoreResult(FPRClass, Op, Res, OpSize::iInvalid); } } // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/DDDTables.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher/DDDTables.h index cb5218dd06..7f986b177f 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/DDDTables.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/DDDTables.h @@ -5,41 +5,41 @@ namespace FEXCore::IR { constexpr std::tuple OpDispatch_DDDTable[] = { {0x0C, 1, &OpDispatchBuilder::PI2FWOp}, - {0x0D, 1, &OpDispatchBuilder::Vector_CVT_Int_To_Float<4, false>}, + {0x0D, 1, &OpDispatchBuilder::Vector_CVT_Int_To_Float}, {0x1C, 1, &OpDispatchBuilder::PF2IWOp}, - {0x1D, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<4, false, false>}, + {0x1D, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int}, - {0x86, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFRECP, 4>}, - {0x87, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFRSQRT, 4>}, + {0x86, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFRECP, OpSize::i32Bit>}, + {0x87, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFRSQRT, OpSize::i32Bit>}, {0x8A, 1, &OpDispatchBuilder::PFNACCOp}, {0x8E, 1, &OpDispatchBuilder::PFPNACCOp}, {0x90, 1, &OpDispatchBuilder::VPFCMPOp<1>}, - {0x94, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMIN, 4>}, - {0x96, 1, &OpDispatchBuilder::VectorUnaryDuplicateOp}, - {0x97, 1, &OpDispatchBuilder::VectorUnaryDuplicateOp}, + {0x94, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMIN, OpSize::i32Bit>}, + {0x96, 1, &OpDispatchBuilder::VectorUnaryDuplicateOp}, + {0x97, 1, &OpDispatchBuilder::VectorUnaryDuplicateOp}, - {0x9A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFSUB, 4>}, - {0x9E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADD, 4>}, + {0x9A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFSUB, OpSize::i32Bit>}, + {0x9E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADD, OpSize::i32Bit>}, {0xA0, 1, &OpDispatchBuilder::VPFCMPOp<2>}, - {0xA4, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMAX, 4>}, + {0xA4, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMAX, OpSize::i32Bit>}, // Can be treated as a move {0xA6, 1, &OpDispatchBuilder::MOVVectorUnalignedOp}, {0xA7, 1, &OpDispatchBuilder::MOVVectorUnalignedOp}, - {0xAA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VFSUB, 4>}, - {0xAE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADDP, 4>}, + {0xAA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VFSUB, OpSize::i32Bit>}, + {0xAE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADDP, OpSize::i32Bit>}, {0xB0, 1, &OpDispatchBuilder::VPFCMPOp<0>}, - {0xB4, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMUL, 4>}, + {0xB4, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMUL, OpSize::i32Bit>}, // Can be treated as a move {0xB6, 1, &OpDispatchBuilder::MOVVectorUnalignedOp}, {0xB7, 1, &OpDispatchBuilder::PMULHRWOp}, {0xBB, 1, &OpDispatchBuilder::PSWAPDOp}, - {0xBF, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, 1>}, + {0xBF, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, OpSize::i8Bit>}, }; } // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/H0F38Tables.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher/H0F38Tables.h index b5c9702c70..8ae2a47a84 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/H0F38Tables.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/H0F38Tables.h @@ -11,64 +11,64 @@ constexpr uint16_t PF_38_F3 = (1U << 2); constexpr std::tuple OpDispatch_H0F38Table[] = { {OPD(PF_38_NONE, 0x00), 1, &OpDispatchBuilder::PSHUFBOp}, {OPD(PF_38_66, 0x00), 1, &OpDispatchBuilder::PSHUFBOp}, - {OPD(PF_38_NONE, 0x01), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADDP, 2>}, - {OPD(PF_38_66, 0x01), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADDP, 2>}, - {OPD(PF_38_NONE, 0x02), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADDP, 4>}, - {OPD(PF_38_66, 0x02), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADDP, 4>}, + {OPD(PF_38_NONE, 0x01), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADDP, OpSize::i16Bit>}, + {OPD(PF_38_66, 0x01), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADDP, OpSize::i16Bit>}, + {OPD(PF_38_NONE, 0x02), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADDP, OpSize::i32Bit>}, + {OPD(PF_38_66, 0x02), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADDP, OpSize::i32Bit>}, {OPD(PF_38_NONE, 0x03), 1, &OpDispatchBuilder::PHADDS}, {OPD(PF_38_66, 0x03), 1, &OpDispatchBuilder::PHADDS}, {OPD(PF_38_NONE, 0x04), 1, &OpDispatchBuilder::PMADDUBSW}, {OPD(PF_38_66, 0x04), 1, &OpDispatchBuilder::PMADDUBSW}, - {OPD(PF_38_NONE, 0x05), 1, &OpDispatchBuilder::PHSUB<2>}, - {OPD(PF_38_66, 0x05), 1, &OpDispatchBuilder::PHSUB<2>}, - {OPD(PF_38_NONE, 0x06), 1, &OpDispatchBuilder::PHSUB<4>}, - {OPD(PF_38_66, 0x06), 1, &OpDispatchBuilder::PHSUB<4>}, + {OPD(PF_38_NONE, 0x05), 1, &OpDispatchBuilder::PHSUB}, + {OPD(PF_38_66, 0x05), 1, &OpDispatchBuilder::PHSUB}, + {OPD(PF_38_NONE, 0x06), 1, &OpDispatchBuilder::PHSUB}, + {OPD(PF_38_66, 0x06), 1, &OpDispatchBuilder::PHSUB}, {OPD(PF_38_NONE, 0x07), 1, &OpDispatchBuilder::PHSUBS}, {OPD(PF_38_66, 0x07), 1, &OpDispatchBuilder::PHSUBS}, - {OPD(PF_38_NONE, 0x08), 1, &OpDispatchBuilder::PSIGN<1>}, - {OPD(PF_38_66, 0x08), 1, &OpDispatchBuilder::PSIGN<1>}, - {OPD(PF_38_NONE, 0x09), 1, &OpDispatchBuilder::PSIGN<2>}, - {OPD(PF_38_66, 0x09), 1, &OpDispatchBuilder::PSIGN<2>}, - {OPD(PF_38_NONE, 0x0A), 1, &OpDispatchBuilder::PSIGN<4>}, - {OPD(PF_38_66, 0x0A), 1, &OpDispatchBuilder::PSIGN<4>}, + {OPD(PF_38_NONE, 0x08), 1, &OpDispatchBuilder::PSIGN}, + {OPD(PF_38_66, 0x08), 1, &OpDispatchBuilder::PSIGN}, + {OPD(PF_38_NONE, 0x09), 1, &OpDispatchBuilder::PSIGN}, + {OPD(PF_38_66, 0x09), 1, &OpDispatchBuilder::PSIGN}, + {OPD(PF_38_NONE, 0x0A), 1, &OpDispatchBuilder::PSIGN}, + {OPD(PF_38_66, 0x0A), 1, &OpDispatchBuilder::PSIGN}, {OPD(PF_38_NONE, 0x0B), 1, &OpDispatchBuilder::PMULHRSW}, {OPD(PF_38_66, 0x0B), 1, &OpDispatchBuilder::PMULHRSW}, - {OPD(PF_38_66, 0x10), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorVariableBlend, 1>}, - {OPD(PF_38_66, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorVariableBlend, 4>}, - {OPD(PF_38_66, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorVariableBlend, 8>}, + {OPD(PF_38_66, 0x10), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorVariableBlend, OpSize::i8Bit>}, + {OPD(PF_38_66, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorVariableBlend, OpSize::i32Bit>}, + {OPD(PF_38_66, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorVariableBlend, OpSize::i64Bit>}, {OPD(PF_38_66, 0x17), 1, &OpDispatchBuilder::PTestOp}, - {OPD(PF_38_NONE, 0x1C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, 1>}, - {OPD(PF_38_66, 0x1C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, 1>}, - {OPD(PF_38_NONE, 0x1D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, 2>}, - {OPD(PF_38_66, 0x1D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, 2>}, - {OPD(PF_38_NONE, 0x1E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, 4>}, - {OPD(PF_38_66, 0x1E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, 4>}, - {OPD(PF_38_66, 0x20), 1, &OpDispatchBuilder::ExtendVectorElements<1, 2, true>}, - {OPD(PF_38_66, 0x21), 1, &OpDispatchBuilder::ExtendVectorElements<1, 4, true>}, - {OPD(PF_38_66, 0x22), 1, &OpDispatchBuilder::ExtendVectorElements<1, 8, true>}, - {OPD(PF_38_66, 0x23), 1, &OpDispatchBuilder::ExtendVectorElements<2, 4, true>}, - {OPD(PF_38_66, 0x24), 1, &OpDispatchBuilder::ExtendVectorElements<2, 8, true>}, - {OPD(PF_38_66, 0x25), 1, &OpDispatchBuilder::ExtendVectorElements<4, 8, true>}, - {OPD(PF_38_66, 0x28), 1, &OpDispatchBuilder::PMULLOp<4, true>}, - {OPD(PF_38_66, 0x29), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, 8>}, + {OPD(PF_38_NONE, 0x1C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, OpSize::i8Bit>}, + {OPD(PF_38_66, 0x1C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, OpSize::i8Bit>}, + {OPD(PF_38_NONE, 0x1D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, OpSize::i16Bit>}, + {OPD(PF_38_66, 0x1D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, OpSize::i16Bit>}, + {OPD(PF_38_NONE, 0x1E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, OpSize::i32Bit>}, + {OPD(PF_38_66, 0x1E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, OpSize::i32Bit>}, + {OPD(PF_38_66, 0x20), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(PF_38_66, 0x21), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(PF_38_66, 0x22), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(PF_38_66, 0x23), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(PF_38_66, 0x24), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(PF_38_66, 0x25), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(PF_38_66, 0x28), 1, &OpDispatchBuilder::PMULLOp}, + {OPD(PF_38_66, 0x29), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, OpSize::i64Bit>}, {OPD(PF_38_66, 0x2A), 1, &OpDispatchBuilder::MOVVectorNTOp}, - {OPD(PF_38_66, 0x2B), 1, &OpDispatchBuilder::PACKUSOp<4>}, - {OPD(PF_38_66, 0x30), 1, &OpDispatchBuilder::ExtendVectorElements<1, 2, false>}, - {OPD(PF_38_66, 0x31), 1, &OpDispatchBuilder::ExtendVectorElements<1, 4, false>}, - {OPD(PF_38_66, 0x32), 1, &OpDispatchBuilder::ExtendVectorElements<1, 8, false>}, - {OPD(PF_38_66, 0x33), 1, &OpDispatchBuilder::ExtendVectorElements<2, 4, false>}, - {OPD(PF_38_66, 0x34), 1, &OpDispatchBuilder::ExtendVectorElements<2, 8, false>}, - {OPD(PF_38_66, 0x35), 1, &OpDispatchBuilder::ExtendVectorElements<4, 8, false>}, - {OPD(PF_38_66, 0x37), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, 8>}, - {OPD(PF_38_66, 0x38), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMIN, 1>}, - {OPD(PF_38_66, 0x39), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMIN, 4>}, - {OPD(PF_38_66, 0x3A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMIN, 2>}, - {OPD(PF_38_66, 0x3B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMIN, 4>}, - {OPD(PF_38_66, 0x3C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMAX, 1>}, - {OPD(PF_38_66, 0x3D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMAX, 4>}, - {OPD(PF_38_66, 0x3E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMAX, 2>}, - {OPD(PF_38_66, 0x3F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMAX, 4>}, - {OPD(PF_38_66, 0x40), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VMUL, 4>}, + {OPD(PF_38_66, 0x2B), 1, &OpDispatchBuilder::PACKUSOp}, + {OPD(PF_38_66, 0x30), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(PF_38_66, 0x31), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(PF_38_66, 0x32), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(PF_38_66, 0x33), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(PF_38_66, 0x34), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(PF_38_66, 0x35), 1, &OpDispatchBuilder::ExtendVectorElements}, + {OPD(PF_38_66, 0x37), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, OpSize::i64Bit>}, + {OPD(PF_38_66, 0x38), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMIN, OpSize::i8Bit>}, + {OPD(PF_38_66, 0x39), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMIN, OpSize::i32Bit>}, + {OPD(PF_38_66, 0x3A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMIN, OpSize::i16Bit>}, + {OPD(PF_38_66, 0x3B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMIN, OpSize::i32Bit>}, + {OPD(PF_38_66, 0x3C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMAX, OpSize::i8Bit>}, + {OPD(PF_38_66, 0x3D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMAX, OpSize::i32Bit>}, + {OPD(PF_38_66, 0x3E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMAX, OpSize::i16Bit>}, + {OPD(PF_38_66, 0x3F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMAX, OpSize::i32Bit>}, + {OPD(PF_38_66, 0x40), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VMUL, OpSize::i32Bit>}, {OPD(PF_38_66, 0x41), 1, &OpDispatchBuilder::PHMINPOSUWOp}, {OPD(PF_38_NONE, 0xF0), 2, &OpDispatchBuilder::MOVBEOp}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/H0F3ATables.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher/H0F3ATables.h index b346129d7d..2e142e356d 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/H0F3ATables.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/H0F3ATables.h @@ -7,27 +7,27 @@ namespace FEXCore::IR { #define PF_3A_NONE 0 #define PF_3A_66 1 constexpr std::tuple OpDispatch_H0F3ATable[] = { - {OPD(0, PF_3A_66, 0x08), 1, &OpDispatchBuilder::VectorRound<4>}, - {OPD(0, PF_3A_66, 0x09), 1, &OpDispatchBuilder::VectorRound<8>}, - {OPD(0, PF_3A_66, 0x0A), 1, &OpDispatchBuilder::InsertScalarRound<4>}, - {OPD(0, PF_3A_66, 0x0B), 1, &OpDispatchBuilder::InsertScalarRound<8>}, - {OPD(0, PF_3A_66, 0x0C), 1, &OpDispatchBuilder::VectorBlend<4>}, - {OPD(0, PF_3A_66, 0x0D), 1, &OpDispatchBuilder::VectorBlend<8>}, - {OPD(0, PF_3A_66, 0x0E), 1, &OpDispatchBuilder::VectorBlend<2>}, + {OPD(0, PF_3A_66, 0x08), 1, &OpDispatchBuilder::VectorRound}, + {OPD(0, PF_3A_66, 0x09), 1, &OpDispatchBuilder::VectorRound}, + {OPD(0, PF_3A_66, 0x0A), 1, &OpDispatchBuilder::InsertScalarRound}, + {OPD(0, PF_3A_66, 0x0B), 1, &OpDispatchBuilder::InsertScalarRound}, + {OPD(0, PF_3A_66, 0x0C), 1, &OpDispatchBuilder::VectorBlend}, + {OPD(0, PF_3A_66, 0x0D), 1, &OpDispatchBuilder::VectorBlend}, + {OPD(0, PF_3A_66, 0x0E), 1, &OpDispatchBuilder::VectorBlend}, {OPD(0, PF_3A_NONE, 0x0F), 1, &OpDispatchBuilder::PAlignrOp}, {OPD(0, PF_3A_66, 0x0F), 1, &OpDispatchBuilder::PAlignrOp}, - {OPD(0, PF_3A_66, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 1>}, - {OPD(0, PF_3A_66, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 2>}, - {OPD(0, PF_3A_66, 0x16), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 4>}, - {OPD(0, PF_3A_66, 0x17), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 4>}, + {OPD(0, PF_3A_66, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i8Bit>}, + {OPD(0, PF_3A_66, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i16Bit>}, + {OPD(0, PF_3A_66, 0x16), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i32Bit>}, + {OPD(0, PF_3A_66, 0x17), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i32Bit>}, - {OPD(0, PF_3A_66, 0x20), 1, &OpDispatchBuilder::PINSROp<1>}, + {OPD(0, PF_3A_66, 0x20), 1, &OpDispatchBuilder::PINSROp}, {OPD(0, PF_3A_66, 0x21), 1, &OpDispatchBuilder::InsertPSOp}, - {OPD(0, PF_3A_66, 0x22), 1, &OpDispatchBuilder::PINSROp<4>}, - {OPD(0, PF_3A_66, 0x40), 1, &OpDispatchBuilder::DPPOp<4>}, - {OPD(0, PF_3A_66, 0x41), 1, &OpDispatchBuilder::DPPOp<8>}, + {OPD(0, PF_3A_66, 0x22), 1, &OpDispatchBuilder::PINSROp}, + {OPD(0, PF_3A_66, 0x40), 1, &OpDispatchBuilder::DPPOp}, + {OPD(0, PF_3A_66, 0x41), 1, &OpDispatchBuilder::DPPOp}, {OPD(0, PF_3A_66, 0x42), 1, &OpDispatchBuilder::MPSADBWOp}, {OPD(0, PF_3A_66, 0x60), 1, &OpDispatchBuilder::VPCMPESTRMOp}, @@ -40,8 +40,8 @@ constexpr std::tuple OpDis constexpr std::tuple OpDispatch_H0F3ATable_64[] = { {OPD(1, PF_3A_66, 0x0F), 1, &OpDispatchBuilder::PAlignrOp}, - {OPD(1, PF_3A_66, 0x16), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 8>}, - {OPD(1, PF_3A_66, 0x22), 1, &OpDispatchBuilder::PINSROp<8>}, + {OPD(1, PF_3A_66, 0x16), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i64Bit>}, + {OPD(1, PF_3A_66, 0x22), 1, &OpDispatchBuilder::PINSROp}, }; #undef PF_3A_NONE diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/SecondaryGroupTables.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher/SecondaryGroupTables.h index e26f10e2fd..e42ffe10ff 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/SecondaryGroupTables.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/SecondaryGroupTables.h @@ -66,30 +66,30 @@ constexpr std::tuple OpDis {OPD(FEXCore::X86Tables::TYPE_GROUP_9, PF_F3, 7), 1, &OpDispatchBuilder::RDPIDOp}, // GROUP 12 - {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_NONE, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, 2>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_NONE, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAIOp, 2>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_NONE, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, 2>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_NONE, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, OpSize::i16Bit>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_NONE, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAIOp, OpSize::i16Bit>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_NONE, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, OpSize::i16Bit>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_66, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, 2>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_66, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAIOp, 2>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_66, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, 2>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_66, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, OpSize::i16Bit>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_66, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAIOp, OpSize::i16Bit>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_66, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, OpSize::i16Bit>}, // GROUP 13 - {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_NONE, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, 4>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_NONE, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAIOp, 4>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_NONE, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, 4>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_NONE, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, OpSize::i32Bit>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_NONE, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAIOp, OpSize::i32Bit>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_NONE, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, OpSize::i32Bit>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_66, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, 4>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_66, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAIOp, 4>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_66, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, 4>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_66, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, OpSize::i32Bit>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_66, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAIOp, OpSize::i32Bit>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_66, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, OpSize::i32Bit>}, // GROUP 14 - {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_NONE, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, 8>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_NONE, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, 8>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_NONE, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, OpSize::i64Bit>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_NONE, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, OpSize::i64Bit>}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, 8>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, OpSize::i64Bit>}, {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 3), 1, &OpDispatchBuilder::PSRLDQ}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, 8>}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, OpSize::i64Bit>}, {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 7), 1, &OpDispatchBuilder::PSLLDQ}, // GROUP 15 diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/SecondaryTables.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher/SecondaryTables.h index 14a6df3041..7cb9978ffb 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/SecondaryTables.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/SecondaryTables.h @@ -44,104 +44,104 @@ constexpr std::tuple OpDisp {0xBE, 2, &OpDispatchBuilder::MOVSXOp}, {0xC0, 2, &OpDispatchBuilder::XADDOp}, {0xC3, 1, &OpDispatchBuilder::MOVGPRNTOp}, - {0xC4, 1, &OpDispatchBuilder::PINSROp<2>}, - {0xC5, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 2>}, + {0xC4, 1, &OpDispatchBuilder::PINSROp}, + {0xC5, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i16Bit>}, {0xC8, 8, &OpDispatchBuilder::BSWAPOp}, // SSE {0x10, 2, &OpDispatchBuilder::MOVVectorUnalignedOp}, {0x12, 2, &OpDispatchBuilder::MOVLPOp}, - {0x14, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 4>}, - {0x15, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 4>}, + {0x14, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i32Bit>}, + {0x15, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i32Bit>}, {0x16, 2, &OpDispatchBuilder::MOVHPDOp}, {0x28, 2, &OpDispatchBuilder::MOVVectorAlignedOp}, {0x2A, 1, &OpDispatchBuilder::InsertMMX_To_XMM_Vector_CVT_Int_To_Float}, {0x2B, 1, &OpDispatchBuilder::MOVVectorNTOp}, - {0x2C, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<4, false, false>}, - {0x2D, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<4, false, true>}, - {0x2E, 2, &OpDispatchBuilder::UCOMISxOp<4>}, - {0x50, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, 4>}, - {0x51, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFSQRT, 4>}, - {0x52, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFRSQRT, 4>}, - {0x53, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFRECP, 4>}, - {0x54, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VAND, 16>}, - {0x55, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VANDN, 8>}, - {0x56, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VOR, 16>}, + {0x2C, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int}, + {0x2D, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int}, + {0x2E, 2, &OpDispatchBuilder::UCOMISxOp}, + {0x50, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, OpSize::i32Bit>}, + {0x51, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFSQRT, OpSize::i32Bit>}, + {0x52, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFRSQRT, OpSize::i32Bit>}, + {0x53, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFRECP, OpSize::i32Bit>}, + {0x54, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VAND, OpSize::i128Bit>}, + {0x55, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VANDN, OpSize::i64Bit>}, + {0x56, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VOR, OpSize::i128Bit>}, {0x57, 1, &OpDispatchBuilder::VectorXOROp}, - {0x58, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADD, 4>}, - {0x59, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMUL, 4>}, - {0x5A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Vector_CVT_Float_To_Float, 8, 4, false>}, - {0x5B, 1, &OpDispatchBuilder::Vector_CVT_Int_To_Float<4, false>}, - {0x5C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFSUB, 4>}, - {0x5D, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMIN, 4>}, - {0x5E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFDIV, 4>}, - {0x5F, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMAX, 4>}, - {0x60, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 1>}, - {0x61, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 2>}, - {0x62, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 4>}, - {0x63, 1, &OpDispatchBuilder::PACKSSOp<2>}, - {0x64, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, 1>}, - {0x65, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, 2>}, - {0x66, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, 4>}, - {0x67, 1, &OpDispatchBuilder::PACKUSOp<2>}, - {0x68, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 1>}, - {0x69, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 2>}, - {0x6A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 4>}, - {0x6B, 1, &OpDispatchBuilder::PACKSSOp<4>}, + {0x58, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADD, OpSize::i32Bit>}, + {0x59, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMUL, OpSize::i32Bit>}, + {0x5A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Vector_CVT_Float_To_Float, OpSize::i64Bit, OpSize::i32Bit, false>}, + {0x5B, 1, &OpDispatchBuilder::Vector_CVT_Int_To_Float}, + {0x5C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFSUB, OpSize::i32Bit>}, + {0x5D, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMIN, OpSize::i32Bit>}, + {0x5E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFDIV, OpSize::i32Bit>}, + {0x5F, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMAX, OpSize::i32Bit>}, + {0x60, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i8Bit>}, + {0x61, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i16Bit>}, + {0x62, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i32Bit>}, + {0x63, 1, &OpDispatchBuilder::PACKSSOp}, + {0x64, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, OpSize::i8Bit>}, + {0x65, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, OpSize::i16Bit>}, + {0x66, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, OpSize::i32Bit>}, + {0x67, 1, &OpDispatchBuilder::PACKUSOp}, + {0x68, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i8Bit>}, + {0x69, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i16Bit>}, + {0x6A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i32Bit>}, + {0x6B, 1, &OpDispatchBuilder::PACKSSOp}, {0x70, 1, &OpDispatchBuilder::PSHUFW8ByteOp}, - {0x74, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, 1>}, - {0x75, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, 2>}, - {0x76, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, 4>}, + {0x74, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, OpSize::i8Bit>}, + {0x75, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, OpSize::i16Bit>}, + {0x76, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, OpSize::i32Bit>}, {0x77, 1, &OpDispatchBuilder::X87EMMS}, - {0xC2, 1, &OpDispatchBuilder::VFCMPOp<4>}, - {0xC6, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHUFOp, 4>}, + {0xC2, 1, &OpDispatchBuilder::VFCMPOp}, + {0xC6, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHUFOp, OpSize::i32Bit>}, - {0xD1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, 2>}, - {0xD2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, 4>}, - {0xD3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, 8>}, - {0xD4, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, 8>}, - {0xD5, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VMUL, 2>}, + {0xD1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, OpSize::i16Bit>}, + {0xD2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, OpSize::i32Bit>}, + {0xD3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, OpSize::i64Bit>}, + {0xD4, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, OpSize::i64Bit>}, + {0xD5, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VMUL, OpSize::i16Bit>}, {0xD7, 1, &OpDispatchBuilder::MOVMSKOpOne}, // PMOVMSKB - {0xD8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQSUB, 1>}, - {0xD9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQSUB, 2>}, - {0xDA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMIN, 1>}, - {0xDB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VAND, 8>}, - {0xDC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQADD, 1>}, - {0xDD, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQADD, 2>}, - {0xDE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMAX, 1>}, - {0xDF, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VANDN, 8>}, - {0xE0, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, 1>}, - {0xE1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAOp, 2>}, - {0xE2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAOp, 4>}, - {0xE3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, 2>}, + {0xD8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQSUB, OpSize::i8Bit>}, + {0xD9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQSUB, OpSize::i16Bit>}, + {0xDA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMIN, OpSize::i8Bit>}, + {0xDB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VAND, OpSize::i64Bit>}, + {0xDC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQADD, OpSize::i8Bit>}, + {0xDD, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQADD, OpSize::i16Bit>}, + {0xDE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMAX, OpSize::i8Bit>}, + {0xDF, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VANDN, OpSize::i64Bit>}, + {0xE0, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, OpSize::i8Bit>}, + {0xE1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAOp, OpSize::i16Bit>}, + {0xE2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAOp, OpSize::i32Bit>}, + {0xE3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, OpSize::i16Bit>}, {0xE4, 1, &OpDispatchBuilder::PMULHW}, {0xE5, 1, &OpDispatchBuilder::PMULHW}, {0xE7, 1, &OpDispatchBuilder::MOVVectorNTOp}, - {0xE8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQSUB, 1>}, - {0xE9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQSUB, 2>}, - {0xEA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMIN, 2>}, - {0xEB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VOR, 8>}, - {0xEC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQADD, 1>}, - {0xED, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQADD, 2>}, - {0xEE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMAX, 2>}, + {0xE8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQSUB, OpSize::i8Bit>}, + {0xE9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQSUB, OpSize::i16Bit>}, + {0xEA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMIN, OpSize::i16Bit>}, + {0xEB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VOR, OpSize::i64Bit>}, + {0xEC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQADD, OpSize::i8Bit>}, + {0xED, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQADD, OpSize::i16Bit>}, + {0xEE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMAX, OpSize::i16Bit>}, {0xEF, 1, &OpDispatchBuilder::VectorXOROp}, - {0xF1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, 2>}, - {0xF2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, 4>}, - {0xF3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, 8>}, - {0xF4, 1, &OpDispatchBuilder::PMULLOp<4, false>}, + {0xF1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, OpSize::i16Bit>}, + {0xF2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, OpSize::i32Bit>}, + {0xF3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, OpSize::i64Bit>}, + {0xF4, 1, &OpDispatchBuilder::PMULLOp}, {0xF5, 1, &OpDispatchBuilder::PMADDWD}, {0xF6, 1, &OpDispatchBuilder::PSADBW}, {0xF7, 1, &OpDispatchBuilder::MASKMOVOp}, - {0xF8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, 1>}, - {0xF9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, 2>}, - {0xFA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, 4>}, - {0xFB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, 8>}, - {0xFC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, 1>}, - {0xFD, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, 2>}, - {0xFE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, 4>}, + {0xF8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, OpSize::i8Bit>}, + {0xF9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, OpSize::i16Bit>}, + {0xFA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, OpSize::i32Bit>}, + {0xFB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, OpSize::i64Bit>}, + {0xFC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, OpSize::i8Bit>}, + {0xFD, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, OpSize::i16Bit>}, + {0xFE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, OpSize::i32Bit>}, // FEX reserved instructions {0x37, 1, &OpDispatchBuilder::CallbackReturnOp}, @@ -151,21 +151,21 @@ constexpr std::tuple OpDisp {0x10, 2, &OpDispatchBuilder::MOVSSOp}, {0x12, 1, &OpDispatchBuilder::VMOVSLDUPOp}, {0x16, 1, &OpDispatchBuilder::VMOVSHDUPOp}, - {0x2A, 1, &OpDispatchBuilder::InsertCVTGPR_To_FPR<4>}, + {0x2A, 1, &OpDispatchBuilder::InsertCVTGPR_To_FPR}, {0x2B, 1, &OpDispatchBuilder::MOVVectorNTOp}, - {0x2C, 1, &OpDispatchBuilder::CVTFPR_To_GPR<4, false>}, - {0x2D, 1, &OpDispatchBuilder::CVTFPR_To_GPR<4, true>}, - {0x51, 1, &OpDispatchBuilder::VectorScalarUnaryInsertALUOp}, - {0x52, 1, &OpDispatchBuilder::VectorScalarUnaryInsertALUOp}, - {0x53, 1, &OpDispatchBuilder::VectorScalarUnaryInsertALUOp}, - {0x58, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, - {0x59, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, - {0x5A, 1, &OpDispatchBuilder::InsertScalar_CVT_Float_To_Float<8, 4>}, - {0x5B, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<4, false, false>}, - {0x5C, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, - {0x5D, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, - {0x5E, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, - {0x5F, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, + {0x2C, 1, &OpDispatchBuilder::CVTFPR_To_GPR}, + {0x2D, 1, &OpDispatchBuilder::CVTFPR_To_GPR}, + {0x51, 1, &OpDispatchBuilder::VectorScalarUnaryInsertALUOp}, + {0x52, 1, &OpDispatchBuilder::VectorScalarUnaryInsertALUOp}, + {0x53, 1, &OpDispatchBuilder::VectorScalarUnaryInsertALUOp}, + {0x58, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, + {0x59, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, + {0x5A, 1, &OpDispatchBuilder::InsertScalar_CVT_Float_To_Float}, + {0x5B, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int}, + {0x5C, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, + {0x5D, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, + {0x5E, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, + {0x5F, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, {0x6F, 1, &OpDispatchBuilder::MOVVectorUnalignedOp}, {0x70, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSHUFWOp, false>}, {0x7E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVQOp, OpDispatchBuilder::VectorOpType::SSE>}, @@ -173,142 +173,142 @@ constexpr std::tuple OpDisp {0xB8, 1, &OpDispatchBuilder::PopcountOp}, {0xBC, 1, &OpDispatchBuilder::TZCNT}, {0xBD, 1, &OpDispatchBuilder::LZCNT}, - {0xC2, 1, &OpDispatchBuilder::InsertScalarFCMPOp<4>}, + {0xC2, 1, &OpDispatchBuilder::InsertScalarFCMPOp}, {0xD6, 1, &OpDispatchBuilder::MOVQ2DQ}, - {0xE6, 1, &OpDispatchBuilder::Vector_CVT_Int_To_Float<4, true>}, + {0xE6, 1, &OpDispatchBuilder::Vector_CVT_Int_To_Float}, }; constexpr std::tuple OpDispatch_SecondaryRepNEModTables[] = { {0x10, 2, &OpDispatchBuilder::MOVSDOp}, {0x12, 1, &OpDispatchBuilder::MOVDDUPOp}, - {0x2A, 1, &OpDispatchBuilder::InsertCVTGPR_To_FPR<8>}, + {0x2A, 1, &OpDispatchBuilder::InsertCVTGPR_To_FPR}, {0x2B, 1, &OpDispatchBuilder::MOVVectorNTOp}, - {0x2C, 1, &OpDispatchBuilder::CVTFPR_To_GPR<8, false>}, - {0x2D, 1, &OpDispatchBuilder::CVTFPR_To_GPR<8, true>}, - {0x51, 1, &OpDispatchBuilder::VectorScalarUnaryInsertALUOp}, + {0x2C, 1, &OpDispatchBuilder::CVTFPR_To_GPR}, + {0x2D, 1, &OpDispatchBuilder::CVTFPR_To_GPR}, + {0x51, 1, &OpDispatchBuilder::VectorScalarUnaryInsertALUOp}, // x52 = Invalid - {0x58, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, - {0x59, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, - {0x5A, 1, &OpDispatchBuilder::InsertScalar_CVT_Float_To_Float<4, 8>}, - {0x5C, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, - {0x5D, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, - {0x5E, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, - {0x5F, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, + {0x58, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, + {0x59, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, + {0x5A, 1, &OpDispatchBuilder::InsertScalar_CVT_Float_To_Float}, + {0x5C, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, + {0x5D, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, + {0x5E, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, + {0x5F, 1, &OpDispatchBuilder::VectorScalarInsertALUOp}, {0x70, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSHUFWOp, true>}, - {0x7C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADDP, 4>}, - {0x7D, 1, &OpDispatchBuilder::HSUBP<4>}, - {0xD0, 1, &OpDispatchBuilder::ADDSUBPOp<4>}, + {0x7C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADDP, OpSize::i32Bit>}, + {0x7D, 1, &OpDispatchBuilder::HSUBP}, + {0xD0, 1, &OpDispatchBuilder::ADDSUBPOp}, {0xD6, 1, &OpDispatchBuilder::MOVQ2DQ}, - {0xC2, 1, &OpDispatchBuilder::InsertScalarFCMPOp<8>}, - {0xE6, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<8, true, true>}, + {0xC2, 1, &OpDispatchBuilder::InsertScalarFCMPOp}, + {0xE6, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int}, {0xF0, 1, &OpDispatchBuilder::MOVVectorUnalignedOp}, }; constexpr std::tuple OpDispatch_SecondaryOpSizeModTables[] = { {0x10, 2, &OpDispatchBuilder::MOVVectorUnalignedOp}, {0x12, 2, &OpDispatchBuilder::MOVLPOp}, - {0x14, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 8>}, - {0x15, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 8>}, + {0x14, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i64Bit>}, + {0x15, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i64Bit>}, {0x16, 2, &OpDispatchBuilder::MOVHPDOp}, {0x28, 2, &OpDispatchBuilder::MOVVectorAlignedOp}, {0x2A, 1, &OpDispatchBuilder::MMX_To_XMM_Vector_CVT_Int_To_Float}, {0x2B, 1, &OpDispatchBuilder::MOVVectorNTOp}, - {0x2C, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<8, true, false>}, - {0x2D, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<8, true, true>}, - {0x2E, 2, &OpDispatchBuilder::UCOMISxOp<8>}, + {0x2C, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int}, + {0x2D, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int}, + {0x2E, 2, &OpDispatchBuilder::UCOMISxOp}, - {0x50, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, 8>}, - {0x51, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFSQRT, 8>}, - {0x54, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VAND, 16>}, - {0x55, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VANDN, 8>}, - {0x56, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VOR, 16>}, + {0x50, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, OpSize::i64Bit>}, + {0x51, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFSQRT, OpSize::i64Bit>}, + {0x54, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VAND, OpSize::i128Bit>}, + {0x55, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VANDN, OpSize::i64Bit>}, + {0x56, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VOR, OpSize::i128Bit>}, {0x57, 1, &OpDispatchBuilder::VectorXOROp}, - {0x58, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADD, 8>}, - {0x59, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMUL, 8>}, - {0x5A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Vector_CVT_Float_To_Float, 4, 8, false>}, - {0x5B, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<4, false, true>}, - {0x5C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFSUB, 8>}, - {0x5D, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMIN, 8>}, - {0x5E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFDIV, 8>}, - {0x5F, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMAX, 8>}, - {0x60, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 1>}, - {0x61, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 2>}, - {0x62, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 4>}, - {0x63, 1, &OpDispatchBuilder::PACKSSOp<2>}, - {0x64, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, 1>}, - {0x65, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, 2>}, - {0x66, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, 4>}, - {0x67, 1, &OpDispatchBuilder::PACKUSOp<2>}, - {0x68, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 1>}, - {0x69, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 2>}, - {0x6A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 4>}, - {0x6B, 1, &OpDispatchBuilder::PACKSSOp<4>}, - {0x6C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, 8>}, - {0x6D, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, 8>}, + {0x58, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADD, OpSize::i64Bit>}, + {0x59, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMUL, OpSize::i64Bit>}, + {0x5A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Vector_CVT_Float_To_Float, OpSize::i32Bit, OpSize::i64Bit, false>}, + {0x5B, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int}, + {0x5C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFSUB, OpSize::i64Bit>}, + {0x5D, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMIN, OpSize::i64Bit>}, + {0x5E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFDIV, OpSize::i64Bit>}, + {0x5F, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMAX, OpSize::i64Bit>}, + {0x60, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i8Bit>}, + {0x61, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i16Bit>}, + {0x62, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i32Bit>}, + {0x63, 1, &OpDispatchBuilder::PACKSSOp}, + {0x64, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, OpSize::i8Bit>}, + {0x65, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, OpSize::i16Bit>}, + {0x66, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, OpSize::i32Bit>}, + {0x67, 1, &OpDispatchBuilder::PACKUSOp}, + {0x68, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i8Bit>}, + {0x69, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i16Bit>}, + {0x6A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i32Bit>}, + {0x6B, 1, &OpDispatchBuilder::PACKSSOp}, + {0x6C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i64Bit>}, + {0x6D, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i64Bit>}, {0x6E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::SSE>}, {0x6F, 1, &OpDispatchBuilder::MOVVectorAlignedOp}, {0x70, 1, &OpDispatchBuilder::PSHUFDOp}, - {0x74, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, 1>}, - {0x75, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, 2>}, - {0x76, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, 4>}, + {0x74, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, OpSize::i8Bit>}, + {0x75, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, OpSize::i16Bit>}, + {0x76, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, OpSize::i32Bit>}, {0x78, 1, nullptr}, // GROUP 17 - {0x7C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADDP, 8>}, - {0x7D, 1, &OpDispatchBuilder::HSUBP<8>}, + {0x7C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADDP, OpSize::i64Bit>}, + {0x7D, 1, &OpDispatchBuilder::HSUBP}, {0x7E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::SSE>}, {0x7F, 1, &OpDispatchBuilder::MOVVectorAlignedOp}, - {0xC2, 1, &OpDispatchBuilder::VFCMPOp<8>}, - {0xC4, 1, &OpDispatchBuilder::PINSROp<2>}, - {0xC5, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, 2>}, - {0xC6, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHUFOp, 8>}, + {0xC2, 1, &OpDispatchBuilder::VFCMPOp}, + {0xC4, 1, &OpDispatchBuilder::PINSROp}, + {0xC5, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i16Bit>}, + {0xC6, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHUFOp, OpSize::i64Bit>}, - {0xD0, 1, &OpDispatchBuilder::ADDSUBPOp<8>}, - {0xD1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, 2>}, - {0xD2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, 4>}, - {0xD3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, 8>}, - {0xD4, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, 8>}, - {0xD5, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VMUL, 2>}, + {0xD0, 1, &OpDispatchBuilder::ADDSUBPOp}, + {0xD1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, OpSize::i16Bit>}, + {0xD2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, OpSize::i32Bit>}, + {0xD3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, OpSize::i64Bit>}, + {0xD4, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, OpSize::i64Bit>}, + {0xD5, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VMUL, OpSize::i16Bit>}, {0xD6, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVQOp, OpDispatchBuilder::VectorOpType::SSE>}, {0xD7, 1, &OpDispatchBuilder::MOVMSKOpOne}, // PMOVMSKB - {0xD8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQSUB, 1>}, - {0xD9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQSUB, 2>}, - {0xDA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMIN, 1>}, - {0xDB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VAND, 16>}, - {0xDC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQADD, 1>}, - {0xDD, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQADD, 2>}, - {0xDE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMAX, 1>}, - {0xDF, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VANDN, 8>}, - {0xE0, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, 1>}, - {0xE1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAOp, 2>}, - {0xE2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAOp, 4>}, - {0xE3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, 2>}, + {0xD8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQSUB, OpSize::i8Bit>}, + {0xD9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQSUB, OpSize::i16Bit>}, + {0xDA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMIN, OpSize::i8Bit>}, + {0xDB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VAND, OpSize::i128Bit>}, + {0xDC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQADD, OpSize::i8Bit>}, + {0xDD, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQADD, OpSize::i16Bit>}, + {0xDE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMAX, OpSize::i8Bit>}, + {0xDF, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VANDN, OpSize::i64Bit>}, + {0xE0, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, OpSize::i8Bit>}, + {0xE1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAOp, OpSize::i16Bit>}, + {0xE2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAOp, OpSize::i32Bit>}, + {0xE3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, OpSize::i16Bit>}, {0xE4, 1, &OpDispatchBuilder::PMULHW}, {0xE5, 1, &OpDispatchBuilder::PMULHW}, - {0xE6, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<8, true, false>}, + {0xE6, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int}, {0xE7, 1, &OpDispatchBuilder::MOVVectorNTOp}, - {0xE8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQSUB, 1>}, - {0xE9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQSUB, 2>}, - {0xEA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMIN, 2>}, - {0xEB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VOR, 16>}, - {0xEC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQADD, 1>}, - {0xED, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQADD, 2>}, - {0xEE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMAX, 2>}, + {0xE8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQSUB, OpSize::i8Bit>}, + {0xE9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQSUB, OpSize::i16Bit>}, + {0xEA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMIN, OpSize::i16Bit>}, + {0xEB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VOR, OpSize::i128Bit>}, + {0xEC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQADD, OpSize::i8Bit>}, + {0xED, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQADD, OpSize::i16Bit>}, + {0xEE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMAX, OpSize::i16Bit>}, {0xEF, 1, &OpDispatchBuilder::VectorXOROp}, - {0xF1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, 2>}, - {0xF2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, 4>}, - {0xF3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, 8>}, - {0xF4, 1, &OpDispatchBuilder::PMULLOp<4, false>}, + {0xF1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, OpSize::i16Bit>}, + {0xF2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, OpSize::i32Bit>}, + {0xF3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, OpSize::i64Bit>}, + {0xF4, 1, &OpDispatchBuilder::PMULLOp}, {0xF5, 1, &OpDispatchBuilder::PMADDWD}, {0xF6, 1, &OpDispatchBuilder::PSADBW}, {0xF7, 1, &OpDispatchBuilder::MASKMOVOp}, - {0xF8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, 1>}, - {0xF9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, 2>}, - {0xFA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, 4>}, - {0xFB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, 8>}, - {0xFC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, 1>}, - {0xFD, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, 2>}, - {0xFE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, 4>}, + {0xF8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, OpSize::i8Bit>}, + {0xF9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, OpSize::i16Bit>}, + {0xFA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, OpSize::i32Bit>}, + {0xFB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, OpSize::i64Bit>}, + {0xFC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, OpSize::i8Bit>}, + {0xFD, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, OpSize::i16Bit>}, + {0xFE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, OpSize::i32Bit>}, }; constexpr std::tuple OpDispatch_TwoByteOpTable_64[] = { diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 28d31266af..cf9e3e5752 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -30,7 +30,7 @@ void OpDispatchBuilder::MOVVectorAlignedOp(OpcodeArgs) { return; } Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - StoreResult(FPRClass, Op, Src, -1); + StoreResult(FPRClass, Op, Src, OpSize::iInvalid); } void OpDispatchBuilder::MOVVectorUnalignedOp(OpcodeArgs) { @@ -38,12 +38,12 @@ void OpDispatchBuilder::MOVVectorUnalignedOp(OpcodeArgs) { // Nop return; } - Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1}); - StoreResult(FPRClass, Op, Src, 1); + Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = OpSize::i8Bit}); + StoreResult(FPRClass, Op, Src, OpSize::i8Bit); } void OpDispatchBuilder::MOVVectorNTOp(OpcodeArgs) { - const auto Size = GetDstSize(Op); + const auto Size = OpSizeFromDst(Op); if (Op->Dest.IsGPR() && Size >= OpSize::i128Bit) { ///< MOVNTDQA load non-temporal comes from SSE4.1 and is extended by AVX/AVX2. @@ -80,19 +80,19 @@ void OpDispatchBuilder::VMOVAPS_VMOVAPDOp(OpcodeArgs) { if (Is128Bit && Op->Dest.IsGPR()) { Src = _VMov(OpSize::i128Bit, Src); } - StoreResult(FPRClass, Op, Src, -1); + StoreResult(FPRClass, Op, Src, OpSize::iInvalid); } void OpDispatchBuilder::VMOVUPS_VMOVUPDOp(OpcodeArgs) { const auto SrcSize = GetSrcSize(Op); const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; - Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1}); + Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = OpSize::i8Bit}); if (Is128Bit && Op->Dest.IsGPR()) { Src = _VMov(OpSize::i128Bit, Src); } - StoreResult(FPRClass, Op, Src, 1); + StoreResult(FPRClass, Op, Src, OpSize::i8Bit); } void OpDispatchBuilder::MOVHPDOp(OpcodeArgs) { @@ -102,14 +102,14 @@ void OpDispatchBuilder::MOVHPDOp(OpcodeArgs) { Ref Src = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); Ref Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, OpSize::i128Bit, Op->Flags); auto Result = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 1, 0, Dest, Src); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } else { // If the destination is a GPR then the source is memory // xmm1[127:64] = src Ref Src = MakeSegmentAddress(Op, Op->Src[0]); Ref Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, OpSize::i128Bit, Op->Flags); auto Result = _VLoadVectorElement(OpSize::i128Bit, OpSize::i64Bit, Dest, 1, Src); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } } else { // In this case memory is the destination and the high bits of the XMM are source @@ -126,7 +126,7 @@ void OpDispatchBuilder::VMOVHPOp(OpcodeArgs) { Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, {.Align = OpSize::i64Bit}); Ref Result = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 1, 0, Src1, Src2); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } else { Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = OpSize::i128Bit}); Ref Result = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 0, 1, Src, Src); @@ -143,11 +143,11 @@ void OpDispatchBuilder::MOVLPOp(OpcodeArgs) { auto Result = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 0, 1, Dest, Src); StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, OpSize::i128Bit, OpSize::i128Bit); } else { - auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); Ref Src = MakeSegmentAddress(Op, Op->Src[0]); Ref Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, DstSize, Op->Flags); auto Result = _VLoadVectorElement(OpSize::i128Bit, OpSize::i64Bit, Dest, 0, Src); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } } else { Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = OpSize::i64Bit}); @@ -167,45 +167,45 @@ void OpDispatchBuilder::VMOVLPOp(OpcodeArgs) { // Bits[127:64] come from Src1[127:64] Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, {.Align = OpSize::i64Bit}); Ref Result = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 1, 1, Src2, Src1); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } else { ///< VMOVHLPS/PD xmm1, xmm2, xmm3 Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags, {.Align = OpSize::i128Bit}); Ref Result = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 0, 1, Src1, Src2); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } } void OpDispatchBuilder::VMOVSHDUPOp(OpcodeArgs) { - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = _VTrn2(SrcSize, OpSize::i32Bit, Src, Src); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VMOVSLDUPOp(OpcodeArgs) { - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = _VTrn(SrcSize, OpSize::i32Bit, Src, Src); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -void OpDispatchBuilder::MOVScalarOpImpl(OpcodeArgs, size_t ElementSize) { +void OpDispatchBuilder::MOVScalarOpImpl(OpcodeArgs, IR::OpSize ElementSize) { if (Op->Dest.IsGPR() && Op->Src[0].IsGPR()) { // MOVSS/SD xmm1, xmm2 Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); auto Result = _VInsElement(OpSize::i128Bit, ElementSize, 0, 0, Dest, Src); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } else if (Op->Dest.IsGPR()) { // MOVSS/SD xmm1, mem32/mem64 // xmm1[127:0] <- zext(mem32/mem64) Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], ElementSize, Op->Flags); - StoreResult(FPRClass, Op, Src, -1); + StoreResult(FPRClass, Op, Src, OpSize::iInvalid); } else { // MOVSS/SD mem32/mem64, xmm1 Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, ElementSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, ElementSize, OpSize::iInvalid); } } @@ -217,21 +217,21 @@ void OpDispatchBuilder::MOVSDOp(OpcodeArgs) { MOVScalarOpImpl(Op, OpSize::i64Bit); } -void OpDispatchBuilder::VMOVScalarOpImpl(OpcodeArgs, size_t ElementSize) { +void OpDispatchBuilder::VMOVScalarOpImpl(OpcodeArgs, IR::OpSize ElementSize) { if (Op->Dest.IsGPR() && Op->Src[0].IsGPR() && Op->Src[1].IsGPR()) { // VMOVSS/SD xmm1, xmm2, xmm3 Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); Ref Result = _VInsElement(OpSize::i128Bit, ElementSize, 0, 0, Src1, Src2); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } else if (Op->Dest.IsGPR()) { // VMOVSS/SD xmm1, mem32/mem64 Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], ElementSize, Op->Flags); - StoreResult(FPRClass, Op, Src, -1); + StoreResult(FPRClass, Op, Src, OpSize::iInvalid); } else { // VMOVSS/SD mem32/mem64, xmm1 Ref Src = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, ElementSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, ElementSize, OpSize::iInvalid); } } @@ -243,23 +243,23 @@ void OpDispatchBuilder::VMOVSSOp(OpcodeArgs) { VMOVScalarOpImpl(Op, OpSize::i32Bit); } -void OpDispatchBuilder::VectorALUOp(OpcodeArgs, IROps IROp, size_t ElementSize) { - const auto Size = GetSrcSize(Op); +void OpDispatchBuilder::VectorALUOp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize) { + const auto Size = OpSizeFromSrc(Op); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); DeriveOp(ALUOp, IROp, _VAdd(Size, ElementSize, Dest, Src)); - StoreResult(FPRClass, Op, ALUOp, -1); + StoreResult(FPRClass, Op, ALUOp, OpSize::iInvalid); } void OpDispatchBuilder::VectorXOROp(OpcodeArgs) { - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); // Special case for vector xor with itself being the optimal way for x86 to zero vector registers. if (Op->Dest.IsGPR() && Op->Src[0].IsGPR() && Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) { const auto ZeroRegister = LoadZeroVector(Size); - StoreResult(FPRClass, Op, ZeroRegister, -1); + StoreResult(FPRClass, Op, ZeroRegister, OpSize::iInvalid); return; } @@ -267,23 +267,23 @@ void OpDispatchBuilder::VectorXOROp(OpcodeArgs) { VectorALUOp(Op, OP_VXOR, Size); } -void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs, IROps IROp, size_t ElementSize) { - const auto Size = GetSrcSize(Op); +void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize) { + const auto Size = OpSizeFromSrc(Op); Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); DeriveOp(ALUOp, IROp, _VAdd(Size, ElementSize, Src1, Src2)); - StoreResult(FPRClass, Op, ALUOp, -1); + StoreResult(FPRClass, Op, ALUOp, OpSize::iInvalid); } void OpDispatchBuilder::AVXVectorXOROp(OpcodeArgs) { // Special case for vector xor with itself being the optimal way for x86 to zero vector registers. if (Op->Src[0].IsGPR() && Op->Src[1].IsGPR() && Op->Src[0].Data.GPR.GPR == Op->Src[1].Data.GPR.GPR) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); const auto ZeroRegister = LoadZeroVector(DstSize); - StoreResult(FPRClass, Op, ZeroRegister, -1); + StoreResult(FPRClass, Op, ZeroRegister, OpSize::iInvalid); return; } @@ -291,37 +291,37 @@ void OpDispatchBuilder::AVXVectorXOROp(OpcodeArgs) { AVXVectorALUOp(Op, OP_VXOR, OpSize::i128Bit); } -void OpDispatchBuilder::VectorALUROp(OpcodeArgs, IROps IROp, size_t ElementSize) { - const auto Size = GetSrcSize(Op); +void OpDispatchBuilder::VectorALUROp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize) { + const auto Size = OpSizeFromSrc(Op); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); DeriveOp(ALUOp, IROp, _VAdd(Size, ElementSize, Src, Dest)); - StoreResult(FPRClass, Op, ALUOp, -1); + StoreResult(FPRClass, Op, ALUOp, OpSize::iInvalid); } -Ref OpDispatchBuilder::VectorScalarInsertALUOpImpl(OpcodeArgs, IROps IROp, size_t DstSize, size_t ElementSize, +Ref OpDispatchBuilder::VectorScalarInsertALUOpImpl(OpcodeArgs, IROps IROp, IR::OpSize DstSize, IR::OpSize ElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits) { // We load the full vector width when dealing with a source vector, // so that we don't do any unnecessary zero extension to the scalar // element that we're going to operate on. - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags); Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, SrcSize, Op->Flags, {.AllowUpperGarbage = true}); // If OpSize == ElementSize then it only does the lower scalar op - DeriveOp(ALUOp, IROp, _VFAddScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, ZeroUpperBits)); + DeriveOp(ALUOp, IROp, _VFAddScalarInsert(DstSize, ElementSize, Src1, Src2, ZeroUpperBits)); return ALUOp; } -template +template void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs) { const auto DstSize = GetGuestVectorLength(); auto Result = VectorScalarInsertALUOpImpl(Op, IROp, DstSize, ElementSize, Op->Dest, Op->Src[0], false); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid); } template void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); @@ -337,11 +337,11 @@ template void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); template void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs); -template +template void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs) { const auto DstSize = GetGuestVectorLength(); auto Result = VectorScalarInsertALUOpImpl(Op, IROp, DstSize, ElementSize, Op->Src[0], Op->Src[1], true); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid); } template void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); @@ -357,27 +357,27 @@ template void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); template void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs); -Ref OpDispatchBuilder::VectorScalarUnaryInsertALUOpImpl(OpcodeArgs, IROps IROp, size_t DstSize, size_t ElementSize, +Ref OpDispatchBuilder::VectorScalarUnaryInsertALUOpImpl(OpcodeArgs, IROps IROp, IR::OpSize DstSize, IR::OpSize ElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits) { // We load the full vector width when dealing with a source vector, // so that we don't do any unnecessary zero extension to the scalar // element that we're going to operate on. - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags); Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, SrcSize, Op->Flags, {.AllowUpperGarbage = true}); // If OpSize == ElementSize then it only does the lower scalar op - DeriveOp(ALUOp, IROp, _VFSqrtScalarInsert(IR::SizeToOpSize(DstSize), ElementSize, Src1, Src2, ZeroUpperBits)); + DeriveOp(ALUOp, IROp, _VFSqrtScalarInsert(DstSize, ElementSize, Src1, Src2, ZeroUpperBits)); return ALUOp; } -template +template void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs) { const auto DstSize = GetGuestVectorLength(); auto Result = VectorScalarInsertALUOpImpl(Op, IROp, DstSize, ElementSize, Op->Dest, Op->Src[0], false); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid); } template void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs); @@ -389,11 +389,11 @@ template void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs); template void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs); -template +template void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp(OpcodeArgs) { const auto DstSize = GetGuestVectorLength(); auto Result = VectorScalarInsertALUOpImpl(Op, IROp, DstSize, ElementSize, Op->Src[0], Op->Src[1], true); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid); } template void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp(OpcodeArgs); @@ -410,74 +410,74 @@ void OpDispatchBuilder::InsertMMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs) { // so that we don't do any unnecessary zero extension to the scalar // element that we're going to operate on. const auto DstSize = GetGuestVectorLength(); - const auto SrcSize = Op->Src[0].IsGPR() ? OpSize::i64Bit : GetSrcSize(Op); + const auto SrcSize = Op->Src[0].IsGPR() ? OpSize::i64Bit : OpSizeFromSrc(Op); Ref Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, DstSize, Op->Flags); Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); // Always 32-bit. - const size_t ElementSize = OpSize::i32Bit; + const auto ElementSize = OpSize::i32Bit; // Always signed Dest = _VSToFVectorInsert(IR::SizeToOpSize(DstSize), ElementSize, ElementSize, Dest, Src, true, false); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Dest, DstSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Dest, DstSize, OpSize::iInvalid); } -Ref OpDispatchBuilder::InsertCVTGPR_To_FPRImpl(OpcodeArgs, size_t DstSize, size_t DstElementSize, const X86Tables::DecodedOperand& Src1Op, +Ref OpDispatchBuilder::InsertCVTGPR_To_FPRImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize DstElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits) { // We load the full vector width when dealing with a source vector, // so that we don't do any unnecessary zero extension to the scalar // element that we're going to operate on. - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags); if (Src2Op.IsGPR()) { // If the source is a GPR then convert directly from the GPR. - auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Src2Op, CTX->GetGPRSize(), Op->Flags); - return _VSToFGPRInsert(IR::SizeToOpSize(DstSize), DstElementSize, SrcSize, Src1, Src2, ZeroUpperBits); + auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Src2Op, CTX->GetGPROpSize(), Op->Flags); + return _VSToFGPRInsert(DstSize, DstElementSize, SrcSize, Src1, Src2, ZeroUpperBits); } else if (SrcSize != DstElementSize) { // If the source is from memory but the Source size and destination size aren't the same, // then it is more optimal to load in to a GPR and convert between GPR->FPR. // ARM GPR->FPR conversion supports different size source and destinations while FPR->FPR doesn't. auto Src2 = LoadSource(GPRClass, Op, Src2Op, Op->Flags); - return _VSToFGPRInsert(IR::SizeToOpSize(DstSize), DstElementSize, SrcSize, Src1, Src2, ZeroUpperBits); + return _VSToFGPRInsert(DstSize, DstElementSize, SrcSize, Src1, Src2, ZeroUpperBits); } // In the case of cvtsi2s{s,d} where the source and destination are the same size, // then it is more optimal to load in to the FPR register directly and convert there. auto Src2 = LoadSource(FPRClass, Op, Src2Op, Op->Flags); // Always signed - return _VSToFVectorInsert(IR::SizeToOpSize(DstSize), DstElementSize, DstElementSize, Src1, Src2, false, ZeroUpperBits); + return _VSToFVectorInsert(DstSize, DstElementSize, DstElementSize, Src1, Src2, false, ZeroUpperBits); } -template +template void OpDispatchBuilder::InsertCVTGPR_To_FPR(OpcodeArgs) { const auto DstSize = GetGuestVectorLength(); auto Result = InsertCVTGPR_To_FPRImpl(Op, DstSize, DstElementSize, Op->Dest, Op->Src[0], false); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid); } template void OpDispatchBuilder::InsertCVTGPR_To_FPR(OpcodeArgs); template void OpDispatchBuilder::InsertCVTGPR_To_FPR(OpcodeArgs); -template +template void OpDispatchBuilder::AVXInsertCVTGPR_To_FPR(OpcodeArgs) { const auto DstSize = GetGuestVectorLength(); Ref Result = InsertCVTGPR_To_FPRImpl(Op, DstSize, DstElementSize, Op->Src[0], Op->Src[1], true); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid); } template void OpDispatchBuilder::AVXInsertCVTGPR_To_FPR(OpcodeArgs); template void OpDispatchBuilder::AVXInsertCVTGPR_To_FPR(OpcodeArgs); -Ref OpDispatchBuilder::InsertScalar_CVT_Float_To_FloatImpl(OpcodeArgs, size_t DstSize, size_t DstElementSize, size_t SrcElementSize, - const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, - bool ZeroUpperBits) { +Ref OpDispatchBuilder::InsertScalar_CVT_Float_To_FloatImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize DstElementSize, + IR::OpSize SrcElementSize, const X86Tables::DecodedOperand& Src1Op, + const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits) { // We load the full vector width when dealing with a source vector, // so that we don't do any unnecessary zero extension to the scalar // element that we're going to operate on. - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags); Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, SrcSize, Op->Flags, {.AllowUpperGarbage = true}); @@ -485,21 +485,21 @@ Ref OpDispatchBuilder::InsertScalar_CVT_Float_To_FloatImpl(OpcodeArgs, size_t Ds return _VFToFScalarInsert(IR::SizeToOpSize(DstSize), DstElementSize, SrcElementSize, Src1, Src2, ZeroUpperBits); } -template +template void OpDispatchBuilder::InsertScalar_CVT_Float_To_Float(OpcodeArgs) { const auto DstSize = GetGuestVectorLength(); Ref Result = InsertScalar_CVT_Float_To_FloatImpl(Op, DstSize, DstElementSize, SrcElementSize, Op->Dest, Op->Src[0], false); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid); } template void OpDispatchBuilder::InsertScalar_CVT_Float_To_Float(OpcodeArgs); template void OpDispatchBuilder::InsertScalar_CVT_Float_To_Float(OpcodeArgs); -template +template void OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float(OpcodeArgs) { const auto DstSize = GetGuestVectorLength(); Ref Result = InsertScalar_CVT_Float_To_FloatImpl(Op, DstSize, DstElementSize, SrcElementSize, Op->Src[0], Op->Src[1], true); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid); } template void OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float(OpcodeArgs); @@ -519,12 +519,12 @@ RoundType OpDispatchBuilder::TranslateRoundType(uint8_t Mode) { return RoundControlSource ? Round_Host : SourceModes[RoundControl]; } -Ref OpDispatchBuilder::InsertScalarRoundImpl(OpcodeArgs, size_t DstSize, size_t ElementSize, const X86Tables::DecodedOperand& Src1Op, +Ref OpDispatchBuilder::InsertScalarRoundImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize ElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, uint64_t Mode, bool ZeroUpperBits) { // We load the full vector width when dealing with a source vector, // so that we don't do any unnecessary zero extension to the scalar // element that we're going to operate on. - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, DstSize, Op->Flags); Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Src2Op, SrcSize, Op->Flags, {.AllowUpperGarbage = true}); @@ -535,33 +535,33 @@ Ref OpDispatchBuilder::InsertScalarRoundImpl(OpcodeArgs, size_t DstSize, size_t return ALUOp; } -template +template void OpDispatchBuilder::InsertScalarRound(OpcodeArgs) { const uint64_t Mode = Op->Src[1].Literal(); const auto DstSize = GetGuestVectorLength(); Ref Result = InsertScalarRoundImpl(Op, DstSize, ElementSize, Op->Dest, Op->Src[0], Mode, false); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid); } template void OpDispatchBuilder::InsertScalarRound(OpcodeArgs); template void OpDispatchBuilder::InsertScalarRound(OpcodeArgs); -template +template void OpDispatchBuilder::AVXInsertScalarRound(OpcodeArgs) { const uint64_t Mode = Op->Src[2].Literal(); const auto DstSize = GetGuestVectorLength(); Ref Result = InsertScalarRoundImpl(Op, DstSize, ElementSize, Op->Dest, Op->Src[0], Mode, true); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid); } template void OpDispatchBuilder::AVXInsertScalarRound(OpcodeArgs); template void OpDispatchBuilder::AVXInsertScalarRound(OpcodeArgs); -Ref OpDispatchBuilder::InsertScalarFCMPOpImpl(OpSize Size, uint8_t OpDstSize, size_t ElementSize, Ref Src1, Ref Src2, uint8_t CompType, - bool ZeroUpperBits) { +Ref OpDispatchBuilder::InsertScalarFCMPOpImpl(OpSize Size, IR::OpSize OpDstSize, IR::OpSize ElementSize, Ref Src1, Ref Src2, + uint8_t CompType, bool ZeroUpperBits) { switch (CompType & 7) { case 0x0: // EQ return _VFCMPScalarInsert(Size, ElementSize, Src1, Src2, FloatCompareOp::EQ, ZeroUpperBits); @@ -591,27 +591,27 @@ Ref OpDispatchBuilder::InsertScalarFCMPOpImpl(OpSize Size, uint8_t OpDstSize, si FEX_UNREACHABLE; } -template +template void OpDispatchBuilder::InsertScalarFCMPOp(OpcodeArgs) { const uint8_t CompType = Op->Src[1].Literal(); const auto DstSize = GetGuestVectorLength(); - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Src1 = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, DstSize, Op->Flags); Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags, {.AllowUpperGarbage = true}); - Ref Result = InsertScalarFCMPOpImpl(IR::SizeToOpSize(DstSize), GetDstSize(Op), ElementSize, Src1, Src2, CompType, false); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); + Ref Result = InsertScalarFCMPOpImpl(IR::SizeToOpSize(DstSize), OpSizeFromDst(Op), ElementSize, Src1, Src2, CompType, false); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid); } template void OpDispatchBuilder::InsertScalarFCMPOp(OpcodeArgs); template void OpDispatchBuilder::InsertScalarFCMPOp(OpcodeArgs); -template +template void OpDispatchBuilder::AVXInsertScalarFCMPOp(OpcodeArgs) { const uint8_t CompType = Op->Src[2].Literal(); const auto DstSize = GetGuestVectorLength(); - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); // We load the full vector width when dealing with a source vector, // so that we don't do any unnecessary zero extension to the scalar @@ -619,50 +619,48 @@ void OpDispatchBuilder::AVXInsertScalarFCMPOp(OpcodeArgs) { Ref Src1 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], DstSize, Op->Flags); Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], SrcSize, Op->Flags, {.AllowUpperGarbage = true}); - Ref Result = InsertScalarFCMPOpImpl(IR::SizeToOpSize(DstSize), GetDstSize(Op), ElementSize, Src1, Src2, CompType, true); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); + Ref Result = InsertScalarFCMPOpImpl(IR::SizeToOpSize(DstSize), OpSizeFromDst(Op), ElementSize, Src1, Src2, CompType, true); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid); } template void OpDispatchBuilder::AVXInsertScalarFCMPOp(OpcodeArgs); template void OpDispatchBuilder::AVXInsertScalarFCMPOp(OpcodeArgs); -void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs, IROps IROp, size_t ElementSize) { +void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize) { // In the event of a scalar operation and a vector source, then // we can specify the entire vector length in order to avoid // unnecessary sign extension on the element to be operated on. // In the event of a memory operand, we load the exact element size. - const auto SrcSize = GetSrcSize(Op); - const auto OpSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); - DeriveOp(ALUOp, IROp, _VFSqrt(OpSize, ElementSize, Src)); + DeriveOp(ALUOp, IROp, _VFSqrt(SrcSize, ElementSize, Src)); - StoreResult(FPRClass, Op, ALUOp, -1); + StoreResult(FPRClass, Op, ALUOp, OpSize::iInvalid); } -void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs, IROps IROp, size_t ElementSize) { +void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize) { // In the event of a scalar operation and a vector source, then // we can specify the entire vector length in order to avoid // unnecessary sign extension on the element to be operated on. // In the event of a memory operand, we load the exact element size. - const auto SrcSize = GetSrcSize(Op); - const auto OpSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); - DeriveOp(ALUOp, IROp, _VFSqrt(OpSize, ElementSize, Src)); + DeriveOp(ALUOp, IROp, _VFSqrt(SrcSize, ElementSize, Src)); // NOTE: We don't need to clear the upper lanes here, since the // IR ops make use of 128-bit AdvSimd for 128-bit cases, // which, on hardware with SVE, zero-extends as part of // storing into the destination. - StoreResult(FPRClass, Op, ALUOp, -1); + StoreResult(FPRClass, Op, ALUOp, OpSize::iInvalid); } -void OpDispatchBuilder::VectorUnaryDuplicateOpImpl(OpcodeArgs, IROps IROp, size_t ElementSize) { - const auto Size = GetSrcSize(Op); +void OpDispatchBuilder::VectorUnaryDuplicateOpImpl(OpcodeArgs, IROps IROp, IR::OpSize ElementSize) { + const auto Size = OpSizeFromSrc(Op); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -670,10 +668,10 @@ void OpDispatchBuilder::VectorUnaryDuplicateOpImpl(OpcodeArgs, IROps IROp, size_ // Duplicate the lower bits auto Result = _VDupElement(Size, ElementSize, ALUOp, 0); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -template +template void OpDispatchBuilder::VectorUnaryDuplicateOp(OpcodeArgs) { VectorUnaryDuplicateOpImpl(Op, IROp, ElementSize); } @@ -682,7 +680,7 @@ template void OpDispatchBuilder::VectorUnaryDuplicateOp(OpcodeArgs); void OpDispatchBuilder::MOVQOp(OpcodeArgs, VectorOpType VectorType) { - const auto SrcSize = Op->Src[0].IsGPR() ? OpSize::i128Bit : GetSrcSize(Op); + const auto SrcSize = Op->Src[0].IsGPR() ? OpSize::i128Bit : OpSizeFromSrc(Op); Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); // This instruction is a bit special that if the destination is a register then it'll ZEXT the 64bit source to 128bit if (Op->Dest.IsGPR()) { @@ -693,7 +691,7 @@ void OpDispatchBuilder::MOVQOp(OpcodeArgs, VectorOpType VectorType) { StoreXMMRegister_WithAVXInsert(VectorType, gprIndex, Reg); } else { // This is simple, just store the result - StoreResult(FPRClass, Op, Src, -1); + StoreResult(FPRClass, Op, Src, OpSize::iInvalid); } } @@ -702,12 +700,12 @@ void OpDispatchBuilder::MOVQMMXOp(OpcodeArgs) { if (MMXState == MMXState_X87) { ChgStateX87_MMX(); } - Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = 1}); - StoreResult(FPRClass, Op, Src, 1); + Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags, {.Align = OpSize::i8Bit}); + StoreResult(FPRClass, Op, Src, OpSize::i8Bit); } -void OpDispatchBuilder::MOVMSKOp(OpcodeArgs, size_t ElementSize) { - auto Size = GetSrcSize(Op); +void OpDispatchBuilder::MOVMSKOp(OpcodeArgs, IR::OpSize ElementSize) { + const auto Size = OpSizeFromSrc(Op); uint8_t NumElements = Size / ElementSize; Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -724,7 +722,7 @@ void OpDispatchBuilder::MOVMSKOp(OpcodeArgs, size_t ElementSize) { GPR = _Bfi(OpSize::i64Bit, 32, 31, GPR, GPR); // Shift right to only get the two sign bits we care about. GPR = _Lshr(OpSize::i64Bit, GPR, _Constant(62)); - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, GPR, CTX->GetGPRSize(), -1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, GPR, CTX->GetGPROpSize(), OpSize::iInvalid); } else if (Size == OpSize::i128Bit && ElementSize == OpSize::i32Bit) { // Shift all the sign bits to the bottom of their respective elements. Src = _VUShrI(Size, OpSize::i32Bit, Src, 31); @@ -736,7 +734,7 @@ void OpDispatchBuilder::MOVMSKOp(OpcodeArgs, size_t ElementSize) { Src = _VAddV(Size, OpSize::i32Bit, Src); // Extract to a GPR. Ref GPR = _VExtractToGPR(Size, OpSize::i32Bit, Src, 0); - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, GPR, CTX->GetGPRSize(), -1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, GPR, CTX->GetGPROpSize(), OpSize::iInvalid); } else { Ref CurrentVal = _Constant(0); @@ -751,12 +749,12 @@ void OpDispatchBuilder::MOVMSKOp(OpcodeArgs, size_t ElementSize) { // Or it with the current value CurrentVal = _Or(OpSize::i64Bit, CurrentVal, Tmp); } - StoreResult(GPRClass, Op, CurrentVal, -1); + StoreResult(GPRClass, Op, CurrentVal, OpSize::iInvalid); } } void OpDispatchBuilder::MOVMSKOpOne(OpcodeArgs) { - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE; const auto ExtractSize = Is256Bit ? OpSize::i32Bit : OpSize::i16Bit; @@ -768,8 +766,8 @@ void OpDispatchBuilder::MOVMSKOpOne(OpcodeArgs) { // Since we also handle the MM MOVMSKB here too, // we need to clamp the lower bound. - const auto VAdd1Size = std::max(SrcSize, OpSize::i128Bit); - const auto VAdd2Size = std::max(SrcSize / 2, OpSize::i64Bit); + const auto VAdd1Size = std::max(SrcSize, OpSize::i128Bit); + const auto VAdd2Size = std::max(IR::DivideOpSize(SrcSize, 2), OpSize::i64Bit); auto VAdd1 = _VAddP(VAdd1Size, OpSize::i8Bit, VAnd, VAnd); auto VAdd2 = _VAddP(VAdd2Size, OpSize::i8Bit, VAdd1, VAdd1); @@ -777,21 +775,21 @@ void OpDispatchBuilder::MOVMSKOpOne(OpcodeArgs) { auto Result = _VExtractToGPR(SrcSize, ExtractSize, VAdd3, 0); - StoreResult(GPRClass, Op, Result, -1); + StoreResult(GPRClass, Op, Result, OpSize::iInvalid); } -void OpDispatchBuilder::PUNPCKLOp(OpcodeArgs, size_t ElementSize) { - auto Size = GetSrcSize(Op); +void OpDispatchBuilder::PUNPCKLOp(OpcodeArgs, IR::OpSize ElementSize) { + const auto Size = OpSizeFromSrc(Op); Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); auto ALUOp = _VZip(Size, ElementSize, Dest, Src); - StoreResult(FPRClass, Op, ALUOp, -1); + StoreResult(FPRClass, Op, ALUOp, OpSize::iInvalid); } -void OpDispatchBuilder::VPUNPCKLOp(OpcodeArgs, size_t ElementSize) { - const auto SrcSize = GetSrcSize(Op); +void OpDispatchBuilder::VPUNPCKLOp(OpcodeArgs, IR::OpSize ElementSize) { + const auto SrcSize = OpSizeFromSrc(Op); const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -807,20 +805,20 @@ void OpDispatchBuilder::VPUNPCKLOp(OpcodeArgs, size_t ElementSize) { Result = _VInsElement(SrcSize, OpSize::i128Bit, 1, 0, ZipLo, ZipHi); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -void OpDispatchBuilder::PUNPCKHOp(OpcodeArgs, size_t ElementSize) { - auto Size = GetSrcSize(Op); +void OpDispatchBuilder::PUNPCKHOp(OpcodeArgs, IR::OpSize ElementSize) { + const auto Size = OpSizeFromSrc(Op); Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); auto ALUOp = _VZip2(Size, ElementSize, Dest, Src); - StoreResult(FPRClass, Op, ALUOp, -1); + StoreResult(FPRClass, Op, ALUOp, OpSize::iInvalid); } -void OpDispatchBuilder::VPUNPCKHOp(OpcodeArgs, size_t ElementSize) { - const auto SrcSize = GetSrcSize(Op); +void OpDispatchBuilder::VPUNPCKHOp(OpcodeArgs, IR::OpSize ElementSize) { + const auto SrcSize = OpSizeFromSrc(Op); const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -836,10 +834,10 @@ void OpDispatchBuilder::VPUNPCKHOp(OpcodeArgs, size_t ElementSize) { Result = _VInsElement(SrcSize, OpSize::i128Bit, 0, 1, ZipHi, ZipLo); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -Ref OpDispatchBuilder::GeneratePSHUFBMask(uint8_t SrcSize) { +Ref OpDispatchBuilder::GeneratePSHUFBMask(IR::OpSize SrcSize) { // PSHUFB doesn't 100% match VTBL behaviour // VTBL will set the element zero if the index is greater than // the number of elements in the array @@ -850,15 +848,15 @@ Ref OpDispatchBuilder::GeneratePSHUFBMask(uint8_t SrcSize) { // Bits [6:3] is reserved for 64-bit const uint8_t MaskImm = SrcSize == OpSize::i64Bit ? 0b1000'0111 : 0b1000'1111; - return _VectorImm(SrcSize, 1, MaskImm); + return _VectorImm(SrcSize, OpSize::i8Bit, MaskImm); } -Ref OpDispatchBuilder::PSHUFBOpImpl(uint8_t SrcSize, Ref Src1, Ref Src2, Ref MaskVector) { +Ref OpDispatchBuilder::PSHUFBOpImpl(IR::OpSize SrcSize, Ref Src1, Ref Src2, Ref MaskVector) { const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE; // We perform the 256-bit version as two 128-bit operations due to // the lane splitting behavior, so cap the maximum size at 16. - const auto SanitizedSrcSize = std::min(SrcSize, OpSize::i128Bit); + const auto SanitizedSrcSize = std::min(SrcSize, OpSize::i128Bit); Ref MaskedIndices = _VAnd(SrcSize, SrcSize, Src2, MaskVector); @@ -873,24 +871,25 @@ Ref OpDispatchBuilder::PSHUFBOpImpl(uint8_t SrcSize, Ref Src1, Ref Src2, Ref Mas } void OpDispatchBuilder::PSHUFBOp(OpcodeArgs) { - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Src1 = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = PSHUFBOpImpl(SrcSize, Src1, Src2, GeneratePSHUFBMask(SrcSize)); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VPSHUFBOp(OpcodeArgs) { - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); Ref Result = PSHUFBOpImpl(SrcSize, Src1, Src2, GeneratePSHUFBMask(SrcSize)); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -Ref OpDispatchBuilder::PShufWLane(size_t Size, FEXCore::IR::IndexNamedVectorConstant IndexConstant, bool LowLane, Ref IncomingLane, uint8_t Shuffle) { +Ref OpDispatchBuilder::PShufWLane(IR::OpSize Size, FEXCore::IR::IndexNamedVectorConstant IndexConstant, bool LowLane, Ref IncomingLane, + uint8_t Shuffle) { constexpr auto IdentityCopy = 0b11'10'01'00; const bool Is128BitLane = Size == OpSize::i128Bit; @@ -935,22 +934,22 @@ Ref OpDispatchBuilder::PShufWLane(size_t Size, FEXCore::IR::IndexNamedVectorCons void OpDispatchBuilder::PSHUFW8ByteOp(OpcodeArgs) { uint16_t Shuffle = Op->Src[1].Data.Literal.Value; - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Dest = PShufWLane(Size, FEXCore::IR::INDEXED_NAMED_VECTOR_PSHUFLW, true, Src, Shuffle); - StoreResult(FPRClass, Op, Dest, -1); + StoreResult(FPRClass, Op, Dest, OpSize::iInvalid); } void OpDispatchBuilder::PSHUFWOp(OpcodeArgs, bool Low) { uint16_t Shuffle = Op->Src[1].Data.Literal.Value; - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); const auto IndexedVectorConstant = Low ? FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFLW : FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFHW; Ref Dest = PShufWLane(Size, IndexedVectorConstant, Low, Src, Shuffle); - StoreResult(FPRClass, Op, Dest, -1); + StoreResult(FPRClass, Op, Dest, OpSize::iInvalid); } Ref OpDispatchBuilder::Single128Bit4ByteVectorShuffle(Ref Src, uint8_t Shuffle) { @@ -1169,11 +1168,11 @@ Ref OpDispatchBuilder::Single128Bit4ByteVectorShuffle(Ref Src, uint8_t Shuffle) void OpDispatchBuilder::PSHUFDOp(OpcodeArgs) { uint16_t Shuffle = Op->Src[1].Data.Literal.Value; Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - StoreResult(FPRClass, Op, Single128Bit4ByteVectorShuffle(Src, Shuffle), -1); + StoreResult(FPRClass, Op, Single128Bit4ByteVectorShuffle(Src, Shuffle), OpSize::iInvalid); } -void OpDispatchBuilder::VPSHUFWOp(OpcodeArgs, size_t ElementSize, bool Low) { - const auto SrcSize = GetSrcSize(Op); +void OpDispatchBuilder::VPSHUFWOp(OpcodeArgs, IR::OpSize ElementSize, bool Low) { + const auto SrcSize = OpSizeFromSrc(Op); const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE; auto Shuffle = Op->Src[1].Literal(); @@ -1218,10 +1217,10 @@ void OpDispatchBuilder::VPSHUFWOp(OpcodeArgs, size_t ElementSize, bool Low) { } } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -Ref OpDispatchBuilder::SHUFOpImpl(OpcodeArgs, size_t DstSize, size_t ElementSize, Ref Src1, Ref Src2, uint8_t Shuffle) { +Ref OpDispatchBuilder::SHUFOpImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize ElementSize, Ref Src1, Ref Src2, uint8_t Shuffle) { // Since 256-bit variants and up don't lane cross, we can construct // everything in terms of the 128-variant, as each lane is essentially // its own 128-bit segment. @@ -1412,37 +1411,37 @@ Ref OpDispatchBuilder::SHUFOpImpl(OpcodeArgs, size_t DstSize, size_t ElementSize return Dest; } -void OpDispatchBuilder::SHUFOp(OpcodeArgs, size_t ElementSize) { +void OpDispatchBuilder::SHUFOp(OpcodeArgs, IR::OpSize ElementSize) { Ref Src1Node = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src2Node = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); uint8_t Shuffle = Op->Src[1].Literal(); - Ref Result = SHUFOpImpl(Op, GetDstSize(Op), ElementSize, Src1Node, Src2Node, Shuffle); - StoreResult(FPRClass, Op, Result, -1); + Ref Result = SHUFOpImpl(Op, OpSizeFromDst(Op), ElementSize, Src1Node, Src2Node, Shuffle); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -void OpDispatchBuilder::VSHUFOp(OpcodeArgs, size_t ElementSize) { +void OpDispatchBuilder::VSHUFOp(OpcodeArgs, IR::OpSize ElementSize) { Ref Src1Node = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2Node = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); uint8_t Shuffle = Op->Src[2].Literal(); - Ref Result = SHUFOpImpl(Op, GetDstSize(Op), ElementSize, Src1Node, Src2Node, Shuffle); - StoreResult(FPRClass, Op, Result, -1); + Ref Result = SHUFOpImpl(Op, OpSizeFromDst(Op), ElementSize, Src1Node, Src2Node, Shuffle); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VANDNOp(OpcodeArgs) { - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); Ref Dest = _VAndn(SrcSize, SrcSize, Src2, Src1); - StoreResult(FPRClass, Op, Dest, -1); + StoreResult(FPRClass, Op, Dest, OpSize::iInvalid); } -template +template void OpDispatchBuilder::VHADDPOp(OpcodeArgs) { - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE; Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -1456,16 +1455,16 @@ void OpDispatchBuilder::VHADDPOp(OpcodeArgs) { Dest = _VInsElement(SrcSize, OpSize::i64Bit, 2, 1, Dest, Res); } - StoreResult(FPRClass, Op, Dest, -1); + StoreResult(FPRClass, Op, Dest, OpSize::iInvalid); } -template void OpDispatchBuilder::VHADDPOp(OpcodeArgs); -template void OpDispatchBuilder::VHADDPOp(OpcodeArgs); -template void OpDispatchBuilder::VHADDPOp(OpcodeArgs); -template void OpDispatchBuilder::VHADDPOp(OpcodeArgs); +template void OpDispatchBuilder::VHADDPOp(OpcodeArgs); +template void OpDispatchBuilder::VHADDPOp(OpcodeArgs); +template void OpDispatchBuilder::VHADDPOp(OpcodeArgs); +template void OpDispatchBuilder::VHADDPOp(OpcodeArgs); -void OpDispatchBuilder::VBROADCASTOp(OpcodeArgs, size_t ElementSize) { - const auto DstSize = GetDstSize(Op); +void OpDispatchBuilder::VBROADCASTOp(OpcodeArgs, IR::OpSize ElementSize) { + const auto DstSize = OpSizeFromDst(Op); Ref Result {}; if (Op->Src[0].IsGPR()) { @@ -1473,26 +1472,26 @@ void OpDispatchBuilder::VBROADCASTOp(OpcodeArgs, size_t ElementSize) { Result = _VDupElement(DstSize, ElementSize, Src, 0); } else { // Get the address to broadcast from into a GPR. - Ref Address = MakeSegmentAddress(Op, Op->Src[0], CTX->GetGPRSize()); + Ref Address = MakeSegmentAddress(Op, Op->Src[0], CTX->GetGPROpSize()); Result = _VBroadcastFromMem(DstSize, ElementSize, Address); } // No need to zero-extend result, since implementations // use zero extending AdvSIMD or zeroing SVE loads internally. - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -Ref OpDispatchBuilder::PINSROpImpl(OpcodeArgs, size_t ElementSize, const X86Tables::DecodedOperand& Src1Op, +Ref OpDispatchBuilder::PINSROpImpl(OpcodeArgs, IR::OpSize ElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, const X86Tables::DecodedOperand& Imm) { - const auto Size = GetDstSize(Op); + const auto Size = OpSizeFromDst(Op); const auto NumElements = Size / ElementSize; const uint64_t Index = Imm.Literal() & (NumElements - 1); Ref Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, Size, Op->Flags); if (Src2Op.IsGPR()) { // If the source is a GPR then convert directly from the GPR. - auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Src2Op, CTX->GetGPRSize(), Op->Flags); + auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Src2Op, CTX->GetGPROpSize(), Op->Flags); return _VInsGPR(Size, ElementSize, Index, Src1, Src2); } @@ -1501,10 +1500,10 @@ Ref OpDispatchBuilder::PINSROpImpl(OpcodeArgs, size_t ElementSize, const X86Tabl return _VLoadVectorElement(Size, ElementSize, Src1, Index, Src2); } -template +template void OpDispatchBuilder::PINSROp(OpcodeArgs) { Ref Result = PINSROpImpl(Op, ElementSize, Op->Dest, Op->Src[0], Op->Src[1]); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::PINSROp(OpcodeArgs); @@ -1517,16 +1516,16 @@ void OpDispatchBuilder::VPINSRBOp(OpcodeArgs) { if (Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) { Result = _VMov(OpSize::i128Bit, Result); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VPINSRDQOp(OpcodeArgs) { - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Result = PINSROpImpl(Op, SrcSize, Op->Src[0], Op->Src[1], Op->Src[2]); if (Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) { Result = _VMov(OpSize::i128Bit, Result); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VPINSRWOp(OpcodeArgs) { @@ -1534,7 +1533,7 @@ void OpDispatchBuilder::VPINSRWOp(OpcodeArgs) { if (Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) { Result = _VMov(OpSize::i128Bit, Result); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } Ref OpDispatchBuilder::InsertPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2, @@ -1544,7 +1543,7 @@ Ref OpDispatchBuilder::InsertPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperan uint8_t CountD = (ImmValue >> 4) & 0b11; const uint8_t ZMask = ImmValue & 0xF; - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); Ref Dest {}; if (ZMask != 0xF) { @@ -1585,16 +1584,16 @@ Ref OpDispatchBuilder::InsertPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperan void OpDispatchBuilder::InsertPSOp(OpcodeArgs) { Ref Result = InsertPSOpImpl(Op, Op->Dest, Op->Src[0], Op->Src[1]); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VINSERTPSOp(OpcodeArgs) { Ref Result = InsertPSOpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2]); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -void OpDispatchBuilder::PExtrOp(OpcodeArgs, size_t ElementSize) { - const auto DstSize = GetDstSize(Op); +void OpDispatchBuilder::PExtrOp(OpcodeArgs, IR::OpSize ElementSize) { + const auto DstSize = OpSizeFromDst(Op); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); uint64_t Index = Op->Src[1].Literal(); @@ -1603,7 +1602,7 @@ void OpDispatchBuilder::PExtrOp(OpcodeArgs, size_t ElementSize) { // When the element size is 32-bit then it can be overriden as 64-bit because the encoding of PEXTRD/PEXTRQ // is the same except that REX.W or VEX.W is set to 1. Incredibly frustrating. // Use the destination size as the element size in this case. - size_t OverridenElementSize = ElementSize; + auto OverridenElementSize = ElementSize; if (ElementSize == OpSize::i32Bit) { OverridenElementSize = DstSize; } @@ -1613,10 +1612,10 @@ void OpDispatchBuilder::PExtrOp(OpcodeArgs, size_t ElementSize) { Index &= NumElements - 1; if (Op->Dest.IsGPR()) { - const uint8_t GPRSize = CTX->GetGPRSize(); + const auto GPRSize = CTX->GetGPROpSize(); // Extract already zero extends the result. Ref Result = _VExtractToGPR(OpSize::i128Bit, OverridenElementSize, Src, Index); - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Result, GPRSize, -1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Result, GPRSize, OpSize::iInvalid); return; } @@ -1635,7 +1634,7 @@ void OpDispatchBuilder::VEXTRACT128Op(OpcodeArgs) { // A selector of zero is the same as doing a 128-bit vector move. if (Selector == 0) { Ref Result = DstIsXMM ? _VMov(OpSize::i128Bit, Src) : Src; - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, StoreSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, StoreSize, OpSize::iInvalid); return; } @@ -1644,59 +1643,59 @@ void OpDispatchBuilder::VEXTRACT128Op(OpcodeArgs) { if (DstIsXMM) { Result = _VMov(OpSize::i128Bit, Result); } - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, StoreSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, StoreSize, OpSize::iInvalid); } -Ref OpDispatchBuilder::PSIGNImpl(OpcodeArgs, size_t ElementSize, Ref Src1, Ref Src2) { - const auto Size = GetSrcSize(Op); +Ref OpDispatchBuilder::PSIGNImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src1, Ref Src2) { + const auto Size = OpSizeFromSrc(Op); Ref Control = _VSQSHL(Size, ElementSize, Src2, (ElementSize * 8) - 1); Control = _VSRSHR(Size, ElementSize, Control, (ElementSize * 8) - 1); return _VMul(Size, ElementSize, Src1, Control); } -template +template void OpDispatchBuilder::PSIGN(OpcodeArgs) { Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Res = PSIGNImpl(Op, ElementSize, Dest, Src); - StoreResult(FPRClass, Op, Res, -1); + StoreResult(FPRClass, Op, Res, OpSize::iInvalid); } template void OpDispatchBuilder::PSIGN(OpcodeArgs); template void OpDispatchBuilder::PSIGN(OpcodeArgs); template void OpDispatchBuilder::PSIGN(OpcodeArgs); -template +template void OpDispatchBuilder::VPSIGN(OpcodeArgs) { Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); Ref Res = PSIGNImpl(Op, ElementSize, Src1, Src2); - StoreResult(FPRClass, Op, Res, -1); + StoreResult(FPRClass, Op, Res, OpSize::iInvalid); } template void OpDispatchBuilder::VPSIGN(OpcodeArgs); template void OpDispatchBuilder::VPSIGN(OpcodeArgs); template void OpDispatchBuilder::VPSIGN(OpcodeArgs); -Ref OpDispatchBuilder::PSRLDOpImpl(OpcodeArgs, size_t ElementSize, Ref Src, Ref ShiftVec) { - const auto Size = GetSrcSize(Op); +Ref OpDispatchBuilder::PSRLDOpImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src, Ref ShiftVec) { + const auto Size = OpSizeFromSrc(Op); // Incoming element size for the shift source is always 8 return _VUShrSWide(Size, ElementSize, Src, ShiftVec); } -void OpDispatchBuilder::PSRLDOp(OpcodeArgs, size_t ElementSize) { +void OpDispatchBuilder::PSRLDOp(OpcodeArgs, IR::OpSize ElementSize) { Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = PSRLDOpImpl(Op, ElementSize, Dest, Src); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -void OpDispatchBuilder::VPSRLDOp(OpcodeArgs, size_t ElementSize) { +void OpDispatchBuilder::VPSRLDOp(OpcodeArgs, IR::OpSize ElementSize) { const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; @@ -1707,25 +1706,25 @@ void OpDispatchBuilder::VPSRLDOp(OpcodeArgs, size_t ElementSize) { if (Is128Bit) { Result = _VMov(OpSize::i128Bit, Result); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -void OpDispatchBuilder::PSRLI(OpcodeArgs, size_t ElementSize) { +void OpDispatchBuilder::PSRLI(OpcodeArgs, IR::OpSize ElementSize) { const uint64_t ShiftConstant = Op->Src[1].Literal(); if (ShiftConstant == 0) [[unlikely]] { // Nothing to do, value is already in Dest. return; } - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Shift = _VUShrI(Size, ElementSize, Dest, ShiftConstant); - StoreResult(FPRClass, Op, Shift, -1); + StoreResult(FPRClass, Op, Shift, OpSize::iInvalid); } -void OpDispatchBuilder::VPSRLIOp(OpcodeArgs, size_t ElementSize) { - const auto Size = GetSrcSize(Op); +void OpDispatchBuilder::VPSRLIOp(OpcodeArgs, IR::OpSize ElementSize) { + const auto Size = OpSizeFromSrc(Op); const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE; const uint64_t ShiftConstant = Op->Src[1].Literal(); @@ -1740,19 +1739,19 @@ void OpDispatchBuilder::VPSRLIOp(OpcodeArgs, size_t ElementSize) { } } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -Ref OpDispatchBuilder::PSLLIImpl(OpcodeArgs, size_t ElementSize, Ref Src, uint64_t Shift) { +Ref OpDispatchBuilder::PSLLIImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src, uint64_t Shift) { if (Shift == 0) [[unlikely]] { // If zero-shift then just return the source. return Src; } - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); return _VShlI(Size, ElementSize, Src, Shift); } -void OpDispatchBuilder::PSLLI(OpcodeArgs, size_t ElementSize) { +void OpDispatchBuilder::PSLLI(OpcodeArgs, IR::OpSize ElementSize) { const uint64_t ShiftConstant = Op->Src[1].Literal(); if (ShiftConstant == 0) [[unlikely]] { // Nothing to do, value is already in Dest. @@ -1762,10 +1761,10 @@ void OpDispatchBuilder::PSLLI(OpcodeArgs, size_t ElementSize) { Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Result = PSLLIImpl(Op, ElementSize, Dest, ShiftConstant); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -void OpDispatchBuilder::VPSLLIOp(OpcodeArgs, size_t ElementSize) { +void OpDispatchBuilder::VPSLLIOp(OpcodeArgs, IR::OpSize ElementSize) { const uint64_t ShiftConstant = Op->Src[1].Literal(); const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; @@ -1776,54 +1775,54 @@ void OpDispatchBuilder::VPSLLIOp(OpcodeArgs, size_t ElementSize) { Result = _VMov(OpSize::i128Bit, Result); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -Ref OpDispatchBuilder::PSLLImpl(OpcodeArgs, size_t ElementSize, Ref Src, Ref ShiftVec) { - const auto Size = GetDstSize(Op); +Ref OpDispatchBuilder::PSLLImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src, Ref ShiftVec) { + const auto Size = OpSizeFromDst(Op); // Incoming element size for the shift source is always 8 return _VUShlSWide(Size, ElementSize, Src, ShiftVec); } -void OpDispatchBuilder::PSLL(OpcodeArgs, size_t ElementSize) { +void OpDispatchBuilder::PSLL(OpcodeArgs, IR::OpSize ElementSize) { Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = PSLLImpl(Op, ElementSize, Dest, Src); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -void OpDispatchBuilder::VPSLLOp(OpcodeArgs, size_t ElementSize) { +void OpDispatchBuilder::VPSLLOp(OpcodeArgs, IR::OpSize ElementSize) { const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], 16, Op->Flags); + Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], OpSize::i128Bit, Op->Flags); Ref Result = PSLLImpl(Op, ElementSize, Src1, Src2); if (Is128Bit) { Result = _VMov(OpSize::i128Bit, Result); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -Ref OpDispatchBuilder::PSRAOpImpl(OpcodeArgs, size_t ElementSize, Ref Src, Ref ShiftVec) { - const auto Size = GetDstSize(Op); +Ref OpDispatchBuilder::PSRAOpImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src, Ref ShiftVec) { + const auto Size = OpSizeFromDst(Op); // Incoming element size for the shift source is always 8 return _VSShrSWide(Size, ElementSize, Src, ShiftVec); } -void OpDispatchBuilder::PSRAOp(OpcodeArgs, size_t ElementSize) { +void OpDispatchBuilder::PSRAOp(OpcodeArgs, IR::OpSize ElementSize) { Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = PSRAOpImpl(Op, ElementSize, Dest, Src); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -void OpDispatchBuilder::VPSRAOp(OpcodeArgs, size_t ElementSize) { +void OpDispatchBuilder::VPSRAOp(OpcodeArgs, IR::OpSize ElementSize) { const auto DstSize = GetDstSize(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; @@ -1834,7 +1833,7 @@ void OpDispatchBuilder::VPSRAOp(OpcodeArgs, size_t ElementSize) { if (Is128Bit) { Result = _VMov(OpSize::i128Bit, Result); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::PSRLDQ(OpcodeArgs) { @@ -1844,7 +1843,7 @@ void OpDispatchBuilder::PSRLDQ(OpcodeArgs) { return; } - const auto Size = GetDstSize(Op); + const auto Size = OpSizeFromDst(Op); Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Result = LoadZeroVector(Size); @@ -1852,11 +1851,11 @@ void OpDispatchBuilder::PSRLDQ(OpcodeArgs) { if (Shift < Size) { Result = _VExtr(Size, OpSize::i8Bit, Result, Dest, Shift); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VPSRLDQOp(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; const uint64_t Shift = Op->Src[1].Literal(); @@ -1878,7 +1877,7 @@ void OpDispatchBuilder::VPSRLDQOp(OpcodeArgs) { } } else { if (Shift < Core::CPUState::XMM_SSE_REG_SIZE) { - Ref ResultBottom = _VExtr(OpSize::i128Bit, 1, Result, Src, Shift); + Ref ResultBottom = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Result, Src, Shift); Ref ResultTop = _VExtr(DstSize, OpSize::i8Bit, Result, Src, 16 + Shift); Result = _VInsElement(DstSize, OpSize::i128Bit, 1, 0, ResultBottom, ResultTop); @@ -1886,7 +1885,7 @@ void OpDispatchBuilder::VPSRLDQOp(OpcodeArgs) { } } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::PSLLDQ(OpcodeArgs) { @@ -1896,7 +1895,7 @@ void OpDispatchBuilder::PSLLDQ(OpcodeArgs) { return; } - const auto Size = GetDstSize(Op); + const auto Size = OpSizeFromDst(Op); Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Result = LoadZeroVector(Size); @@ -1904,11 +1903,11 @@ void OpDispatchBuilder::PSLLDQ(OpcodeArgs) { Result = _VExtr(Size, OpSize::i8Bit, Dest, Result, Size - Shift); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VPSLLDQOp(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; const uint64_t Shift = Op->Src[1].Literal(); @@ -1935,26 +1934,26 @@ void OpDispatchBuilder::VPSLLDQOp(OpcodeArgs) { } } } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -void OpDispatchBuilder::PSRAIOp(OpcodeArgs, size_t ElementSize) { +void OpDispatchBuilder::PSRAIOp(OpcodeArgs, IR::OpSize ElementSize) { const uint64_t Shift = Op->Src[1].Literal(); if (Shift == 0) [[unlikely]] { // Nothing to do, value is already in Dest. return; } - const auto Size = GetDstSize(Op); + const auto Size = OpSizeFromDst(Op); Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Result = _VSShrI(Size, ElementSize, Dest, Shift); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -void OpDispatchBuilder::VPSRAIOp(OpcodeArgs, size_t ElementSize) { +void OpDispatchBuilder::VPSRAIOp(OpcodeArgs, IR::OpSize ElementSize) { const uint64_t Shift = Op->Src[1].Literal(); - const auto Size = GetDstSize(Op); + const auto Size = OpSizeFromDst(Op); const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE; Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -1968,19 +1967,19 @@ void OpDispatchBuilder::VPSRAIOp(OpcodeArgs, size_t ElementSize) { } } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::AVXVariableShiftImpl(OpcodeArgs, IROps IROp) { - const auto DstSize = GetDstSize(Op); - const auto SrcSize = GetSrcSize(Op); + const auto DstSize = OpSizeFromDst(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Vector = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], DstSize, Op->Flags); Ref ShiftVector = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], DstSize, Op->Flags); DeriveOp(Shift, IROp, _VUShr(DstSize, SrcSize, Vector, ShiftVector, true)); - StoreResult(FPRClass, Op, Shift, -1); + StoreResult(FPRClass, Op, Shift, OpSize::iInvalid); } void OpDispatchBuilder::VPSLLVOp(OpcodeArgs) { @@ -1999,15 +1998,15 @@ void OpDispatchBuilder::MOVDDUPOp(OpcodeArgs) { // If loading a vector, use the full size, so we don't // unnecessarily zero extend the vector. Otherwise, if // memory, then we want to load the element size exactly. - const auto SrcSize = Op->Src[0].IsGPR() ? OpSize::i128Bit : GetSrcSize(Op); + const auto SrcSize = Op->Src[0].IsGPR() ? OpSize::i128Bit : OpSizeFromSrc(Op); Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); - Ref Res = _VDupElement(OpSize::i128Bit, GetSrcSize(Op), Src, 0); + Ref Res = _VDupElement(OpSize::i128Bit, OpSizeFromSrc(Op), Src, 0); - StoreResult(FPRClass, Op, Res, -1); + StoreResult(FPRClass, Op, Res, OpSize::iInvalid); } void OpDispatchBuilder::VMOVDDUPOp(OpcodeArgs) { - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); const auto IsSrcGPR = Op->Src[0].IsGPR(); const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE; const auto MemSize = Is256Bit ? OpSize::i256Bit : OpSize::i64Bit; @@ -2022,18 +2021,18 @@ void OpDispatchBuilder::VMOVDDUPOp(OpcodeArgs) { Res = _VDupElement(SrcSize, OpSize::i64Bit, Src, 0); } - StoreResult(FPRClass, Op, Res, -1); + StoreResult(FPRClass, Op, Res, OpSize::iInvalid); } -Ref OpDispatchBuilder::CVTGPR_To_FPRImpl(OpcodeArgs, size_t DstElementSize, const X86Tables::DecodedOperand& Src1Op, +Ref OpDispatchBuilder::CVTGPR_To_FPRImpl(OpcodeArgs, IR::OpSize DstElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op) { - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Src1 = LoadSource_WithOpSize(FPRClass, Op, Src1Op, OpSize::i128Bit, Op->Flags); Ref Converted {}; if (Src2Op.IsGPR()) { // If the source is a GPR then convert directly from the GPR. - auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Src2Op, CTX->GetGPRSize(), Op->Flags); + auto Src2 = LoadSource_WithOpSize(GPRClass, Op, Src2Op, CTX->GetGPROpSize(), Op->Flags); Converted = _Float_FromGPR_S(DstElementSize, SrcSize, Src2); } else if (SrcSize != DstElementSize) { // If the source is from memory but the Source size and destination size aren't the same, @@ -2051,34 +2050,34 @@ Ref OpDispatchBuilder::CVTGPR_To_FPRImpl(OpcodeArgs, size_t DstElementSize, cons return _VInsElement(OpSize::i128Bit, DstElementSize, 0, 0, Src1, Converted); } -template +template void OpDispatchBuilder::CVTGPR_To_FPR(OpcodeArgs) { Ref Result = CVTGPR_To_FPRImpl(Op, DstElementSize, Op->Dest, Op->Src[0]); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::CVTGPR_To_FPR(OpcodeArgs); template void OpDispatchBuilder::CVTGPR_To_FPR(OpcodeArgs); -template +template void OpDispatchBuilder::AVXCVTGPR_To_FPR(OpcodeArgs) { Ref Result = CVTGPR_To_FPRImpl(Op, DstElementSize, Op->Src[0], Op->Src[1]); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::AVXCVTGPR_To_FPR(OpcodeArgs); template void OpDispatchBuilder::AVXCVTGPR_To_FPR(OpcodeArgs); -template +template void OpDispatchBuilder::CVTFPR_To_GPR(OpcodeArgs) { // If loading a vector, use the full size, so we don't // unnecessarily zero extend the vector. Otherwise, if // memory, then we want to load the element size exactly. - const auto SrcSize = Op->Src[0].IsGPR() ? OpSize::i128Bit : GetSrcSize(Op); + const auto SrcSize = Op->Src[0].IsGPR() ? OpSize::i128Bit : OpSizeFromSrc(Op); Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); // GPR size is determined by REX.W // Source Element size is determined by instruction - size_t GPRSize = GetDstSize(Op); + const auto GPRSize = OpSizeFromDst(Op); if constexpr (HostRoundingMode) { Src = _Float_ToGPR_S(GPRSize, SrcElementSize, Src); @@ -2086,7 +2085,7 @@ void OpDispatchBuilder::CVTFPR_To_GPR(OpcodeArgs) { Src = _Float_ToGPR_ZS(GPRSize, SrcElementSize, Src); } - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Src, GPRSize, -1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Src, GPRSize, OpSize::iInvalid); } template void OpDispatchBuilder::CVTFPR_To_GPR(OpcodeArgs); @@ -2095,57 +2094,57 @@ template void OpDispatchBuilder::CVTFPR_To_GPR(OpcodeArgs template void OpDispatchBuilder::CVTFPR_To_GPR(OpcodeArgs); template void OpDispatchBuilder::CVTFPR_To_GPR(OpcodeArgs); -Ref OpDispatchBuilder::Vector_CVT_Int_To_FloatImpl(OpcodeArgs, size_t SrcElementSize, bool Widen) { - const size_t Size = GetDstSize(Op); +Ref OpDispatchBuilder::Vector_CVT_Int_To_FloatImpl(OpcodeArgs, IR::OpSize SrcElementSize, bool Widen) { + const auto Size = OpSizeFromDst(Op); Ref Src = [&] { if (Widen) { // If loading a vector, use the full size, so we don't // unnecessarily zero extend the vector. Otherwise, if // memory, then we want to load the element size exactly. - const auto LoadSize = Op->Src[0].IsGPR() ? OpSize::i128Bit : 8 * (Size / 16); + const auto LoadSize = Op->Src[0].IsGPR() ? OpSize::i128Bit : IR::SizeToOpSize(8 * (IR::OpSizeToSize(Size) / 16)); return LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], LoadSize, Op->Flags); } else { return LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); } }(); - size_t ElementSize = SrcElementSize; + auto ElementSize = SrcElementSize; if (Widen) { Src = _VSXTL(Size, ElementSize, Src); - ElementSize <<= 1; + ElementSize = IR::MultiplyOpSize(ElementSize, 2); } return _Vector_SToF(Size, ElementSize, Src); } -template +template void OpDispatchBuilder::Vector_CVT_Int_To_Float(OpcodeArgs) { Ref Result = Vector_CVT_Int_To_FloatImpl(Op, SrcElementSize, Widen); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::Vector_CVT_Int_To_Float(OpcodeArgs); template void OpDispatchBuilder::Vector_CVT_Int_To_Float(OpcodeArgs); -template +template void OpDispatchBuilder::AVXVector_CVT_Int_To_Float(OpcodeArgs) { Ref Result = Vector_CVT_Int_To_FloatImpl(Op, SrcElementSize, Widen); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::AVXVector_CVT_Int_To_Float(OpcodeArgs); template void OpDispatchBuilder::AVXVector_CVT_Int_To_Float(OpcodeArgs); -Ref OpDispatchBuilder::Vector_CVT_Float_To_IntImpl(OpcodeArgs, size_t SrcElementSize, bool Narrow, bool HostRoundingMode) { - const size_t DstSize = GetDstSize(Op); - size_t ElementSize = SrcElementSize; +Ref OpDispatchBuilder::Vector_CVT_Float_To_IntImpl(OpcodeArgs, IR::OpSize SrcElementSize, bool Narrow, bool HostRoundingMode) { + const auto DstSize = OpSizeFromDst(Op); + auto ElementSize = SrcElementSize; Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); if (Narrow) { - Src = _Vector_FToF(DstSize, SrcElementSize >> 1, Src, SrcElementSize); - ElementSize >>= 1; + Src = _Vector_FToF(DstSize, IR::DivideOpSize(SrcElementSize, 2), Src, SrcElementSize); + ElementSize = IR::DivideOpSize(ElementSize, 2); } if (HostRoundingMode) { @@ -2155,9 +2154,9 @@ Ref OpDispatchBuilder::Vector_CVT_Float_To_IntImpl(OpcodeArgs, size_t SrcElement } } -template +template void OpDispatchBuilder::Vector_CVT_Float_To_Int(OpcodeArgs) { - const size_t DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); Ref Result {}; if (SrcElementSize == OpSize::i64Bit && Narrow) { @@ -2168,7 +2167,7 @@ void OpDispatchBuilder::Vector_CVT_Float_To_Int(OpcodeArgs) { Result = Vector_CVT_Float_To_IntImpl(Op, SrcElementSize, Narrow, HostRoundingMode); } - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid); } template void OpDispatchBuilder::Vector_CVT_Float_To_Int(OpcodeArgs); @@ -2178,9 +2177,9 @@ template void OpDispatchBuilder::Vector_CVT_Float_To_Int(OpcodeArgs); template void OpDispatchBuilder::Vector_CVT_Float_To_Int(OpcodeArgs); -template +template void OpDispatchBuilder::AVXVector_CVT_Float_To_Int(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); Ref Result {}; if (SrcElementSize == OpSize::i64Bit && Narrow) { @@ -2191,7 +2190,7 @@ void OpDispatchBuilder::AVXVector_CVT_Float_To_Int(OpcodeArgs) { Result = Vector_CVT_Float_To_IntImpl(Op, SrcElementSize, Narrow, HostRoundingMode); } - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, DstSize, OpSize::iInvalid); } template void OpDispatchBuilder::AVXVector_CVT_Float_To_Int(OpcodeArgs); @@ -2200,7 +2199,7 @@ template void OpDispatchBuilder::AVXVector_CVT_Float_To_Int(OpcodeArgs); template void OpDispatchBuilder::AVXVector_CVT_Float_To_Int(OpcodeArgs); -Ref OpDispatchBuilder::Scalar_CVT_Float_To_FloatImpl(OpcodeArgs, size_t DstElementSize, size_t SrcElementSize, +Ref OpDispatchBuilder::Scalar_CVT_Float_To_FloatImpl(OpcodeArgs, IR::OpSize DstElementSize, IR::OpSize SrcElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op) { // In the case of vectors, we can just specify the full vector length, // so that we don't unnecessarily zero-extend the entire vector. @@ -2215,39 +2214,39 @@ Ref OpDispatchBuilder::Scalar_CVT_Float_To_FloatImpl(OpcodeArgs, size_t DstEleme return _VInsElement(OpSize::i128Bit, DstElementSize, 0, 0, Src1, Converted); } -template +template void OpDispatchBuilder::Scalar_CVT_Float_To_Float(OpcodeArgs) { Ref Result = Scalar_CVT_Float_To_FloatImpl(Op, DstElementSize, SrcElementSize, Op->Dest, Op->Src[0]); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::Scalar_CVT_Float_To_Float(OpcodeArgs); template void OpDispatchBuilder::Scalar_CVT_Float_To_Float(OpcodeArgs); -template +template void OpDispatchBuilder::AVXScalar_CVT_Float_To_Float(OpcodeArgs) { Ref Result = Scalar_CVT_Float_To_FloatImpl(Op, DstElementSize, SrcElementSize, Op->Src[0], Op->Src[1]); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::AVXScalar_CVT_Float_To_Float(OpcodeArgs); template void OpDispatchBuilder::AVXScalar_CVT_Float_To_Float(OpcodeArgs); -void OpDispatchBuilder::Vector_CVT_Float_To_Float(OpcodeArgs, size_t DstElementSize, size_t SrcElementSize, bool IsAVX) { - const auto SrcSize = GetSrcSize(Op); +void OpDispatchBuilder::Vector_CVT_Float_To_Float(OpcodeArgs, IR::OpSize DstElementSize, IR::OpSize SrcElementSize, bool IsAVX) { + const auto SrcSize = OpSizeFromSrc(Op); const auto IsFloatSrc = SrcElementSize == OpSize::i32Bit; const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE; - const auto LoadSize = IsFloatSrc && !Op->Src[0].IsGPR() ? SrcSize / 2 : SrcSize; + const auto LoadSize = IsFloatSrc && !Op->Src[0].IsGPR() ? IR::SizeToOpSize(IR::OpSizeToSize(SrcSize) / 2) : SrcSize; Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], LoadSize, Op->Flags); Ref Result {}; if (DstElementSize > SrcElementSize) { - Result = _Vector_FToF(SrcSize, SrcElementSize << 1, Src, SrcElementSize); + Result = _Vector_FToF(SrcSize, IR::MultiplyOpSize(SrcElementSize, 2), Src, SrcElementSize); } else { - Result = _Vector_FToF(SrcSize, SrcElementSize >> 1, Src, SrcElementSize); + Result = _Vector_FToF(SrcSize, IR::DivideOpSize(SrcElementSize, 2), Src, SrcElementSize); } if (IsAVX) { @@ -2259,26 +2258,26 @@ void OpDispatchBuilder::Vector_CVT_Float_To_Float(OpcodeArgs, size_t DstElementS Result = _VMov(OpSize::i128Bit, Result); } } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::MMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs) { Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); // Always 32-bit. - size_t ElementSize = OpSize::i32Bit; - size_t DstSize = GetDstSize(Op); + auto ElementSize = OpSize::i32Bit; + const auto DstSize = OpSizeFromDst(Op); Src = _VSXTL(DstSize, ElementSize, Src); - ElementSize <<= 1; + ElementSize = IR::MultiplyOpSize(ElementSize, 2); // Always signed Src = _Vector_SToF(DstSize, ElementSize, Src); - StoreResult(FPRClass, Op, Src, -1); + StoreResult(FPRClass, Op, Src, OpSize::iInvalid); } -template +template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs) { // This function causes a change in MMX state from X87 to MMX if (MMXState == MMXState_X87) { @@ -2288,15 +2287,15 @@ void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs) { // If loading a vector, use the full size, so we don't // unnecessarily zero extend the vector. Otherwise, if // memory, then we want to load the element size exactly. - const auto SrcSize = Op->Src[0].IsGPR() ? OpSize::i128Bit : GetSrcSize(Op); + const auto SrcSize = Op->Src[0].IsGPR() ? OpSize::i128Bit : OpSizeFromSrc(Op); Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); - size_t ElementSize = SrcElementSize; - size_t Size = GetDstSize(Op); + auto ElementSize = SrcElementSize; + const auto Size = OpSizeFromDst(Op); if (Narrow) { - Src = _Vector_FToF(Size, SrcElementSize >> 1, Src, SrcElementSize); - ElementSize >>= 1; + Src = _Vector_FToF(Size, IR::DivideOpSize(SrcElementSize, 2), Src, SrcElementSize); + ElementSize = IR::DivideOpSize(ElementSize, 2); } if constexpr (HostRoundingMode) { @@ -2305,7 +2304,7 @@ void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs) { Src = _Vector_FToZS(Size, ElementSize, Src); } - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, Size, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, Size, OpSize::iInvalid); } template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs); @@ -2314,7 +2313,7 @@ template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs); void OpDispatchBuilder::MASKMOVOp(OpcodeArgs) { - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref MaskSrc = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); // Mask only cares about the top bit of each byte @@ -2333,11 +2332,11 @@ void OpDispatchBuilder::MASKMOVOp(OpcodeArgs) { _StoreMem(FPRClass, Size, MemDest, XMMReg, OpSize::i8Bit); } -void OpDispatchBuilder::VMASKMOVOpImpl(OpcodeArgs, size_t ElementSize, size_t DataSize, bool IsStore, +void OpDispatchBuilder::VMASKMOVOpImpl(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DataSize, bool IsStore, const X86Tables::DecodedOperand& MaskOp, const X86Tables::DecodedOperand& DataOp) { const auto MakeAddress = [this, Op](const X86Tables::DecodedOperand& Data) { - return MakeSegmentAddress(Op, Data, CTX->GetGPRSize()); + return MakeSegmentAddress(Op, Data, CTX->GetGPROpSize()); }; Ref Mask = LoadSource_WithOpSize(FPRClass, Op, MaskOp, DataSize, Op->Flags); @@ -2355,13 +2354,13 @@ void OpDispatchBuilder::VMASKMOVOpImpl(OpcodeArgs, size_t ElementSize, size_t Da if (Is128Bit) { Result = _VMov(OpSize::i128Bit, Result); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } } -template +template void OpDispatchBuilder::VMASKMOVOp(OpcodeArgs) { - VMASKMOVOpImpl(Op, ElementSize, GetDstSize(Op), IsStore, Op->Src[0], Op->Src[1]); + VMASKMOVOpImpl(Op, ElementSize, OpSizeFromDst(Op), IsStore, Op->Src[0], Op->Src[1]); } template void OpDispatchBuilder::VMASKMOVOp(OpcodeArgs); template void OpDispatchBuilder::VMASKMOVOp(OpcodeArgs); @@ -2370,7 +2369,7 @@ template void OpDispatchBuilder::VMASKMOVOp(OpcodeArgs); template void OpDispatchBuilder::VPMASKMOVOp(OpcodeArgs) { - VMASKMOVOpImpl(Op, GetSrcSize(Op), GetDstSize(Op), IsStore, Op->Src[0], Op->Src[1]); + VMASKMOVOpImpl(Op, OpSizeFromSrc(Op), OpSizeFromDst(Op), IsStore, Op->Src[0], Op->Src[1]); } template void OpDispatchBuilder::VPMASKMOVOp(OpcodeArgs); template void OpDispatchBuilder::VPMASKMOVOp(OpcodeArgs); @@ -2380,32 +2379,32 @@ void OpDispatchBuilder::MOVBetweenGPR_FPR(OpcodeArgs, VectorOpType VectorType) { Ref Result {}; if (Op->Src[0].IsGPR()) { // Loading from GPR and moving to Vector. - Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], CTX->GetGPRSize(), Op->Flags); + Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], CTX->GetGPROpSize(), Op->Flags); // zext to 128bit - Result = _VCastFromGPR(OpSize::i128Bit, GetSrcSize(Op), Src); + Result = _VCastFromGPR(OpSize::i128Bit, OpSizeFromSrc(Op), Src); } else { // Loading from Memory as a scalar. Zero extend Result = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); } - StoreResult_WithAVXInsert(VectorType, FPRClass, Op, Result, -1); + StoreResult_WithAVXInsert(VectorType, FPRClass, Op, Result, OpSize::iInvalid); } else { Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); if (Op->Dest.IsGPR()) { - auto ElementSize = GetDstSize(Op); + const auto ElementSize = OpSizeFromDst(Op); // Extract element from GPR. Zero extending in the process. - Src = _VExtractToGPR(GetSrcSize(Op), ElementSize, Src, 0); - StoreResult(GPRClass, Op, Op->Dest, Src, -1); + Src = _VExtractToGPR(OpSizeFromSrc(Op), ElementSize, Src, 0); + StoreResult(GPRClass, Op, Op->Dest, Src, OpSize::iInvalid); } else { // Storing first element to memory. Ref Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); - _StoreMem(FPRClass, GetDstSize(Op), Dest, Src, 1); + _StoreMem(FPRClass, OpSizeFromDst(Op), Dest, Src, OpSize::i8Bit); } } } -Ref OpDispatchBuilder::VFCMPOpImpl(OpSize Size, size_t ElementSize, Ref Src1, Ref Src2, uint8_t CompType) { +Ref OpDispatchBuilder::VFCMPOpImpl(OpSize Size, IR::OpSize ElementSize, Ref Src1, Ref Src2, uint8_t CompType) { Ref Result {}; switch (CompType & 0x7) { case 0x0: // EQ @@ -2430,12 +2429,12 @@ Ref OpDispatchBuilder::VFCMPOpImpl(OpSize Size, size_t ElementSize, Ref Src1, Re FEX_UNREACHABLE; } -template +template void OpDispatchBuilder::VFCMPOp(OpcodeArgs) { // No need for zero-extending in the scalar case, since // all we need is an insert at the end of the operation. - const auto SrcSize = GetSrcSize(Op); - const auto DstSize = GetDstSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); + const auto DstSize = OpSizeFromDst(Op); Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); Ref Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, DstSize, Op->Flags); @@ -2443,25 +2442,25 @@ void OpDispatchBuilder::VFCMPOp(OpcodeArgs) { Ref Result = VFCMPOpImpl(OpSizeFromSrc(Op), ElementSize, Dest, Src, CompType); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::VFCMPOp(OpcodeArgs); template void OpDispatchBuilder::VFCMPOp(OpcodeArgs); -template +template void OpDispatchBuilder::AVXVFCMPOp(OpcodeArgs) { // No need for zero-extending in the scalar case, since // all we need is an insert at the end of the operation. - const auto SrcSize = GetSrcSize(Op); - const auto DstSize = GetDstSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); + const auto DstSize = OpSizeFromDst(Op); const uint8_t CompType = Op->Src[2].Literal(); Ref Src1 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], DstSize, Op->Flags); Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], SrcSize, Op->Flags); Ref Result = VFCMPOpImpl(OpSizeFromSrc(Op), ElementSize, Src1, Src2, CompType); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::AVXVFCMPOp(OpcodeArgs); @@ -2786,7 +2785,7 @@ void OpDispatchBuilder::DefaultX87State(OpcodeArgs) { // On top of resetting the flags to a default state, we also need to clear // all of the ST0-7/MM0-7 registers to zero. - Ref ZeroVector = LoadZeroVector(Core::CPUState::MM_REG_SIZE); + Ref ZeroVector = LoadZeroVector(OpSize::i64Bit); for (uint32_t i = 0; i < Core::CPUState::NUM_MMS; ++i) { StoreContext(MM0Index + i, ZeroVector); } @@ -2795,7 +2794,7 @@ void OpDispatchBuilder::DefaultX87State(OpcodeArgs) { void OpDispatchBuilder::DefaultSSEState() { const auto NumRegs = CTX->Config.Is64BitMode ? 16U : 8U; - Ref ZeroVector = LoadZeroVector(Core::CPUState::XMM_SSE_REG_SIZE); + Ref ZeroVector = LoadZeroVector(OpSize::i128Bit); for (uint32_t i = 0; i < NumRegs; ++i) { StoreXMMRegister(i, ZeroVector); } @@ -2814,8 +2813,8 @@ void OpDispatchBuilder::DefaultAVXState() { Ref OpDispatchBuilder::PALIGNROpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2, const X86Tables::DecodedOperand& Imm, bool IsAVX) { // For the 256-bit case we handle it as pairs of 128-bit halves. - const auto DstSize = GetDstSize(Op); - const auto SanitizedDstSize = std::min(DstSize, OpSize::i128Bit); + const auto DstSize = OpSizeFromDst(Op); + const auto SanitizedDstSize = std::min(DstSize, OpSize::i128Bit); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; const auto Index = Imm.Literal(); @@ -2836,30 +2835,30 @@ Ref OpDispatchBuilder::PALIGNROpImpl(OpcodeArgs, const X86Tables::DecodedOperand return LoadZeroVector(DstSize); } - Ref Low = _VExtr(SanitizedDstSize, 1, Src1Node, Src2Node, Index); + Ref Low = _VExtr(SanitizedDstSize, OpSize::i8Bit, Src1Node, Src2Node, Index); if (!Is256Bit) { return Low; } Ref HighSrc1 = _VInsElement(DstSize, OpSize::i128Bit, 0, 1, Src1Node, Src1Node); Ref HighSrc2 = _VInsElement(DstSize, OpSize::i128Bit, 0, 1, Src2Node, Src2Node); - Ref High = _VExtr(SanitizedDstSize, 1, HighSrc1, HighSrc2, Index); + Ref High = _VExtr(SanitizedDstSize, OpSize::i8Bit, HighSrc1, HighSrc2, Index); return _VInsElement(DstSize, OpSize::i128Bit, 1, 0, Low, High); } void OpDispatchBuilder::PAlignrOp(OpcodeArgs) { Ref Result = PALIGNROpImpl(Op, Op->Dest, Op->Src[0], Op->Src[1], false); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VPALIGNROp(OpcodeArgs) { Ref Result = PALIGNROpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2], true); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -template +template void OpDispatchBuilder::UCOMISxOp(OpcodeArgs) { - const auto SrcSize = Op->Src[0].IsGPR() ? GetGuestVectorLength() : GetSrcSize(Op); + const auto SrcSize = Op->Src[0].IsGPR() ? GetGuestVectorLength() : OpSizeFromSrc(Op); Ref Src1 = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, GetGuestVectorLength(), Op->Flags); Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); @@ -2875,66 +2874,66 @@ void OpDispatchBuilder::LDMXCSR(OpcodeArgs) { } void OpDispatchBuilder::STMXCSR(OpcodeArgs) { - StoreResult(GPRClass, Op, GetMXCSR(), -1); + StoreResult(GPRClass, Op, GetMXCSR(), OpSize::iInvalid); } -template +template void OpDispatchBuilder::PACKUSOp(OpcodeArgs) { Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - Ref Result = _VSQXTUNPair(GetSrcSize(Op), ElementSize, Dest, Src); + Ref Result = _VSQXTUNPair(OpSizeFromSrc(Op), ElementSize, Dest, Src); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::PACKUSOp(OpcodeArgs); template void OpDispatchBuilder::PACKUSOp(OpcodeArgs); -void OpDispatchBuilder::VPACKUSOp(OpcodeArgs, size_t ElementSize) { - const auto DstSize = GetDstSize(Op); +void OpDispatchBuilder::VPACKUSOp(OpcodeArgs, IR::OpSize ElementSize) { + const auto DstSize = OpSizeFromDst(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - Ref Result = _VSQXTUNPair(GetSrcSize(Op), ElementSize, Src1, Src2); + Ref Result = _VSQXTUNPair(OpSizeFromSrc(Op), ElementSize, Src1, Src2); if (Is256Bit) { // We do a little cheeky 64-bit swapping to interleave the result. Ref Swapped = _VInsElement(DstSize, OpSize::i64Bit, 2, 1, Result, Result); Result = _VInsElement(DstSize, OpSize::i64Bit, 1, 2, Swapped, Result); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -template +template void OpDispatchBuilder::PACKSSOp(OpcodeArgs) { Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - Ref Result = _VSQXTNPair(GetSrcSize(Op), ElementSize, Dest, Src); + Ref Result = _VSQXTNPair(OpSizeFromSrc(Op), ElementSize, Dest, Src); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::PACKSSOp(OpcodeArgs); template void OpDispatchBuilder::PACKSSOp(OpcodeArgs); -void OpDispatchBuilder::VPACKSSOp(OpcodeArgs, size_t ElementSize) { - const auto DstSize = GetDstSize(Op); +void OpDispatchBuilder::VPACKSSOp(OpcodeArgs, IR::OpSize ElementSize) { + const auto DstSize = OpSizeFromDst(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - Ref Result = _VSQXTNPair(GetSrcSize(Op), ElementSize, Src1, Src2); + Ref Result = _VSQXTNPair(OpSizeFromSrc(Op), ElementSize, Src1, Src2); if (Is256Bit) { // We do a little cheeky 64-bit swapping to interleave the result. Ref Swapped = _VInsElement(DstSize, OpSize::i64Bit, 2, 1, Result, Result); Result = _VInsElement(DstSize, OpSize::i64Bit, 1, 2, Swapped, Result); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -Ref OpDispatchBuilder::PMULLOpImpl(OpSize Size, size_t ElementSize, bool Signed, Ref Src1, Ref Src2) { +Ref OpDispatchBuilder::PMULLOpImpl(OpSize Size, IR::OpSize ElementSize, bool Signed, Ref Src1, Ref Src2) { if (Size == OpSize::i64Bit) { if (Signed) { return _VSMull(OpSize::i128Bit, ElementSize, Src1, Src2); @@ -2953,7 +2952,7 @@ Ref OpDispatchBuilder::PMULLOpImpl(OpSize Size, size_t ElementSize, bool Signed, } } -template +template void OpDispatchBuilder::PMULLOp(OpcodeArgs) { static_assert(ElementSize == sizeof(uint32_t), "Currently only handles 32-bit -> 64-bit"); @@ -2961,13 +2960,13 @@ void OpDispatchBuilder::PMULLOp(OpcodeArgs) { Ref Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Res = PMULLOpImpl(OpSizeFromSrc(Op), ElementSize, Signed, Src1, Src2); - StoreResult(FPRClass, Op, Res, -1); + StoreResult(FPRClass, Op, Res, OpSize::iInvalid); } template void OpDispatchBuilder::PMULLOp(OpcodeArgs); template void OpDispatchBuilder::PMULLOp(OpcodeArgs); -template +template void OpDispatchBuilder::VPMULLOp(OpcodeArgs) { static_assert(ElementSize == sizeof(uint32_t), "Currently only handles 32-bit -> 64-bit"); @@ -2975,7 +2974,7 @@ void OpDispatchBuilder::VPMULLOp(OpcodeArgs) { Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); Ref Result = PMULLOpImpl(OpSizeFromSrc(Op), ElementSize, Signed, Src1, Src2); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::VPMULLOp(OpcodeArgs); @@ -2993,20 +2992,20 @@ void OpDispatchBuilder::MOVQ2DQ(OpcodeArgs) { StoreXMMRegister(Index, Src); } else { // This is simple, just store the result - StoreResult(FPRClass, Op, Src, -1); + StoreResult(FPRClass, Op, Src, OpSize::iInvalid); } } template void OpDispatchBuilder::MOVQ2DQ(OpcodeArgs); template void OpDispatchBuilder::MOVQ2DQ(OpcodeArgs); -Ref OpDispatchBuilder::ADDSUBPOpImpl(OpSize Size, size_t ElementSize, Ref Src1, Ref Src2) { +Ref OpDispatchBuilder::ADDSUBPOpImpl(OpSize Size, IR::OpSize ElementSize, Ref Src1, Ref Src2) { if (CTX->HostFeatures.SupportsFCMA) { if (ElementSize == OpSize::i32Bit) { - auto Swizzle = _VRev64(Size, 4, Src2); + auto Swizzle = _VRev64(Size, OpSize::i32Bit, Src2); return _VFCADD(Size, ElementSize, Src1, Swizzle, 90); } else { - auto Swizzle = _VExtr(Size, 1, Src2, Src2, 8); + auto Swizzle = _VExtr(Size, OpSize::i8Bit, Src2, Src2, 8); return _VFCADD(Size, ElementSize, Src1, Swizzle, 90); } } else { @@ -3017,32 +3016,32 @@ Ref OpDispatchBuilder::ADDSUBPOpImpl(OpSize Size, size_t ElementSize, Ref Src1, } } -template +template void OpDispatchBuilder::ADDSUBPOp(OpcodeArgs) { Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = ADDSUBPOpImpl(OpSizeFromSrc(Op), ElementSize, Dest, Src); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::ADDSUBPOp(OpcodeArgs); template void OpDispatchBuilder::ADDSUBPOp(OpcodeArgs); -template +template void OpDispatchBuilder::VADDSUBPOp(OpcodeArgs) { Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); Ref Result = ADDSUBPOpImpl(OpSizeFromSrc(Op), ElementSize, Src1, Src2); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::VADDSUBPOp(OpcodeArgs); template void OpDispatchBuilder::VADDSUBPOp(OpcodeArgs); void OpDispatchBuilder::PFNACCOp(OpcodeArgs) { - auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -3051,11 +3050,11 @@ void OpDispatchBuilder::PFNACCOp(OpcodeArgs) { auto SrcUnzip = _VUnZip2(Size, OpSize::i32Bit, Dest, Src); auto Result = _VFSub(Size, OpSize::i32Bit, DestUnzip, SrcUnzip); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::PFPNACCOp(OpcodeArgs) { - auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -3069,21 +3068,21 @@ void OpDispatchBuilder::PFPNACCOp(OpcodeArgs) { auto Result = _VInsElement(OpSize::i64Bit, OpSize::i32Bit, 1, 0, ResSub, ResAdd); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::PSWAPDOp(OpcodeArgs) { - auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); auto Result = _VRev64(Size, OpSize::i32Bit, Src); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::PI2FWOp(OpcodeArgs) { Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - size_t Size = GetDstSize(Op); + const auto Size = OpSizeFromDst(Op); // We now need to transpose the lower 16-bits of each element together // Only needing to move the upper element down in this case @@ -3095,13 +3094,13 @@ void OpDispatchBuilder::PI2FWOp(OpcodeArgs) { // int32_t to float Src = _Vector_SToF(Size, OpSize::i32Bit, Src); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, Size, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, Size, OpSize::iInvalid); } void OpDispatchBuilder::PF2IWOp(OpcodeArgs) { Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - size_t Size = GetDstSize(Op); + const auto Size = OpSizeFromDst(Op); // Float to int32_t Src = _Vector_FToZS(Size, OpSize::i32Bit, Src); @@ -3112,11 +3111,11 @@ void OpDispatchBuilder::PF2IWOp(OpcodeArgs) { // Now we need to sign extend the 16bit value to 32-bit Src = _VSXTL(Size, OpSize::i16Bit, Src); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, Size, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Src, Size, OpSize::iInvalid); } void OpDispatchBuilder::PMULHRWOp(OpcodeArgs) { - auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -3125,24 +3124,24 @@ void OpDispatchBuilder::PMULHRWOp(OpcodeArgs) { // Implementation is more efficient for 8byte registers // Multiplies 4 16bit values in to 4 32bit values - Res = _VSMull(Size * 2, OpSize::i16Bit, Dest, Src); + Res = _VSMull(IR::MultiplyOpSize(Size, 2), OpSize::i16Bit, Dest, Src); // Load 0x0000_8000 in to each 32-bit element. Ref VConstant = _VectorImm(OpSize::i128Bit, OpSize::i32Bit, 0x80, 8); - Res = _VAdd(Size * 2, OpSize::i32Bit, Res, VConstant); + Res = _VAdd(IR::MultiplyOpSize(Size, 2), OpSize::i32Bit, Res, VConstant); // Now shift and narrow to convert 32-bit values to 16bit, storing the top 16bits - Res = _VUShrNI(Size * 2, OpSize::i32Bit, Res, 16); + Res = _VUShrNI(IR::MultiplyOpSize(Size, 2), OpSize::i32Bit, Res, 16); - StoreResult(FPRClass, Op, Res, -1); + StoreResult(FPRClass, Op, Res, OpSize::iInvalid); } template void OpDispatchBuilder::VPFCMPOp(OpcodeArgs) { - auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - Ref Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, GetDstSize(Op), Op->Flags); + Ref Dest = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, OpSizeFromDst(Op), Op->Flags); Ref Result {}; // This maps 1:1 to an AArch64 NEON Op @@ -3160,14 +3159,14 @@ void OpDispatchBuilder::VPFCMPOp(OpcodeArgs) { default: LOGMAN_MSG_A_FMT("Unknown Comparison type: {}", CompType); break; } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::VPFCMPOp<0>(OpcodeArgs); template void OpDispatchBuilder::VPFCMPOp<1>(OpcodeArgs); template void OpDispatchBuilder::VPFCMPOp<2>(OpcodeArgs); -Ref OpDispatchBuilder::PMADDWDOpImpl(size_t Size, Ref Src1, Ref Src2) { +Ref OpDispatchBuilder::PMADDWDOpImpl(IR::OpSize Size, Ref Src1, Ref Src2) { // This is a pretty curious operation // Does two MADD operations across 4 16bit signed integers and accumulates to 32bit integers in the destination // @@ -3178,7 +3177,7 @@ Ref OpDispatchBuilder::PMADDWDOpImpl(size_t Size, Ref Src1, Ref Src2) { if (Size == OpSize::i64Bit) { // MMX implementation can be slightly more optimal - Size <<= 1; + Size = IR::DivideOpSize(Size, 2); auto MullResult = _VSMull(Size, OpSize::i16Bit, Src1, Src2); return _VAddP(Size, OpSize::i32Bit, MullResult, MullResult); } @@ -3191,43 +3190,44 @@ Ref OpDispatchBuilder::PMADDWDOpImpl(size_t Size, Ref Src1, Ref Src2) { } void OpDispatchBuilder::PMADDWD(OpcodeArgs) { - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Src1 = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = PMADDWDOpImpl(Size, Src1, Src2); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VPMADDWDOp(OpcodeArgs) { - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); Ref Result = PMADDWDOpImpl(Size, Src1, Src2); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -Ref OpDispatchBuilder::PMADDUBSWOpImpl(size_t Size, Ref Src1, Ref Src2) { +Ref OpDispatchBuilder::PMADDUBSWOpImpl(IR::OpSize Size, Ref Src1, Ref Src2) { if (Size == OpSize::i64Bit) { + const auto MultSize = IR::MultiplyOpSize(Size, 2); // 64bit is more efficient // Src1 is unsigned - auto Src1_16b = _VUXTL(Size * 2, OpSize::i8Bit, Src1); // [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56] + auto Src1_16b = _VUXTL(MultSize, OpSize::i8Bit, Src1); // [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56] // Src2 is signed - auto Src2_16b = _VSXTL(Size * 2, OpSize::i8Bit, Src2); // [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56] + auto Src2_16b = _VSXTL(MultSize, OpSize::i8Bit, Src2); // [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56] - auto ResMul_L = _VSMull(Size * 2, OpSize::i16Bit, Src1_16b, Src2_16b); - auto ResMul_H = _VSMull2(Size * 2, OpSize::i16Bit, Src1_16b, Src2_16b); + auto ResMul_L = _VSMull(MultSize, OpSize::i16Bit, Src1_16b, Src2_16b); + auto ResMul_H = _VSMull2(MultSize, OpSize::i16Bit, Src1_16b, Src2_16b); // Now add pairwise across the vector - auto ResAdd = _VAddP(Size * 2, OpSize::i32Bit, ResMul_L, ResMul_H); + auto ResAdd = _VAddP(MultSize, OpSize::i32Bit, ResMul_L, ResMul_H); // Add saturate back down to 16bit - return _VSQXTN(Size * 2, OpSize::i32Bit, ResAdd); + return _VSQXTN(MultSize, OpSize::i32Bit, ResAdd); } // V{U,S}XTL{,2}/ and VUnZip{,2} can be optimized in this solution to save about one instruction. @@ -3252,27 +3252,27 @@ Ref OpDispatchBuilder::PMADDUBSWOpImpl(size_t Size, Ref Src1, Ref Src2) { } void OpDispatchBuilder::PMADDUBSW(OpcodeArgs) { - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Src1 = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = PMADDUBSWOpImpl(Size, Src1, Src2); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VPMADDUBSWOp(OpcodeArgs) { - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); Ref Result = PMADDUBSWOpImpl(Size, Src1, Src2); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } Ref OpDispatchBuilder::PMULHWOpImpl(OpcodeArgs, bool Signed, Ref Src1, Ref Src2) { - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); if (Signed) { return _VSMulH(Size, OpSize::i16Bit, Src1, Src2); } else { @@ -3286,7 +3286,7 @@ void OpDispatchBuilder::PMULHW(OpcodeArgs) { Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = PMULHWOpImpl(Op, Signed, Dest, Src); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::PMULHW(OpcodeArgs); @@ -3304,7 +3304,7 @@ void OpDispatchBuilder::VPMULHWOp(OpcodeArgs) { if (Is128Bit) { Result = _VMov(OpSize::i128Bit, Result); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::VPMULHWOp(OpcodeArgs); @@ -3314,11 +3314,11 @@ Ref OpDispatchBuilder::PMULHRSWOpImpl(OpSize Size, Ref Src1, Ref Src2) { Ref Res {}; if (Size == OpSize::i64Bit) { // Implementation is more efficient for 8byte registers - Res = _VSMull(Size * 2, OpSize::i16Bit, Src1, Src2); - Res = _VSShrI(Size * 2, OpSize::i32Bit, Res, 14); - auto OneVector = _VectorImm(Size * 2, OpSize::i32Bit, 1); - Res = _VAdd(Size * 2, OpSize::i32Bit, Res, OneVector); - return _VUShrNI(Size * 2, OpSize::i32Bit, Res, 1); + Res = _VSMull(IR::MultiplyOpSize(Size, 2), OpSize::i16Bit, Src1, Src2); + Res = _VSShrI(IR::MultiplyOpSize(Size, 2), OpSize::i32Bit, Res, 14); + auto OneVector = _VectorImm(IR::MultiplyOpSize(Size, 2), OpSize::i32Bit, 1); + Res = _VAdd(IR::MultiplyOpSize(Size, 2), OpSize::i32Bit, Res, OneVector); + return _VUShrNI(IR::MultiplyOpSize(Size, 2), OpSize::i32Bit, Res, 1); } else { // 128-bit and 256-bit are less efficient Ref ResultLow; @@ -3345,7 +3345,7 @@ void OpDispatchBuilder::PMULHRSW(OpcodeArgs) { Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = PMULHRSWOpImpl(OpSizeFromSrc(Op), Dest, Src); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VPMULHRSWOp(OpcodeArgs) { @@ -3353,28 +3353,28 @@ void OpDispatchBuilder::VPMULHRSWOp(OpcodeArgs) { Ref Src = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); Ref Result = PMULHRSWOpImpl(OpSizeFromSrc(Op), Dest, Src); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -Ref OpDispatchBuilder::HSUBPOpImpl(OpSize SrcSize, size_t ElementSize, Ref Src1, Ref Src2) { +Ref OpDispatchBuilder::HSUBPOpImpl(OpSize SrcSize, IR::OpSize ElementSize, Ref Src1, Ref Src2) { auto Even = _VUnZip(SrcSize, ElementSize, Src1, Src2); auto Odd = _VUnZip2(SrcSize, ElementSize, Src1, Src2); return _VFSub(SrcSize, ElementSize, Even, Odd); } -template +template void OpDispatchBuilder::HSUBP(OpcodeArgs) { Ref Src1 = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = HSUBPOpImpl(OpSizeFromSrc(Op), ElementSize, Src1, Src2); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::HSUBP(OpcodeArgs); template void OpDispatchBuilder::HSUBP(OpcodeArgs); -void OpDispatchBuilder::VHSUBPOp(OpcodeArgs, size_t ElementSize) { - const auto DstSize = GetDstSize(Op); +void OpDispatchBuilder::VHSUBPOp(OpcodeArgs, IR::OpSize ElementSize) { + const auto DstSize = OpSizeFromDst(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -3387,28 +3387,28 @@ void OpDispatchBuilder::VHSUBPOp(OpcodeArgs, size_t ElementSize) { Dest = _VInsElement(DstSize, OpSize::i64Bit, 2, 1, Dest, Result); } - StoreResult(FPRClass, Op, Dest, -1); + StoreResult(FPRClass, Op, Dest, OpSize::iInvalid); } -Ref OpDispatchBuilder::PHSUBOpImpl(OpSize Size, Ref Src1, Ref Src2, size_t ElementSize) { +Ref OpDispatchBuilder::PHSUBOpImpl(OpSize Size, Ref Src1, Ref Src2, IR::OpSize ElementSize) { auto Even = _VUnZip(Size, ElementSize, Src1, Src2); auto Odd = _VUnZip2(Size, ElementSize, Src1, Src2); return _VSub(Size, ElementSize, Even, Odd); } -template +template void OpDispatchBuilder::PHSUB(OpcodeArgs) { Ref Src1 = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = PHSUBOpImpl(OpSizeFromSrc(Op), Src1, Src2, ElementSize); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -template void OpDispatchBuilder::PHSUB<2>(OpcodeArgs); -template void OpDispatchBuilder::PHSUB<4>(OpcodeArgs); +template void OpDispatchBuilder::PHSUB(OpcodeArgs); +template void OpDispatchBuilder::PHSUB(OpcodeArgs); -void OpDispatchBuilder::VPHSUBOp(OpcodeArgs, size_t ElementSize) { - const auto DstSize = GetDstSize(Op); +void OpDispatchBuilder::VPHSUBOp(OpcodeArgs, IR::OpSize ElementSize) { + const auto DstSize = OpSizeFromDst(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -3418,11 +3418,11 @@ void OpDispatchBuilder::VPHSUBOp(OpcodeArgs, size_t ElementSize) { Ref Inserted = _VInsElement(DstSize, OpSize::i64Bit, 1, 2, Result, Result); Result = _VInsElement(DstSize, OpSize::i64Bit, 2, 1, Inserted, Result); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } Ref OpDispatchBuilder::PHADDSOpImpl(OpSize Size, Ref Src1, Ref Src2) { - const uint8_t ElementSize = 2; + const auto ElementSize = OpSize::i16Bit; auto Even = _VUnZip(Size, ElementSize, Src1, Src2); auto Odd = _VUnZip2(Size, ElementSize, Src1, Src2); @@ -3436,11 +3436,11 @@ void OpDispatchBuilder::PHADDS(OpcodeArgs) { Ref Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = PHADDSOpImpl(OpSizeFromSrc(Op), Src1, Src2); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VPHADDSWOp(OpcodeArgs) { - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); const auto Is256Bit = SrcSize == Core::CPUState::XMM_AVX_REG_SIZE; Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -3454,11 +3454,11 @@ void OpDispatchBuilder::VPHADDSWOp(OpcodeArgs) { Dest = _VInsElement(SrcSize, OpSize::i64Bit, 2, 1, Dest, Result); } - StoreResult(FPRClass, Op, Dest, -1); + StoreResult(FPRClass, Op, Dest, OpSize::iInvalid); } Ref OpDispatchBuilder::PHSUBSOpImpl(OpSize Size, Ref Src1, Ref Src2) { - const uint8_t ElementSize = OpSize::i16Bit; + const auto ElementSize = OpSize::i16Bit; auto Even = _VUnZip(Size, ElementSize, Src1, Src2); auto Odd = _VUnZip2(Size, ElementSize, Src1, Src2); @@ -3471,11 +3471,11 @@ void OpDispatchBuilder::PHSUBS(OpcodeArgs) { Ref Src1 = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = PHSUBSOpImpl(OpSizeFromSrc(Op), Src1, Src2); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VPHSUBSWOp(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -3488,10 +3488,10 @@ void OpDispatchBuilder::VPHSUBSWOp(OpcodeArgs) { Dest = _VInsElement(DstSize, OpSize::i64Bit, 2, 1, Dest, Result); } - StoreResult(FPRClass, Op, Dest, -1); + StoreResult(FPRClass, Op, Dest, OpSize::iInvalid); } -Ref OpDispatchBuilder::PSADBWOpImpl(size_t Size, Ref Src1, Ref Src2) { +Ref OpDispatchBuilder::PSADBWOpImpl(IR::OpSize Size, Ref Src1, Ref Src2) { // The documentation is actually incorrect in how this instruction operates // It strongly implies that the `abs(dest[i] - src[i])` operates in 8bit space // but it actually operates in more than 8bit space @@ -3500,10 +3500,10 @@ Ref OpDispatchBuilder::PSADBWOpImpl(size_t Size, Ref Src1, Ref Src2) { const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE; if (Size == OpSize::i64Bit) { - auto AbsResult = _VUABDL(Size * 2, OpSize::i8Bit, Src1, Src2); + auto AbsResult = _VUABDL(IR::MultiplyOpSize(Size, 2), OpSize::i8Bit, Src1, Src2); // Now vector-wide add the results for each - return _VAddV(Size * 2, OpSize::i16Bit, AbsResult); + return _VAddV(IR::MultiplyOpSize(Size, 2), OpSize::i16Bit, AbsResult); } auto AbsResult_Low = _VUABDL(Size, OpSize::i8Bit, Src1, Src2); @@ -3531,27 +3531,27 @@ Ref OpDispatchBuilder::PSADBWOpImpl(size_t Size, Ref Src1, Ref Src2) { } void OpDispatchBuilder::PSADBW(OpcodeArgs) { - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Src1 = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = PSADBWOpImpl(Size, Src1, Src2); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VPSADBWOp(OpcodeArgs) { - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); Ref Result = PSADBWOpImpl(Size, Src1, Src2); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -Ref OpDispatchBuilder::ExtendVectorElementsImpl(OpcodeArgs, size_t ElementSize, size_t DstElementSize, bool Signed) { - const auto DstSize = GetDstSize(Op); +Ref OpDispatchBuilder::ExtendVectorElementsImpl(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DstElementSize, bool Signed) { + const auto DstSize = OpSizeFromDst(Op); const auto GetSrc = [&] { if (Op->Src[0].IsGPR()) { @@ -3559,8 +3559,8 @@ Ref OpDispatchBuilder::ExtendVectorElementsImpl(OpcodeArgs, size_t ElementSize, } else { // For memory operands the 256-bit variant loads twice the size specified in the table. const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; - const auto SrcSize = GetSrcSize(Op); - const auto LoadSize = Is256Bit ? SrcSize * 2 : SrcSize; + const auto SrcSize = OpSizeFromSrc(Op); + const auto LoadSize = Is256Bit ? IR::SizeToOpSize(IR::OpSizeToSize(SrcSize) * 2) : SrcSize; return LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], LoadSize, Op->Flags); } @@ -3569,7 +3569,8 @@ Ref OpDispatchBuilder::ExtendVectorElementsImpl(OpcodeArgs, size_t ElementSize, Ref Src = GetSrc(); Ref Result {Src}; - for (size_t CurrentElementSize = ElementSize; CurrentElementSize != DstElementSize; CurrentElementSize <<= 1) { + for (auto CurrentElementSize = ElementSize; CurrentElementSize != DstElementSize; + CurrentElementSize = IR::MultiplyOpSize(CurrentElementSize, 2)) { if (Signed) { Result = _VSXTL(DstSize, CurrentElementSize, Result); } else { @@ -3580,10 +3581,10 @@ Ref OpDispatchBuilder::ExtendVectorElementsImpl(OpcodeArgs, size_t ElementSize, return Result; } -template +template void OpDispatchBuilder::ExtendVectorElements(OpcodeArgs) { Ref Result = ExtendVectorElementsImpl(Op, ElementSize, DstElementSize, Signed); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::ExtendVectorElements(OpcodeArgs); @@ -3600,44 +3601,44 @@ template void OpDispatchBuilder::ExtendVectorElements(OpcodeArgs); template void OpDispatchBuilder::ExtendVectorElements(OpcodeArgs); -Ref OpDispatchBuilder::VectorRoundImpl(OpSize Size, size_t ElementSize, Ref Src, uint64_t Mode) { +Ref OpDispatchBuilder::VectorRoundImpl(OpSize Size, IR::OpSize ElementSize, Ref Src, uint64_t Mode) { return _Vector_FToI(Size, ElementSize, Src, TranslateRoundType(Mode)); } -template +template void OpDispatchBuilder::VectorRound(OpcodeArgs) { // No need to zero extend the vector in the event we have a // scalar source, especially since it's only inserted into another vector. - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); const uint64_t Mode = Op->Src[1].Literal(); Src = VectorRoundImpl(OpSizeFromDst(Op), ElementSize, Src, Mode); - StoreResult(FPRClass, Op, Src, -1); + StoreResult(FPRClass, Op, Src, OpSize::iInvalid); } template void OpDispatchBuilder::VectorRound(OpcodeArgs); template void OpDispatchBuilder::VectorRound(OpcodeArgs); -template +template void OpDispatchBuilder::AVXVectorRound(OpcodeArgs) { const auto Mode = Op->Src[1].Literal(); // No need to zero extend the vector in the event we have a // scalar source, especially since it's only inserted into another vector. - const auto SrcSize = GetSrcSize(Op); + const auto SrcSize = OpSizeFromSrc(Op); Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcSize, Op->Flags); Ref Result = VectorRoundImpl(OpSizeFromDst(Op), ElementSize, Src, Mode); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::AVXVectorRound(OpcodeArgs); template void OpDispatchBuilder::AVXVectorRound(OpcodeArgs); -Ref OpDispatchBuilder::VectorBlend(OpSize Size, size_t ElementSize, Ref Src1, Ref Src2, uint8_t Selector) { +Ref OpDispatchBuilder::VectorBlend(OpSize Size, IR::OpSize ElementSize, Ref Src1, Ref Src2, uint8_t Selector) { if (ElementSize == OpSize::i32Bit) { Selector &= 0b1111; switch (Selector) { @@ -3873,22 +3874,22 @@ Ref OpDispatchBuilder::VectorBlend(OpSize Size, size_t ElementSize, Ref Src1, Re FEX_UNREACHABLE; } -template +template void OpDispatchBuilder::VectorBlend(OpcodeArgs) { uint8_t Select = Op->Src[1].Literal(); Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Dest = VectorBlend(OpSize::i128Bit, ElementSize, Dest, Src, Select); - StoreResult(FPRClass, Op, Dest, -1); + StoreResult(FPRClass, Op, Dest, OpSize::iInvalid); } template void OpDispatchBuilder::VectorBlend(OpcodeArgs); template void OpDispatchBuilder::VectorBlend(OpcodeArgs); template void OpDispatchBuilder::VectorBlend(OpcodeArgs); -void OpDispatchBuilder::VectorVariableBlend(OpcodeArgs, size_t ElementSize) { - auto Size = GetSrcSize(Op); +void OpDispatchBuilder::VectorVariableBlend(OpcodeArgs, IR::OpSize ElementSize) { + const auto Size = OpSizeFromSrc(Op); Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -3904,11 +3905,11 @@ void OpDispatchBuilder::VectorVariableBlend(OpcodeArgs, size_t ElementSize) { auto Result = _VBSL(Size, Mask, Src, Dest); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -void OpDispatchBuilder::AVXVectorVariableBlend(OpcodeArgs, size_t ElementSize) { - const auto SrcSize = GetSrcSize(Op); +void OpDispatchBuilder::AVXVectorVariableBlend(OpcodeArgs, IR::OpSize ElementSize) { + const auto SrcSize = OpSizeFromSrc(Op); const auto ElementSizeBits = ElementSize * 8; Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -3920,7 +3921,7 @@ void OpDispatchBuilder::AVXVectorVariableBlend(OpcodeArgs, size_t ElementSize) { Ref Shifted = _VSShrI(SrcSize, ElementSize, Mask, ElementSizeBits - 1); Ref Result = _VBSL(SrcSize, Shifted, Src2, Src1); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::PTestOpImpl(OpSize Size, Ref Dest, Ref Src) { @@ -3958,7 +3959,7 @@ void OpDispatchBuilder::PTestOp(OpcodeArgs) { PTestOpImpl(OpSizeFromSrc(Op), Dest, Src); } -void OpDispatchBuilder::VTESTOpImpl(OpSize SrcSize, size_t ElementSize, Ref Src1, Ref Src2) { +void OpDispatchBuilder::VTESTOpImpl(OpSize SrcSize, IR::OpSize ElementSize, Ref Src1, Ref Src2) { InvalidateDeferredFlags(); const auto ElementSizeInBits = ElementSize * 8; @@ -3989,7 +3990,7 @@ void OpDispatchBuilder::VTESTOpImpl(OpSize SrcSize, size_t ElementSize, Ref Src1 ZeroPF_AF(); } -template +template void OpDispatchBuilder::VTESTPOp(OpcodeArgs) { Ref Src1 = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -4000,7 +4001,7 @@ template void OpDispatchBuilder::VTESTPOp(OpcodeArgs); template void OpDispatchBuilder::VTESTPOp(OpcodeArgs); Ref OpDispatchBuilder::PHMINPOSUWOpImpl(OpcodeArgs) { - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); @@ -4040,10 +4041,10 @@ Ref OpDispatchBuilder::PHMINPOSUWOpImpl(OpcodeArgs) { void OpDispatchBuilder::PHMINPOSUWOp(OpcodeArgs) { Ref Result = PHMINPOSUWOpImpl(Op); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -Ref OpDispatchBuilder::DPPOpImpl(size_t DstSize, Ref Src1, Ref Src2, uint8_t Mask, size_t ElementSize) { +Ref OpDispatchBuilder::DPPOpImpl(IR::OpSize DstSize, Ref Src1, Ref Src2, uint8_t Mask, IR::OpSize ElementSize) { const auto SizeMask = [ElementSize]() { if (ElementSize == OpSize::i32Bit) { return 0b1111; @@ -4126,13 +4127,13 @@ Ref OpDispatchBuilder::DPPOpImpl(size_t DstSize, Ref Src1, Ref Src2, uint8_t Mas // Dest[63:32] = Result // Dest[95:64] = Zero // Dest[127:96] = Zero - return _VZip(DstSize / 2, ElementSize, ZeroVec, Temp); + return _VZip(IR::DivideOpSize(DstSize, 2), ElementSize, ZeroVec, Temp); case 0b0011: // Dest[31:0] = Result // Dest[63:32] = Result // Dest[95:64] = Zero // Dest[127:96] = Zero - return _VDupElement(DstSize / 2, ElementSize, Temp, 0); + return _VDupElement(IR::DivideOpSize(DstSize, 2), ElementSize, Temp, 0); case 0b0100: // Dest[31:0] = Zero // Dest[63:32] = Zero @@ -4219,14 +4220,14 @@ Ref OpDispatchBuilder::DPPOpImpl(size_t DstSize, Ref Src1, Ref Src2, uint8_t Mas FEX_UNREACHABLE; } -template +template void OpDispatchBuilder::DPPOp(OpcodeArgs) { Ref Dest = LoadSource(FPRClass, Op, Op->Dest, Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - Ref Result = DPPOpImpl(GetDstSize(Op), Dest, Src, Op->Src[1].Literal(), ElementSize); - StoreResult(FPRClass, Op, Result, -1); + Ref Result = DPPOpImpl(OpSizeFromDst(Op), Dest, Src, Op->Src[1].Literal(), ElementSize); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::DPPOp(OpcodeArgs); @@ -4234,12 +4235,12 @@ template void OpDispatchBuilder::DPPOp(OpcodeArgs); Ref OpDispatchBuilder::VDPPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2, const X86Tables::DecodedOperand& Imm) { - constexpr size_t ElementSize = OpSize::i32Bit; + constexpr auto ElementSize = OpSize::i32Bit; const uint8_t Mask = Imm.Literal(); const uint8_t SrcMask = Mask >> 4; const uint8_t DstMask = Mask & 0xF; - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); Ref Src1V = LoadSource(FPRClass, Op, Src1, Op->Flags); Ref Src2V = LoadSource(FPRClass, Op, Src2, Op->Flags); @@ -4282,7 +4283,7 @@ Ref OpDispatchBuilder::VDPPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& return Result; } -template +template void OpDispatchBuilder::VDPPOp(OpcodeArgs) { const auto DstSize = GetDstSize(Op); @@ -4294,12 +4295,12 @@ void OpDispatchBuilder::VDPPOp(OpcodeArgs) { Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); - Result = DPPOpImpl(GetDstSize(Op), Src1, Src2, Op->Src[2].Literal(), ElementSize); + Result = DPPOpImpl(OpSizeFromDst(Op), Src1, Src2, Op->Src[2].Literal(), ElementSize); } // We don't need to emit a _VMov to clear the upper lane, since DPPOpImpl uses a zero vector // to construct the results, so the upper lane will always be cleared for the 128-bit version. - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::VDPPOp(OpcodeArgs); @@ -4399,7 +4400,7 @@ void OpDispatchBuilder::MPSADBWOp(OpcodeArgs) { Ref Src2 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = MPSADBWOpImpl(SrcSize, Src1, Src2, Select); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VMPSADBWOp(OpcodeArgs) { @@ -4409,36 +4410,36 @@ void OpDispatchBuilder::VMPSADBWOp(OpcodeArgs) { Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); Ref Result = MPSADBWOpImpl(SrcSize, Src1, Src2, Select); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VINSERTOp(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[1], OpSize::i128Bit, Op->Flags); const auto Selector = Op->Src[2].Literal() & 1; Ref Result = _VInsElement(DstSize, OpSize::i128Bit, Selector, 0, Src1, Src2); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VCVTPH2PSOp(OpcodeArgs) { // In the event that a memory operand is used as the source operand, // the access width will always be half the size of the destination vector width // (i.e. 128-bit vector -> 64-bit mem, 256-bit vector -> 128-bit mem) - const auto DstSize = GetDstSize(Op); - const auto SrcLoadSize = Op->Src[0].IsGPR() ? DstSize : DstSize / 2; + const auto DstSize = OpSizeFromDst(Op); + const auto SrcLoadSize = Op->Src[0].IsGPR() ? DstSize : IR::SizeToOpSize(IR::OpSizeToSize(DstSize) / 2); Ref Src = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], SrcLoadSize, Op->Flags); Ref Result = _Vector_FToF(DstSize, OpSize::i32Bit, Src, OpSize::i16Bit); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VCVTPS2PHOp(OpcodeArgs) { - const auto SrcSize = GetSrcSize(Op); - const auto StoreSize = Op->Dest.IsGPR() ? OpSize::i128Bit : SrcSize / 2; + const auto SrcSize = OpSizeFromSrc(Op); + const auto StoreSize = Op->Dest.IsGPR() ? OpSize::i128Bit : IR::SizeToOpSize(IR::OpSizeToSize(SrcSize) / 2); const auto Imm8 = Op->Src[1].Literal(); const auto UseMXCSR = (Imm8 & 0b100) != 0; @@ -4466,11 +4467,11 @@ void OpDispatchBuilder::VCVTPS2PHOp(OpcodeArgs) { Result = _VMov(OpSize::i128Bit, Result); } - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, StoreSize, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, Result, StoreSize, OpSize::iInvalid); } void OpDispatchBuilder::VPERM2Op(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); Ref Src1 = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src2 = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); @@ -4494,7 +4495,7 @@ void OpDispatchBuilder::VPERM2Op(OpcodeArgs) { Result = SelectElement(1, (Selector >> 4) & 0b11); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } Ref OpDispatchBuilder::VPERMDIndices(OpSize DstSize, Ref Indices, Ref IndexMask, Ref Repeating3210) { @@ -4560,7 +4561,7 @@ Ref OpDispatchBuilder::VPERMDIndices(OpSize DstSize, Ref Indices, Ref IndexMask, } void OpDispatchBuilder::VPERMDOp(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); Ref Indices = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Src = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); @@ -4575,11 +4576,11 @@ void OpDispatchBuilder::VPERMDOp(OpcodeArgs) { // Now lets finally shuffle this bad boy around. Ref Result = _VTBL1(DstSize, Src, FinalIndices); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VPERMQOp(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); const auto Selector = Op->Src[1].Literal(); @@ -4597,10 +4598,10 @@ void OpDispatchBuilder::VPERMQOp(OpcodeArgs) { Result = _VInsElement(DstSize, OpSize::i64Bit, i, SrcIndex, Result, Src); } } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -Ref OpDispatchBuilder::VBLENDOpImpl(uint32_t VecSize, uint32_t ElementSize, Ref Src1, Ref Src2, Ref ZeroRegister, uint64_t Selector) { +Ref OpDispatchBuilder::VBLENDOpImpl(IR::OpSize VecSize, IR::OpSize ElementSize, Ref Src1, Ref Src2, Ref ZeroRegister, uint64_t Selector) { const std::array Sources {Src1, Src2}; Ref Result = ZeroRegister; @@ -4615,7 +4616,7 @@ Ref OpDispatchBuilder::VBLENDOpImpl(uint32_t VecSize, uint32_t ElementSize, Ref } void OpDispatchBuilder::VBLENDPDOp(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; const auto Selector = Op->Src[2].Literal(); @@ -4624,23 +4625,23 @@ void OpDispatchBuilder::VBLENDPDOp(OpcodeArgs) { if (Selector == 0) { Ref Result = Is256Bit ? Src1 : _VMov(OpSize::i128Bit, Src1); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); return; } // Only the first four bits of the 8-bit immediate are used, so only check them. if (((Selector & 0b11) == 0b11 && !Is256Bit) || (Selector & 0b1111) == 0b1111) { Ref Result = Is256Bit ? Src2 : _VMov(OpSize::i128Bit, Src2); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); return; } const auto ZeroRegister = LoadZeroVector(DstSize); Ref Result = VBLENDOpImpl(DstSize, OpSize::i64Bit, Src1, Src2, ZeroRegister, Selector); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VPBLENDDOp(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; const auto Selector = Op->Src[2].Literal(); @@ -4658,12 +4659,12 @@ void OpDispatchBuilder::VPBLENDDOp(OpcodeArgs) { if (Selector == 0) { Ref Result = Is256Bit ? Src1 : _VMov(OpSize::i128Bit, Src1); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); return; } if (Selector == 0xFF && Is256Bit) { Ref Result = Is256Bit ? Src2 : _VMov(OpSize::i128Bit, Src2); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); return; } // The only bits we care about from the 8-bit immediate for 128-bit operations @@ -4671,7 +4672,7 @@ void OpDispatchBuilder::VPBLENDDOp(OpcodeArgs) { // silliness is going on and the upper bits are being set even when they'll // be ignored if ((Selector & 0xF) == 0xF && !Is256Bit) { - StoreResult(FPRClass, Op, _VMov(OpSize::i128Bit, Src2), -1); + StoreResult(FPRClass, Op, _VMov(OpSize::i128Bit, Src2), OpSize::iInvalid); return; } @@ -4680,11 +4681,11 @@ void OpDispatchBuilder::VPBLENDDOp(OpcodeArgs) { if (!Is256Bit) { Result = _VMov(OpSize::i128Bit, Result); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VPBLENDWOp(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE; const auto Selector = Op->Src[2].Literal(); @@ -4693,12 +4694,12 @@ void OpDispatchBuilder::VPBLENDWOp(OpcodeArgs) { if (Selector == 0) { Ref Result = Is128Bit ? _VMov(OpSize::i128Bit, Src1) : Src1; - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); return; } if (Selector == 0xFF) { Ref Result = Is128Bit ? _VMov(OpSize::i128Bit, Src2) : Src2; - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); return; } @@ -4712,11 +4713,11 @@ void OpDispatchBuilder::VPBLENDWOp(OpcodeArgs) { if (Is128Bit) { Result = _VMov(OpSize::i128Bit, Result); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VZEROOp(OpcodeArgs) { - const auto DstSize = GetDstSize(Op); + const auto DstSize = OpSizeFromDst(Op); const auto IsVZEROALL = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; const auto NumRegs = CTX->Config.Is64BitMode ? 16U : 8U; @@ -4740,15 +4741,15 @@ void OpDispatchBuilder::VZEROOp(OpcodeArgs) { } } -void OpDispatchBuilder::VPERMILImmOp(OpcodeArgs, size_t ElementSize) { - const auto DstSize = GetDstSize(Op); +void OpDispatchBuilder::VPERMILImmOp(OpcodeArgs, IR::OpSize ElementSize) { + const auto DstSize = OpSizeFromDst(Op); const auto Is256Bit = DstSize == Core::CPUState::XMM_AVX_REG_SIZE; const auto Selector = Op->Src[1].Literal() & 0xFF; Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Result = LoadZeroVector(DstSize); - if (ElementSize == 8) { + if (ElementSize == OpSize::i64Bit) { Result = _VInsElement(DstSize, ElementSize, 0, Selector & 0b0001, Result, Src); Result = _VInsElement(DstSize, ElementSize, 1, (Selector & 0b0010) >> 1, Result, Src); @@ -4770,10 +4771,10 @@ void OpDispatchBuilder::VPERMILImmOp(OpcodeArgs, size_t ElementSize) { } } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } -Ref OpDispatchBuilder::VPERMILRegOpImpl(OpSize DstSize, size_t ElementSize, Ref Src, Ref Indices) { +Ref OpDispatchBuilder::VPERMILRegOpImpl(OpSize DstSize, IR::OpSize ElementSize, Ref Src, Ref Indices) { // NOTE: See implementation of VPERMD for the gist of what we do to make this work. // // The only difference here is that we need to add 16 to the upper lane @@ -4809,7 +4810,7 @@ Ref OpDispatchBuilder::VPERMILRegOpImpl(OpSize DstSize, size_t ElementSize, Ref if (Is256Bit) { const auto ZeroRegister = LoadZeroVector(DstSize); - Ref Vector16 = _VInsElement(DstSize, OpSize::i128Bit, 1, 0, ZeroRegister, _VectorImm(DstSize, 1, 16)); + Ref Vector16 = _VInsElement(DstSize, OpSize::i128Bit, 1, 0, ZeroRegister, _VectorImm(DstSize, OpSize::i8Bit, 16)); Ref IndexOffsets = _VAdd(DstSize, OpSize::i8Bit, VectorConst, Vector16); FinalIndices = _VAdd(DstSize, OpSize::i8Bit, IndexOffsets, ShiftedIndices); @@ -4820,13 +4821,13 @@ Ref OpDispatchBuilder::VPERMILRegOpImpl(OpSize DstSize, size_t ElementSize, Ref return _VTBL1(DstSize, Src, FinalIndices); } -template +template void OpDispatchBuilder::VPERMILRegOp(OpcodeArgs) { Ref Src = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Ref Indices = LoadSource(FPRClass, Op, Op->Src[1], Op->Flags); Ref Result = VPERMILRegOpImpl(OpSizeFromDst(Op), ElementSize, Src, Indices); - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } template void OpDispatchBuilder::VPERMILRegOp(OpcodeArgs); @@ -4846,7 +4847,7 @@ void OpDispatchBuilder::PCMPXSTRXOpImpl(OpcodeArgs, bool IsExplicit, bool IsMask // // So, we specify Src2 as having an alignment of 1 to indicate this. Ref Src1 = LoadSource_WithOpSize(FPRClass, Op, Op->Dest, OpSize::i128Bit, Op->Flags); - Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], OpSize::i128Bit, Op->Flags, {.Align = 1}); + Ref Src2 = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], OpSize::i128Bit, Op->Flags, {.Align = OpSize::i8Bit}); Ref IntermediateResult {}; if (IsExplicit) { @@ -4886,10 +4887,10 @@ void OpDispatchBuilder::PCMPXSTRXOpImpl(OpcodeArgs, bool IsExplicit, bool IsMask const auto ElementSize = 1U << (Control & 1); const auto NumElements = 16U >> (Control & 1); - Ref Result = LoadZeroVector(Core::CPUState::XMM_SSE_REG_SIZE); + Ref Result = LoadZeroVector(OpSize::i128Bit); for (uint32_t i = 0; i < NumElements; i++) { Ref SignBit = _Sbfe(OpSize::i64Bit, 1, i, IntermediateResult); - Result = _VInsGPR(Core::CPUState::XMM_SSE_REG_SIZE, ElementSize, i, Result, SignBit); + Result = _VInsGPR(OpSize::i128Bit, IR::SizeToOpSize(ElementSize), i, Result, SignBit); } StoreXMMRegister(0, Result); } else { @@ -4933,7 +4934,7 @@ void OpDispatchBuilder::VPCMPISTRMOp(OpcodeArgs) { } void OpDispatchBuilder::VFMAImpl(OpcodeArgs, IROps IROp, bool Scalar, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx) { - const auto Size = GetDstSize(Op); + const auto Size = OpSizeFromDst(Op); const auto Is256Bit = Size == Core::CPUState::XMM_AVX_REG_SIZE; const OpSize ElementSize = Op->Flags & X86Tables::DecodeFlags::FLAG_OPTION_AVX_W ? OpSize::i64Bit : OpSize::i32Bit; @@ -4958,11 +4959,11 @@ void OpDispatchBuilder::VFMAImpl(OpcodeArgs, IROps IROp, bool Scalar, uint8_t Sr if (!Is256Bit) { Result = _VMov(OpSize::i128Bit, Result); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } void OpDispatchBuilder::VFMAddSubImpl(OpcodeArgs, bool AddSub, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx) { - const auto Size = GetDstSize(Op); + const auto Size = OpSizeFromDst(Op); const auto Is256Bit = Size == Core::CPUState::XMM_AVX_REG_SIZE; const OpSize ElementSize = Op->Flags & X86Tables::DecodeFlags::FLAG_OPTION_AVX_W ? OpSize::i64Bit : OpSize::i32Bit; @@ -4992,7 +4993,7 @@ void OpDispatchBuilder::VFMAddSubImpl(OpcodeArgs, bool AddSub, uint8_t Src1Idx, if (!Is256Bit) { Result = _VMov(OpSize::i128Bit, Result); } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); } OpDispatchBuilder::RefVSIB OpDispatchBuilder::LoadVSIB(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, uint32_t Flags) { @@ -5022,7 +5023,7 @@ template void OpDispatchBuilder::VPGATHER(OpcodeArgs) { LOGMAN_THROW_A_FMT(AddrElementSize == OpSize::i32Bit || AddrElementSize == OpSize::i64Bit, "Unknown address element size"); - const auto Size = GetDstSize(Op); + const auto Size = OpSizeFromDst(Op); const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE; ///< Element size is determined by W flag. @@ -5100,11 +5101,11 @@ void OpDispatchBuilder::VPGATHER(OpcodeArgs) { } } - StoreResult(FPRClass, Op, Result, -1); + StoreResult(FPRClass, Op, Result, OpSize::iInvalid); ///< Assume non-faulting behaviour and clear the mask register. auto Zero = LoadZeroVector(Size); - StoreResult_WithOpSize(FPRClass, Op, Op->Src[1], Zero, Size, -1); + StoreResult_WithOpSize(FPRClass, Op, Op->Src[1], Zero, Size, OpSize::iInvalid); } template void OpDispatchBuilder::VPGATHER(OpcodeArgs); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp index b162e4d2f8..8731dd95f6 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp @@ -60,13 +60,13 @@ void OpDispatchBuilder::SetX87Top(Ref Value) { } // Float LoaD operation with memory operand -void OpDispatchBuilder::FLD(OpcodeArgs, size_t Width) { - size_t ReadWidth = (Width == 80) ? 16 : Width / 8; +void OpDispatchBuilder::FLD(OpcodeArgs, IR::OpSize Width) { + const auto ReadWidth = (Width == OpSize::f80Bit) ? OpSize::i128Bit : Width; Ref Data = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], ReadWidth, Op->Flags); Ref ConvertedData = Data; // Convert to 80bit float - if (Width == 32 || Width == 64) { + if (Width == OpSize::i32Bit || Width == OpSize::i64Bit) { ConvertedData = _F80CVTTo(Data, ReadWidth); } _PushStack(ConvertedData, Data, ReadWidth, true); @@ -86,7 +86,7 @@ void OpDispatchBuilder::FBLD(OpcodeArgs) { void OpDispatchBuilder::FBSTP(OpcodeArgs) { Ref converted = _F80BCDStore(_ReadStackValue(0)); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, converted, 10, 1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, converted, OpSize::f80Bit, OpSize::i8Bit); _PopStackDestroy(); } @@ -97,12 +97,12 @@ void OpDispatchBuilder::FLD_Const(OpcodeArgs, NamedVectorConstant Constant) { } void OpDispatchBuilder::FILD(OpcodeArgs) { - size_t ReadWidth = GetSrcSize(Op); + const auto ReadWidth = OpSizeFromSrc(Op); // Read from memory Ref Data = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], ReadWidth, Op->Flags); // Sign extend to 64bits - if (ReadWidth != 8) { + if (ReadWidth != OpSize::i64Bit) { Data = _Sbfe(OpSize::i64Bit, ReadWidth * 8, 0, Data); } @@ -128,9 +128,9 @@ void OpDispatchBuilder::FILD(OpcodeArgs) { _PushStack(ConvertedData, Data, ReadWidth, false); } -void OpDispatchBuilder::FST(OpcodeArgs, size_t Width) { +void OpDispatchBuilder::FST(OpcodeArgs, IR::OpSize Width) { Ref Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); - _StoreStackMemory(Mem, OpSize::i128Bit, true, Width / 8); + _StoreStackMemory(Mem, OpSize::i128Bit, true, Width); if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) { _PopStackDestroy(); } @@ -149,18 +149,18 @@ void OpDispatchBuilder::FSTToStack(OpcodeArgs) { // Store integer to memory (possibly with truncation) void OpDispatchBuilder::FIST(OpcodeArgs, bool Truncate) { - auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Data = _ReadStackValue(0); Data = _F80CVTInt(Size, Data, Truncate); - StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Data, Size, 1); + StoreResult_WithOpSize(GPRClass, Op, Op->Dest, Data, Size, OpSize::i8Bit); if ((Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) != 0) { _PopStackDestroy(); } } -void OpDispatchBuilder::FADD(OpcodeArgs, size_t Width, bool Integer, OpDispatchBuilder::OpResult ResInST0) { +void OpDispatchBuilder::FADD(OpcodeArgs, IR::OpSize Width, bool Integer, OpDispatchBuilder::OpResult ResInST0) { if (Op->Src[0].IsNone()) { // Implicit argument case auto Offset = Op->OP & 7; auto St0 = 0; @@ -175,22 +175,22 @@ void OpDispatchBuilder::FADD(OpcodeArgs, size_t Width, bool Integer, OpDispatchB return; } - LOGMAN_THROW_A_FMT(Width != 80, "No 80-bit floats from memory"); + LOGMAN_THROW_A_FMT(Width != OpSize::f80Bit, "No 80-bit floats from memory"); // We have one memory argument Ref Arg {}; if (Integer) { Arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - Arg = _F80CVTToInt(Arg, Width / 8); + Arg = _F80CVTToInt(Arg, Width); } else { Arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - Arg = _F80CVTTo(Arg, Width / 8); + Arg = _F80CVTTo(Arg, Width); } // top of stack is at offset zero _F80AddValue(0, Arg); } -void OpDispatchBuilder::FMUL(OpcodeArgs, size_t Width, bool Integer, OpDispatchBuilder::OpResult ResInST0) { +void OpDispatchBuilder::FMUL(OpcodeArgs, IR::OpSize Width, bool Integer, OpDispatchBuilder::OpResult ResInST0) { if (Op->Src[0].IsNone()) { // Implicit argument case auto offset = Op->OP & 7; auto st0 = 0; @@ -205,15 +205,15 @@ void OpDispatchBuilder::FMUL(OpcodeArgs, size_t Width, bool Integer, OpDispatchB return; } - LOGMAN_THROW_A_FMT(Width != 80, "No 80-bit floats from memory"); + LOGMAN_THROW_A_FMT(Width != OpSize::f80Bit, "No 80-bit floats from memory"); // We have one memory argument Ref arg {}; if (Integer) { arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - arg = _F80CVTToInt(arg, Width / 8); + arg = _F80CVTToInt(arg, Width); } else { arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - arg = _F80CVTTo(arg, Width / 8); + arg = _F80CVTTo(arg, Width); } // top of stack is at offset zero @@ -224,7 +224,7 @@ void OpDispatchBuilder::FMUL(OpcodeArgs, size_t Width, bool Integer, OpDispatchB } } -void OpDispatchBuilder::FDIV(OpcodeArgs, size_t Width, bool Integer, bool Reverse, OpDispatchBuilder::OpResult ResInST0) { +void OpDispatchBuilder::FDIV(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpDispatchBuilder::OpResult ResInST0) { if (Op->Src[0].IsNone()) { const auto Offset = Op->OP & 7; const auto St0 = 0; @@ -242,15 +242,15 @@ void OpDispatchBuilder::FDIV(OpcodeArgs, size_t Width, bool Integer, bool Revers return; } - LOGMAN_THROW_A_FMT(Width != 80, "No 80-bit floats from memory"); + LOGMAN_THROW_A_FMT(Width != OpSize::f80Bit, "No 80-bit floats from memory"); // We have one memory argument Ref arg {}; if (Integer) { arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - arg = _F80CVTToInt(arg, Width / 8); + arg = _F80CVTToInt(arg, Width); } else { arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - arg = _F80CVTTo(arg, Width / 8); + arg = _F80CVTTo(arg, Width); } // top of stack is at offset zero @@ -265,7 +265,7 @@ void OpDispatchBuilder::FDIV(OpcodeArgs, size_t Width, bool Integer, bool Revers } } -void OpDispatchBuilder::FSUB(OpcodeArgs, size_t Width, bool Integer, bool Reverse, OpDispatchBuilder::OpResult ResInST0) { +void OpDispatchBuilder::FSUB(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpDispatchBuilder::OpResult ResInST0) { if (Op->Src[0].IsNone()) { const auto Offset = Op->OP & 7; const auto St0 = 0; @@ -283,15 +283,15 @@ void OpDispatchBuilder::FSUB(OpcodeArgs, size_t Width, bool Integer, bool Revers return; } - LOGMAN_THROW_A_FMT(Width != 80, "No 80-bit floats from memory"); + LOGMAN_THROW_A_FMT(Width != OpSize::f80Bit, "No 80-bit floats from memory"); // We have one memory argument Ref Arg {}; if (Integer) { Arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - Arg = _F80CVTToInt(Arg, Width / 8); + Arg = _F80CVTToInt(Arg, Width); } else { Arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - Arg = _F80CVTTo(Arg, Width / 8); + Arg = _F80CVTTo(Arg, Width); } // top of stack is at offset zero @@ -342,7 +342,7 @@ void OpDispatchBuilder::X87FNSTENV(OpcodeArgs) { // Before we store anything we need to sync our stack to the registers. _SyncStackToSlow(); - auto Size = GetDstSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); Mem = AppendSegmentOffset(Mem, Op->Flags); @@ -351,33 +351,33 @@ void OpDispatchBuilder::X87FNSTENV(OpcodeArgs) { _StoreMem(GPRClass, Size, Mem, FCW, Size); } - { _StoreMem(GPRClass, Size, ReconstructFSW_Helper(), Mem, _Constant(Size * 1), Size, MEM_OFFSET_SXTX, 1); } + { _StoreMem(GPRClass, Size, ReconstructFSW_Helper(), Mem, _Constant(Size * 1), Size, MEM_OFFSET_SXTX, OpSize::i8Bit); } auto ZeroConst = _Constant(0); { // FTW - _StoreMem(GPRClass, Size, GetX87FTW_Helper(), Mem, _Constant(Size * 2), Size, MEM_OFFSET_SXTX, 1); + _StoreMem(GPRClass, Size, GetX87FTW_Helper(), Mem, _Constant(Size * 2), Size, MEM_OFFSET_SXTX, OpSize::i8Bit); } { // Instruction Offset - _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(Size * 3), Size, MEM_OFFSET_SXTX, 1); + _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(Size * 3), Size, MEM_OFFSET_SXTX, OpSize::i8Bit); } { // Instruction CS selector (+ Opcode) - _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(Size * 4), Size, MEM_OFFSET_SXTX, 1); + _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(Size * 4), Size, MEM_OFFSET_SXTX, OpSize::i8Bit); } { // Data pointer offset - _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(Size * 5), Size, MEM_OFFSET_SXTX, 1); + _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(Size * 5), Size, MEM_OFFSET_SXTX, OpSize::i8Bit); } { // Data pointer selector - _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(Size * 6), Size, MEM_OFFSET_SXTX, 1); + _StoreMem(GPRClass, Size, ZeroConst, Mem, _Constant(Size * 6), Size, MEM_OFFSET_SXTX, OpSize::i8Bit); } } @@ -400,7 +400,7 @@ Ref OpDispatchBuilder::ReconstructX87StateFromFSW_Helper(Ref FSW) { void OpDispatchBuilder::X87LDENV(OpcodeArgs) { _StackForceSlow(); - auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Mem = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.LoadData = false}); Mem = AppendSegmentOffset(Mem, Op->Flags); @@ -439,7 +439,7 @@ void OpDispatchBuilder::X87FNSAVE(OpcodeArgs) { // 2 bytes : Opcode // 4 bytes : data pointer offset // 4 bytes : data pointer selector - const auto Size = GetDstSize(Op); + const auto Size = OpSizeFromDst(Op); Ref Mem = MakeSegmentAddress(Op, Op->Dest); Ref Top = GetX87Top(); { @@ -478,7 +478,7 @@ void OpDispatchBuilder::X87FNSAVE(OpcodeArgs) { auto OneConst = _Constant(1); auto SevenConst = _Constant(7); - size_t LoadSize = ReducedPrecisionMode ? OpSize::i64Bit : OpSize::i128Bit; + const auto LoadSize = ReducedPrecisionMode ? OpSize::i64Bit : OpSize::i128Bit; for (int i = 0; i < 7; ++i) { Ref data = _LoadContextIndexed(Top, LoadSize, MMBaseOffset(), OpSize::i128Bit, FPRClass); if (ReducedPrecisionMode) { @@ -506,7 +506,7 @@ void OpDispatchBuilder::X87FNSAVE(OpcodeArgs) { void OpDispatchBuilder::X87FRSTOR(OpcodeArgs) { _StackForceSlow(); - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Mem = MakeSegmentAddress(Op, Op->Src[0]); auto NewFCW = _LoadMem(GPRClass, OpSize::i16Bit, Mem, OpSize::i16Bit); @@ -536,7 +536,7 @@ void OpDispatchBuilder::X87FRSTOR(OpcodeArgs) { auto high = _Constant(0xFFFF); Ref Mask = _VCastFromGPR(OpSize::i128Bit, OpSize::i64Bit, low); Mask = _VInsGPR(OpSize::i128Bit, OpSize::i64Bit, 1, Mask, high); - size_t StoreSize = ReducedPrecisionMode ? OpSize::i64Bit : OpSize::i128Bit; + const auto StoreSize = ReducedPrecisionMode ? OpSize::i64Bit : OpSize::i128Bit; for (int i = 0; i < 7; ++i) { Ref Reg = _LoadMem(FPRClass, OpSize::i128Bit, Mem, _Constant((Size * 7) + (10 * i)), OpSize::i8Bit, MEM_OFFSET_SXTX, 1); // Mask off the top bits @@ -566,7 +566,7 @@ void OpDispatchBuilder::X87FRSTOR(OpcodeArgs) { // Load / Store Control Word void OpDispatchBuilder::X87FSTCW(OpcodeArgs) { auto FCW = _LoadContext(OpSize::i16Bit, GPRClass, offsetof(FEXCore::Core::CPUState, FCW)); - StoreResult(GPRClass, Op, FCW, -1); + StoreResult(GPRClass, Op, FCW, OpSize::iInvalid); } void OpDispatchBuilder::X87FLDCW(OpcodeArgs) { @@ -598,7 +598,7 @@ void OpDispatchBuilder::X87FYL2X(OpcodeArgs, bool IsFYL2XP1) { _F80FYL2XStack(); } -void OpDispatchBuilder::FCOMI(OpcodeArgs, size_t Width, bool Integer, OpDispatchBuilder::FCOMIFlags WhichFlags, bool PopTwice) { +void OpDispatchBuilder::FCOMI(OpcodeArgs, IR::OpSize Width, bool Integer, OpDispatchBuilder::FCOMIFlags WhichFlags, bool PopTwice) { Ref arg {}; Ref b {}; @@ -609,13 +609,13 @@ void OpDispatchBuilder::FCOMI(OpcodeArgs, size_t Width, bool Integer, OpDispatch Res = _F80CmpStack(Offset); } else { // Memory arg - if (Width == 16 || Width == 32 || Width == 64) { + if (Width == OpSize::i16Bit || Width == OpSize::i32Bit || Width == OpSize::i64Bit) { if (Integer) { arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - b = _F80CVTToInt(arg, Width / 8); + b = _F80CVTToInt(arg, Width); } else { arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); - b = _F80CVTTo(arg, Width / 8); + b = _F80CVTTo(arg, Width); } } Res = _F80CmpValue(b); @@ -722,7 +722,7 @@ Ref OpDispatchBuilder::ReconstructFSW_Helper(Ref T) { void OpDispatchBuilder::X87FNSTSW(OpcodeArgs) { Ref TopValue = _SyncStackToSlow(); Ref StatusWord = ReconstructFSW_Helper(TopValue); - StoreResult(GPRClass, Op, StatusWord, -1); + StoreResult(GPRClass, Op, StatusWord, OpSize::iInvalid); } void OpDispatchBuilder::FNINIT(OpcodeArgs) { @@ -834,8 +834,8 @@ void OpDispatchBuilder::X87FXTRACT(OpcodeArgs) { _PopStackDestroy(); auto Exp = _F80XTRACT_EXP(Top); auto Sig = _F80XTRACT_SIG(Top); - _PushStack(Exp, Exp, 80, true); - _PushStack(Sig, Sig, 80, true); + _PushStack(Exp, Exp, OpSize::f80Bit, true); + _PushStack(Sig, Sig, OpSize::f80Bit, true); } } // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp index 334f25e20b..4537132e3e 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp @@ -26,7 +26,7 @@ class OrderedNode; void OpDispatchBuilder::X87LDENVF64(OpcodeArgs) { _StackForceSlow(); - const auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref Mem = MakeSegmentAddress(Op, Op->Src[0]); auto NewFCW = _LoadMem(GPRClass, OpSize::i16Bit, Mem, OpSize::i16Bit); @@ -58,14 +58,14 @@ void OpDispatchBuilder::X87FLDCWF64(OpcodeArgs) { // F64 ops // Float load op with memory operand -void OpDispatchBuilder::FLDF64(OpcodeArgs, size_t Width) { - size_t ReadWidth = (Width == 80) ? 16 : Width / 8; +void OpDispatchBuilder::FLDF64(OpcodeArgs, IR::OpSize Width) { + const auto ReadWidth = (Width == OpSize::f80Bit) ? OpSize::i128Bit : Width; Ref Data = LoadSource_WithOpSize(FPRClass, Op, Op->Src[0], ReadWidth, Op->Flags); // Convert to 64bit float Ref ConvertedData = Data; - if (Width == 32) { + if (Width == OpSize::i32Bit) { ConvertedData = _Float_FToF(OpSize::i64Bit, OpSize::i32Bit, Data); - } else if (Width == 80) { + } else if (Width == OpSize::f80Bit) { ConvertedData = _F80CVT(OpSize::i64Bit, Data); } _PushStack(ConvertedData, Data, ReadWidth, true); @@ -82,7 +82,7 @@ void OpDispatchBuilder::FBLDF64(OpcodeArgs) { void OpDispatchBuilder::FBSTPF64(OpcodeArgs) { Ref converted = _F80CVTTo(_ReadStackValue(0), OpSize::i64Bit); converted = _F80BCDStore(converted); - StoreResult_WithOpSize(FPRClass, Op, Op->Dest, converted, 10, 1); + StoreResult_WithOpSize(FPRClass, Op, Op->Dest, converted, OpSize::f80Bit, OpSize::i8Bit); _PopStackDestroy(); } @@ -92,20 +92,20 @@ void OpDispatchBuilder::FLDF64_Const(OpcodeArgs, uint64_t Num) { } void OpDispatchBuilder::FILDF64(OpcodeArgs) { - size_t ReadWidth = GetSrcSize(Op); + const auto ReadWidth = OpSizeFromSrc(Op); // Read from memory Ref Data = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], ReadWidth, Op->Flags); if (ReadWidth == OpSize::i16Bit) { Data = _Sbfe(OpSize::i64Bit, ReadWidth * 8, 0, Data); } - auto ConvertedData = _Float_FromGPR_S(OpSize::i64Bit, ReadWidth == 4 ? OpSize::i32Bit : OpSize::i64Bit, Data); + auto ConvertedData = _Float_FromGPR_S(OpSize::i64Bit, ReadWidth == OpSize::i32Bit ? OpSize::i32Bit : OpSize::i64Bit, Data); _PushStack(ConvertedData, Data, ReadWidth, false); } -void OpDispatchBuilder::FSTF64(OpcodeArgs, size_t Width) { +void OpDispatchBuilder::FSTF64(OpcodeArgs, IR::OpSize Width) { Ref Mem = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.LoadData = false}); - _StoreStackMemory(Mem, OpSize::i64Bit, true, Width / 8); + _StoreStackMemory(Mem, OpSize::i64Bit, true, Width); if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) { _PopStackDestroy(); @@ -113,7 +113,7 @@ void OpDispatchBuilder::FSTF64(OpcodeArgs, size_t Width) { } void OpDispatchBuilder::FISTF64(OpcodeArgs, bool Truncate) { - auto Size = GetSrcSize(Op); + const auto Size = OpSizeFromSrc(Op); Ref data = _ReadStackValue(0); if (Truncate) { @@ -128,7 +128,7 @@ void OpDispatchBuilder::FISTF64(OpcodeArgs, bool Truncate) { } } -void OpDispatchBuilder::FADDF64(OpcodeArgs, size_t Width, bool Integer, OpDispatchBuilder::OpResult ResInST0) { +void OpDispatchBuilder::FADDF64(OpcodeArgs, IR::OpSize Width, bool Integer, OpDispatchBuilder::OpResult ResInST0) { if (Op->Src[0].IsNone()) { // Implicit argument case auto Offset = Op->OP & 7; auto St0 = 0; @@ -148,14 +148,14 @@ void OpDispatchBuilder::FADDF64(OpcodeArgs, size_t Width, bool Integer, OpDispat if (Integer) { arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - if (Width == 16) { + if (Width == OpSize::i16Bit) { arg = _Sbfe(OpSize::i64Bit, 16, 0, arg); } - arg = _Float_FromGPR_S(OpSize::i64Bit, Width == 64 ? OpSize::i64Bit : OpSize::i32Bit, arg); - } else if (Width == 32) { + arg = _Float_FromGPR_S(OpSize::i64Bit, Width == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit, arg); + } else if (Width == OpSize::i32Bit) { arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); arg = _Float_FToF(OpSize::i64Bit, OpSize::i32Bit, arg); - } else if (Width == 64) { + } else if (Width == OpSize::i64Bit) { arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); } @@ -164,7 +164,7 @@ void OpDispatchBuilder::FADDF64(OpcodeArgs, size_t Width, bool Integer, OpDispat } // FIXME: following is very similar to FADDF64 -void OpDispatchBuilder::FMULF64(OpcodeArgs, size_t Width, bool Integer, OpDispatchBuilder::OpResult ResInST0) { +void OpDispatchBuilder::FMULF64(OpcodeArgs, IR::OpSize Width, bool Integer, OpDispatchBuilder::OpResult ResInST0) { if (Op->Src[0].IsNone()) { // Implicit argument case auto offset = Op->OP & 7; auto st0 = 0; @@ -184,14 +184,14 @@ void OpDispatchBuilder::FMULF64(OpcodeArgs, size_t Width, bool Integer, OpDispat if (Integer) { arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - if (Width == 16) { + if (Width == OpSize::i16Bit) { arg = _Sbfe(OpSize::i64Bit, 16, 0, arg); } - arg = _Float_FromGPR_S(8, Width == 64 ? OpSize::i64Bit : OpSize::i32Bit, arg); - } else if (Width == 32) { + arg = _Float_FromGPR_S(OpSize::i64Bit, Width == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit, arg); + } else if (Width == OpSize::i32Bit) { arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); arg = _Float_FToF(OpSize::i64Bit, OpSize::i32Bit, arg); - } else if (Width == 64) { + } else if (Width == OpSize::i64Bit) { arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); } @@ -203,7 +203,7 @@ void OpDispatchBuilder::FMULF64(OpcodeArgs, size_t Width, bool Integer, OpDispat } } -void OpDispatchBuilder::FDIVF64(OpcodeArgs, size_t Width, bool Integer, bool Reverse, OpDispatchBuilder::OpResult ResInST0) { +void OpDispatchBuilder::FDIVF64(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpDispatchBuilder::OpResult ResInST0) { if (Op->Src[0].IsNone()) { const auto offset = Op->OP & 7; const auto st0 = 0; @@ -231,17 +231,17 @@ void OpDispatchBuilder::FDIVF64(OpcodeArgs, size_t Width, bool Integer, bool Rev // We have one memory argument Ref Arg {}; - if (Width == 16 || Width == 32 || Width == 64) { + if (Width == OpSize::i16Bit || Width == OpSize::i32Bit || Width == OpSize::i64Bit) { if (Integer) { Arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - if (Width == 16) { + if (Width == OpSize::i16Bit) { Arg = _Sbfe(OpSize::i64Bit, 16, 0, Arg); } - Arg = _Float_FromGPR_S(8, Width == 64 ? OpSize::i64Bit : OpSize::i32Bit, Arg); - } else if (Width == 32) { + Arg = _Float_FromGPR_S(OpSize::i64Bit, Width == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit, Arg); + } else if (Width == OpSize::i32Bit) { Arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); Arg = _Float_FToF(OpSize::i64Bit, OpSize::i32Bit, Arg); - } else if (Width == 64) { + } else if (Width == OpSize::i64Bit) { Arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); } } @@ -258,7 +258,7 @@ void OpDispatchBuilder::FDIVF64(OpcodeArgs, size_t Width, bool Integer, bool Rev } } -void OpDispatchBuilder::FSUBF64(OpcodeArgs, size_t Width, bool Integer, bool Reverse, OpDispatchBuilder::OpResult ResInST0) { +void OpDispatchBuilder::FSUBF64(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpDispatchBuilder::OpResult ResInST0) { if (Op->Src[0].IsNone()) { const auto Offset = Op->OP & 7; const auto St0 = 0; @@ -286,17 +286,17 @@ void OpDispatchBuilder::FSUBF64(OpcodeArgs, size_t Width, bool Integer, bool Rev // We have one memory argument Ref arg {}; - if (Width == 16 || Width == 32 || Width == 64) { + if (Width == OpSize::i16Bit || Width == OpSize::i32Bit || Width == OpSize::i64Bit) { if (Integer) { arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - if (Width == 16) { + if (Width == OpSize::i16Bit) { arg = _Sbfe(OpSize::i64Bit, 16, 0, arg); } - arg = _Float_FromGPR_S(8, Width == 64 ? OpSize::i64Bit : OpSize::i32Bit, arg); - } else if (Width == 32) { + arg = _Float_FromGPR_S(OpSize::i64Bit, Width == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit, arg); + } else if (Width == OpSize::i32Bit) { arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); arg = _Float_FToF(OpSize::i64Bit, OpSize::i32Bit, arg); - } else if (Width == 64) { + } else if (Width == OpSize::i64Bit) { arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); } } @@ -323,7 +323,7 @@ void OpDispatchBuilder::FTSTF64(OpcodeArgs) { ConvertNZCVToX87(); } -void OpDispatchBuilder::FCOMIF64(OpcodeArgs, size_t Width, bool Integer, OpDispatchBuilder::FCOMIFlags WhichFlags, bool PopTwice) { +void OpDispatchBuilder::FCOMIF64(OpcodeArgs, IR::OpSize Width, bool Integer, OpDispatchBuilder::FCOMIFlags WhichFlags, bool PopTwice) { Ref arg {}; Ref b {}; @@ -333,17 +333,17 @@ void OpDispatchBuilder::FCOMIF64(OpcodeArgs, size_t Width, bool Integer, OpDispa b = _ReadStackValue(offset); } else { // Memory arg - if (Width == 16 || Width == 32 || Width == 64) { + if (Width == OpSize::i16Bit || Width == OpSize::i32Bit || Width == OpSize::i64Bit) { if (Integer) { arg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags); - if (Width == 16) { + if (Width == OpSize::i16Bit) { arg = _Sbfe(OpSize::i64Bit, 16, 0, arg); } - b = _Float_FromGPR_S(8, Width == 64 ? OpSize::i64Bit : OpSize::i32Bit, arg); - } else if (Width == 32) { + b = _Float_FromGPR_S(OpSize::i64Bit, Width == 64 ? OpSize::i64Bit : OpSize::i32Bit, arg); + } else if (Width == OpSize::i32Bit) { arg = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); b = _Float_FToF(OpSize::i64Bit, OpSize::i32Bit, arg); - } else if (Width == 64) { + } else if (Width == OpSize::i64Bit) { b = LoadSource(FPRClass, Op, Op->Src[0], Op->Flags); } } @@ -400,7 +400,7 @@ void OpDispatchBuilder::X87FXTRACTF64(OpcodeArgs) { Ref Exp = _NZCVSelectV(OpSize::i64Bit, {COND_EQ}, ExpZV, ExpNZV); _PopStackDestroy(); - _PushStack(Exp, Exp, 64, true); - _PushStack(Sig, Sig, 64, true); + _PushStack(Exp, Exp, OpSize::i64Bit, true); + _PushStack(Sig, Sig, OpSize::i64Bit, true); } } // namespace FEXCore::IR diff --git a/FEXCore/Source/Interface/IR/IR.h b/FEXCore/Source/Interface/IR/IR.h index edf6ea3c64..a823ff7ccd 100644 --- a/FEXCore/Source/Interface/IR/IR.h +++ b/FEXCore/Source/Interface/IR/IR.h @@ -553,8 +553,10 @@ enum OpSize : uint8_t { i16Bit = 2, i32Bit = 4, i64Bit = 8, + f80Bit = 10, i128Bit = 16, i256Bit = 32, + iInvalid = 0xFF, }; enum class FloatCompareOp : uint8_t { @@ -582,12 +584,37 @@ static inline OpSize SizeToOpSize(uint8_t Size) { case 2: return OpSize::i16Bit; case 4: return OpSize::i32Bit; case 8: return OpSize::i64Bit; + case 10: return OpSize::f80Bit; case 16: return OpSize::i128Bit; case 32: return OpSize::i256Bit; + case 0xFF: return OpSize::iInvalid; default: FEX_UNREACHABLE; } } +// This is a nop operation and will be eliminated by the compiler. +static inline uint8_t OpSizeToSize(IR::OpSize Size) { + switch (Size) { + case OpSize::i8Bit: return 1; + case OpSize::i16Bit: return 2; + case OpSize::i32Bit: return 4; + case OpSize::i64Bit: return 8; + case OpSize::f80Bit: return 10; + case OpSize::i128Bit: return 16; + case OpSize::i256Bit: return 32; + case OpSize::iInvalid: return 0xFF; + default: FEX_UNREACHABLE; + } +} + +static inline OpSize MultiplyOpSize(IR::OpSize Size, uint8_t Multiplier) { + return IR::SizeToOpSize(IR::OpSizeToSize(Size) * Multiplier); +} + +static inline OpSize DivideOpSize(IR::OpSize Size, uint8_t Multiplier) { + return IR::SizeToOpSize(IR::OpSizeToSize(Size) / Multiplier); +} + #define IROP_ENUM #define IROP_STRUCTS #define IROP_SIZES diff --git a/FEXCore/Source/Interface/IR/IR.json b/FEXCore/Source/Interface/IR/IR.json index 1e88d6020f..58373d8829 100644 --- a/FEXCore/Source/Interface/IR/IR.json +++ b/FEXCore/Source/Interface/IR/IR.json @@ -255,7 +255,7 @@ "If ForPair is set, RA will try to allocate the base of a register pair"], "DestSize": "8" }, - "FPR = AllocateFPR u8:#RegisterSize, u8:#ElementSize": { + "FPR = AllocateFPR OpSize:#RegisterSize, OpSize:#ElementSize": { "Desc": ["Like AllocateGPR, but for FPR"], "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" @@ -289,7 +289,7 @@ "HasSideEffects": true, "RAOverride": "0" }, - "CondJump SSA:$Cmp1, SSA:$Cmp2, SSA:$TrueBlock, SSA:$FalseBlock, CondClass:$Cond{{COND_NEQ}}, u8:$CompareSize{0}, i1:$FromNZCV{false}": { + "CondJump SSA:$Cmp1, SSA:$Cmp2, SSA:$TrueBlock, SSA:$FalseBlock, CondClass:$Cond{{COND_NEQ}}, OpSize:$CompareSize{OpSize::iInvalid}, i1:$FromNZCV{false}": { "HasSideEffects": true, "RAOverride": "2" }, @@ -360,23 +360,23 @@ } }, "StaticRA": { - "SSA = LoadRegister u32:$Reg, RegisterClass:$Class, u8:#Size": { + "SSA = LoadRegister u32:$Reg, RegisterClass:$Class, OpSize:#Size": { "Desc": ["Loads a value from the given register", "Size must match the execution mode."], "DestSize": "Size" }, - "GPR = LoadPF u8:#Size": { + "GPR = LoadPF OpSize:#Size": { "Desc": ["Loads raw PF"], "DestSize": "Size" }, - "GPR = LoadAF u8:#Size": { + "GPR = LoadAF OpSize:#Size": { "Desc": ["Loads raw PF"], "DestSize": "Size" }, - "StoreRegister SSA:$Value, u32:$Reg, RegisterClass:$Class, u8:#Size": { + "StoreRegister SSA:$Value, u32:$Reg, RegisterClass:$Class, OpSize:#Size": { "HasSideEffects": true, "Desc": ["Stores a value to a given register.", "Size must match the execution mode."], @@ -386,20 +386,20 @@ ] }, - "StorePF GPR:$Value, u8:#Size": { + "StorePF GPR:$Value, OpSize:#Size": { "HasSideEffects": true, "Desc": ["Stores raw PF"], "DestSize": "Size" }, - "StoreAF GPR:$Value, u8:#Size": { + "StoreAF GPR:$Value, OpSize:#Size": { "HasSideEffects": true, "Desc": ["Stores raw AF"], "DestSize": "Size" } }, "Memory": { - "SSA = LoadContext u8:#ByteSize, RegisterClass:$Class, u32:$Offset": { + "SSA = LoadContext OpSize:#ByteSize, RegisterClass:$Class, u32:$Offset": { "Desc": ["Loads a value from the context with offset", "Dest = Ctx[Offset]" ], @@ -412,7 +412,7 @@ ] }, - "SSA:$Value1, SSA:$Value2 = LoadContextPair u8:#ByteSize, RegisterClass:$Class, u32:$Offset": { + "SSA:$Value1, SSA:$Value2 = LoadContextPair OpSize:#ByteSize, RegisterClass:$Class, u32:$Offset": { "Desc": ["Loads a pair of values from the context with offset", "Value0 = Ctx[Offset], Value1 = Ctx[Offset + ByteSize]" ], @@ -426,7 +426,7 @@ ] }, - "StoreContext u8:#ByteSize, RegisterClass:$Class, SSA:$Value, u32:$Offset": { + "StoreContext OpSize:#ByteSize, RegisterClass:$Class, SSA:$Value, u32:$Offset": { "Desc": ["Stores a value to the context with offset", "Ctx[Offset] = Value", "Zero Extends if value's type is too small", @@ -443,7 +443,7 @@ ] }, - "StoreContextPair u8:#ByteSize, RegisterClass:$Class, SSA:$Value1, SSA:$Value2, u32:$Offset": { + "StoreContextPair OpSize:#ByteSize, RegisterClass:$Class, SSA:$Value1, SSA:$Value2, u32:$Offset": { "Desc": ["Stores a pair of values to the context with offset", "Ctx[Offset] = Value1, Ctx[Offset + ByteSize] = Value2", "Zero Extends if value's type is too small", @@ -461,7 +461,7 @@ ] }, - "SSA = LoadContextIndexed GPR:$Index, u8:#ByteSize, u32:$BaseOffset, u32:$Stride, RegisterClass:$Class": { + "SSA = LoadContextIndexed GPR:$Index, OpSize:#ByteSize, u32:$BaseOffset, u32:$Stride, RegisterClass:$Class": { "Desc": ["Loads a value from the context with offset and indexed by SSA value", "Dest = Ctx[BaseOffset + Index * Stride]" ], @@ -473,7 +473,7 @@ "!($BaseOffset >= offsetof(Core::CPUState, xmm.avx.data[0]) && $BaseOffset < offsetof(Core::CPUState, xmm.avx.data[16])) && \"Can't LoadContextIndexed to XMM\"" ] }, - "StoreContextIndexed SSA:$Value, GPR:$Index, u8:#ByteSize, u32:$BaseOffset, u32:$Stride, RegisterClass:$Class": { + "StoreContextIndexed SSA:$Value, GPR:$Index, OpSize:#ByteSize, u32:$BaseOffset, u32:$Stride, RegisterClass:$Class": { "HasSideEffects": true, "Desc": ["Stores a value to the context with offset and indexed by SSA value", "Ctx[BaseOffset + Index * Stride] = Value" @@ -530,17 +530,17 @@ "DestSize": "8" }, - "SSA = LoadMem RegisterClass:$Class, u8:#Size, GPR:$Addr, GPR:$Offset, u8:$Align, MemOffsetType:$OffsetType, u8:$OffsetScale": { + "SSA = LoadMem RegisterClass:$Class, OpSize:#Size, GPR:$Addr, GPR:$Offset, OpSize:$Align, MemOffsetType:$OffsetType, u8:$OffsetScale": { "DestSize": "Size" }, - "SSA:$Value1, SSA:$Value2 = LoadMemPair RegisterClass:$Class, u8:#Size, GPR:$Addr, u32:$Offset": { + "SSA:$Value1, SSA:$Value2 = LoadMemPair RegisterClass:$Class, OpSize:#Size, GPR:$Addr, u32:$Offset": { "Desc": ["Load a pair of values from memory."], "DestSize": "Size", "HasSideEffects": true }, - "StoreMem RegisterClass:$Class, u8:#Size, SSA:$Value, GPR:$Addr, GPR:$Offset, u8:$Align, MemOffsetType:$OffsetType, u8:$OffsetScale": { + "StoreMem RegisterClass:$Class, OpSize:#Size, SSA:$Value, GPR:$Addr, GPR:$Offset, OpSize:$Align, MemOffsetType:$OffsetType, u8:$OffsetScale": { "Desc": [ "Stores a value to memory.", "Zero Extends if value's type is too small", "Truncates if value's type is too large" @@ -552,7 +552,7 @@ ] }, - "StoreMemPair RegisterClass:$Class, u8:#Size, SSA:$Value1, SSA:$Value2, GPR:$Addr, u32:$Offset": { + "StoreMemPair RegisterClass:$Class, OpSize:#Size, SSA:$Value1, SSA:$Value2, GPR:$Addr, u32:$Offset": { "Desc": [ "Stores a pair of values to memory.", "Zero Extends if value's type is too small", "Truncates if value's type is too large" @@ -565,14 +565,14 @@ ] }, - "SSA = LoadMemTSO RegisterClass:$Class, u8:#Size, GPR:$Addr, GPR:$Offset, u8:$Align, MemOffsetType:$OffsetType, u8:$OffsetScale": { + "SSA = LoadMemTSO RegisterClass:$Class, OpSize:#Size, GPR:$Addr, GPR:$Offset, OpSize:$Align, MemOffsetType:$OffsetType, u8:$OffsetScale": { "Desc": ["Does a x86 TSO compatible load from memory. Offset must be Invalid()." ], "DestSize": "Size", "DynamicDispatch": true }, - "StoreMemTSO RegisterClass:$Class, u8:#Size, SSA:$Value, GPR:$Addr, GPR:$Offset, u8:$Align, MemOffsetType:$OffsetType, u8:$OffsetScale": { + "StoreMemTSO RegisterClass:$Class, OpSize:#Size, SSA:$Value, GPR:$Addr, GPR:$Offset, OpSize:$Align, MemOffsetType:$OffsetType, u8:$OffsetScale": { "Desc": ["Does a x86 TSO compatible store to memory. Offset must be Invalid()." ], "HasSideEffects": true, @@ -583,14 +583,14 @@ ] }, - "FPR = VLoadVectorMasked u8:#RegisterSize, u8:#ElementSize, FPR:$Mask, GPR:$Addr, GPR:$Offset, MemOffsetType:$OffsetType, u8:$OffsetScale": { + "FPR = VLoadVectorMasked OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Mask, GPR:$Addr, GPR:$Offset, MemOffsetType:$OffsetType, u8:$OffsetScale": { "Desc": ["Does a masked load similar to VPMASKMOV/VMASKMOV where the upper bit of each element", "determines whether or not that element will be loaded from memory"], "ImplicitFlagClobber": true, "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "VStoreVectorMasked u8:#RegisterSize, u8:#ElementSize, FPR:$Mask, FPR:$Data, GPR:$Addr, GPR:$Offset, MemOffsetType:$OffsetType, u8:$OffsetScale": { + "VStoreVectorMasked OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Mask, FPR:$Data, GPR:$Addr, GPR:$Offset, MemOffsetType:$OffsetType, u8:$OffsetScale": { "Desc": ["Does a masked store similar to VPMASKMOV/VMASKMOV where the upper bit of each element", "determines whether or not that element will be stored to memory"], "HasSideEffects": true, @@ -598,7 +598,7 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VLoadVectorGatherMasked u8:#RegisterSize, u8:#ElementSize, FPR:$Incoming, FPR:$Mask, GPR:$AddrBase, FPR:$VectorIndexLow, FPR:$VectorIndexHigh, u8:$VectorIndexElementSize, u8:$OffsetScale, u8:$DataElementOffsetStart, u8:$IndexElementOffsetStart": { + "FPR = VLoadVectorGatherMasked OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Incoming, FPR:$Mask, GPR:$AddrBase, FPR:$VectorIndexLow, FPR:$VectorIndexHigh, u8:$VectorIndexElementSize, u8:$OffsetScale, u8:$DataElementOffsetStart, u8:$IndexElementOffsetStart": { "Desc": [ "Does a masked load similar to VPGATHERD* where the upper bit of each element", "determines whether or not that element will be loaded from memory.", @@ -612,7 +612,7 @@ "$VectorIndexElementSize == OpSize::i32Bit || $VectorIndexElementSize == OpSize::i64Bit" ] }, - "FPR = VLoadVectorGatherMaskedQPS u8:#RegisterSize, u8:#ElementSize, FPR:$Incoming, FPR:$MaskReg, GPR:$AddrBase, FPR:$VectorIndexLow, FPR:$VectorIndexHigh, u8:$OffsetScale": { + "FPR = VLoadVectorGatherMaskedQPS OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Incoming, FPR:$MaskReg, GPR:$AddrBase, FPR:$VectorIndexLow, FPR:$VectorIndexHigh, u8:$OffsetScale": { "Desc": [ "Does a masked load similar to VPGATHERQPS where the upper bit of each element", "determines whether or not that element will be loaded from memory.", @@ -628,7 +628,7 @@ "RegisterSize != FEXCore::IR::OpSize::i256Bit && \"What does 256-bit mean in this context?\"" ] }, - "FPR = VLoadVectorElement u8:#RegisterSize, u8:#ElementSize, FPR:$DstSrc, u8:$Index, GPR:$Addr": { + "FPR = VLoadVectorElement OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$DstSrc, u8:$Index, GPR:$Addr": { "Desc": ["Does a memory load to a single element of a vector.", "Leaves the rest of the vector's data intact.", "Matches arm64 ld1 semantics"], @@ -636,19 +636,19 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "VStoreVectorElement u8:#RegisterSize, u8:#ElementSize, FPR:$Value, u8:$Index, GPR:$Addr": { + "VStoreVectorElement OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Value, u8:$Index, GPR:$Addr": { "Desc": ["Does a memory store of a single element of a vector.", "Matches arm64 st1 semantics"], "HasSideEffects": true, "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VBroadcastFromMem u8:#RegisterSize, u8:#ElementSize, GPR:$Address": { + "FPR = VBroadcastFromMem OpSize:#RegisterSize, OpSize:#ElementSize, GPR:$Address": { "Desc": ["Broadcasts an ElementSize value from memory into each element of a vector."], "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "GPR = Push u8:#Size, u8:$ValueSize, GPR:$Value, GPR:$Addr": { + "GPR = Push OpSize:#Size, OpSize:$ValueSize, GPR:$Value, GPR:$Addr": { "Desc": [ "Pushes a value to the address, returning the new pointer after incrementing.", "The address is decremented by the value size while.", @@ -666,7 +666,7 @@ "HasSideEffects": true, "TiedSource": 0 }, - "GPR:$Addr, GPR:$Value = Pop u8:$Size, GPR:$Addr": { + "GPR:$Addr, GPR:$Value = Pop OpSize:$Size, GPR:$Addr": { "Desc": [ "Pops a value from the address, updating the new pointer after incrementing.", "The address is incremented by the size via an RMW source/destintaion." @@ -674,14 +674,14 @@ "HasSideEffects": true, "DestSize": "Size" }, - "GPR = MemSet i1:$IsAtomic, u8:$Size, GPR:$Prefix, GPR:$Addr, GPR:$Value, GPR:$Length, GPR:$Direction": { + "GPR = MemSet i1:$IsAtomic, OpSize:$Size, GPR:$Prefix, GPR:$Addr, GPR:$Value, GPR:$Length, GPR:$Direction": { "Desc": ["Duplicates behaviour of x86 STOS repeat", "Returns the final address that gets generated without the prefix appended." ], "HasSideEffects": true, "DestSize": "8" }, - "GPR:$DstAddress, GPR:$SrcAddress = MemCpy i1:$IsAtomic, u8:$Size, GPR:$Dest, GPR:$Src, GPR:$Length, GPR:$Direction": { + "GPR:$DstAddress, GPR:$SrcAddress = MemCpy i1:$IsAtomic, OpSize:$Size, GPR:$Dest, GPR:$Src, GPR:$Length, GPR:$Direction": { "Desc": ["Duplicates behaviour of x86 MOVS repeat", "Returns the final addresses after they have been incremented or decremented" ], @@ -728,7 +728,7 @@ "HasSideEffects": true, "DestSize": "8" }, - "VStoreNonTemporal u8:#RegisterSize, FPR:$Value, GPR:$Addr, i8:$Offset": { + "VStoreNonTemporal OpSize:#RegisterSize, FPR:$Value, GPR:$Addr, i8:$Offset": { "Desc": ["Does a non-temporal memory store of a vector.", "Matches arm64 SVE stnt1b semantics.", "Specifically weak-memory model ordered to match x86 non-temporal stores." @@ -740,7 +740,7 @@ "RegisterSize == FEXCore::IR::OpSize::i128Bit || RegisterSize == FEXCore::IR::OpSize::i256Bit" ] }, - "VStoreNonTemporalPair u8:#RegisterSize, FPR:$ValueLow, FPR:$ValueHigh, GPR:$Addr, i8:$Offset": { + "VStoreNonTemporalPair OpSize:#RegisterSize, FPR:$ValueLow, FPR:$ValueHigh, GPR:$Addr, i8:$Offset": { "Desc": ["Does a non-temporal memory store of two vector registers.", "Matches arm64 stnp semantics.", "Specifically weak-memory model ordered to match x86 non-temporal stores." @@ -752,7 +752,7 @@ "RegisterSize == FEXCore::IR::OpSize::i128Bit" ] }, - "FPR = VLoadNonTemporal u8:#RegisterSize, GPR:$Addr, i8:$Offset": { + "FPR = VLoadNonTemporal OpSize:#RegisterSize, GPR:$Addr, i8:$Offset": { "Desc": ["Does a non-temporal memory load of a vector.", "Matches arm64 SVE ldnt1b semantics.", "Specifically weak-memory model ordered to match x86 non-temporal stores." @@ -1539,7 +1539,7 @@ "ResultSize == FEXCore::IR::OpSize::i32Bit || ResultSize == FEXCore::IR::OpSize::i64Bit" ] }, - "FPR = NZCVSelectV u8:#ResultSize, CondClass:$Cond, FPR:$TrueVal, FPR:$FalseVal": { + "FPR = NZCVSelectV OpSize:#ResultSize, CondClass:$Cond, FPR:$TrueVal, FPR:$FalseVal": { "Desc": [ "Select based on value in NZCV flags, where TrueVal and FalseVal are both FPRs.", "op:", @@ -1646,14 +1646,14 @@ }, "Float to GPR": {"Ignore": 1}, - "GPR = VExtractToGPR u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, u8:$Index": { + "GPR = VExtractToGPR OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$Index": { "Desc": ["Extracts an element from a vector and places it in a GPR", "The element that is extracted from the vector is zero extended to the GPR size" ], "DestSize": "ElementSize" }, - "GPR = Float_ToGPR_S u8:#DestElementSize, u8:$SrcElementSize, FPR:$Scalar": { + "GPR = Float_ToGPR_S OpSize:#DestElementSize, OpSize:$SrcElementSize, FPR:$Scalar": { "Desc": ["Moves the scalar element to a GPR with conversion", "Converts the 32bit or 64bit float to an signed integer", "Rounding mode determined by host flag's rounding mode" @@ -1661,14 +1661,14 @@ "DestSize": "DestElementSize" }, - "GPR = Float_ToGPR_ZS u8:#DestElementSize, u8:$SrcElementSize, FPR:$Scalar": { + "GPR = Float_ToGPR_ZS OpSize:#DestElementSize, OpSize:$SrcElementSize, FPR:$Scalar": { "Desc": ["Moves the scalar element to a GPR with conversion", "Converts the 32bit or 64bit float to an signed integer rounding towards zero (Truncating)" ], "DestSize": "DestElementSize" }, - "FCmp u8:$ElementSize, FPR:$Scalar1, FPR:$Scalar2": { + "FCmp OpSize:$ElementSize, FPR:$Scalar1, FPR:$Scalar2": { "Desc": ["Does a scalar unordered compare and sets NZCV accordingly.", "NZCV follows Arm conventions, a separate AXFLAG instruction is required for x86", "Ordering flag result is true if either float input is NaN" @@ -1677,7 +1677,7 @@ } }, "VectorScalar": { - "FPR = VFAddScalarInsert OpSize:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { + "FPR = VFAddScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { "Desc": ["Does a scalar 'add' between Vector1 and Vector2.", "Inserting the result in to the lower element of Vector1 and returning the results.", "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.", @@ -1687,7 +1687,7 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFSubScalarInsert OpSize:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { + "FPR = VFSubScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { "Desc": ["Does a scalar 'sub' between Vector1 and Vector2.", "Inserting the result in to the lower element of Vector1 and returning the results.", "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.", @@ -1697,7 +1697,7 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFMulScalarInsert OpSize:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { + "FPR = VFMulScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { "Desc": ["Does a scalar 'mul' between Vector1 and Vector2.", "Inserting the result in to the lower element of Vector1 and returning the results.", "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.", @@ -1707,7 +1707,7 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFDivScalarInsert OpSize:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { + "FPR = VFDivScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { "Desc": ["Does a scalar 'div' between Vector1 and Vector2.", "Inserting the result in to the lower element of Vector1 and returning the results.", "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.", @@ -1717,7 +1717,7 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFMinScalarInsert OpSize:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { + "FPR = VFMinScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { "Desc": ["Does a scalar 'min' between Vector1 and Vector2.", "Inserting the result in to the lower element of Vector1 and returning the results.", "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.", @@ -1731,7 +1731,7 @@ "NumElements": "RegisterSize / ElementSize", "ImplicitFlagClobber": true }, - "FPR = VFMaxScalarInsert OpSize:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { + "FPR = VFMaxScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { "Desc": ["Does a scalar 'max' between Vector1 and Vector2.", "Inserting the result in to the lower element of Vector1 and returning the results.", "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.", @@ -1745,7 +1745,7 @@ "NumElements": "RegisterSize / ElementSize", "ImplicitFlagClobber": true }, - "FPR = VFSqrtScalarInsert OpSize:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { + "FPR = VFSqrtScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { "Desc": ["Does a scalar 'sqrt' on Vector2, inserting in to Vector1 and storing in to the destination.", "Inserting the result in to the lower element of Vector1 and returning the results.", "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.", @@ -1755,7 +1755,7 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFRSqrtScalarInsert OpSize:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { + "FPR = VFRSqrtScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { "Desc": ["Does a scalar 'rsqrt' on Vector2, inserting in to Vector1 and storing in to the destination.", "Inserting the result in to the lower element of Vector1 and returning the results.", "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.", @@ -1765,7 +1765,7 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFRecpScalarInsert OpSize:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { + "FPR = VFRecpScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { "Desc": ["Does a scalar 'recip' on Vector2, inserting in to Vector1 and storing in to the destination.", "Inserting the result in to the lower element of Vector1 and returning the results.", "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.", @@ -1775,7 +1775,7 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFToFScalarInsert OpSize:#RegisterSize, u8:#DstElementSize, u8:$SrcElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { + "FPR = VFToFScalarInsert OpSize:#RegisterSize, OpSize:#DstElementSize, OpSize:$SrcElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": { "Desc": ["Does a scalar 'cvt' between Vector1 and Vector2.", "Inserting the result in to the lower element of Vector1 and returning the results.", "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.", @@ -1785,7 +1785,7 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / DstElementSize" }, - "FPR = VSToFVectorInsert OpSize:#RegisterSize, u8:#DstElementSize, u8:$SrcElementSize, FPR:$Vector1, FPR:$Vector2, i8:$HasTwoElements, i1:$ZeroUpperBits": { + "FPR = VSToFVectorInsert OpSize:#RegisterSize, OpSize:#DstElementSize, OpSize:$SrcElementSize, FPR:$Vector1, FPR:$Vector2, i8:$HasTwoElements, i1:$ZeroUpperBits": { "Desc": ["Does a Vector 'scvt' between Vector1 and Vector2.", "Inserting the result in to the lower element of Vector1 and returning the results.", "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.", @@ -1797,7 +1797,7 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / DstElementSize" }, - "FPR = VSToFGPRInsert OpSize:#RegisterSize, u8:#DstElementSize, u8:$SrcElementSize, FPR:$Vector, GPR:$Src, i1:$ZeroUpperBits": { + "FPR = VSToFGPRInsert OpSize:#RegisterSize, OpSize:#DstElementSize, OpSize:$SrcElementSize, FPR:$Vector, GPR:$Src, i1:$ZeroUpperBits": { "Desc": ["Does a scalar 'cvt' between Vector1 and GPR.", "Inserting the result in to the lower element of Vector1 and returning the results.", "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.", @@ -1807,7 +1807,7 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / DstElementSize" }, - "FPR = VFToIScalarInsert OpSize:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, RoundType:$Round, i1:$ZeroUpperBits": { + "FPR = VFToIScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, RoundType:$Round, i1:$ZeroUpperBits": { "Desc": ["Does a scalar round float to integral on Vector2, inserting in to Vector1 and storing in to the destination.", "Rounding mode determined by argument", "Inserting the result in to the lower element of Vector1 and returning the results.", @@ -1818,7 +1818,7 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFCMPScalarInsert OpSize:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, FloatCompareOp:$Op, i1:$ZeroUpperBits": { + "FPR = VFCMPScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, FloatCompareOp:$Op, i1:$ZeroUpperBits": { "Desc": ["Does a scalar 'cmp' between Vector1 and Vecto2, inserting in to Vector1 and storing in to the destination.", "Compare op determined by argument", "Inserting the result in to the lower element of Vector1 and returning the results.", @@ -1829,7 +1829,7 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFMLAScalarInsert u8:#RegisterSize, u8:#ElementSize, FPR:$Upper, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": { + "FPR = VFMLAScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Upper, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": { "Desc": [ "Dest = (Vector1 * Vector2) + Addend", "This explicitly matches x86 FMA semantics because ARM semantics are mind-bending.", @@ -1839,7 +1839,7 @@ "NumElements": "RegisterSize / ElementSize", "TiedSource": 0 }, - "FPR = VFMLSScalarInsert u8:#RegisterSize, u8:#ElementSize, FPR:$Upper, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": { + "FPR = VFMLSScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Upper, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": { "Desc": [ "Dest = (Vector1 * Vector2) - Addend", "This explicitly matches x86 FMA semantics because ARM semantics are mind-bending.", @@ -1849,7 +1849,7 @@ "NumElements": "RegisterSize / ElementSize", "TiedSource": 0 }, - "FPR = VFNMLAScalarInsert u8:#RegisterSize, u8:#ElementSize, FPR:$Upper, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": { + "FPR = VFNMLAScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Upper, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": { "Desc": [ "Dest = (-Vector1 * Vector2) + Addend", "This explicitly matches x86 FMA semantics because ARM semantics are mind-bending.", @@ -1859,7 +1859,7 @@ "NumElements": "RegisterSize / ElementSize", "TiedSource": 0 }, - "FPR = VFNMLSScalarInsert u8:#RegisterSize, u8:#ElementSize, FPR:$Upper, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": { + "FPR = VFNMLSScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Upper, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": { "Desc": [ "Dest = (-Vector1 * Vector2) - Addend", "This explicitly matches x86 FMA semantics because ARM semantics are mind-bending.", @@ -1871,7 +1871,7 @@ } }, "Vector": { - "FPR = VMov u8:#RegisterSize, FPR:$Source": { + "FPR = VMov OpSize:#RegisterSize, FPR:$Source": { "Desc" : ["Copy vector register", "When Register size is smaller than Source register size,", "this op is defined to truncate and zero extend" @@ -1879,99 +1879,99 @@ "DestSize": "RegisterSize" }, - "FPR = VectorImm u8:#RegisterSize, u8:#ElementSize, u8:$Immediate, u8:$ShiftAmount{0}": { + "FPR = VectorImm OpSize:#RegisterSize, OpSize:#ElementSize, u8:$Immediate, u8:$ShiftAmount{0}": { "Desc": ["Generates a vector with each element containg the immediate zexted" ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = LoadNamedVectorConstant u8:#RegisterSize, NamedVectorConstant:$Constant": { + "FPR = LoadNamedVectorConstant OpSize:#RegisterSize, NamedVectorConstant:$Constant": { "Desc": ["Load a named vector constant.", "The list of vector constants can be found in " ], "DestSize": "RegisterSize" }, - "FPR = LoadNamedVectorIndexedConstant u8:#RegisterSize, IndexNamedVectorConstant:$Constant, u32:$Index": { + "FPR = LoadNamedVectorIndexedConstant OpSize:#RegisterSize, IndexNamedVectorConstant:$Constant, u32:$Index": { "Desc": ["Load a named vector constant from Indexable table.", "Index needs to be aligned register size.", "The list of indexable vector constants can be found in " ], "DestSize": "RegisterSize" }, - "FPR = VNeg u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VNeg OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VNot u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VNot OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VAbs u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VAbs OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc": ["Does an signed integer absolute" ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VPopcount u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VPopcount OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc": ["Does a popcount for each element of the register" ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VAddV u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VAddV OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc": ["Does a horizontal vector add of elements across the source vector", "Result is a zero extended scalar" ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUMinV u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VUMinV OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc": ["Does a horizontal vector unsigned minimum of elements across the source vector", "Result is a zero extended scalar" ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUMaxV u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VUMaxV OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc": ["Does a horizontal vector unsigned maximum of elements across the source vector", "Result is a zero extended scalar" ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFAbs u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VFAbs OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFNeg u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VFNeg OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFRecp u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VFRecp OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFSqrt u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VFSqrt OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFRSqrt u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VFRSqrt OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VCMPEQZ u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VCMPEQZ OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VCMPGTZ u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VCMPGTZ OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc": ["Vector compare signed greater than", "Each element is compared, if the result is true then the resulting element is ~0, else zero", "Compares the vector against zero" @@ -1979,7 +1979,7 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VCMPLTZ u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VCMPLTZ OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc": ["Vector compare signed less than", "Each element is compared, if the result is true then the resulting element is ~0, else zero", "Compares the vector against zero" @@ -1987,40 +1987,40 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VDupElement u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, u8:$Index": { + "FPR = VDupElement OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$Index": { "Desc": ["Duplicates one element from the source register across the whole register"], "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VShlI u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, u8:$BitShift": { + "FPR = VShlI OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift": { "TiedSource": 0, "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUShrI u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, u8:$BitShift": { + "FPR = VUShrI OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift": { "TiedSource": 0, "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUShraI u8:#RegisterSize, u8:#ElementSize, FPR:$DestVector, FPR:$Vector, u8:$BitShift": { + "FPR = VUShraI OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$DestVector, FPR:$Vector, u8:$BitShift": { "TiedSource": 0, "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VSShrI u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, u8:$BitShift": { + "FPR = VSShrI OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift": { "TiedSource": 0, "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUShrNI u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, u8:$BitShift": { + "FPR = VUShrNI OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift": { "TiedSource": 0, "Desc": "Unsigned shifts right each element and then narrows to the next lower element size", "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize >> 1)" }, - "FPR = VUShrNI2 u8:#RegisterSize, u8:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper, u8:$BitShift": { + "FPR = VUShrNI2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper, u8:$BitShift": { "TiedSource": 0, "Desc": ["Unsigned shifts right each element and then narrows to the next lower element size", "Inserts results in to the high elements of the first argument" @@ -2028,73 +2028,73 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize >> 1)" }, - "FPR = VSXTL u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VSXTL OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc": "Sign extends elements from the source element size to the next size up", "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize << 1)" }, - "FPR = VSXTL2 u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VSXTL2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc": ["Sign extends elements from the source element size to the next size up", "Source elements come from the upper half of the register" ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize << 1)" }, - "FPR = VSSHLL u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, u8:$BitShift{0}": { + "FPR = VSSHLL OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift{0}": { "Desc": "Sign extends elements from the source element size to the next size up", "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize << 1)" }, - "FPR = VSSHLL2 u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, u8:$BitShift{0}": { + "FPR = VSSHLL2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift{0}": { "Desc": ["Sign extends elements from the source element size to the next size up", "Source elements come from the upper half of the register" ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize << 1)" }, - "FPR = VUXTL u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VUXTL OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc": "Zero extends elements from the source element size to the next size up", "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize << 1)" }, - "FPR = VUXTL2 u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VUXTL2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc": ["Zero extends elements from the source element size to the next size up", "Source elements come from the upper half of the register" ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize << 1)" }, - "FPR = VSQXTN u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VSQXTN OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "TiedSource": 0, "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize >> 1)" }, - "FPR = VSQXTN2 u8:#RegisterSize, u8:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { + "FPR = VSQXTN2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { "TiedSource": 0, "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize >> 1)" }, - "FPR = VSQXTNPair u8:#RegisterSize, u8:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { + "FPR = VSQXTNPair OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { "Desc": ["Does both VSQXTN and VSQXTN2 in a combined operation." ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize >> 1)" }, - "FPR = VSQXTUN u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VSQXTUN OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize >> 1)" }, - "FPR = VSQXTUN2 u8:#RegisterSize, u8:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { + "FPR = VSQXTUN2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize >> 1)" }, - "FPR = VSQXTUNPair u8:#RegisterSize, u8:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { + "FPR = VSQXTUNPair OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { "Desc": ["Does both VSQXTUN and VSQXTUN2 in a combined operation." ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize >> 1)" }, - "FPR = VSRSHR u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, u8:$BitShift": { + "FPR = VSRSHR OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift": { "Desc": ["Signed rounding shift right by immediate", "Exactly matching Arm64 srshr semantics" ], @@ -2102,7 +2102,7 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VSQSHL u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, u8:$BitShift": { + "FPR = VSQSHL OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift": { "Desc": ["Signed satuating shift left by immediate", "Exactly matching Arm64 sqshl semantics" ], @@ -2110,14 +2110,14 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VRev32 u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VRev32 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc" : ["Reverses elements in 32-bit halfwords", "Available element size: 1byte, 2 byte" ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VRev64 u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VRev64 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc" : ["Reverses elements in 64-bit halfwords", "Available element size: 1byte, 2 byte, 4 byte" ], @@ -2125,251 +2125,251 @@ "NumElements": "RegisterSize / ElementSize" }, - "FPR = VAdd u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VAdd OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VSub u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VSub OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VAnd u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VAnd OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VAndn u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VAndn OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VOr u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VOr OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VXor u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VXor OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUQAdd u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VUQAdd OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUQSub u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VUQSub OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VSQAdd u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VSQAdd OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VSQSub u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VSQSub OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VAddP u8:#RegisterSize, u8:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { + "FPR = VAddP OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { "Desc": "Does a horizontal pairwise add of elements across the two source vectors", "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VURAvg u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VURAvg OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "Desc": ["Does an unsigned rounded average", "dst_elem = (src1_elem + src2_elem + 1) >> 1"], "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUMin u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VUMin OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUMax u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VUMax OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VSMin u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VSMin OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VSMax u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VSMax OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VZip u8:#RegisterSize, u8:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { + "FPR = VZip OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VZip2 u8:#RegisterSize, u8:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { + "FPR = VZip2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUnZip u8:#RegisterSize, u8:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { + "FPR = VUnZip OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUnZip2 u8:#RegisterSize, u8:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { + "FPR = VUnZip2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VTrn u8:#RegisterSize, u8:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { + "FPR = VTrn OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VTrn2 u8:#RegisterSize, u8:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { + "FPR = VTrn2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFAdd u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VFAdd OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFAddP u8:#RegisterSize, u8:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { + "FPR = VFAddP OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { "Desc": "Does a horizontal pairwise add of elements across the two source vectors with float element types", "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFAddV u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VFAddV OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc": ["Does a horizontal float vector add of elements across the source vector", "Result is a zero extended scalar" ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFSub u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VFSub OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFMul u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VFMul OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFDiv u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VFDiv OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFMin u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VFMin OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFMax u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VFMax OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VMul u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VMul OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUMull u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VUMull OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize << 1)" }, - "FPR = VSMull u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VSMull OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "Desc": [ "Does a signed integer multiply with extend.", "ElementSize is the source size" ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize << 1)" }, - "FPR = VUMull2 u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VUMull2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "Desc": "Multiplies the high elements with size extension", "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize << 1)" }, - "FPR = VSMull2 u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VSMull2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "Desc": "Multiplies the high elements with size extension", "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize << 1)" }, - "FPR = VUMulH u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VUMulH OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "Desc": "Wide unsigned multiply returning the high results", "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VSMulH u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VSMulH OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "Desc": "Wide signed multiply returning the high results", "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUABDL u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VUABDL OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "Desc": ["Unsigned Absolute Difference Long" ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize << 1)" }, - "FPR = VUABDL2 u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VUABDL2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "Desc": ["Unsigned Absolute Difference Long", "Using the high elements of the source vectors" ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / (ElementSize << 1)" }, - "FPR = VUShl u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftVector, i1:$RangeCheck": { + "FPR = VUShl OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftVector, i1:$RangeCheck": { "TiedSource": 0, "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUShr u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftVector, i1:$RangeCheck": { + "FPR = VUShr OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftVector, i1:$RangeCheck": { "TiedSource": 0, "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VSShr u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftVector, i1:$RangeCheck": { + "FPR = VSShr OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftVector, i1:$RangeCheck": { "TiedSource": 0, "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUShlS u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": { + "FPR = VUShlS OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": { "TiedSource": 0, "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUShrS u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": { + "FPR = VUShrS OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": { "TiedSource": 0, "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VSShrS u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": { + "FPR = VSShrS OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": { "TiedSource": 0, "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUShrSWide u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": { + "FPR = VUShrSWide OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": { "TiedSource": 0, "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VSShrSWide u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": { + "FPR = VSShrSWide OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": { "TiedSource": 0, "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VUShlSWide u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": { + "FPR = VUShlSWide OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": { "TiedSource": 0, "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VInsElement u8:#RegisterSize, u8:#ElementSize, u8:$DestIdx, u8:$SrcIdx, FPR:$DestVector, FPR:$SrcVector": { + "FPR = VInsElement OpSize:#RegisterSize, OpSize:#ElementSize, u8:$DestIdx, u8:$SrcIdx, FPR:$DestVector, FPR:$SrcVector": { "TiedSource": 0, "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VInsGPR u8:#RegisterSize, u8:#ElementSize, u8:$DestIdx, FPR:$DestVector, GPR:$Src": { + "FPR = VInsGPR OpSize:#RegisterSize, OpSize:#ElementSize, u8:$DestIdx, FPR:$DestVector, GPR:$Src": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VExtr u8:#RegisterSize, u8:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper, u8:$Index": { + "FPR = VExtr OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper, u8:$Index": { "Desc": ["Concats two vector registers together and extracts a full width register from the element index", "Index is an element index. So it is offset by ElementSize argument", "op:", @@ -2380,12 +2380,12 @@ "NumElements": "RegisterSize / ElementSize" }, - "FPR = VCMPEQ u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VCMPEQ OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VCMPGT u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VCMPGT OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "Desc": ["Vector compare signed greater than", "Each element is compared, if the result is true then the resulting element is ~0, else zero" ], @@ -2393,35 +2393,35 @@ "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFCMPEQ u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VFCMPEQ OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFCMPNEQ u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VFCMPNEQ OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFCMPLT u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VFCMPLT OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFCMPGT u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VFCMPGT OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFCMPLE u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VFCMPLE OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFCMPORD u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VFCMPORD OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFCMPUNO u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2": { + "FPR = VFCMPUNO OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VTBL1 u8:#RegisterSize, FPR:$VectorTable, FPR:$VectorIndices": { + "FPR = VTBL1 OpSize:#RegisterSize, FPR:$VectorTable, FPR:$VectorIndices": { "Desc": ["Does a vector table lookup from one register in to the destination", "Lookup is byte sized per byte element.", "Any index larger than what the registers provide will result in zero for that element", @@ -2430,7 +2430,7 @@ ], "DestSize": "RegisterSize" }, - "FPR = VTBL2 u8:#RegisterSize, FPR:$VectorTable1, FPR:$VectorTable2, FPR:$VectorIndices": { + "FPR = VTBL2 OpSize:#RegisterSize, FPR:$VectorTable1, FPR:$VectorTable2, FPR:$VectorIndices": { "Desc": ["Does a vector table lookup from two registers in to the destination", "Lookup is byte sized per byte element.", "Any index larger than what the registers provide will result in zero for that element", @@ -2440,7 +2440,7 @@ ], "DestSize": "RegisterSize" }, - "FPR = VTBX1 u8:#RegisterSize, FPR:$VectorSrcDst, FPR:$VectorTable, FPR:$VectorIndices": { + "FPR = VTBX1 OpSize:#RegisterSize, FPR:$VectorSrcDst, FPR:$VectorTable, FPR:$VectorIndices": { "Desc": ["Does a vector table lookup from one register in to the destination", "Lookup is byte sized per byte element.", "Any index larger than what the registers provide will result in not modifying that element", @@ -2450,7 +2450,7 @@ "TiedSource": 0, "DestSize": "RegisterSize" }, - "FPR = VBSL u8:#RegisterSize, FPR:$VectorMask, FPR:$VectorTrue, FPR:$VectorFalse": { + "FPR = VBSL OpSize:#RegisterSize, FPR:$VectorMask, FPR:$VectorTrue, FPR:$VectorFalse": { "Desc": ["Does a vector bitwise select.", "If the bit in the field is 1 then the corresponding bit is pulled from VectorTrue", "If the bit in the field is 0 then the corresponding bit is pulled from VectorFalse" @@ -2482,11 +2482,11 @@ "DestSize": "4", "JITDispatch": false }, - "FPR = VFCADD u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, u16:$Rotate": { + "FPR = VFCADD OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, u16:$Rotate": { "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = VFMLA u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": { + "FPR = VFMLA OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": { "Desc": [ "Dest = (Vector1 * Vector2) + Addend", "This explicitly matches x86 FMA semantics because ARM semantics are mind-bending." @@ -2495,7 +2495,7 @@ "NumElements": "RegisterSize / ElementSize", "TiedSource": 2 }, - "FPR = VFMLS u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": { + "FPR = VFMLS OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": { "Desc": [ "Dest = (Vector1 * Vector2) - Addend", "This explicitly matches x86 FMA semantics because ARM semantics are mind-bending." @@ -2504,7 +2504,7 @@ "NumElements": "RegisterSize / ElementSize", "TiedSource": 2 }, - "FPR = VFNMLA u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": { + "FPR = VFNMLA OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": { "Desc": [ "Dest = (-Vector1 * Vector2) + Addend", "This explicitly matches x86 FMA semantics because ARM semantics are mind-bending." @@ -2513,7 +2513,7 @@ "NumElements": "RegisterSize / ElementSize", "TiedSource": 2 }, - "FPR = VFNMLS u8:#RegisterSize, u8:#ElementSize, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": { + "FPR = VFNMLS OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": { "Desc": [ "Dest = (-Vector1 * Vector2) - Addend", "This explicitly matches x86 FMA semantics because ARM semantics are mind-bending." @@ -2524,7 +2524,7 @@ } }, "Conv": { - "FPR = VCastFromGPR u8:#RegisterSize, u8:#ElementSize, GPR:$Src": { + "FPR = VCastFromGPR OpSize:#RegisterSize, OpSize:#ElementSize, GPR:$Src": { "Desc": ["Moves a GPR to a Vector register with zero extension to full length of the register.", "No conversion is done on the data as it moves register files" ], @@ -2532,49 +2532,49 @@ "NumElements": "RegisterSize / ElementSize" }, - "FPR = VDupFromGPR u8:#RegisterSize, u8:#ElementSize, GPR:$Src": { + "FPR = VDupFromGPR OpSize:#RegisterSize, OpSize:#ElementSize, GPR:$Src": { "Desc": ["Broadcasts a value in a GPR into each ElementSize-sized element in a vector"], "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = Float_FromGPR_S u8:#DstElementSize, u8:$SrcElementSize, GPR:$Src": { + "FPR = Float_FromGPR_S OpSize:#DstElementSize, OpSize:$SrcElementSize, GPR:$Src": { "Desc": ["Scalar op: Converts signed GPR to Scalar float", "Zeroes the upper bits of the vector register" ], "DestSize": "DstElementSize" }, - "FPR = Float_FToF u8:#DstElementSize, u8:$SrcElementSize, FPR:$Scalar": { + "FPR = Float_FToF OpSize:#DstElementSize, OpSize:$SrcElementSize, FPR:$Scalar": { "Desc": ["Scalar op: Converts float from one size to another", "Zeroes the upper bits of the vector register" ], "DestSize": "DstElementSize" }, - "FPR = Vector_SToF u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = Vector_SToF OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc": "Vector op: Converts signed integer to same size float", "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = Vector_FToS u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = Vector_FToS OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc": ["Vector op: Converts float to signed integer, rounding towards zero", "Rounding mode determined by host rounding mode" ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = Vector_FToZS u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = Vector_FToZS OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc": "Vector op: Converts float to signed integer, rounding towards zero", "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = Vector_FToF u8:#RegisterSize, u8:#DestElementSize, FPR:$Vector, u8:$SrcElementSize": { + "FPR = Vector_FToF OpSize:#RegisterSize, OpSize:#DestElementSize, FPR:$Vector, OpSize:$SrcElementSize": { "Desc": "Vector op: Converts float from source element size to destination size (fp32<->fp64)", "DestSize": "RegisterSize", "NumElements": "RegisterSize / DestElementSize" }, - "FPR = VFCVTL2 u8:#RegisterSize, u8:#ElementSize, FPR:$Vector": { + "FPR = VFCVTL2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": { "Desc": [ "Vector op: Converts float from source element size to destination size (fp32->fp64)", "Selecting from the high half of the register." @@ -2585,7 +2585,7 @@ "RegisterSize != FEXCore::IR::OpSize::i256Bit && \"What does 256-bit mean in this context?\"" ] }, - "FPR = VFCVTN2 u8:#RegisterSize, u8:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { + "FPR = VFCVTN2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": { "TiedSource": 0, "Desc": [ "Vector op: Converts float from source element size and inserting in to the high bits.", @@ -2599,14 +2599,14 @@ "RegisterSize != FEXCore::IR::OpSize::i256Bit && \"What does 256-bit mean in this context?\"" ] }, - "FPR = Vector_FToI u8:#RegisterSize, u8:#ElementSize, FPR:$Vector, RoundType:$Round": { + "FPR = Vector_FToI OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, RoundType:$Round": { "Desc": ["Vector op: Rounds float to integral", "Rounding mode determined by argument" ], "DestSize": "RegisterSize", "NumElements": "RegisterSize / ElementSize" }, - "FPR = Vector_F64ToI32 u8:#RegisterSize, FPR:$Vector, RoundType:$Round, i1:$EnsureZeroUpperHalf": { + "FPR = Vector_F64ToI32 OpSize:#RegisterSize, FPR:$Vector, RoundType:$Round, i1:$EnsureZeroUpperHalf": { "Desc": ["Vector op: Rounds 64-bit float to 32-bit integral with round mode", "Matches CVTPD2DQ/CVTTPD2DQ behaviour" ], @@ -2619,19 +2619,19 @@ "Desc": "Does a stage of the inverse mix column transformation", "DestSize": "16" }, - "FPR = VAESEnc u8:#RegisterSize, FPR:$State, FPR:$Key, FPR:$ZeroReg": { + "FPR = VAESEnc OpSize:#RegisterSize, FPR:$State, FPR:$Key, FPR:$ZeroReg": { "Desc": "Does a step of AES encryption", "DestSize": "RegisterSize" }, - "FPR = VAESEncLast u8:#RegisterSize, FPR:$State, FPR:$Key, FPR:$ZeroReg": { + "FPR = VAESEncLast OpSize:#RegisterSize, FPR:$State, FPR:$Key, FPR:$ZeroReg": { "Desc": "Does the last step of AES encryption", "DestSize": "RegisterSize" }, - "FPR = VAESDec u8:#RegisterSize, FPR:$State, FPR:$Key, FPR:$ZeroReg": { + "FPR = VAESDec OpSize:#RegisterSize, FPR:$State, FPR:$Key, FPR:$ZeroReg": { "Desc": "Does a step of AES decryption", "DestSize": "RegisterSize" }, - "FPR = VAESDecLast u8:#RegisterSize, FPR:$State, FPR:$Key, FPR:$ZeroReg": { + "FPR = VAESDecLast OpSize:#RegisterSize, FPR:$State, FPR:$Key, FPR:$ZeroReg": { "Desc": "Does the last step of AES decryption", "DestSize": "RegisterSize" }, @@ -2647,12 +2647,12 @@ "Desc": "Does vector scalar VSha256U0 instruction", "DestSize": "FEXCore::IR::OpSize::i128Bit" }, - "GPR = CRC32 GPR:$Src1, GPR:$Src2, u8:$SrcSize": { + "GPR = CRC32 GPR:$Src1, GPR:$Src2, OpSize:$SrcSize": { "Desc": ["CRC32 using polynomial 0x1EDC6F41" ], "DestSize": "4" }, - "FPR = PCLMUL u8:#RegisterSize, FPR:$Src1, FPR:$Src2, u8:$Selector": { + "FPR = PCLMUL OpSize:#RegisterSize, FPR:$Src1, FPR:$Src2, u8:$Selector": { "Desc": [ "Performs carryless multiplication of 64-bit elements depending on the selector.", "Selector = 0b00000000: Uses low 64-bit elements from both input vectors", @@ -2747,7 +2747,7 @@ "X87": true, "HasSideEffects": true }, - "PushStack FPR:$X80Src, SSA:$OriginalValue, u8:$LoadSize, i1:$Float": { + "PushStack FPR:$X80Src, SSA:$OriginalValue, OpSize:$LoadSize, i1:$Float": { "Desc": [ "Pushes the provided X80Src source on to the x87 stack.", "Tracks OriginalValue as the original value of X80Src.", @@ -2769,7 +2769,7 @@ "HasSideEffects": true, "X87": true }, - "StoreStackMemory GPR:$Addr, OpSize:$SourceSize, i1:$Float, u8:$StoreSize": { + "StoreStackMemory GPR:$Addr, OpSize:$SourceSize, i1:$Float, OpSize:$StoreSize": { "Desc": [ "Takes the top value off the x87 stack and stores it to memory.", "SourceSize is 128bit for F80 values, 64-bit for low precision.", @@ -2983,19 +2983,19 @@ "DestSize": "16", "JITDispatch": false }, - "FPR = F80CVT u8:#Size, FPR:$X80Src": { + "FPR = F80CVT OpSize:#Size, FPR:$X80Src": { "DestSize": "Size", "JITDispatch": false }, - "GPR = F80CVTInt u8:#Size, FPR:$X80Src, i1:$Truncate": { + "GPR = F80CVTInt OpSize:#Size, FPR:$X80Src, i1:$Truncate": { "DestSize": "Size", "JITDispatch": false }, - "FPR = F80CVTTo FPR:$X80Src, u8:$SrcSize": { + "FPR = F80CVTTo FPR:$X80Src, OpSize:$SrcSize": { "DestSize": "16", "JITDispatch": false }, - "FPR = F80CVTToInt GPR:$Src, u8:$SrcSize": { + "FPR = F80CVTToInt GPR:$Src, OpSize:$SrcSize": { "DestSize": "16", "JITDispatch": false }, @@ -3111,7 +3111,7 @@ "DestSize": "16", "JITDispatch": false }, - "F80VBSLStack u8:#RegisterSize, FPR:$VectorMask, u8:$SrcStack1, u8:$SrcStack2": { + "F80VBSLStack OpSize:#RegisterSize, FPR:$VectorMask, u8:$SrcStack1, u8:$SrcStack2": { "Desc": [ "Does a vector bitwise select.", "If the bit in the field is 1 then the corresponding bit is pulled from VectorTrue", diff --git a/FEXCore/Source/Interface/IR/IREmitter.h b/FEXCore/Source/Interface/IR/IREmitter.h index 5b0720e613..b826729c64 100644 --- a/FEXCore/Source/Interface/IR/IREmitter.h +++ b/FEXCore/Source/Interface/IR/IREmitter.h @@ -71,10 +71,10 @@ class IREmitter { return _Jump(InvalidNode); } IRPair _CondJump(Ref ssa0, CondClassType cond = {COND_NEQ}) { - return _CondJump(ssa0, _Constant(0), InvalidNode, InvalidNode, cond, GetOpSize(ssa0)); + return _CondJump(ssa0, _Constant(0), InvalidNode, InvalidNode, cond, IR::SizeToOpSize(GetOpSize(ssa0))); } IRPair _CondJump(Ref ssa0, Ref ssa1, Ref ssa2, CondClassType cond = {COND_NEQ}) { - return _CondJump(ssa0, _Constant(0), ssa1, ssa2, cond, GetOpSize(ssa0)); + return _CondJump(ssa0, _Constant(0), ssa1, ssa2, cond, IR::SizeToOpSize(GetOpSize(ssa0))); } // TODO: Work to remove this implicit sized Select implementation. IRPair _Select(uint8_t Cond, Ref ssa0, Ref ssa1, Ref ssa2, Ref ssa3, uint8_t CompareSize = 0) { @@ -85,16 +85,17 @@ class IREmitter { return _Select(IR::SizeToOpSize(std::max(4, std::max(GetOpSize(ssa2), GetOpSize(ssa3)))), IR::SizeToOpSize(CompareSize), CondClassType {Cond}, ssa0, ssa1, ssa2, ssa3); } - IRPair _LoadMem(FEXCore::IR::RegisterClassType Class, uint8_t Size, Ref ssa0, uint8_t Align = 1) { + IRPair _LoadMem(FEXCore::IR::RegisterClassType Class, IR::OpSize Size, Ref ssa0, IR::OpSize Align = OpSize::i8Bit) { return _LoadMem(Class, Size, ssa0, Invalid(), Align, MEM_OFFSET_SXTX, 1); } - IRPair _LoadMemTSO(FEXCore::IR::RegisterClassType Class, uint8_t Size, Ref ssa0, uint8_t Align = 1) { + IRPair _LoadMemTSO(FEXCore::IR::RegisterClassType Class, IR::OpSize Size, Ref ssa0, IR::OpSize Align = OpSize::i8Bit) { return _LoadMemTSO(Class, Size, ssa0, Invalid(), Align, MEM_OFFSET_SXTX, 1); } - IRPair _StoreMem(FEXCore::IR::RegisterClassType Class, uint8_t Size, Ref Addr, Ref Value, uint8_t Align = 1) { + IRPair _StoreMem(FEXCore::IR::RegisterClassType Class, IR::OpSize Size, Ref Addr, Ref Value, IR::OpSize Align = OpSize::i8Bit) { return _StoreMem(Class, Size, Value, Addr, Invalid(), Align, MEM_OFFSET_SXTX, 1); } - IRPair _StoreMemTSO(FEXCore::IR::RegisterClassType Class, uint8_t Size, Ref Addr, Ref Value, uint8_t Align = 1) { + IRPair + _StoreMemTSO(FEXCore::IR::RegisterClassType Class, IR::OpSize Size, Ref Addr, Ref Value, IR::OpSize Align = OpSize::i8Bit) { return _StoreMemTSO(Class, Size, Value, Addr, Invalid(), Align, MEM_OFFSET_SXTX, 1); } Ref Invalid() { diff --git a/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp b/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp index 29ee822f10..379e725a8f 100644 --- a/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp +++ b/FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp @@ -534,7 +534,7 @@ void DeadFlagCalculationEliminination::FoldBranch(IREmitter* IREmit, IRListView& IREmit->ReplaceNodeArgument(CodeNode, 0, CurrentIR.GetNode(Prev->Args[0])); IREmit->ReplaceNodeArgument(CodeNode, 1, CurrentIR.GetNode(Prev->Args[1])); Op->FromNZCV = false; - Op->CompareSize = Prev->Size; + Op->CompareSize = IR::SizeToOpSize(Prev->Size); } else { return; } diff --git a/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp b/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp index b93fcde297..92d86cc120 100644 --- a/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp +++ b/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp @@ -582,7 +582,7 @@ void ConstrainedRAPass::Run(IREmitter* IREmit_) { if (Reg.Class == FPRFixedClass) { IROp_Header* Header = IR->GetOp(Old); - Copy = IREmit->_VMov(Header->Size, Map(Old)); + Copy = IREmit->_VMov(IR::SizeToOpSize(Header->Size), Map(Old)); } else { Copy = IREmit->_Copy(Map(Old)); } diff --git a/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp b/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp index e2c2bcdbcb..35e2f1fa7b 100644 --- a/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp +++ b/FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp @@ -283,7 +283,8 @@ inline void X87StackOptimization::MigrateToSlowPathIf(bool ShouldMigrate) { inline Ref X87StackOptimization::GetTopWithCache_Slow() { if (!TopOffsetCache[0]) { - TopOffsetCache[0] = IREmit->_LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, flags) + FEXCore::X86State::X87FLAG_TOP_LOC); + TopOffsetCache[0] = + IREmit->_LoadContext(OpSize::i8Bit, GPRClass, offsetof(FEXCore::Core::CPUState, flags) + FEXCore::X86State::X87FLAG_TOP_LOC); } return TopOffsetCache[0]; } @@ -305,31 +306,32 @@ inline Ref X87StackOptimization::GetOffsetTopWithCache_Slow(uint8_t Offset) { inline void X87StackOptimization::SetTopWithCache_Slow(Ref Value) { - IREmit->_StoreContext(1, GPRClass, Value, offsetof(FEXCore::Core::CPUState, flags) + FEXCore::X86State::X87FLAG_TOP_LOC); + IREmit->_StoreContext(OpSize::i8Bit, GPRClass, Value, offsetof(FEXCore::Core::CPUState, flags) + FEXCore::X86State::X87FLAG_TOP_LOC); InvalidateTopOffsetCache(); TopOffsetCache[0] = Value; } inline void X87StackOptimization::SetX87ValidTag(Ref Value, bool Valid) { - Ref AbridgedFTW = IREmit->_LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); + Ref AbridgedFTW = IREmit->_LoadContext(OpSize::i8Bit, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); Ref RegMask = IREmit->_Lshl(OpSize::i32Bit, GetConstant(1), Value); Ref NewAbridgedFTW = Valid ? IREmit->_Or(OpSize::i32Bit, AbridgedFTW, RegMask) : IREmit->_Andn(OpSize::i32Bit, AbridgedFTW, RegMask); - IREmit->_StoreContext(1, GPRClass, NewAbridgedFTW, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); + IREmit->_StoreContext(OpSize::i8Bit, GPRClass, NewAbridgedFTW, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); } inline Ref X87StackOptimization::GetX87ValidTag_Slow(uint8_t Offset) { - Ref AbridgedFTW = IREmit->_LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); + Ref AbridgedFTW = IREmit->_LoadContext(OpSize::i8Bit, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); return IREmit->_And(OpSize::i32Bit, IREmit->_Lshr(OpSize::i32Bit, AbridgedFTW, GetOffsetTopWithCache_Slow(Offset)), GetConstant(1)); } inline Ref X87StackOptimization::LoadStackValueAtOffset_Slow(uint8_t Offset) { - return IREmit->_LoadContextIndexed(GetOffsetTopWithCache_Slow(Offset), ReducedPrecisionMode ? 8 : 16, MMBaseOffset(), 16, FPRClass); + return IREmit->_LoadContextIndexed(GetOffsetTopWithCache_Slow(Offset), ReducedPrecisionMode ? OpSize::i64Bit : OpSize::i128Bit, + MMBaseOffset(), 16, FPRClass); } inline void X87StackOptimization::StoreStackValueAtOffset_Slow(Ref Value, uint8_t Offset, bool SetValid) { OrderedNode* TopOffset = GetOffsetTopWithCache_Slow(Offset); // store - IREmit->_StoreContextIndexed(Value, TopOffset, ReducedPrecisionMode ? 8 : 16, MMBaseOffset(), 16, FPRClass); + IREmit->_StoreContextIndexed(Value, TopOffset, ReducedPrecisionMode ? OpSize::i64Bit : OpSize::i128Bit, MMBaseOffset(), 16, FPRClass); // mark it valid // In some cases we might already know it has been previously set as valid so we don't need to do it again if (SetValid) { @@ -378,7 +380,7 @@ void X87StackOptimization::HandleUnop(IROps Op64, bool VFOp64, IROps Op80) { if (ReducedPrecisionMode) { if (VFOp64) { - DeriveOp(Value, Op64, IREmit->_VFSqrt(8, 8, St0)); + DeriveOp(Value, Op64, IREmit->_VFSqrt(OpSize::i64Bit, OpSize::i64Bit, St0)); } else { DeriveOp(Value, Op64, IREmit->_F64SIN(St0)); } @@ -398,10 +400,10 @@ void X87StackOptimization::HandleBinopValue(IROps Op64, bool VFOp64, IROps Op80, Ref Node = {}; if (ReducedPrecisionMode) { if (Reverse) { - DeriveOp(Node, Op64, IREmit->_VFAdd(8, 8, ValueNode, StackNode)); + DeriveOp(Node, Op64, IREmit->_VFAdd(OpSize::i64Bit, OpSize::i64Bit, ValueNode, StackNode)); } else { if (VFOp64) { - DeriveOp(Node, Op64, IREmit->_VFAdd(8, 8, StackNode, ValueNode)); + DeriveOp(Node, Op64, IREmit->_VFAdd(OpSize::i64Bit, OpSize::i64Bit, StackNode, ValueNode)); } else { DeriveOp(Node, Op64, IREmit->_F64FPREM(StackNode, ValueNode)); } @@ -475,13 +477,14 @@ Ref X87StackOptimization::SynchronizeStackValues() { } Ref TopIndex = GetOffsetTopWithCache_Slow(i); if (Valid == StackSlot::VALID) { - IREmit->_StoreContextIndexed(StackMember.StackDataNode, TopIndex, ReducedPrecisionMode ? 8 : 16, MMBaseOffset(), 16, FPRClass); + IREmit->_StoreContextIndexed(StackMember.StackDataNode, TopIndex, ReducedPrecisionMode ? OpSize::i64Bit : OpSize::i128Bit, + MMBaseOffset(), 16, FPRClass); } } { // Set valid tags uint8_t Mask = StackData.getValidMask(); if (Mask == 0xff) { - IREmit->_StoreContext(1, GPRClass, GetConstant(Mask), offsetof(FEXCore::Core::CPUState, AbridgedFTW)); + IREmit->_StoreContext(OpSize::i8Bit, GPRClass, GetConstant(Mask), offsetof(FEXCore::Core::CPUState, AbridgedFTW)); } else if (Mask != 0) { if (std::popcount(Mask) == 1) { uint8_t BitIdx = __builtin_ctz(Mask); @@ -490,16 +493,16 @@ Ref X87StackOptimization::SynchronizeStackValues() { // perform a rotate right on mask by top auto* TopValue = GetTopWithCache_Slow(); Ref RotAmount = IREmit->_Sub(OpSize::i32Bit, GetConstant(8), TopValue); - Ref AbridgedFTW = IREmit->_LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); + Ref AbridgedFTW = IREmit->_LoadContext(OpSize::i8Bit, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); Ref NewAbridgedFTW = IREmit->_Or(OpSize::i32Bit, AbridgedFTW, RotateRight8(Mask, RotAmount)); - IREmit->_StoreContext(1, GPRClass, NewAbridgedFTW, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); + IREmit->_StoreContext(OpSize::i8Bit, GPRClass, NewAbridgedFTW, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); } } } { // Set invalid tags uint8_t Mask = StackData.getInvalidMask(); if (Mask == 0xff) { - IREmit->_StoreContext(1, GPRClass, GetConstant(0), offsetof(FEXCore::Core::CPUState, AbridgedFTW)); + IREmit->_StoreContext(OpSize::i8Bit, GPRClass, GetConstant(0), offsetof(FEXCore::Core::CPUState, AbridgedFTW)); } else if (Mask != 0) { if (std::popcount(Mask)) { uint8_t BitIdx = __builtin_ctz(Mask); @@ -508,9 +511,9 @@ Ref X87StackOptimization::SynchronizeStackValues() { // Same rotate right as above but this time on the invalid mask auto* TopValue = GetTopWithCache_Slow(); Ref RotAmount = IREmit->_Sub(OpSize::i32Bit, GetConstant(8), TopValue); - Ref AbridgedFTW = IREmit->_LoadContext(1, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); + Ref AbridgedFTW = IREmit->_LoadContext(OpSize::i8Bit, GPRClass, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); Ref NewAbridgedFTW = IREmit->_Andn(OpSize::i32Bit, AbridgedFTW, RotateRight8(Mask, RotAmount)); - IREmit->_StoreContext(1, GPRClass, NewAbridgedFTW, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); + IREmit->_StoreContext(OpSize::i8Bit, GPRClass, NewAbridgedFTW, offsetof(FEXCore::Core::CPUState, AbridgedFTW)); } } } @@ -647,9 +650,9 @@ void X87StackOptimization::Run(IREmitter* Emit) { HandleUnop(OP_F64TAN, false, OP_F80TAN); Ref OneConst {}; if (ReducedPrecisionMode) { - OneConst = IREmit->_VCastFromGPR(8, 8, GetConstant(0x3FF0000000000000)); + OneConst = IREmit->_VCastFromGPR(OpSize::i64Bit, OpSize::i64Bit, GetConstant(0x3FF0000000000000)); } else { - OneConst = IREmit->_LoadNamedVectorConstant(16, NamedVectorConstant::NAMED_VECTOR_X87_ONE); + OneConst = IREmit->_LoadNamedVectorConstant(OpSize::i128Bit, NamedVectorConstant::NAMED_VECTOR_X87_ONE); } if (SlowPath) { @@ -708,7 +711,7 @@ void X87StackOptimization::Run(IREmitter* Emit) { } } else { // invalidate all if (SlowPath) { - IREmit->_StoreContext(1, GPRClass, GetConstant(0), offsetof(FEXCore::Core::CPUState, AbridgedFTW)); + IREmit->_StoreContext(OpSize::i8Bit, GPRClass, GetConstant(0), offsetof(FEXCore::Core::CPUState, AbridgedFTW)); } else { for (size_t i = 0; i < StackData.size; i++) { StackData.setTagInvalid(i); @@ -790,26 +793,27 @@ void X87StackOptimization::Run(IREmitter* Emit) { // or similar. As long as the source size and dest size are one and the same. // This will avoid any conversions between source and stack element size and conversion back. if (!SlowPath && Value->Source && Value->Source->first == Op->StoreSize && Value->InterpretAsFloat) { - IREmit->_StoreMem(Value->InterpretAsFloat ? FPRClass : GPRClass, Op->StoreSize, AddrNode, Value->Source->second); + IREmit->_StoreMem(Value->InterpretAsFloat ? FPRClass : GPRClass, IR::SizeToOpSize(Op->StoreSize), AddrNode, Value->Source->second); } else { if (ReducedPrecisionMode) { switch (Op->StoreSize) { - case 4: { - StackNode = IREmit->_Float_FToF(4, 8, StackNode); - IREmit->_StoreMem(FPRClass, 4, AddrNode, StackNode); + case OpSize::i32Bit: { + StackNode = IREmit->_Float_FToF(OpSize::i32Bit, OpSize::i64Bit, StackNode); + IREmit->_StoreMem(FPRClass, OpSize::i32Bit, AddrNode, StackNode); break; } - case 8: { - IREmit->_StoreMem(FPRClass, 8, AddrNode, StackNode); + case OpSize::i64Bit: { + IREmit->_StoreMem(FPRClass, OpSize::i64Bit, AddrNode, StackNode); break; } - case 10: { - StackNode = IREmit->_F80CVTTo(StackNode, 8); - IREmit->_StoreMem(FPRClass, 8, AddrNode, StackNode); - auto Upper = IREmit->_VExtractToGPR(16, 8, StackNode, 1); - IREmit->_StoreMem(GPRClass, 2, Upper, AddrNode, GetConstant(8), 8, MEM_OFFSET_SXTX, 1); + case OpSize::f80Bit: { + StackNode = IREmit->_F80CVTTo(StackNode, OpSize::i64Bit); + IREmit->_StoreMem(FPRClass, OpSize::i64Bit, AddrNode, StackNode); + auto Upper = IREmit->_VExtractToGPR(OpSize::i128Bit, OpSize::i64Bit, StackNode, 1); + IREmit->_StoreMem(GPRClass, OpSize::i16Bit, Upper, AddrNode, GetConstant(8), OpSize::i64Bit, MEM_OFFSET_SXTX, 1); break; } + default: ERROR_AND_DIE_FMT("Unsupported x87 size"); } } else { if (Op->StoreSize != 10) { // if it's not 80bits then convert @@ -817,12 +821,12 @@ void X87StackOptimization::Run(IREmitter* Emit) { } if (Op->StoreSize == 10) { // Part of code from StoreResult_WithOpSize() // For X87 extended doubles, split before storing - IREmit->_StoreMem(FPRClass, 8, AddrNode, StackNode); - auto Upper = IREmit->_VExtractToGPR(16, 8, StackNode, 1); + IREmit->_StoreMem(FPRClass, OpSize::i64Bit, AddrNode, StackNode); + auto Upper = IREmit->_VExtractToGPR(OpSize::i128Bit, OpSize::i64Bit, StackNode, 1); auto DestAddr = IREmit->_Add(OpSize::i64Bit, AddrNode, GetConstant(8)); - IREmit->_StoreMem(GPRClass, 2, DestAddr, Upper, 8); + IREmit->_StoreMem(GPRClass, OpSize::i16Bit, DestAddr, Upper, OpSize::i64Bit); } else { - IREmit->_StoreMem(FPRClass, Op->StoreSize, AddrNode, StackNode); + IREmit->_StoreMem(FPRClass, IR::SizeToOpSize(Op->StoreSize), AddrNode, StackNode); } } } @@ -871,13 +875,13 @@ void X87StackOptimization::Run(IREmitter* Emit) { // of a value Ref ResultNode {}; if (ReducedPrecisionMode) { - ResultNode = IREmit->_VFNeg(8, 8, Value); + ResultNode = IREmit->_VFNeg(OpSize::i64Bit, OpSize::i64Bit, Value); } else { Ref Low = GetConstant(0); Ref High = GetConstant(0b1'000'0000'0000'0000ULL); - Ref HelperNode = IREmit->_VCastFromGPR(16, 8, Low); - HelperNode = IREmit->_VInsGPR(16, 8, 1, HelperNode, High); - ResultNode = IREmit->_VXor(16, 1, Value, HelperNode); + Ref HelperNode = IREmit->_VCastFromGPR(OpSize::i128Bit, OpSize::i64Bit, Low); + HelperNode = IREmit->_VInsGPR(OpSize::i128Bit, OpSize::i64Bit, 1, HelperNode, High); + ResultNode = IREmit->_VXor(OpSize::i128Bit, OpSize::i8Bit, Value, HelperNode); } StoreStackValue(ResultNode); break; @@ -888,14 +892,14 @@ void X87StackOptimization::Run(IREmitter* Emit) { Ref ResultNode {}; if (ReducedPrecisionMode) { - ResultNode = IREmit->_VFAbs(8, 8, Value); + ResultNode = IREmit->_VFAbs(OpSize::i64Bit, OpSize::i64Bit, Value); } else { // Intermediate insts Ref Low = GetConstant(~0ULL); Ref High = GetConstant(0b0'111'1111'1111'1111ULL); - Ref HelperNode = IREmit->_VCastFromGPR(16, 8, Low); - HelperNode = IREmit->_VInsGPR(16, 8, 1, HelperNode, High); - ResultNode = IREmit->_VAnd(16, 1, Value, HelperNode); + Ref HelperNode = IREmit->_VCastFromGPR(OpSize::i128Bit, OpSize::i64Bit, Low); + HelperNode = IREmit->_VInsGPR(OpSize::i128Bit, OpSize::i64Bit, 1, HelperNode, High); + ResultNode = IREmit->_VAnd(OpSize::i128Bit, OpSize::i8Bit, Value, HelperNode); } StoreStackValue(ResultNode); break; @@ -909,7 +913,7 @@ void X87StackOptimization::Run(IREmitter* Emit) { Ref CmpNode {}; if (ReducedPrecisionMode) { - CmpNode = IREmit->_FCmp(8, StackValue1, StackValue2); + CmpNode = IREmit->_FCmp(OpSize::i64Bit, StackValue1, StackValue2); } else { CmpNode = IREmit->_F80Cmp(StackValue1, StackValue2); } @@ -921,11 +925,11 @@ void X87StackOptimization::Run(IREmitter* Emit) { const auto* Op = IROp->C(); auto Offset = Op->SrcStack; auto StackNode = LoadStackValue(Offset); - Ref ZeroConst = IREmit->_VCastFromGPR(ReducedPrecisionMode ? 8 : 16, 8, GetConstant(0)); + Ref ZeroConst = IREmit->_VCastFromGPR(ReducedPrecisionMode ? OpSize::i64Bit : OpSize::i128Bit, OpSize::i64Bit, GetConstant(0)); Ref CmpNode {}; if (ReducedPrecisionMode) { - CmpNode = IREmit->_FCmp(8, StackNode, ZeroConst); + CmpNode = IREmit->_FCmp(OpSize::i64Bit, StackNode, ZeroConst); } else { CmpNode = IREmit->_F80Cmp(StackNode, ZeroConst); } @@ -941,7 +945,7 @@ void X87StackOptimization::Run(IREmitter* Emit) { Ref CmpNode {}; if (ReducedPrecisionMode) { - CmpNode = IREmit->_FCmp(8, StackNode, Value); + CmpNode = IREmit->_FCmp(OpSize::i64Bit, StackNode, Value); } else { CmpNode = IREmit->_F80Cmp(StackNode, Value); } @@ -984,7 +988,7 @@ void X87StackOptimization::Run(IREmitter* Emit) { Ref Value {}; if (ReducedPrecisionMode) { - Value = IREmit->_Vector_FToI(8, 8, St0, Round_Host); + Value = IREmit->_Vector_FToI(OpSize::i64Bit, OpSize::i64Bit, St0, Round_Host); } else { Value = IREmit->_F80Round(St0); } @@ -1000,7 +1004,7 @@ void X87StackOptimization::Run(IREmitter* Emit) { Ref Value1 = LoadStackValue(StackOffset1); Ref Value2 = LoadStackValue(StackOffset2); - Ref StackNode = IREmit->_VBSL(16, CurrentIR.GetNode(Op->VectorMask), Value1, Value2); + Ref StackNode = IREmit->_VBSL(OpSize::i128Bit, CurrentIR.GetNode(Op->VectorMask), Value1, Value2); StoreStackValue(StackNode, 0, StackOffset1 && StackOffset2); break; }