Skip to content

Commit

Permalink
IR: Converts base IR operations to store OpSize sizes
Browse files Browse the repository at this point in the history
NFC

Finally converts the IR operations themselves to store the OpSize for
the IR operation size and element sizes.

This also finally, FINALLY, converts that remaining `_Constant` helper
to stop using a size field that is specified in bits rather than bytes
like all the other IR op handlers. That thing was so confusing and now
it's gone.
  • Loading branch information
Sonicadvance1 committed Oct 29, 2024
1 parent 493b952 commit 3434b57
Show file tree
Hide file tree
Showing 13 changed files with 273 additions and 237 deletions.
20 changes: 10 additions & 10 deletions FEXCore/Scripts/json_ir_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,8 +323,8 @@ def print_ir_structs(defines):
output_file.write("struct __attribute__((packed)) IROp_Header {\n")
output_file.write("\tvoid* Data[0];\n")
output_file.write("\tIROps Op;\n\n")
output_file.write("\tuint8_t Size;\n")
output_file.write("\tuint8_t ElementSize;\n")
output_file.write("\tIR::OpSize Size;\n")
output_file.write("\tIR::OpSize ElementSize;\n")

output_file.write("\ttemplate<typename T>\n")
output_file.write("\tT const* C() const { return reinterpret_cast<T const*>(Data); }\n")
Expand Down Expand Up @@ -630,20 +630,20 @@ def print_ir_allocator_helpers():
output_file.write("\t\treturn IRPair<T>{Op, CreateNode(&Op->Header)};\n")
output_file.write("\t}\n\n")

output_file.write("\tuint8_t GetOpSize(const OrderedNode *Op) const {\n")
output_file.write("\tIR::OpSize GetOpSize(const OrderedNode *Op) const {\n")
output_file.write("\t\tauto HeaderOp = Op->Header.Value.GetNode(DualListData.DataBegin());\n")
output_file.write("\t\treturn HeaderOp->Size;\n")
output_file.write("\t\treturn IR::SizeToOpSize(HeaderOp->Size);\n")
output_file.write("\t}\n\n")

output_file.write("\tuint8_t GetOpElementSize(const OrderedNode *Op) const {\n")
output_file.write("\tIR::OpSize GetOpElementSize(const OrderedNode *Op) const {\n")
output_file.write("\t\tauto HeaderOp = Op->Header.Value.GetNode(DualListData.DataBegin());\n")
output_file.write("\t\treturn HeaderOp->ElementSize;\n")
output_file.write("\t\treturn IR::SizeToOpSize(HeaderOp->ElementSize);\n")
output_file.write("\t}\n\n")

output_file.write("\tuint8_t GetOpElements(const OrderedNode *Op) const {\n")
output_file.write("\t\tauto HeaderOp = Op->Header.Value.GetNode(DualListData.DataBegin());\n")
output_file.write("\t\tLOGMAN_THROW_A_FMT(OpHasDest(Op), \"Op {} has no dest\\n\", GetName(HeaderOp->Op));\n")
output_file.write("\t\treturn HeaderOp->Size / HeaderOp->ElementSize;\n")
output_file.write("\t\treturn IR::OpSizeToSize(GetOpSize(Op)) / IR::OpSizeToSize(GetOpElementSize(Op));\n")
output_file.write("\t}\n\n")

output_file.write("\tbool OpHasDest(const OrderedNode *Op) const {\n")
Expand Down Expand Up @@ -728,11 +728,11 @@ def print_ir_allocator_helpers():
# We can only infer a size if we have arguments
if op.DestSize == None:
# We need to infer destination size
output_file.write("\t\tuint8_t InferSize = 0;\n")
output_file.write("\t\tIR::OpSize InferSize = OpSize::iUnsized;\n")
if len(op.Arguments) != 0:
for arg in op.Arguments:
if arg.IsSSA:
output_file.write("\t\tuint8_t Size{} = GetOpSize({});\n".format(arg.Name, arg.Name))
output_file.write("\t\tauto Size{} = GetOpSize({});\n".format(arg.Name, arg.Name))
for arg in op.Arguments:
if arg.IsSSA:
output_file.write("\t\tInferSize = std::max(InferSize, Size{});\n".format(arg.Name))
Expand All @@ -745,7 +745,7 @@ def print_ir_allocator_helpers():
output_file.write("\t\t_Op.first->Header.Size = {};\n".format(op.DestSize))

if op.NumElements == None:
output_file.write("\t\t_Op.first->Header.ElementSize = _Op.first->Header.Size / ({});\n".format(1))
output_file.write("\t\t_Op.first->Header.ElementSize = _Op.first->Header.Size;\n")
else:
output_file.write("\t\t_Op.first->Header.ElementSize = _Op.first->Header.Size / ({});\n".format(op.NumElements))

Expand Down
27 changes: 15 additions & 12 deletions FEXCore/Source/Interface/Core/JIT/AtomicOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace FEXCore::CPU {
#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const* IROp, IR::NodeID Node)
DEF_OP(CASPair) {
auto Op = IROp->C<IR::IROp_CASPair>();
LOGMAN_THROW_AA_FMT(IROp->ElementSize == 4 || IROp->ElementSize == 8, "Wrong element size");
LOGMAN_THROW_AA_FMT(IROp->ElementSize == IR::OpSize::i32Bit || IROp->ElementSize == IR::OpSize::i64Bit, "Wrong element size");
// Size is the size of each pair element
auto Dst0 = GetReg(Op->OutLo.ID());
auto Dst1 = GetReg(Op->OutHi.ID());
Expand All @@ -23,7 +23,7 @@ DEF_OP(CASPair) {
auto Desired1 = GetReg(Op->DesiredHi.ID());
auto MemSrc = GetReg(Op->Addr.ID());

const auto EmitSize = IROp->ElementSize == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
const auto EmitSize = IROp->ElementSize == IR::OpSize::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
if (CTX->HostFeatures.SupportsAtomics) {
// RA has heuristics to try to pair sources, but we need to handle the cases
// where they fail. We do so by moving to temporaries. Note we use 64-bit
Expand Down Expand Up @@ -112,9 +112,9 @@ DEF_OP(CAS) {
ARMEmitter::SingleUseForwardLabel LoopExpected;
Bind(&LoopTop);
ldaxr(SubEmitSize, TMP2, MemSrc);
if (IROp->Size == 1) {
if (IROp->Size == IR::OpSize::i8Bit) {
cmp(EmitSize, TMP2, Expected, ARMEmitter::ExtendedType::UXTB, 0);
} else if (IROp->Size == 2) {
} else if (IROp->Size == IR::OpSize::i16Bit) {
cmp(EmitSize, TMP2, Expected, ARMEmitter::ExtendedType::UXTH, 0);
} else {
cmp(EmitSize, TMP2, Expected);
Expand Down Expand Up @@ -273,18 +273,21 @@ DEF_OP(AtomicNeg) {

DEF_OP(AtomicSwap) {
auto Op = IROp->C<IR::IROp_AtomicSwap>();
uint8_t OpSize = IROp->Size;
LOGMAN_THROW_AA_FMT(OpSize == 8 || OpSize == 4 || OpSize == 2 || OpSize == 1, "Unexpected CAS size");
const auto OpSize = IROp->Size;
LOGMAN_THROW_AA_FMT(
OpSize == IR::OpSize::i64Bit || OpSize == IR::OpSize::i32Bit || OpSize == IR::OpSize::i16Bit || OpSize == IR::OpSize::i8Bit, "Unexpecte"
"d CAS "
"size");

auto MemSrc = GetReg(Op->Addr.ID());
auto Src = GetReg(Op->Value.ID());

const auto EmitSize = ConvertSize(IROp);
const auto SubEmitSize = OpSize == 8 ? ARMEmitter::SubRegSize::i64Bit :
OpSize == 4 ? ARMEmitter::SubRegSize::i32Bit :
OpSize == 2 ? ARMEmitter::SubRegSize::i16Bit :
OpSize == 1 ? ARMEmitter::SubRegSize::i8Bit :
ARMEmitter::SubRegSize::i8Bit;
const auto SubEmitSize = OpSize == IR::OpSize::i64Bit ? ARMEmitter::SubRegSize::i64Bit :
OpSize == IR::OpSize::i32Bit ? ARMEmitter::SubRegSize::i32Bit :
OpSize == IR::OpSize::i16Bit ? ARMEmitter::SubRegSize::i16Bit :
OpSize == IR::OpSize::i8Bit ? ARMEmitter::SubRegSize::i8Bit :
ARMEmitter::SubRegSize::i8Bit;

if (CTX->HostFeatures.SupportsAtomics) {
ldswpal(SubEmitSize, Src, GetReg(Node), MemSrc);
Expand All @@ -294,7 +297,7 @@ DEF_OP(AtomicSwap) {
ldaxr(SubEmitSize, TMP2, MemSrc);
stlxr(SubEmitSize, TMP4, Src, MemSrc);
cbnz(EmitSize, TMP4, &LoopTop);
ubfm(EmitSize, GetReg(Node), TMP2, 0, OpSize * 8 - 1);
ubfm(EmitSize, GetReg(Node), TMP2, 0, IR::OpSizeAsBits(OpSize) - 1);
}
}

Expand Down
4 changes: 2 additions & 2 deletions FEXCore/Source/Interface/Core/JIT/JIT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -626,8 +626,8 @@ bool Arm64JITCore::IsInlineEntrypointOffset(const IR::OrderedNodeWrapper& WNode,
auto Op = OpHeader->C<IR::IROp_InlineEntrypointOffset>();
if (Value) {
uint64_t Mask = ~0ULL;
uint8_t OpSize = OpHeader->Size;
if (OpSize == 4) {
const auto Size = OpHeader->Size;
if (Size == IR::OpSize::i32Bit) {
Mask = 0xFFFF'FFFFULL;
}
*Value = (Entry + Op->Offset) & Mask;
Expand Down
32 changes: 17 additions & 15 deletions FEXCore/Source/Interface/Core/JIT/JITClass.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,23 +129,25 @@ class Arm64JITCore final : public CPUBackend, public Arm64Emitter {

[[nodiscard]]
ARMEmitter::Size ConvertSize(const IR::IROp_Header* Op) {
return Op->Size == 8 ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
return Op->Size == IR::OpSize::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
}

[[nodiscard]]
ARMEmitter::Size ConvertSize48(const IR::IROp_Header* Op) {
LOGMAN_THROW_AA_FMT(Op->Size == 4 || Op->Size == 8, "Invalid size");
LOGMAN_THROW_AA_FMT(Op->Size == IR::OpSize::i32Bit || Op->Size == IR::OpSize::i64Bit, "Invalid size");
return ConvertSize(Op);
}

[[nodiscard]]
ARMEmitter::SubRegSize ConvertSubRegSize16(uint8_t ElementSize) {
LOGMAN_THROW_AA_FMT(ElementSize == 1 || ElementSize == 2 || ElementSize == 4 || ElementSize == 8 || ElementSize == 16, "Invalid size");
return ElementSize == 1 ? ARMEmitter::SubRegSize::i8Bit :
ElementSize == 2 ? ARMEmitter::SubRegSize::i16Bit :
ElementSize == 4 ? ARMEmitter::SubRegSize::i32Bit :
ElementSize == 8 ? ARMEmitter::SubRegSize::i64Bit :
ARMEmitter::SubRegSize::i128Bit;
ARMEmitter::SubRegSize ConvertSubRegSize16(IR::OpSize ElementSize) {
LOGMAN_THROW_AA_FMT(ElementSize == IR::OpSize::i8Bit || ElementSize == IR::OpSize::i16Bit || ElementSize == IR::OpSize::i32Bit ||
ElementSize == IR::OpSize::i64Bit || ElementSize == IR::OpSize::i128Bit,
"Invalid size");
return ElementSize == IR::OpSize::i8Bit ? ARMEmitter::SubRegSize::i8Bit :
ElementSize == IR::OpSize::i16Bit ? ARMEmitter::SubRegSize::i16Bit :
ElementSize == IR::OpSize::i32Bit ? ARMEmitter::SubRegSize::i32Bit :
ElementSize == IR::OpSize::i64Bit ? ARMEmitter::SubRegSize::i64Bit :
ARMEmitter::SubRegSize::i128Bit;
}

[[nodiscard]]
Expand All @@ -154,8 +156,8 @@ class Arm64JITCore final : public CPUBackend, public Arm64Emitter {
}

[[nodiscard]]
ARMEmitter::SubRegSize ConvertSubRegSize8(uint8_t ElementSize) {
LOGMAN_THROW_AA_FMT(ElementSize != 16, "Invalid size");
ARMEmitter::SubRegSize ConvertSubRegSize8(IR::OpSize ElementSize) {
LOGMAN_THROW_AA_FMT(ElementSize != IR::OpSize::i128Bit, "Invalid size");
return ConvertSubRegSize16(ElementSize);
}

Expand All @@ -166,13 +168,13 @@ class Arm64JITCore final : public CPUBackend, public Arm64Emitter {

[[nodiscard]]
ARMEmitter::SubRegSize ConvertSubRegSize4(const IR::IROp_Header* Op) {
LOGMAN_THROW_AA_FMT(Op->ElementSize != 8, "Invalid size");
LOGMAN_THROW_AA_FMT(Op->ElementSize != IR::OpSize::i64Bit, "Invalid size");
return ConvertSubRegSize8(Op);
}

[[nodiscard]]
ARMEmitter::SubRegSize ConvertSubRegSize248(const IR::IROp_Header* Op) {
LOGMAN_THROW_AA_FMT(Op->ElementSize != 1, "Invalid size");
LOGMAN_THROW_AA_FMT(Op->ElementSize != IR::OpSize::i8Bit, "Invalid size");
return ConvertSubRegSize8(Op);
}

Expand All @@ -183,13 +185,13 @@ class Arm64JITCore final : public CPUBackend, public Arm64Emitter {

[[nodiscard]]
ARMEmitter::VectorRegSizePair ConvertSubRegSizePair8(const IR::IROp_Header* Op) {
LOGMAN_THROW_AA_FMT(Op->ElementSize != 16, "Invalid size");
LOGMAN_THROW_AA_FMT(Op->ElementSize != IR::OpSize::i128Bit, "Invalid size");
return ConvertSubRegSizePair16(Op);
}

[[nodiscard]]
ARMEmitter::VectorRegSizePair ConvertSubRegSizePair248(const IR::IROp_Header* Op) {
LOGMAN_THROW_AA_FMT(Op->ElementSize != 1, "Invalid size");
LOGMAN_THROW_AA_FMT(Op->ElementSize != IR::OpSize::i8Bit, "Invalid size");
return ConvertSubRegSizePair8(Op);
}

Expand Down
42 changes: 21 additions & 21 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1388,7 +1388,7 @@ void OpDispatchBuilder::SHLImmediateOp(OpcodeArgs, bool SHL1Bit) {
uint64_t Shift = LoadConstantShift(Op, SHL1Bit);
const auto Size = GetSrcBitSize(Op);

Ref Src = _Constant(Size, Shift);
Ref Src = _Constant(OpSizeFromSrc(Op), Shift);
Ref Result = _Lshl(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src);

CalculateFlags_ShiftLeftImmediate(OpSizeFromSrc(Op), Result, Dest, Shift);
Expand All @@ -1411,7 +1411,7 @@ void OpDispatchBuilder::SHRImmediateOp(OpcodeArgs, bool SHR1Bit) {

uint64_t Shift = LoadConstantShift(Op, SHR1Bit);

Ref Src = _Constant(Size, Shift);
Ref Src = _Constant(OpSizeFromSrc(Op), Shift);
auto ALUOp = _Lshr(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src);

CalculateFlags_ShiftRightImmediate(OpSizeFromSrc(Op), ALUOp, Dest, Shift);
Expand Down Expand Up @@ -1664,28 +1664,28 @@ void OpDispatchBuilder::BEXTRBMIOp(OpcodeArgs) {
auto* Src1 = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
auto* Src2 = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});

const auto Size = GetSrcSize(Op);
const auto Size = OpSizeFromSrc(Op);
const auto SrcSize = Size * 8;
const auto MaxSrcBit = SrcSize - 1;
auto MaxSrcBitOp = _Constant(SrcSize, MaxSrcBit);
auto MaxSrcBitOp = _Constant(Size, MaxSrcBit);

// Shift the operand down to the starting bit
auto Start = _Bfe(OpSizeFromSrc(Op), 8, 0, Src2);
auto Shifted = _Lshr(IR::SizeToOpSize(Size), Src1, Start);
auto Shifted = _Lshr(Size, Src1, Start);

// Shifts larger than operand size need to be set to zero.
auto SanitizedShifted = _Select(IR::COND_ULE, Start, MaxSrcBitOp, Shifted, _Constant(SrcSize, 0));
auto SanitizedShifted = _Select(IR::COND_ULE, Start, MaxSrcBitOp, Shifted, _Constant(Size, 0));

// Now handle the length specifier.
auto Length = _Bfe(OpSizeFromSrc(Op), 8, 8, Src2);
auto Length = _Bfe(Size, 8, 8, Src2);

// Now build up the mask
// (1 << Length) - 1 = ~(~0 << Length)
auto AllOnes = _Constant(~0ull);
auto InvertedMask = _Lshl(IR::SizeToOpSize(Size), AllOnes, Length);
auto InvertedMask = _Lshl(Size, AllOnes, Length);

// Now put it all together and make the result.
auto Masked = _Andn(IR::SizeToOpSize(Size), SanitizedShifted, InvertedMask);
auto Masked = _Andn(Size, SanitizedShifted, InvertedMask);

// Sanitize the length. If it is above the max, we don't do the masking.
auto Dest = _Select(IR::COND_ULE, Length, MaxSrcBitOp, Masked, SanitizedShifted);
Expand Down Expand Up @@ -1787,7 +1787,7 @@ void OpDispatchBuilder::BMI2Shift(OpcodeArgs) {
}

void OpDispatchBuilder::BZHI(OpcodeArgs) {
const auto Size = GetSrcSize(Op);
const auto Size = OpSizeFromSrc(Op);
const auto OperandSize = Size * 8;

// In 32-bit mode we only look at bottom 32-bit, no 8 or 16-bit BZHI so no
Expand All @@ -1799,9 +1799,9 @@ void OpDispatchBuilder::BZHI(OpcodeArgs) {
// Clear the high bits specified by the index. A64 only considers bottom bits
// of the shift, so we don't need to mask bottom 8-bits ourselves.
// Out-of-bounds results ignored after.
auto NegOne = _Constant(OperandSize, -1);
auto Mask = _Lshl(IR::SizeToOpSize(Size), NegOne, Index);
auto MaskResult = _Andn(IR::SizeToOpSize(Size), Src, Mask);
auto NegOne = _Constant(Size, -1);
auto Mask = _Lshl(Size, NegOne, Index);
auto MaskResult = _Andn(Size, Src, Mask);

// If the index is above OperandSize, we don't clear anything. BZHI only
// considers the bottom 8-bits, so we really want to know if the bottom 8-bits
Expand All @@ -1810,7 +1810,7 @@ void OpDispatchBuilder::BZHI(OpcodeArgs) {
// Because we're clobbering flags internally we ignore all carry invert
// shenanigans and use the raw versions here.
_TestNZ(OpSize::i64Bit, Index, _Constant(0xFF & ~(OperandSize - 1)));
auto Result = _NZCVSelect(IR::SizeToOpSize(Size), {COND_NEQ}, Src, MaskResult);
auto Result = _NZCVSelect(Size, {COND_NEQ}, Src, MaskResult);
StoreResult(GPRClass, Op, Result, OpSize::iInvalid);

auto Zero = _InlineConstant(0);
Expand Down Expand Up @@ -2065,7 +2065,7 @@ void OpDispatchBuilder::RCROp(OpcodeArgs) {
Ref Res = _Lshr(OpSize, Dest, Src);
auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

auto One = _Constant(Size, 1);
auto One = _Constant(OpSizeFromSrc(Op), 1);

// Res |= (Dest << (Size - Shift + 1));
// Expressed as Res | ((Src << (Size - Shift)) << 1) to get correct
Expand Down Expand Up @@ -2188,7 +2188,7 @@ void OpDispatchBuilder::RCRSmallerOp(OpcodeArgs) {
if (IsSrcConst) {
SetCFDirect(Tmp, SrcConst - 1, true);
} else {
auto One = _Constant(Size, 1);
auto One = _Constant(OpSizeFromSrc(Op), 1);
auto NewCF = _Lshr(OpSize::i32Bit, Tmp, _Sub(OpSize::i32Bit, Src, One));
SetCFDirect(NewCF, 0, true);
}
Expand Down Expand Up @@ -2337,7 +2337,7 @@ void OpDispatchBuilder::RCLSmallerOp(OpcodeArgs) {

auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

Ref Tmp = _Constant(64, 0);
Ref Tmp = _Constant(OpSize::i64Bit, 0);

for (size_t i = 0; i < (32 + Size + 1); i += (Size + 1)) {
// Insert incoming value
Expand Down Expand Up @@ -3084,7 +3084,7 @@ void OpDispatchBuilder::INCOp(OpcodeArgs) {
Ref Dest;
Ref Result;
const auto Size = GetSrcBitSize(Op);
auto OneConst = _Constant(Size, 1);
auto OneConst = _Constant(OpSizeFromSrc(Op), 1);

const bool IsLocked = DestIsLockedMem(Op);

Expand Down Expand Up @@ -3125,7 +3125,7 @@ void OpDispatchBuilder::DECOp(OpcodeArgs) {
Ref Dest;
Ref Result;
const auto Size = GetSrcBitSize(Op);
auto OneConst = _Constant(Size, 1);
auto OneConst = _Constant(OpSizeFromSrc(Op), 1);

const bool IsLocked = DestIsLockedMem(Op);

Expand All @@ -3135,7 +3135,7 @@ void OpDispatchBuilder::DECOp(OpcodeArgs) {
Ref DestAddress = MakeSegmentAddress(Op, Op->Dest);

// Use Add instead of Sub to avoid a NEG
Dest = _AtomicFetchAdd(OpSizeFromSrc(Op), _Constant(Size, -1), DestAddress);
Dest = _AtomicFetchAdd(OpSizeFromSrc(Op), _Constant(OpSizeFromSrc(Op), -1), DestAddress);
} else {
Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = Size >= 32});
}
Expand Down Expand Up @@ -3585,7 +3585,7 @@ void OpDispatchBuilder::POPFOp(OpcodeArgs) {
// Bit 1 is always 1
// Bit 9 is always 1 because we always have interrupts enabled

Src = _Or(OpSize::i64Bit, Src, _Constant(Size * 8, 0x202));
Src = _Or(OpSize::i64Bit, Src, _Constant(Size, 0x202));

SetPackedRFLAG(false, Src);
}
Expand Down
4 changes: 2 additions & 2 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2618,14 +2618,14 @@ OpDispatchBuilder::RefPair OpDispatchBuilder::AVX128_VPGatherImpl(OpSize Size, O
// If the address element size if half the size of the Element load size then we need to start fetching half-way through the low register.
AddrAddressing.Low = VSIB.Low;
AddrAddressing.High = VSIB.High;
IndexElementOffset = OpSize::i128Bit / AddrElementSize / 2;
IndexElementOffset = IR::NumElements(OpSize::i128Bit, AddrElementSize) / 2;
} else if (AddrElementSize == OpSize::i64Bit && ElementLoadSize == OpSize::i32Bit) {
AddrAddressing.Low = VSIB.High;
AddrAddressing.High = Invalid();
DestReg = Result.Low; ///< Start mixing with the low register.
MaskReg = Mask.Low; ///< Mask starts with the low mask here.
IndexElementOffset = 0;
DataElementOffset = OpSize::i128Bit / ElementLoadSize / 2;
DataElementOffset = IR::NumElements(OpSize::i128Bit, ElementLoadSize) / 2;
}

///< Calculate the high-half.
Expand Down
Loading

0 comments on commit 3434b57

Please sign in to comment.